From c5dd8b2d829836074117c42d9b052149d84f3c6f Mon Sep 17 00:00:00 2001 From: "jay.ho.park" Date: Thu, 22 Oct 2020 13:56:46 +0900 Subject: [PATCH] Convert nltk codes into version 3.4 Change-Id: I8fbc3822305a67ea3aed093100f7f4711b96d3db --- nlp_resource_data/nltk/VERSION | 2 +- nlp_resource_data/nltk/__init__.py | 93 +- .../nltk/__pycache__/__init__.cpython-37.pyc | Bin 4847 -> 4932 bytes .../nltk/__pycache__/book.cpython-37.pyc | Bin 3237 -> 3274 bytes .../nltk/__pycache__/cli.cpython-37.pyc | Bin 1650 -> 0 bytes .../nltk/__pycache__/collections.cpython-37.pyc | Bin 23447 -> 23629 bytes .../nltk/__pycache__/collocations.cpython-37.pyc | Bin 14902 -> 14942 bytes .../nltk/__pycache__/compat.cpython-37.pyc | Bin 1115 -> 10168 bytes .../nltk/__pycache__/data.cpython-37.pyc | Bin 38120 -> 41425 bytes .../nltk/__pycache__/decorators.cpython-37.pyc | Bin 6366 -> 5904 bytes .../nltk/__pycache__/downloader.cpython-37.pyc | Bin 61998 -> 63129 bytes .../nltk/__pycache__/featstruct.cpython-37.pyc | Bin 75128 -> 75418 bytes .../nltk/__pycache__/grammar.cpython-37.pyc | Bin 53693 -> 51439 bytes .../nltk/__pycache__/help.cpython-37.pyc | Bin 1607 -> 1644 bytes .../nltk/__pycache__/internals.cpython-37.pyc | Bin 28646 -> 28784 bytes .../nltk/__pycache__/jsontags.cpython-37.pyc | Bin 2237 -> 2225 bytes .../nltk/__pycache__/lazyimport.cpython-37.pyc | Bin 3743 -> 3780 bytes .../nltk/__pycache__/probability.cpython-37.pyc | Bin 87626 -> 86280 bytes .../nltk/__pycache__/text.cpython-37.pyc | Bin 27605 -> 25465 bytes .../nltk/__pycache__/tgrep.cpython-37.pyc | Bin 37563 -> 37710 bytes .../nltk/__pycache__/toolbox.cpython-37.pyc | Bin 15722 -> 15920 bytes .../nltk/__pycache__/tree.cpython-37.pyc | Bin 55897 -> 55512 bytes .../__pycache__/treeprettyprinter.cpython-37.pyc | Bin 20598 -> 20665 bytes .../nltk/__pycache__/treetransforms.cpython-37.pyc | Bin 9363 -> 9400 bytes .../nltk/__pycache__/util.cpython-37.pyc | Bin 21532 -> 21108 bytes .../nltk/__pycache__/wsd.cpython-37.pyc | Bin 1805 -> 1793 bytes nlp_resource_data/nltk/app/__init__.py | 4 +- .../nltk/app/__pycache__/__init__.cpython-37.pyc | Bin 1505 -> 1515 bytes .../app/__pycache__/chartparser_app.cpython-37.pyc | Bin 63496 -> 63569 bytes .../app/__pycache__/chunkparser_app.cpython-37.pyc | Bin 33249 -> 33322 bytes .../__pycache__/collocations_app.cpython-37.pyc | Bin 14621 -> 14694 bytes .../app/__pycache__/concordance_app.cpython-37.pyc | Bin 22666 -> 22735 bytes .../nltk/app/__pycache__/nemo_app.cpython-37.pyc | Bin 12220 -> 12218 bytes .../app/__pycache__/rdparser_app.cpython-37.pyc | Bin 25956 -> 26017 bytes .../app/__pycache__/srparser_app.cpython-37.pyc | Bin 21924 -> 21942 bytes .../app/__pycache__/wordfreq_app.cpython-37.pyc | Bin 1444 -> 1432 bytes .../app/__pycache__/wordnet_app.cpython-37.pyc | Bin 30729 -> 30860 bytes nlp_resource_data/nltk/app/chartparser_app.py | 787 ++--- nlp_resource_data/nltk/app/chunkparser_app.py | 763 ++--- nlp_resource_data/nltk/app/collocations_app.py | 174 +- nlp_resource_data/nltk/app/concordance_app.py | 311 +- nlp_resource_data/nltk/app/nemo_app.py | 10 +- nlp_resource_data/nltk/app/rdparser_app.py | 367 +-- nlp_resource_data/nltk/app/srparser_app.py | 342 +- nlp_resource_data/nltk/app/wordfreq_app.py | 8 +- nlp_resource_data/nltk/app/wordnet_app.py | 223 +- nlp_resource_data/nltk/book.py | 77 +- nlp_resource_data/nltk/ccg/__init__.py | 2 +- .../nltk/ccg/__pycache__/__init__.cpython-37.pyc | Bin 952 -> 940 bytes .../nltk/ccg/__pycache__/api.cpython-37.pyc | Bin 11772 -> 11956 bytes .../nltk/ccg/__pycache__/chart.cpython-37.pyc | Bin 13035 -> 13239 bytes .../nltk/ccg/__pycache__/combinator.cpython-37.pyc | Bin 9649 -> 9839 bytes .../nltk/ccg/__pycache__/lexicon.cpython-37.pyc | Bin 7779 -> 7887 bytes .../nltk/ccg/__pycache__/logic.cpython-37.pyc | Bin 1514 -> 1502 bytes nlp_resource_data/nltk/ccg/api.py | 58 +- nlp_resource_data/nltk/ccg/chart.py | 54 +- nlp_resource_data/nltk/ccg/combinator.py | 33 +- nlp_resource_data/nltk/ccg/lexicon.py | 49 +- nlp_resource_data/nltk/ccg/logic.py | 2 +- nlp_resource_data/nltk/chat/__init__.py | 21 +- .../nltk/chat/__pycache__/__init__.cpython-37.pyc | Bin 1397 -> 1434 bytes .../nltk/chat/__pycache__/eliza.cpython-37.pyc | Bin 5872 -> 5909 bytes .../nltk/chat/__pycache__/iesha.cpython-37.pyc | Bin 3204 -> 3241 bytes .../nltk/chat/__pycache__/rude.cpython-37.pyc | Bin 2358 -> 2395 bytes .../nltk/chat/__pycache__/suntsu.cpython-37.pyc | Bin 6119 -> 6156 bytes .../nltk/chat/__pycache__/util.cpython-37.pyc | Bin 3615 -> 3699 bytes .../nltk/chat/__pycache__/zen.cpython-37.pyc | Bin 6487 -> 6524 bytes nlp_resource_data/nltk/chat/eliza.py | 79 +- nlp_resource_data/nltk/chat/iesha.py | 39 +- nlp_resource_data/nltk/chat/rude.py | 33 +- nlp_resource_data/nltk/chat/suntsu.py | 29 +- nlp_resource_data/nltk/chat/util.py | 19 +- nlp_resource_data/nltk/chat/zen.py | 73 +- nlp_resource_data/nltk/chunk/__init__.py | 6 +- .../nltk/chunk/__pycache__/__init__.cpython-37.pyc | Bin 7286 -> 7274 bytes .../nltk/chunk/__pycache__/api.cpython-37.pyc | Bin 1723 -> 1711 bytes .../chunk/__pycache__/named_entity.cpython-37.pyc | Bin 9274 -> 9350 bytes .../nltk/chunk/__pycache__/regexp.cpython-37.pyc | Bin 48176 -> 48474 bytes .../nltk/chunk/__pycache__/util.cpython-37.pyc | Bin 17825 -> 17973 bytes nlp_resource_data/nltk/chunk/api.py | 2 +- nlp_resource_data/nltk/chunk/named_entity.py | 160 +- nlp_resource_data/nltk/chunk/regexp.py | 275 +- nlp_resource_data/nltk/chunk/util.py | 118 +- nlp_resource_data/nltk/classify/__init__.py | 4 +- .../classify/__pycache__/__init__.cpython-37.pyc | Bin 4672 -> 4659 bytes .../nltk/classify/__pycache__/api.cpython-37.pyc | Bin 5665 -> 5653 bytes .../__pycache__/decisiontree.cpython-37.pyc | Bin 9651 -> 9417 bytes .../classify/__pycache__/maxent.cpython-37.pyc | Bin 47410 -> 47587 bytes .../nltk/classify/__pycache__/megam.cpython-37.pyc | Bin 5155 -> 5267 bytes .../classify/__pycache__/naivebayes.cpython-37.pyc | Bin 7932 -> 7755 bytes .../__pycache__/positivenaivebayes.cpython-37.pyc | Bin 5310 -> 5283 bytes .../__pycache__/rte_classify.cpython-37.pyc | Bin 5794 -> 5831 bytes .../__pycache__/scikitlearn.cpython-37.pyc | Bin 6138 -> 6313 bytes .../nltk/classify/__pycache__/senna.cpython-37.pyc | Bin 5903 -> 6096 bytes .../nltk/classify/__pycache__/svm.cpython-37.pyc | Bin 679 -> 667 bytes .../nltk/classify/__pycache__/tadm.cpython-37.pyc | Bin 3280 -> 3384 bytes .../classify/__pycache__/textcat.cpython-37.pyc | Bin 4731 -> 4895 bytes .../nltk/classify/__pycache__/util.cpython-37.pyc | Bin 11203 -> 11259 bytes .../nltk/classify/__pycache__/weka.cpython-37.pyc | Bin 10005 -> 10106 bytes nlp_resource_data/nltk/classify/api.py | 2 +- nlp_resource_data/nltk/classify/decisiontree.py | 52 +- nlp_resource_data/nltk/classify/maxent.py | 270 +- nlp_resource_data/nltk/classify/megam.py | 51 +- nlp_resource_data/nltk/classify/naivebayes.py | 23 +- .../nltk/classify/positivenaivebayes.py | 14 +- nlp_resource_data/nltk/classify/rte_classify.py | 73 +- nlp_resource_data/nltk/classify/scikitlearn.py | 7 +- nlp_resource_data/nltk/classify/senna.py | 57 +- nlp_resource_data/nltk/classify/svm.py | 2 +- nlp_resource_data/nltk/classify/tadm.py | 31 +- nlp_resource_data/nltk/classify/textcat.py | 86 +- nlp_resource_data/nltk/classify/util.py | 111 +- nlp_resource_data/nltk/classify/weka.py | 156 +- nlp_resource_data/nltk/cli.py | 59 - nlp_resource_data/nltk/cluster/__init__.py | 2 +- .../cluster/__pycache__/__init__.cpython-37.pyc | Bin 4302 -> 4290 bytes .../nltk/cluster/__pycache__/api.cpython-37.pyc | Bin 2400 -> 2432 bytes .../nltk/cluster/__pycache__/em.cpython-37.pyc | Bin 6802 -> 6935 bytes .../nltk/cluster/__pycache__/gaac.cpython-37.pyc | Bin 4946 -> 5098 bytes .../nltk/cluster/__pycache__/kmeans.cpython-37.pyc | Bin 6656 -> 6808 bytes .../nltk/cluster/__pycache__/util.cpython-37.pyc | Bin 9797 -> 9949 bytes nlp_resource_data/nltk/cluster/api.py | 6 +- nlp_resource_data/nltk/cluster/em.py | 67 +- nlp_resource_data/nltk/cluster/gaac.py | 17 +- nlp_resource_data/nltk/cluster/kmeans.py | 33 +- nlp_resource_data/nltk/cluster/util.py | 27 +- nlp_resource_data/nltk/collections.py | 42 +- nlp_resource_data/nltk/collocations.py | 36 +- nlp_resource_data/nltk/compat.py | 348 +- nlp_resource_data/nltk/corpus/__init__.py | 376 +-- .../corpus/__pycache__/__init__.cpython-37.pyc | Bin 9476 -> 9437 bytes .../corpus/__pycache__/europarl_raw.cpython-37.pyc | Bin 986 -> 974 bytes .../nltk/corpus/__pycache__/util.cpython-37.pyc | Bin 4492 -> 4600 bytes nlp_resource_data/nltk/corpus/europarl_raw.py | 24 +- nlp_resource_data/nltk/corpus/reader/__init__.py | 4 +- .../reader/__pycache__/__init__.cpython-37.pyc | Bin 5877 -> 5789 bytes .../reader/__pycache__/aligned.cpython-37.pyc | Bin 4796 -> 4824 bytes .../corpus/reader/__pycache__/api.cpython-37.pyc | Bin 18148 -> 18299 bytes .../corpus/reader/__pycache__/bnc.cpython-37.pyc | Bin 9395 -> 9383 bytes .../__pycache__/bracket_parse.cpython-37.pyc | Bin 10801 -> 10774 bytes .../__pycache__/categorized_sents.cpython-37.pyc | Bin 7291 -> 7319 bytes .../reader/__pycache__/chasen.cpython-37.pyc | Bin 5736 -> 5813 bytes .../reader/__pycache__/childes.cpython-37.pyc | Bin 19254 -> 19355 bytes .../reader/__pycache__/chunked.cpython-37.pyc | Bin 9746 -> 9774 bytes .../reader/__pycache__/cmudict.cpython-37.pyc | Bin 4137 -> 4169 bytes .../__pycache__/comparative_sents.cpython-37.pyc | Bin 10896 -> 10924 bytes .../corpus/reader/__pycache__/conll.cpython-37.pyc | Bin 17623 -> 17751 bytes .../reader/__pycache__/crubadan.cpython-37.pyc | Bin 3944 -> 4164 bytes .../reader/__pycache__/dependency.cpython-37.pyc | Bin 4828 -> 4816 bytes .../reader/__pycache__/framenet.cpython-37.pyc | Bin 106499 -> 106745 bytes .../corpus/reader/__pycache__/ieer.cpython-37.pyc | Bin 4219 -> 4364 bytes .../reader/__pycache__/indian.cpython-37.pyc | Bin 4141 -> 4169 bytes .../reader/__pycache__/ipipan.cpython-37.pyc | Bin 11950 -> 11978 bytes .../corpus/reader/__pycache__/knbc.cpython-37.pyc | Bin 5955 -> 6032 bytes .../corpus/reader/__pycache__/lin.cpython-37.pyc | Bin 5920 -> 5957 bytes .../corpus/reader/__pycache__/mte.cpython-37.pyc | Bin 17310 -> 17338 bytes .../corpus/reader/__pycache__/nkjp.cpython-37.pyc | Bin 15282 -> 15310 bytes .../reader/__pycache__/nombank.cpython-37.pyc | Bin 14349 -> 14529 bytes .../reader/__pycache__/nps_chat.cpython-37.pyc | Bin 3999 -> 4038 bytes .../__pycache__/opinion_lexicon.cpython-37.pyc | Bin 4340 -> 4368 bytes .../reader/__pycache__/panlex_lite.cpython-37.pyc | Bin 5901 -> 5889 bytes .../__pycache__/panlex_swadesh.cpython-37.pyc | Bin 4159 -> 0 bytes .../reader/__pycache__/pl196x.cpython-37.pyc | Bin 9510 -> 9529 bytes .../reader/__pycache__/plaintext.cpython-37.pyc | Bin 10446 -> 10443 bytes .../reader/__pycache__/ppattach.cpython-37.pyc | Bin 3731 -> 3882 bytes .../reader/__pycache__/propbank.cpython-37.pyc | Bin 15944 -> 16100 bytes .../reader/__pycache__/pros_cons.cpython-37.pyc | Bin 5030 -> 5058 bytes .../reader/__pycache__/reviews.cpython-37.pyc | Bin 12809 -> 13058 bytes .../corpus/reader/__pycache__/rte.cpython-37.pyc | Bin 4843 -> 4994 bytes .../reader/__pycache__/semcor.cpython-37.pyc | Bin 9093 -> 9158 bytes .../reader/__pycache__/senseval.cpython-37.pyc | Bin 5844 -> 6020 bytes .../reader/__pycache__/sentiwordnet.cpython-37.pyc | Bin 5170 -> 5233 bytes .../__pycache__/sinica_treebank.cpython-37.pyc | Bin 3039 -> 3027 bytes .../__pycache__/string_category.cpython-37.pyc | Bin 2366 -> 2429 bytes .../reader/__pycache__/switchboard.cpython-37.pyc | Bin 6117 -> 6228 bytes .../reader/__pycache__/tagged.cpython-37.pyc | Bin 12503 -> 12531 bytes .../corpus/reader/__pycache__/timit.cpython-37.pyc | Bin 17032 -> 17208 bytes .../reader/__pycache__/toolbox.cpython-37.pyc | Bin 2640 -> 2637 bytes .../reader/__pycache__/twitter.cpython-37.pyc | Bin 5026 -> 5054 bytes .../corpus/reader/__pycache__/udhr.cpython-37.pyc | Bin 2235 -> 2300 bytes .../corpus/reader/__pycache__/util.cpython-37.pyc | Bin 23183 -> 23358 bytes .../reader/__pycache__/verbnet.cpython-37.pyc | Bin 21474 -> 21506 bytes .../reader/__pycache__/wordlist.cpython-37.pyc | Bin 6491 -> 6519 bytes .../reader/__pycache__/wordnet.cpython-37.pyc | Bin 63977 -> 64212 bytes .../reader/__pycache__/xmldocs.cpython-37.pyc | Bin 11153 -> 11335 bytes .../corpus/reader/__pycache__/ycoe.cpython-37.pyc | Bin 10663 -> 10691 bytes nlp_resource_data/nltk/corpus/reader/aligned.py | 12 +- nlp_resource_data/nltk/corpus/reader/api.py | 67 +- nlp_resource_data/nltk/corpus/reader/bnc.py | 66 +- .../nltk/corpus/reader/bracket_parse.py | 51 +- .../nltk/corpus/reader/categorized_sents.py | 13 +- nlp_resource_data/nltk/corpus/reader/chasen.py | 29 +- nlp_resource_data/nltk/corpus/reader/childes.py | 142 +- nlp_resource_data/nltk/corpus/reader/chunked.py | 14 +- nlp_resource_data/nltk/corpus/reader/cmudict.py | 7 +- .../nltk/corpus/reader/comparative_sents.py | 36 +- nlp_resource_data/nltk/corpus/reader/conll.py | 193 +- nlp_resource_data/nltk/corpus/reader/crubadan.py | 48 +- nlp_resource_data/nltk/corpus/reader/dependency.py | 14 +- nlp_resource_data/nltk/corpus/reader/framenet.py | 961 +++--- nlp_resource_data/nltk/corpus/reader/ieer.py | 38 +- nlp_resource_data/nltk/corpus/reader/indian.py | 10 +- nlp_resource_data/nltk/corpus/reader/ipipan.py | 118 +- nlp_resource_data/nltk/corpus/reader/knbc.py | 56 +- nlp_resource_data/nltk/corpus/reader/lin.py | 35 +- nlp_resource_data/nltk/corpus/reader/mte.py | 74 +- nlp_resource_data/nltk/corpus/reader/nkjp.py | 130 +- nlp_resource_data/nltk/corpus/reader/nombank.py | 118 +- nlp_resource_data/nltk/corpus/reader/nps_chat.py | 23 +- .../nltk/corpus/reader/opinion_lexicon.py | 9 +- .../nltk/corpus/reader/panlex_lite.py | 36 +- .../nltk/corpus/reader/panlex_swadesh.py | 91 - nlp_resource_data/nltk/corpus/reader/pl196x.py | 56 +- nlp_resource_data/nltk/corpus/reader/plaintext.py | 20 +- nlp_resource_data/nltk/corpus/reader/ppattach.py | 13 +- nlp_resource_data/nltk/corpus/reader/propbank.py | 139 +- nlp_resource_data/nltk/corpus/reader/pros_cons.py | 12 +- nlp_resource_data/nltk/corpus/reader/reviews.py | 28 +- nlp_resource_data/nltk/corpus/reader/rte.py | 16 +- nlp_resource_data/nltk/corpus/reader/semcor.py | 77 +- nlp_resource_data/nltk/corpus/reader/senseval.py | 85 +- .../nltk/corpus/reader/sentiwordnet.py | 20 +- .../nltk/corpus/reader/sinica_treebank.py | 14 +- .../nltk/corpus/reader/string_category.py | 11 +- .../nltk/corpus/reader/switchboard.py | 37 +- nlp_resource_data/nltk/corpus/reader/tagged.py | 26 +- nlp_resource_data/nltk/corpus/reader/timit.py | 98 +- nlp_resource_data/nltk/corpus/reader/toolbox.py | 20 +- nlp_resource_data/nltk/corpus/reader/twitter.py | 10 +- nlp_resource_data/nltk/corpus/reader/udhr.py | 91 +- nlp_resource_data/nltk/corpus/reader/util.py | 128 +- nlp_resource_data/nltk/corpus/reader/verbnet.py | 235 +- nlp_resource_data/nltk/corpus/reader/wordlist.py | 90 +- nlp_resource_data/nltk/corpus/reader/wordnet.py | 400 +-- nlp_resource_data/nltk/corpus/reader/xmldocs.py | 78 +- nlp_resource_data/nltk/corpus/reader/ycoe.py | 262 +- nlp_resource_data/nltk/corpus/util.py | 29 +- nlp_resource_data/nltk/data.py | 505 +-- nlp_resource_data/nltk/decorators.py | 77 +- nlp_resource_data/nltk/downloader.py | 827 ++--- nlp_resource_data/nltk/draw/__init__.py | 4 +- .../nltk/draw/__pycache__/__init__.cpython-37.pyc | Bin 862 -> 872 bytes .../nltk/draw/__pycache__/cfg.cpython-37.pyc | Bin 20605 -> 20643 bytes .../draw/__pycache__/dispersion.cpython-37.pyc | Bin 1686 -> 1703 bytes .../nltk/draw/__pycache__/table.cpython-37.pyc | Bin 38958 -> 38999 bytes .../nltk/draw/__pycache__/tree.cpython-37.pyc | Bin 28810 -> 28808 bytes .../nltk/draw/__pycache__/util.cpython-37.pyc | Bin 80421 -> 80467 bytes nlp_resource_data/nltk/draw/cfg.py | 337 +- nlp_resource_data/nltk/draw/dispersion.py | 13 +- nlp_resource_data/nltk/draw/table.py | 183 +- nlp_resource_data/nltk/draw/tree.py | 308 +- nlp_resource_data/nltk/draw/util.py | 598 ++-- nlp_resource_data/nltk/featstruct.py | 563 ++-- nlp_resource_data/nltk/grammar.py | 330 +- nlp_resource_data/nltk/help.py | 15 +- nlp_resource_data/nltk/inference/__init__.py | 2 +- .../inference/__pycache__/__init__.cpython-37.pyc | Bin 837 -> 825 bytes .../nltk/inference/__pycache__/api.cpython-37.pyc | Bin 21352 -> 21449 bytes .../inference/__pycache__/discourse.cpython-37.pyc | Bin 20914 -> 20987 bytes .../nltk/inference/__pycache__/mace.cpython-37.pyc | Bin 10629 -> 10666 bytes .../__pycache__/nonmonotonic.cpython-37.pyc | Bin 16548 -> 16681 bytes .../inference/__pycache__/prover9.cpython-37.pyc | Bin 12736 -> 12773 bytes .../__pycache__/resolution.cpython-37.pyc | Bin 21115 -> 21254 bytes .../inference/__pycache__/tableau.cpython-37.pyc | Bin 18785 -> 18849 bytes nlp_resource_data/nltk/inference/api.py | 32 +- nlp_resource_data/nltk/inference/discourse.py | 73 +- nlp_resource_data/nltk/inference/mace.py | 157 +- nlp_resource_data/nltk/inference/nonmonotonic.py | 189 +- nlp_resource_data/nltk/inference/prover9.py | 191 +- nlp_resource_data/nltk/inference/resolution.py | 140 +- nlp_resource_data/nltk/inference/tableau.py | 125 +- nlp_resource_data/nltk/internals.py | 234 +- nlp_resource_data/nltk/jsontags.py | 14 +- nlp_resource_data/nltk/lazyimport.py | 23 +- nlp_resource_data/nltk/lm/__init__.py | 7 +- .../nltk/lm/__pycache__/__init__.cpython-37.pyc | Bin 7666 -> 7659 bytes .../nltk/lm/__pycache__/api.cpython-37.pyc | Bin 8003 -> 8199 bytes .../nltk/lm/__pycache__/counter.cpython-37.pyc | Bin 5426 -> 5558 bytes .../nltk/lm/__pycache__/models.cpython-37.pyc | Bin 4023 -> 4133 bytes .../lm/__pycache__/preprocessing.cpython-37.pyc | Bin 1793 -> 1781 bytes .../nltk/lm/__pycache__/smoothing.cpython-37.pyc | Bin 2417 -> 3456 bytes .../nltk/lm/__pycache__/util.cpython-37.pyc | Bin 488 -> 476 bytes .../nltk/lm/__pycache__/vocabulary.cpython-37.pyc | Bin 7847 -> 8256 bytes nlp_resource_data/nltk/lm/api.py | 69 +- nlp_resource_data/nltk/lm/counter.py | 15 +- nlp_resource_data/nltk/lm/models.py | 27 +- nlp_resource_data/nltk/lm/preprocessing.py | 3 +- nlp_resource_data/nltk/lm/smoothing.py | 58 +- nlp_resource_data/nltk/lm/util.py | 3 +- nlp_resource_data/nltk/lm/vocabulary.py | 39 +- nlp_resource_data/nltk/metrics/__init__.py | 4 +- .../metrics/__pycache__/__init__.cpython-37.pyc | Bin 1296 -> 1222 bytes .../metrics/__pycache__/agreement.cpython-37.pyc | Bin 16735 -> 16928 bytes .../nltk/metrics/__pycache__/aline.cpython-37.pyc | Bin 12486 -> 12525 bytes .../metrics/__pycache__/association.cpython-37.pyc | Bin 15584 -> 15651 bytes .../__pycache__/confusionmatrix.cpython-37.pyc | Bin 6861 -> 6994 bytes .../metrics/__pycache__/distance.cpython-37.pyc | Bin 14225 -> 12312 bytes .../nltk/metrics/__pycache__/paice.cpython-37.pyc | Bin 11308 -> 11296 bytes .../nltk/metrics/__pycache__/scores.cpython-37.pyc | Bin 7643 -> 7750 bytes .../__pycache__/segmentation.cpython-37.pyc | Bin 6766 -> 6791 bytes .../metrics/__pycache__/spearman.cpython-37.pyc | Bin 2242 -> 2226 bytes nlp_resource_data/nltk/metrics/agreement.py | 68 +- nlp_resource_data/nltk/metrics/aline.py | 2000 ++++++------ nlp_resource_data/nltk/metrics/association.py | 18 +- nlp_resource_data/nltk/metrics/confusionmatrix.py | 62 +- nlp_resource_data/nltk/metrics/distance.py | 81 +- nlp_resource_data/nltk/metrics/paice.py | 126 +- nlp_resource_data/nltk/metrics/scores.py | 65 +- nlp_resource_data/nltk/metrics/segmentation.py | 8 +- nlp_resource_data/nltk/metrics/spearman.py | 3 +- nlp_resource_data/nltk/misc/__init__.py | 2 +- .../nltk/misc/__pycache__/__init__.cpython-37.pyc | Bin 386 -> 374 bytes .../nltk/misc/__pycache__/babelfish.cpython-37.pyc | Bin 597 -> 639 bytes .../nltk/misc/__pycache__/chomsky.cpython-37.pyc | Bin 5074 -> 5148 bytes .../misc/__pycache__/minimalset.cpython-37.pyc | Bin 2931 -> 2919 bytes .../nltk/misc/__pycache__/sort.cpython-37.pyc | Bin 3425 -> 3486 bytes .../misc/__pycache__/wordfinder.cpython-37.pyc | Bin 4050 -> 4087 bytes nlp_resource_data/nltk/misc/babelfish.py | 1 + nlp_resource_data/nltk/misc/chomsky.py | 5 +- nlp_resource_data/nltk/misc/minimalset.py | 2 +- nlp_resource_data/nltk/misc/sort.py | 6 +- nlp_resource_data/nltk/misc/wordfinder.py | 11 +- nlp_resource_data/nltk/parse/__init__.py | 2 +- .../nltk/parse/__pycache__/__init__.cpython-37.pyc | Bin 3914 -> 3902 bytes .../nltk/parse/__pycache__/api.cpython-37.pyc | Bin 2595 -> 2583 bytes .../nltk/parse/__pycache__/bllip.cpython-37.pyc | Bin 7274 -> 7311 bytes .../nltk/parse/__pycache__/chart.cpython-37.pyc | Bin 54749 -> 54973 bytes .../nltk/parse/__pycache__/corenlp.cpython-37.pyc | Bin 23636 -> 23504 bytes .../__pycache__/dependencygraph.cpython-37.pyc | Bin 29333 -> 29506 bytes .../parse/__pycache__/earleychart.cpython-37.pyc | Bin 16636 -> 16695 bytes .../nltk/parse/__pycache__/evaluate.cpython-37.pyc | Bin 4034 -> 4065 bytes .../parse/__pycache__/featurechart.cpython-37.pyc | Bin 18937 -> 19046 bytes .../nltk/parse/__pycache__/generate.cpython-37.pyc | Bin 2157 -> 2194 bytes .../nltk/parse/__pycache__/malt.cpython-37.pyc | Bin 9916 -> 10017 bytes .../nonprojectivedependencyparser.cpython-37.pyc | Bin 21673 -> 21747 bytes .../nltk/parse/__pycache__/pchart.cpython-37.pyc | Bin 17367 -> 17500 bytes .../projectivedependencyparser.cpython-37.pyc | Bin 18765 -> 18904 bytes .../__pycache__/recursivedescent.cpython-37.pyc | Bin 23691 -> 23802 bytes .../parse/__pycache__/shiftreduce.cpython-37.pyc | Bin 15367 -> 15478 bytes .../nltk/parse/__pycache__/stanford.cpython-37.pyc | Bin 18012 -> 18088 bytes .../__pycache__/transitionparser.cpython-37.pyc | Bin 21314 -> 21420 bytes .../nltk/parse/__pycache__/util.cpython-37.pyc | Bin 7312 -> 7349 bytes .../nltk/parse/__pycache__/viterbi.cpython-37.pyc | Bin 14556 -> 14689 bytes nlp_resource_data/nltk/parse/api.py | 2 +- nlp_resource_data/nltk/parse/bllip.py | 34 +- nlp_resource_data/nltk/parse/chart.py | 135 +- nlp_resource_data/nltk/parse/corenlp.py | 156 +- nlp_resource_data/nltk/parse/dependencygraph.py | 202 +- nlp_resource_data/nltk/parse/earleychart.py | 19 +- nlp_resource_data/nltk/parse/evaluate.py | 4 +- nlp_resource_data/nltk/parse/featurechart.py | 40 +- nlp_resource_data/nltk/parse/generate.py | 9 +- nlp_resource_data/nltk/parse/malt.py | 138 +- .../nltk/parse/nonprojectivedependencyparser.py | 149 +- nlp_resource_data/nltk/parse/pchart.py | 51 +- .../nltk/parse/projectivedependencyparser.py | 177 +- nlp_resource_data/nltk/parse/recursivedescent.py | 44 +- nlp_resource_data/nltk/parse/shiftreduce.py | 30 +- nlp_resource_data/nltk/parse/stanford.py | 132 +- nlp_resource_data/nltk/parse/transitionparser.py | 143 +- nlp_resource_data/nltk/parse/util.py | 23 +- nlp_resource_data/nltk/parse/viterbi.py | 67 +- nlp_resource_data/nltk/probability.py | 399 +-- nlp_resource_data/nltk/sem/__init__.py | 2 +- .../nltk/sem/__pycache__/__init__.cpython-37.pyc | Bin 2345 -> 2333 bytes .../nltk/sem/__pycache__/boxer.cpython-37.pyc | Bin 46713 -> 46858 bytes .../nltk/sem/__pycache__/chat80.cpython-37.pyc | Bin 21209 -> 21382 bytes .../sem/__pycache__/cooper_storage.cpython-37.pyc | Bin 3818 -> 3855 bytes .../nltk/sem/__pycache__/drt.cpython-37.pyc | Bin 45962 -> 46173 bytes .../sem/__pycache__/drt_glue_demo.cpython-37.pyc | Bin 13237 -> 13255 bytes .../nltk/sem/__pycache__/evaluate.cpython-37.pyc | Bin 21937 -> 22193 bytes .../nltk/sem/__pycache__/glue.cpython-37.pyc | Bin 19720 -> 19918 bytes .../nltk/sem/__pycache__/hole.cpython-37.pyc | Bin 11064 -> 11237 bytes .../nltk/sem/__pycache__/lfg.cpython-37.pyc | Bin 6018 -> 6178 bytes .../sem/__pycache__/linearlogic.cpython-37.pyc | Bin 17626 -> 17825 bytes .../nltk/sem/__pycache__/logic.cpython-37.pyc | Bin 66530 -> 66785 bytes .../nltk/sem/__pycache__/relextract.cpython-37.pyc | Bin 13578 -> 13634 bytes .../nltk/sem/__pycache__/skolemize.cpython-37.pyc | Bin 2326 -> 2314 bytes .../nltk/sem/__pycache__/util.cpython-37.pyc | Bin 7988 -> 8052 bytes nlp_resource_data/nltk/sem/boxer.py | 414 +-- nlp_resource_data/nltk/sem/chat80.py | 182 +- nlp_resource_data/nltk/sem/cooper_storage.py | 13 +- nlp_resource_data/nltk/sem/drt.py | 202 +- nlp_resource_data/nltk/sem/drt_glue_demo.py | 190 +- nlp_resource_data/nltk/sem/evaluate.py | 194 +- nlp_resource_data/nltk/sem/glue.py | 298 +- nlp_resource_data/nltk/sem/hole.py | 69 +- nlp_resource_data/nltk/sem/lfg.py | 133 +- nlp_resource_data/nltk/sem/linearlogic.py | 63 +- nlp_resource_data/nltk/sem/logic.py | 285 +- nlp_resource_data/nltk/sem/relextract.py | 160 +- nlp_resource_data/nltk/sem/skolemize.py | 6 +- nlp_resource_data/nltk/sem/util.py | 77 +- nlp_resource_data/nltk/sentiment/__init__.py | 2 +- .../sentiment/__pycache__/__init__.cpython-37.pyc | Bin 361 -> 349 bytes .../__pycache__/sentiment_analyzer.cpython-37.pyc | Bin 9709 -> 9417 bytes .../nltk/sentiment/__pycache__/util.cpython-37.pyc | Bin 22584 -> 23676 bytes .../sentiment/__pycache__/vader.cpython-37.pyc | Bin 13039 -> 12340 bytes .../nltk/sentiment/sentiment_analyzer.py | 27 +- nlp_resource_data/nltk/sentiment/util.py | 374 ++- nlp_resource_data/nltk/sentiment/vader.py | 582 ++-- nlp_resource_data/nltk/stem/__init__.py | 2 +- .../nltk/stem/__pycache__/__init__.cpython-37.pyc | Bin 1169 -> 1157 bytes .../nltk/stem/__pycache__/api.cpython-37.pyc | Bin 792 -> 824 bytes .../nltk/stem/__pycache__/arlstem.cpython-37.pyc | Bin 8223 -> 8262 bytes .../nltk/stem/__pycache__/cistem.cpython-37.pyc | Bin 5917 -> 6026 bytes .../nltk/stem/__pycache__/isri.cpython-37.pyc | Bin 9402 -> 9441 bytes .../nltk/stem/__pycache__/lancaster.cpython-37.pyc | Bin 6380 -> 6488 bytes .../nltk/stem/__pycache__/porter.cpython-37.pyc | Bin 21449 -> 21580 bytes .../nltk/stem/__pycache__/regexp.cpython-37.pyc | Bin 1704 -> 1812 bytes .../nltk/stem/__pycache__/rslp.cpython-37.pyc | Bin 3012 -> 3076 bytes .../nltk/stem/__pycache__/snowball.cpython-37.pyc | Bin 99002 -> 99216 bytes .../nltk/stem/__pycache__/util.cpython-37.pyc | Bin 670 -> 658 bytes .../nltk/stem/__pycache__/wordnet.cpython-37.pyc | Bin 1640 -> 1748 bytes nlp_resource_data/nltk/stem/api.py | 6 +- nlp_resource_data/nltk/stem/arlstem.py | 109 +- nlp_resource_data/nltk/stem/cistem.py | 10 +- nlp_resource_data/nltk/stem/isri.py | 265 +- nlp_resource_data/nltk/stem/lancaster.py | 11 +- nlp_resource_data/nltk/stem/porter.py | 208 +- nlp_resource_data/nltk/stem/regexp.py | 11 +- nlp_resource_data/nltk/stem/rslp.py | 6 +- nlp_resource_data/nltk/stem/snowball.py | 3440 ++++++++++---------- nlp_resource_data/nltk/stem/util.py | 2 +- nlp_resource_data/nltk/stem/wordnet.py | 7 +- nlp_resource_data/nltk/tag/__init__.py | 29 +- .../nltk/tag/__pycache__/__init__.cpython-37.pyc | Bin 6851 -> 6868 bytes .../nltk/tag/__pycache__/api.cpython-37.pyc | Bin 3428 -> 3460 bytes .../nltk/tag/__pycache__/brill.cpython-37.pyc | Bin 14263 -> 14319 bytes .../tag/__pycache__/brill_trainer.cpython-37.pyc | Bin 18773 -> 18803 bytes .../nltk/tag/__pycache__/crf.cpython-37.pyc | Bin 7541 -> 7618 bytes .../nltk/tag/__pycache__/hmm.cpython-37.pyc | Bin 41014 -> 41217 bytes .../nltk/tag/__pycache__/hunpos.cpython-37.pyc | Bin 4686 -> 4711 bytes .../nltk/tag/__pycache__/mapping.cpython-37.pyc | Bin 3234 -> 3317 bytes .../nltk/tag/__pycache__/perceptron.cpython-37.pyc | Bin 11476 -> 10289 bytes .../nltk/tag/__pycache__/senna.cpython-37.pyc | Bin 6104 -> 6173 bytes .../nltk/tag/__pycache__/sequential.cpython-37.pyc | Bin 26713 -> 27115 bytes .../nltk/tag/__pycache__/stanford.cpython-37.pyc | Bin 7624 -> 7649 bytes .../nltk/tag/__pycache__/tnt.cpython-37.pyc | Bin 12988 -> 13044 bytes .../nltk/tag/__pycache__/util.cpython-37.pyc | Bin 2422 -> 2410 bytes nlp_resource_data/nltk/tag/api.py | 8 +- nlp_resource_data/nltk/tag/brill.py | 34 +- nlp_resource_data/nltk/tag/brill_trainer.py | 52 +- nlp_resource_data/nltk/tag/crf.py | 41 +- nlp_resource_data/nltk/tag/hmm.py | 100 +- nlp_resource_data/nltk/tag/hunpos.py | 32 +- nlp_resource_data/nltk/tag/mapping.py | 93 +- nlp_resource_data/nltk/tag/perceptron.py | 182 +- nlp_resource_data/nltk/tag/senna.py | 37 +- nlp_resource_data/nltk/tag/sequential.py | 172 +- nlp_resource_data/nltk/tag/stanford.py | 84 +- nlp_resource_data/nltk/tag/tnt.py | 86 +- nlp_resource_data/nltk/tag/util.py | 10 +- nlp_resource_data/nltk/tbl/__init__.py | 2 +- .../nltk/tbl/__pycache__/__init__.cpython-37.pyc | Bin 505 -> 493 bytes .../nltk/tbl/__pycache__/api.cpython-37.pyc | Bin 153 -> 141 bytes .../nltk/tbl/__pycache__/demo.cpython-37.pyc | Bin 12481 -> 12563 bytes .../tbl/__pycache__/erroranalysis.cpython-37.pyc | Bin 1369 -> 1411 bytes .../nltk/tbl/__pycache__/feature.cpython-37.pyc | Bin 9740 -> 9867 bytes .../nltk/tbl/__pycache__/rule.cpython-37.pyc | Bin 10393 -> 10399 bytes .../nltk/tbl/__pycache__/template.cpython-37.pyc | Bin 12422 -> 12503 bytes nlp_resource_data/nltk/tbl/demo.py | 47 +- nlp_resource_data/nltk/tbl/erroranalysis.py | 21 +- nlp_resource_data/nltk/tbl/feature.py | 9 +- nlp_resource_data/nltk/tbl/rule.py | 56 +- nlp_resource_data/nltk/tbl/template.py | 7 +- nlp_resource_data/nltk/test/__init__.py | 2 +- .../nltk/test/__pycache__/__init__.cpython-37.pyc | Bin 438 -> 426 bytes .../nltk/test/__pycache__/all.cpython-37.pyc | Bin 1065 -> 1053 bytes .../test/__pycache__/childes_fixt.cpython-37.pyc | Bin 611 -> 654 bytes .../test/__pycache__/classify_fixt.cpython-37.pyc | Bin 417 -> 460 bytes .../test/__pycache__/compat_fixt.cpython-37.pyc | Bin 0 -> 444 bytes .../test/__pycache__/corpus_fixt.cpython-37.pyc | Bin 213 -> 251 bytes .../test/__pycache__/discourse_fixt.cpython-37.pyc | Bin 524 -> 567 bytes .../__pycache__/doctest_nose_plugin.cpython-37.pyc | Bin 0 -> 5254 bytes .../test/__pycache__/gensim_fixt.cpython-37.pyc | Bin 415 -> 458 bytes .../gluesemantics_malt_fixt.cpython-37.pyc | Bin 495 -> 538 bytes .../test/__pycache__/inference_fixt.cpython-37.pyc | Bin 527 -> 570 bytes .../__pycache__/nonmonotonic_fixt.cpython-37.pyc | Bin 533 -> 576 bytes .../__pycache__/portuguese_en_fixt.cpython-37.pyc | Bin 451 -> 648 bytes .../__pycache__/probability_fixt.cpython-37.pyc | Bin 423 -> 466 bytes .../nltk/test/__pycache__/runtests.cpython-37.pyc | Bin 1797 -> 1987 bytes .../__pycache__/segmentation_fixt.cpython-37.pyc | Bin 425 -> 468 bytes .../test/__pycache__/semantics_fixt.cpython-37.pyc | Bin 339 -> 382 bytes .../test/__pycache__/translate_fixt.cpython-37.pyc | Bin 216 -> 254 bytes .../test/__pycache__/wordnet_fixt.cpython-37.pyc | Bin 341 -> 384 bytes nlp_resource_data/nltk/test/all.py | 6 +- nlp_resource_data/nltk/test/bnc.doctest | 2 +- nlp_resource_data/nltk/test/ccg.doctest | 4 +- nlp_resource_data/nltk/test/ccg_semantics.doctest | 2 +- nlp_resource_data/nltk/test/chat80.doctest | 2 +- nlp_resource_data/nltk/test/childes_fixt.py | 3 +- nlp_resource_data/nltk/test/chunk.doctest | 2 +- nlp_resource_data/nltk/test/classify.doctest | 46 +- nlp_resource_data/nltk/test/classify_fixt.py | 1 + nlp_resource_data/nltk/test/collections.doctest | 2 +- nlp_resource_data/nltk/test/collocations.doctest | 33 +- nlp_resource_data/nltk/test/compat.doctest | 134 + nlp_resource_data/nltk/test/compat_fixt.py | 10 + nlp_resource_data/nltk/test/corpus.doctest | 57 +- nlp_resource_data/nltk/test/corpus_fixt.py | 1 + nlp_resource_data/nltk/test/crubadan.doctest | 2 +- nlp_resource_data/nltk/test/data.doctest | 5 +- nlp_resource_data/nltk/test/dependency.doctest | 2 +- nlp_resource_data/nltk/test/discourse.doctest | 2 +- nlp_resource_data/nltk/test/discourse_fixt.py | 3 +- nlp_resource_data/nltk/test/doctest_nose_plugin.py | 164 + nlp_resource_data/nltk/test/drt.doctest | 2 +- nlp_resource_data/nltk/test/featgram.doctest | 3 +- nlp_resource_data/nltk/test/featstruct.doctest | 3 +- nlp_resource_data/nltk/test/framenet.doctest | 2 +- nlp_resource_data/nltk/test/generate.doctest | 2 +- nlp_resource_data/nltk/test/gensim.doctest | 8 +- nlp_resource_data/nltk/test/gensim_fixt.py | 1 + nlp_resource_data/nltk/test/gluesemantics.doctest | 2 +- .../nltk/test/gluesemantics_malt.doctest | 2 +- .../nltk/test/gluesemantics_malt_fixt.py | 3 +- nlp_resource_data/nltk/test/grammar.doctest | 2 +- .../nltk/test/grammartestsuites.doctest | 2 +- nlp_resource_data/nltk/test/index.doctest | 2 +- nlp_resource_data/nltk/test/inference.doctest | 2 +- nlp_resource_data/nltk/test/inference_fixt.py | 3 +- nlp_resource_data/nltk/test/internals.doctest | 2 +- nlp_resource_data/nltk/test/japanese.doctest | 2 +- nlp_resource_data/nltk/test/lm.doctest | 2 +- nlp_resource_data/nltk/test/logic.doctest | 2 +- nlp_resource_data/nltk/test/meteor.doctest | 45 - nlp_resource_data/nltk/test/metrics.doctest | 26 +- nlp_resource_data/nltk/test/misc.doctest | 2 +- nlp_resource_data/nltk/test/nonmonotonic.doctest | 2 +- nlp_resource_data/nltk/test/nonmonotonic_fixt.py | 3 +- nlp_resource_data/nltk/test/parse.doctest | 2 +- nlp_resource_data/nltk/test/portuguese_en.doctest | 34 +- nlp_resource_data/nltk/test/portuguese_en_fixt.py | 8 + nlp_resource_data/nltk/test/probability.doctest | 2 +- nlp_resource_data/nltk/test/probability_fixt.py | 1 + nlp_resource_data/nltk/test/propbank.doctest | 2 +- nlp_resource_data/nltk/test/relextract.doctest | 60 +- nlp_resource_data/nltk/test/resolution.doctest | 2 +- nlp_resource_data/nltk/test/runtests.py | 28 +- nlp_resource_data/nltk/test/segmentation_fixt.py | 1 + nlp_resource_data/nltk/test/semantics.doctest | 2 +- nlp_resource_data/nltk/test/semantics_fixt.py | 1 + nlp_resource_data/nltk/test/sentiment.doctest | 2 +- nlp_resource_data/nltk/test/sentiwordnet.doctest | 2 +- nlp_resource_data/nltk/test/simple.doctest | 5 +- nlp_resource_data/nltk/test/stem.doctest | 3 +- nlp_resource_data/nltk/test/tag.doctest | 2 +- nlp_resource_data/nltk/test/tokenize.doctest | 13 +- nlp_resource_data/nltk/test/toolbox.doctest | 2 +- nlp_resource_data/nltk/test/translate.doctest | 2 +- nlp_resource_data/nltk/test/translate_fixt.py | 1 + nlp_resource_data/nltk/test/tree.doctest | 2 +- .../nltk/test/treeprettyprinter.doctest | 2 +- nlp_resource_data/nltk/test/treetransforms.doctest | 2 +- .../test/unit/__pycache__/__init__.cpython-37.pyc | Bin 164 -> 152 bytes .../unit/__pycache__/test_2x_compat.cpython-37.pyc | Bin 0 -> 2194 bytes .../unit/__pycache__/test_aline.cpython-37.pyc | Bin 1565 -> 1604 bytes .../unit/__pycache__/test_brill.cpython-37.pyc | Bin 1317 -> 1305 bytes .../__pycache__/test_cfd_mutation.cpython-37.pyc | Bin 1365 -> 0 bytes .../__pycache__/test_cfg2chomsky.cpython-37.pyc | Bin 1596 -> 0 bytes .../unit/__pycache__/test_chunk.cpython-37.pyc | Bin 2053 -> 2118 bytes .../unit/__pycache__/test_classify.cpython-37.pyc | Bin 1484 -> 1522 bytes .../__pycache__/test_collocations.cpython-37.pyc | Bin 3597 -> 3662 bytes .../__pycache__/test_concordance.cpython-37.pyc | Bin 5159 -> 5298 bytes .../unit/__pycache__/test_corenlp.cpython-37.pyc | Bin 10465 -> 10559 bytes .../unit/__pycache__/test_corpora.cpython-37.pyc | Bin 7459 -> 7587 bytes .../__pycache__/test_corpus_views.cpython-37.pyc | Bin 1624 -> 1689 bytes .../test/unit/__pycache__/test_data.cpython-37.pyc | Bin 1156 -> 1144 bytes .../__pycache__/test_disagreement.cpython-37.pyc | Bin 3795 -> 3860 bytes .../unit/__pycache__/test_freqdist.cpython-37.pyc | Bin 937 -> 0 bytes .../test/unit/__pycache__/test_hmm.cpython-37.pyc | Bin 2162 -> 2227 bytes .../test_json2csv_corpus.cpython-37.pyc | Bin 7489 -> 7517 bytes .../test_json_serialization.cpython-37.pyc | Bin 3449 -> 0 bytes .../__pycache__/test_naivebayes.cpython-37.pyc | Bin 900 -> 964 bytes .../unit/__pycache__/test_nombank.cpython-37.pyc | Bin 1328 -> 0 bytes .../unit/__pycache__/test_pl196x.cpython-37.pyc | Bin 787 -> 0 bytes .../unit/__pycache__/test_pos_tag.cpython-37.pyc | Bin 2831 -> 2870 bytes .../__pycache__/test_rte_classify.cpython-37.pyc | Bin 3029 -> 3093 bytes ...t_seekable_unicode_stream_reader.cpython-37.pyc | Bin 5078 -> 5143 bytes .../unit/__pycache__/test_senna.cpython-37.pyc | Bin 3421 -> 3460 bytes .../test/unit/__pycache__/test_stem.cpython-37.pyc | Bin 5103 -> 5219 bytes .../test/unit/__pycache__/test_tag.cpython-37.pyc | Bin 869 -> 939 bytes .../unit/__pycache__/test_tgrep.cpython-37.pyc | Bin 20614 -> 20700 bytes .../unit/__pycache__/test_tokenize.cpython-37.pyc | Bin 9688 -> 6867 bytes .../__pycache__/test_twitter_auth.cpython-37.pyc | Bin 6009 -> 5997 bytes .../unit/__pycache__/test_wordnet.cpython-37.pyc | Bin 7157 -> 6351 bytes .../test/unit/__pycache__/utils.cpython-37.pyc | Bin 1538 -> 1576 bytes .../unit/lm/__pycache__/__init__.cpython-37.pyc | Bin 167 -> 155 bytes .../lm/__pycache__/test_counter.cpython-37.pyc | Bin 5769 -> 5799 bytes .../unit/lm/__pycache__/test_models.cpython-37.pyc | Bin 11107 -> 11234 bytes .../__pycache__/test_preprocessing.cpython-37.pyc | Bin 1096 -> 1084 bytes .../lm/__pycache__/test_vocabulary.cpython-37.pyc | Bin 6227 -> 6241 bytes .../nltk/test/unit/lm/test_counter.py | 17 +- nlp_resource_data/nltk/test/unit/lm/test_models.py | 63 +- .../nltk/test/unit/lm/test_preprocessing.py | 3 +- .../nltk/test/unit/lm/test_vocabulary.py | 11 +- nlp_resource_data/nltk/test/unit/test_2x_compat.py | 65 + nlp_resource_data/nltk/test/unit/test_aline.py | 1 + .../nltk/test/unit/test_cfd_mutation.py | 39 - .../nltk/test/unit/test_cfg2chomsky.py | 49 - nlp_resource_data/nltk/test/unit/test_chunk.py | 1 + nlp_resource_data/nltk/test/unit/test_classify.py | 1 + .../nltk/test/unit/test_collocations.py | 1 + .../nltk/test/unit/test_concordance.py | 7 +- nlp_resource_data/nltk/test/unit/test_corenlp.py | 9 +- nlp_resource_data/nltk/test/unit/test_corpora.py | 2 + .../nltk/test/unit/test_corpus_views.py | 1 + .../nltk/test/unit/test_disagreement.py | 1 + nlp_resource_data/nltk/test/unit/test_freqdist.py | 16 - nlp_resource_data/nltk/test/unit/test_hmm.py | 1 + .../nltk/test/unit/test_json2csv_corpus.py | 6 +- .../nltk/test/unit/test_json_serialization.py | 87 - .../nltk/test/unit/test_naivebayes.py | 1 + nlp_resource_data/nltk/test/unit/test_nombank.py | 27 - nlp_resource_data/nltk/test/unit/test_pl196x.py | 14 - nlp_resource_data/nltk/test/unit/test_pos_tag.py | 1 + .../nltk/test/unit/test_rte_classify.py | 1 + .../unit/test_seekable_unicode_stream_reader.py | 1 + nlp_resource_data/nltk/test/unit/test_senna.py | 1 + nlp_resource_data/nltk/test/unit/test_stem.py | 4 + nlp_resource_data/nltk/test/unit/test_tag.py | 1 + nlp_resource_data/nltk/test/unit/test_tgrep.py | 15 +- nlp_resource_data/nltk/test/unit/test_tokenize.py | 73 +- nlp_resource_data/nltk/test/unit/test_wordnet.py | 25 +- .../translate/__pycache__/__init__.cpython-37.pyc | Bin 174 -> 162 bytes .../translate/__pycache__/test_bleu.cpython-37.pyc | Bin 7747 -> 7735 bytes .../translate/__pycache__/test_gdfa.cpython-37.pyc | Bin 3342 -> 3330 bytes .../translate/__pycache__/test_ibm1.cpython-37.pyc | Bin 2339 -> 2327 bytes .../translate/__pycache__/test_ibm2.cpython-37.pyc | Bin 3097 -> 3085 bytes .../translate/__pycache__/test_ibm3.cpython-37.pyc | Bin 3518 -> 3506 bytes .../translate/__pycache__/test_ibm4.cpython-37.pyc | Bin 3860 -> 3848 bytes .../translate/__pycache__/test_ibm5.cpython-37.pyc | Bin 5043 -> 5031 bytes .../__pycache__/test_ibm_model.cpython-37.pyc | Bin 9576 -> 9564 bytes .../translate/__pycache__/test_nist.cpython-37.pyc | Bin 1528 -> 1516 bytes .../__pycache__/test_stack_decoder.cpython-37.pyc | Bin 8916 -> 8904 bytes .../nltk/test/unit/translate/test_stack_decoder.py | 2 +- nlp_resource_data/nltk/test/unit/utils.py | 1 + nlp_resource_data/nltk/test/util.doctest | 3 +- nlp_resource_data/nltk/test/wordnet.doctest | 17 +- nlp_resource_data/nltk/test/wordnet_fixt.py | 1 + nlp_resource_data/nltk/test/wordnet_lch.doctest | 2 +- nlp_resource_data/nltk/test/wsd.doctest | 2 +- nlp_resource_data/nltk/text.py | 189 +- nlp_resource_data/nltk/tgrep.py | 337 +- nlp_resource_data/nltk/tokenize/__init__.py | 32 +- .../tokenize/__pycache__/__init__.cpython-37.pyc | Bin 5077 -> 5394 bytes .../nltk/tokenize/__pycache__/api.cpython-37.pyc | Bin 2853 -> 2757 bytes .../tokenize/__pycache__/casual.cpython-37.pyc | Bin 8105 -> 8211 bytes .../__pycache__/destructive.cpython-37.pyc | Bin 3128 -> 0 bytes .../nltk/tokenize/__pycache__/mwe.cpython-37.pyc | Bin 3637 -> 3625 bytes .../nltk/tokenize/__pycache__/nist.cpython-37.pyc | Bin 4932 -> 5042 bytes .../nltk/tokenize/__pycache__/punkt.cpython-37.pyc | Bin 38955 -> 39072 bytes .../tokenize/__pycache__/regexp.cpython-37.pyc | Bin 8009 -> 8117 bytes .../nltk/tokenize/__pycache__/repp.cpython-37.pyc | Bin 7913 -> 8014 bytes .../nltk/tokenize/__pycache__/sexpr.cpython-37.pyc | Bin 4937 -> 4925 bytes .../tokenize/__pycache__/simple.cpython-37.pyc | Bin 5479 -> 5518 bytes .../__pycache__/sonority_sequencing.cpython-37.pyc | Bin 5671 -> 0 bytes .../tokenize/__pycache__/stanford.cpython-37.pyc | Bin 3629 -> 3730 bytes .../__pycache__/stanford_segmenter.cpython-37.pyc | Bin 6731 -> 6867 bytes .../tokenize/__pycache__/texttiling.cpython-37.pyc | Bin 13639 -> 13627 bytes .../tokenize/__pycache__/toktok.cpython-37.pyc | Bin 4206 -> 4233 bytes .../tokenize/__pycache__/treebank.cpython-37.pyc | Bin 11489 -> 11933 bytes .../nltk/tokenize/__pycache__/util.cpython-37.pyc | Bin 8275 -> 8263 bytes nlp_resource_data/nltk/tokenize/api.py | 13 +- nlp_resource_data/nltk/tokenize/casual.py | 41 +- nlp_resource_data/nltk/tokenize/destructive.py | 141 - nlp_resource_data/nltk/tokenize/mwe.py | 4 +- nlp_resource_data/nltk/tokenize/nist.py | 55 +- nlp_resource_data/nltk/tokenize/punkt.py | 215 +- nlp_resource_data/nltk/tokenize/regexp.py | 15 +- nlp_resource_data/nltk/tokenize/repp.py | 30 +- nlp_resource_data/nltk/tokenize/sexpr.py | 12 +- nlp_resource_data/nltk/tokenize/simple.py | 26 +- .../nltk/tokenize/sonority_sequencing.py | 192 -- nlp_resource_data/nltk/tokenize/stanford.py | 34 +- .../nltk/tokenize/stanford_segmenter.py | 105 +- nlp_resource_data/nltk/tokenize/texttiling.py | 26 +- nlp_resource_data/nltk/tokenize/toktok.py | 103 +- nlp_resource_data/nltk/tokenize/treebank.py | 121 +- nlp_resource_data/nltk/tokenize/util.py | 2 +- nlp_resource_data/nltk/toolbox.py | 115 +- nlp_resource_data/nltk/translate/__init__.py | 3 +- .../translate/__pycache__/__init__.cpython-37.pyc | Bin 1030 -> 946 bytes .../nltk/translate/__pycache__/api.cpython-37.pyc | Bin 12932 -> 13071 bytes .../__pycache__/bleu_score.cpython-37.pyc | Bin 25162 -> 25401 bytes .../__pycache__/chrf_score.cpython-37.pyc | Bin 7861 -> 7892 bytes .../__pycache__/gale_church.cpython-37.pyc | Bin 7840 -> 7871 bytes .../nltk/translate/__pycache__/gdfa.cpython-37.pyc | Bin 5188 -> 5176 bytes .../__pycache__/gleu_score.cpython-37.pyc | Bin 7627 -> 7658 bytes .../nltk/translate/__pycache__/ibm1.cpython-37.pyc | Bin 8328 -> 8359 bytes .../nltk/translate/__pycache__/ibm2.cpython-37.pyc | Bin 11446 -> 11477 bytes .../nltk/translate/__pycache__/ibm3.cpython-37.pyc | Bin 12902 -> 12933 bytes .../nltk/translate/__pycache__/ibm4.cpython-37.pyc | Bin 17212 -> 17243 bytes .../nltk/translate/__pycache__/ibm5.cpython-37.pyc | Bin 22936 -> 22967 bytes .../translate/__pycache__/ibm_model.cpython-37.pyc | Bin 16451 -> 16482 bytes .../__pycache__/meteor_score.cpython-37.pyc | Bin 15372 -> 0 bytes .../translate/__pycache__/metrics.cpython-37.pyc | Bin 1271 -> 1307 bytes .../__pycache__/nist_score.cpython-37.pyc | Bin 5607 -> 5638 bytes .../__pycache__/phrase_based.cpython-37.pyc | Bin 6239 -> 6229 bytes .../__pycache__/ribes_score.cpython-37.pyc | Bin 10821 -> 10852 bytes .../__pycache__/stack_decoder.cpython-37.pyc | Bin 17224 -> 17212 bytes nlp_resource_data/nltk/translate/api.py | 29 +- nlp_resource_data/nltk/translate/bleu_score.py | 49 +- nlp_resource_data/nltk/translate/chrf_score.py | 7 +- nlp_resource_data/nltk/translate/gale_church.py | 19 +- nlp_resource_data/nltk/translate/gdfa.py | 18 +- nlp_resource_data/nltk/translate/gleu_score.py | 4 +- nlp_resource_data/nltk/translate/ibm1.py | 3 +- nlp_resource_data/nltk/translate/ibm2.py | 8 +- nlp_resource_data/nltk/translate/ibm3.py | 12 +- nlp_resource_data/nltk/translate/ibm4.py | 16 +- nlp_resource_data/nltk/translate/ibm5.py | 20 +- nlp_resource_data/nltk/translate/ibm_model.py | 6 +- nlp_resource_data/nltk/translate/meteor_score.py | 434 --- nlp_resource_data/nltk/translate/metrics.py | 3 +- nlp_resource_data/nltk/translate/nist_score.py | 3 +- nlp_resource_data/nltk/translate/phrase_based.py | 22 +- nlp_resource_data/nltk/translate/ribes_score.py | 4 +- nlp_resource_data/nltk/translate/stack_decoder.py | 12 +- nlp_resource_data/nltk/tree.py | 286 +- nlp_resource_data/nltk/treeprettyprinter.py | 155 +- nlp_resource_data/nltk/treetransforms.py | 3 +- nlp_resource_data/nltk/twitter/__init__.py | 2 +- .../twitter/__pycache__/__init__.cpython-37.pyc | Bin 768 -> 756 bytes .../nltk/twitter/__pycache__/api.cpython-37.pyc | Bin 4202 -> 4252 bytes .../nltk/twitter/__pycache__/common.cpython-37.pyc | Bin 7863 -> 7805 bytes .../__pycache__/twitter_demo.cpython-37.pyc | Bin 7808 -> 7854 bytes .../__pycache__/twitterclient.cpython-37.pyc | Bin 16243 -> 16231 bytes .../nltk/twitter/__pycache__/util.cpython-37.pyc | Bin 4207 -> 4244 bytes nlp_resource_data/nltk/twitter/api.py | 17 +- nlp_resource_data/nltk/twitter/common.py | 46 +- nlp_resource_data/nltk/twitter/twitter_demo.py | 42 +- nlp_resource_data/nltk/twitter/twitterclient.py | 68 +- nlp_resource_data/nltk/twitter/util.py | 32 +- nlp_resource_data/nltk/util.py | 102 +- nlp_resource_data/nltk/wsd.py | 2 +- 737 files changed, 16642 insertions(+), 16567 deletions(-) delete mode 100644 nlp_resource_data/nltk/__pycache__/cli.cpython-37.pyc delete mode 100644 nlp_resource_data/nltk/cli.py delete mode 100644 nlp_resource_data/nltk/corpus/reader/__pycache__/panlex_swadesh.cpython-37.pyc delete mode 100644 nlp_resource_data/nltk/corpus/reader/panlex_swadesh.py create mode 100644 nlp_resource_data/nltk/test/__pycache__/compat_fixt.cpython-37.pyc create mode 100644 nlp_resource_data/nltk/test/__pycache__/doctest_nose_plugin.cpython-37.pyc create mode 100644 nlp_resource_data/nltk/test/compat.doctest create mode 100644 nlp_resource_data/nltk/test/compat_fixt.py create mode 100644 nlp_resource_data/nltk/test/doctest_nose_plugin.py delete mode 100644 nlp_resource_data/nltk/test/meteor.doctest create mode 100644 nlp_resource_data/nltk/test/unit/__pycache__/test_2x_compat.cpython-37.pyc delete mode 100644 nlp_resource_data/nltk/test/unit/__pycache__/test_cfd_mutation.cpython-37.pyc delete mode 100644 nlp_resource_data/nltk/test/unit/__pycache__/test_cfg2chomsky.cpython-37.pyc delete mode 100644 nlp_resource_data/nltk/test/unit/__pycache__/test_freqdist.cpython-37.pyc delete mode 100644 nlp_resource_data/nltk/test/unit/__pycache__/test_json_serialization.cpython-37.pyc delete mode 100644 nlp_resource_data/nltk/test/unit/__pycache__/test_nombank.cpython-37.pyc delete mode 100644 nlp_resource_data/nltk/test/unit/__pycache__/test_pl196x.cpython-37.pyc create mode 100644 nlp_resource_data/nltk/test/unit/test_2x_compat.py delete mode 100644 nlp_resource_data/nltk/test/unit/test_cfd_mutation.py delete mode 100644 nlp_resource_data/nltk/test/unit/test_cfg2chomsky.py delete mode 100644 nlp_resource_data/nltk/test/unit/test_freqdist.py delete mode 100644 nlp_resource_data/nltk/test/unit/test_json_serialization.py delete mode 100644 nlp_resource_data/nltk/test/unit/test_nombank.py delete mode 100644 nlp_resource_data/nltk/test/unit/test_pl196x.py delete mode 100644 nlp_resource_data/nltk/tokenize/__pycache__/destructive.cpython-37.pyc delete mode 100644 nlp_resource_data/nltk/tokenize/__pycache__/sonority_sequencing.cpython-37.pyc delete mode 100644 nlp_resource_data/nltk/tokenize/destructive.py delete mode 100644 nlp_resource_data/nltk/tokenize/sonority_sequencing.py delete mode 100644 nlp_resource_data/nltk/translate/__pycache__/meteor_score.cpython-37.pyc delete mode 100644 nlp_resource_data/nltk/translate/meteor_score.py diff --git a/nlp_resource_data/nltk/VERSION b/nlp_resource_data/nltk/VERSION index 5a95802..2f4b607 100644 --- a/nlp_resource_data/nltk/VERSION +++ b/nlp_resource_data/nltk/VERSION @@ -1 +1 @@ -3.5 +3.4 diff --git a/nlp_resource_data/nltk/__init__.py b/nlp_resource_data/nltk/__init__.py index 32833cb..cd14254 100644 --- a/nlp_resource_data/nltk/__init__.py +++ b/nlp_resource_data/nltk/__init__.py @@ -1,6 +1,6 @@ # Natural Language Toolkit (NLTK) # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Authors: Steven Bird # Edward Loper # URL: @@ -15,6 +15,7 @@ Steven Bird, Ewan Klein, and Edward Loper (2009). Natural Language Processing with Python. O'Reilly Media Inc. http://nltk.org/book """ +from __future__ import print_function, absolute_import import os @@ -26,21 +27,21 @@ import os # in the file VERSION. try: # If a VERSION file exists, use it! - version_file = os.path.join(os.path.dirname(__file__), "VERSION") - with open(version_file, "r") as infile: + version_file = os.path.join(os.path.dirname(__file__), 'VERSION') + with open(version_file, 'r') as infile: __version__ = infile.read().strip() except NameError: - __version__ = "unknown (running code interactively?)" + __version__ = 'unknown (running code interactively?)' except IOError as ex: __version__ = "unknown (%s)" % ex if __doc__ is not None: # fix for the ``python -OO`` - __doc__ += "\n@version: " + __version__ + __doc__ += '\n@version: ' + __version__ # Copyright notice __copyright__ = """\ -Copyright (C) 2001-2020 NLTK Project. +Copyright (C) 2001-2019 NLTK Project. Distributed and Licensed under the Apache License, Version 2.0, which is included by reference. @@ -52,18 +53,18 @@ __longdescr__ = """\ The Natural Language Toolkit (NLTK) is a Python package for natural language processing. NLTK requires Python 2.6 or higher.""" __keywords__ = [ - "NLP", - "CL", - "natural language processing", - "computational linguistics", - "parsing", - "tagging", - "tokenizing", - "syntax", - "linguistics", - "language", - "natural language", - "text analytics", + 'NLP', + 'CL', + 'natural language processing', + 'computational linguistics', + 'parsing', + 'tagging', + 'tokenizing', + 'syntax', + 'linguistics', + 'language', + 'natural language', + 'text analytics', ] __url__ = "http://nltk.org/" @@ -75,24 +76,24 @@ __author_email__ = __maintainer_email__ # "Trove" classifiers for Python Package Index. __classifiers__ = [ - "Development Status :: 5 - Production/Stable", - "Intended Audience :: Developers", - "Intended Audience :: Education", - "Intended Audience :: Information Technology", - "Intended Audience :: Science/Research", - "License :: OSI Approved :: Apache Software License", - "Operating System :: OS Independent", - "Programming Language :: Python :: 2.6", - "Programming Language :: Python :: 2.7", - "Topic :: Scientific/Engineering", - "Topic :: Scientific/Engineering :: Artificial Intelligence", - "Topic :: Scientific/Engineering :: Human Machine Interfaces", - "Topic :: Scientific/Engineering :: Information Analysis", - "Topic :: Text Processing", - "Topic :: Text Processing :: Filters", - "Topic :: Text Processing :: General", - "Topic :: Text Processing :: Indexing", - "Topic :: Text Processing :: Linguistic", + 'Development Status :: 5 - Production/Stable', + 'Intended Audience :: Developers', + 'Intended Audience :: Education', + 'Intended Audience :: Information Technology', + 'Intended Audience :: Science/Research', + 'License :: OSI Approved :: Apache Software License', + 'Operating System :: OS Independent', + 'Programming Language :: Python :: 2.6', + 'Programming Language :: Python :: 2.7', + 'Topic :: Scientific/Engineering', + 'Topic :: Scientific/Engineering :: Artificial Intelligence', + 'Topic :: Scientific/Engineering :: Human Machine Interfaces', + 'Topic :: Scientific/Engineering :: Information Analysis', + 'Topic :: Text Processing', + 'Topic :: Text Processing :: Filters', + 'Topic :: Text Processing :: General', + 'Topic :: Text Processing :: Indexing', + 'Topic :: Text Processing :: Linguistic', ] from nltk.internals import config_java @@ -106,16 +107,16 @@ except ImportError: # Override missing methods on environments where it cannot be used like GAE. import subprocess -if not hasattr(subprocess, "PIPE"): +if not hasattr(subprocess, 'PIPE'): def _fake_PIPE(*args, **kwargs): - raise NotImplementedError("subprocess.PIPE is not supported.") + raise NotImplementedError('subprocess.PIPE is not supported.') subprocess.PIPE = _fake_PIPE -if not hasattr(subprocess, "Popen"): +if not hasattr(subprocess, 'Popen'): def _fake_Popen(*args, **kwargs): - raise NotImplementedError("subprocess.Popen is not supported.") + raise NotImplementedError('subprocess.Popen is not supported.') subprocess.Popen = _fake_Popen @@ -157,11 +158,11 @@ from nltk.stem import * from nltk import lazyimport -app = lazyimport.LazyModule("nltk.app", locals(), globals()) -chat = lazyimport.LazyModule("nltk.chat", locals(), globals()) -corpus = lazyimport.LazyModule("nltk.corpus", locals(), globals()) -draw = lazyimport.LazyModule("nltk.draw", locals(), globals()) -toolbox = lazyimport.LazyModule("nltk.toolbox", locals(), globals()) +app = lazyimport.LazyModule('nltk.app', locals(), globals()) +chat = lazyimport.LazyModule('nltk.chat', locals(), globals()) +corpus = lazyimport.LazyModule('nltk.corpus', locals(), globals()) +draw = lazyimport.LazyModule('nltk.draw', locals(), globals()) +toolbox = lazyimport.LazyModule('nltk.toolbox', locals(), globals()) # Optional loading @@ -175,7 +176,7 @@ else: from nltk.downloader import download, download_shell try: - import tkinter + from six.moves import tkinter except ImportError: pass else: diff --git a/nlp_resource_data/nltk/__pycache__/__init__.cpython-37.pyc b/nlp_resource_data/nltk/__pycache__/__init__.cpython-37.pyc index 625f133b0ceee04a0de3e57171bc955b466cf596..a6e7071e00b2c8e13bdb3338f0bb4fb90966c0b1 100644 GIT binary patch delta 1498 zcmYjRJy0805Pt7Y=wGKFK!{%p@DCz3m>(PSW58grjR|1DMl7}u!9HYJ_ax0p@)%|k z6?jrQX0AzCrYgHhfD|(AV^WtD3?IuGRV~YmNf@B4yTyob;GICLKkyaDLSeWc$k-Y zu;EzxJ_0`GJM~w|X`o6#*&--EA?qYbwn%Q)L0B;B)eDrEFFHZlCb@p0hJ^Y9B~S@d za^y!B3rFo_lp(wmP7)C)B?%M1%X;)*1-qHjLdMKeJrF@JG@=ih(2oIBFo+=xV+0$q z37avBEonbAV+^9$3N6?MG1daDECy}Zjvd&EU3dxO*o{3#ltwCeJsp8|(eVwfgLgtF zC5L{i|1RhfT|`B_m$KduRG&TX*!W#Ri)^9mRwXbZxZu1SB2W%d+`KHZ7 zHV@l8V)LlYw`{&`^Vm68v^%yiZu5l2x3x(=1yhu;snfeS#-@)6n>i*`8D=fzPMvbk zQpW3)IZK(SQ|2vYvQD{g&AB3L3$Rc(Xi5M1^PrWaIix93$|5XU^SP@%;1A&;e*}-j zL4|~OT3h0e;W0H=?&7c{{h!bOSWlkE&4|opShnJ(#i4#J4pr7CDq;^;#2R*qU&-1k ztg_Y9HJoKn*vc`Dk}5S5P!@~1va@orm?HL`C~6(n`37w8P1xj5;i*;NoY0@aGrk2| zJPAp@4ci<5_zvt?{(0el4$t`uc)?%7OFplH%6DOxryyko+!q0Ru*dgdpCce=!1w_i z@HC|PD|p2-kP&eUxF{CtHOd<#@!;%lS*5)RsiqY4Os=FJlym!~Og>i&ruK??tz2Si zh8OaB$(U(4kcP(3erfPiC&(+yYhZkIPF@37A*B~Hxiq*+sdU;jyrq1WLuam^89SLvl-yi>lTHNzL25V#l|p)TVnI2cmQD7OhA6N1La zV4pK$nvKS%;3qdYa#|^?HyMA0mT0rl9d2%qn%))x;3;O_4)FXNR@7sr*lH|=`y1O# zv)zOa6FLPL#V{K%i>;f;m$uwfJdb6KO@~E9u_nMlLq;s}TSth7s8f;zMyOkIP>FWA KVl*z^KF9ynS#R6` delta 1400 zcmY*ZOH3PA6n$^}0sQ{}<`YN*g#b-3Uyf5u)0nu^~!7()gOIchowel#igQ?2B%&?#JHj*hYhZc!uL=seNBh(N8V zE!CA-$x~+F-C(FKvlW(P8!R{NWWtyA7(1M)Kf1uX$#Q+9`bqtaGYD{Y>_fK*G@f~P zG}Iul6-Y7~)RIilS`trmh<2O;!87`^C2KL#*}Aq-;#qgam(*qHG_ z1Y;1zCaA||Xb|<#C>kJ!W|VgpGZAQ_176XaWec=$cI2~;+X}4|x{=~t$a-FIW8f_I zSrTia4cbJzXv3=~+6#WThW!}-guz8|QM9Gru{r}*Cx?SLM4e0G5_LMro6tMg)KZ7_ zcV)2)x@0$W(?(R|veiiFJvR5++-LK9Hea#%s?Gg2$88?4dC=w|n-ew<+dNXVi)%KI zT0Ej(mt!!-nHVdL<25mH!o=hWD?8wZrHoc7Qh&K&Zlnt$^S((FwNv1EnSZI$)TrPn!Q9#gt|PCJ}~ z-2BkBuMG{H{^9a*cg3-If2m^rN{urXrcc1Xv+*Z+DdJDlM^Ex(&Bpu9ZSRynPrXA;eI&RMHNWw~Sb*wY%?J`1 zAqC2Q1`(<%-qe0ND^g4OXCn8Ugg5R6bHexX^jF{KeE6-ysw|rmh0Om{H9tyqFU3!# zFu#{dncoIFdByxW@F@p#BREd@WiY{g=8wU6C`z-2UqOR{dear^amQ5IWF|s?)mEH2 zy^uAU&5rPVUkm9FR6;-IKg#L(Gz!CLRnr#;XkpodT!ExE^XqV*`Fj|rhg376U|7M3 h0+p}o>~Bv}J1O({To+SHI{b~mY0Aj=6ty?n`X4u_Ug!V- diff --git a/nlp_resource_data/nltk/__pycache__/book.cpython-37.pyc b/nlp_resource_data/nltk/__pycache__/book.cpython-37.pyc index 7eb915b36d84187352723f5a595d0152bd385f17..1fa7da6ece13c18fbe36be670230b4b612354282 100644 GIT binary patch delta 1032 zcmZ9L+fLL_6o%V5bPk;k!*G}r=cAy2h$t$K3Z4;AQPeFu0)N4Qb_RDl0%|fbcxxgV zZ%n*^3!i`|@XnZ+cs+06E0}0p-H;)%P4-&*|JM5VYSVs6d{1a&niiDkD)_cDAGftc z@v}>kAVD@HQ)aU40>_A&-`Y91w@V82%0KWMeZ>|rp7dp zdyTMJ#j0dUk3iWXKZ?GC=s$=75mnv2Q;lM9SKg9Yl*&{+x>B>{qLzIW5~P-hrq@=8 zhnm8>GO7@H?_zaCtM$4H_3l*fN{9+pPpn3-uP{ZhTESw(>h#!3P4VDBAKBKEZA{=e z;kX{JwE9JBLa+p}q@JiSr!z&J4T2_#HtNX=b)KdtK@WOy6vxnqehd&%Srd+95GOE%lN5(>iq_MDG-Cv3aF)6TagKB+j?#J_ z>1Br&%eX=(UBxwAC&yOYz)hqvQN%4wVggg*p2Re6lY9qvF|%CF+Heo| zF^dOyh({C|9@7DHctWO6k-;;}(-=|Kjs+|NRH0)D%Xp3~;uXE3F1((RdfxnUKH46+9A4wuoSj)JzuOgQock+Rp)bGG%PG#5;Do+n|dr@I`?&@6U=>zNFvKS^S0&ahYe7xR3r zV3j@uo2Gfdkpn`u^$nXl{~wEp{Tk0N%r9h(ti8$A!?J>%HF${j+?lBCUC(n^yvh-@ t(w|_r5_8gIsaGBBa|YR;@o;1@t$)COavMt^r-#k05X$?usl**+IP;U9a7hsN@(a9K{HQ^q5N|DpC(bgb{f! zu{xqrz0Rec{m6lUTF&Z;#q@fYc{!`&tbtg)-rzDHb#16p+c?K@!U;X@VwsmFIZF`J z^n}Zt(nhLGahfFBq$geKoUxf7L?e>cwPS)7wC)M45p8Hk2QqmtOQG6{E_CA*dT<(N z(2KLk5>i+ba_B=p&S8M`JO;@Qang)oT);)D>cb_9J9?Sy2uAZNOJfY}^!3Pu||!vZyZ zju%+OOBy4_+OULWtWXA^;}!B)C4UX;*dQKe?J!`{SOpZZiPzM_LW$O8%345$27g1^ zI)N<;T2zu?9oWVWeWDEB&Ivv5{y0DC;csc)BZllDd8{cxYs)cz$8)PiQ_rn0?W`6z zm#mFk*4~o`#bNuKobqXuabzFKKSg%<#lI-Ys#BM>qrs_^+k*G8l1{3xq!-GiA9f*_ z(ncsS)6>b6f>GYct`x1!O39)tBG_MpX_Wuz1{x?g_h*?C%(8mIlSIUU=0X{1k@gE`A^Va|X diff --git a/nlp_resource_data/nltk/__pycache__/cli.cpython-37.pyc b/nlp_resource_data/nltk/__pycache__/cli.cpython-37.pyc deleted file mode 100644 index 08485376e00e78dbb5000b2341593e977b23c925..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1650 zcmZWp&2QsG6!-Wew$mm}3+?W*%QAfIA%`RjNWcMBz_LPn2@8$X$`UfJXOcJ>kG(UK zrL|HCshp4^{)Lts|B_Egoc3SX6K|Y4YQy z#G9DJU!h=&ID@5TkXKtY?fFLy}m3M{h@X_Fn&A-iO6WmocB zAioXIJ~>!9mF5n)OYV_(HaqT<2jpGya3jA*FnOPRpk?wQ`Dh(QN2;Rj68q5TK6(u( zjV>zPJQ6C27_d$}jS>!wtzMFJ2u$nOOps7zbIOx~g5m&nM~qPhVwek>i!7#67G`f+ zGM%11-H8U(xLKSi~m_@B>7wG$}7W9!)8(wRoHftU&Q%!RHD~3W#`2 z@%0Ij2HfwOrAz2IS};}GIupfCuLrr3RJ@`+mT)>Mo`1vfIzQlx=alBvvGuM>*qk15 zf_e7;a1g^kgMm5V4UID4HBm1!f zchE~`UC&`v@4tXi#tad(w1~Mf|NP_JsbuR~&d(c5lh|`p9H0K8%%yW_UBTRZXZ-x& z+*S6{zI0S$>0Y2KM4V-VG*&RDFF!jn_$Q~2l?%4Y1I~bHT-o4v_uO0iExlVl{Kz;m zi2tMUlW}~5BR+Ivv-k>+risLHmZrKOb<;?!n;J_cXq4iGghXQ9a{XJQgG)y$NHUC& zSLd{BfGS%k(S{V1s0n>zQBF!*Cwc@Evki5cK;OvlBof1}2LsBIcpmIbs0!DkR7Dkf zW=bC}Ky-J>(#b`_q4{(LgwDEX0O|UrS2uFds#`78E~UR|39(llwy9)bjT0W&W4LH( zS9@JxHFIM)J?+z~Qxa+b_dz8)K#ctk>R2AyLk~?G9hd@xuDe(I;{}gZmN6;xG?u>Z zsI_c?T!9jIw8hr>tZ)Q=mD+_$SOuxHNfN7|^Yn+|=-KfoJbX49Js*C5SUMAtE%KoK ziVCTJLhD&u&%1kF!m@F##X8*WKaHpmd0(a(SupxUcdUF2WE;U_dZwmrtBLj;58249 Qo(8~Mb0%t-02(a+0eF_pF#rGn diff --git a/nlp_resource_data/nltk/__pycache__/collections.cpython-37.pyc b/nlp_resource_data/nltk/__pycache__/collections.cpython-37.pyc index 54cc5327c04643519185c98e8b823103f78d8ce0..d7f7d1979bbb9f944e03ec9a94dfa590496f2f30 100644 GIT binary patch delta 5493 zcmb7IYit}>6`s4hGdugT*Rf+ecH>8EH(n>PT|=DUIF4P%NnFw+;hJ3xOex)&&chYWbM&dEG)raK6ss8)+Fg8oRt0)7)-*y9VL>a zOFA*74ADr&r}+rZeWqXY!>8X2WMoawD4HVH17BlwK}5(_*!9Jug75?7xOJJZ_qcve3O{B!Mss#f_anCwg3X{u&`Ng zhJ|LM9p(?gyhU$?d8-iE3iB;`8_e5;AsxDJUP`q86;Vp~+`1e~XC1nj&!(KTm38Zr z3$|tE9fPJbIg2^GfgFqyH{=*A4s|Z)41SU%Iupcg&s}zwtt=g(`D{96=>|<%nOxFI zFPH}ZFxFh)q=Arn{sZ4*ku7l6-Lav(&4#$*AC))5yq!<^^#e^XawC-LhM9JZ45e%n zt|-jNSQm|ik2T}ef8MyvP>Dy{aMTW@hu`sEAWi&qV4wpT+14_T;1-M>u(qAQ5%|Uc zu7S(G!QonrcSGY#M%Gx#v57%`TpsPKM|L7Cs5;zLDFt&GrMvi#iy5 zOf6%&V1JnJRjiqk=eFT|#`9z@XJqw+zoNqpAVZZ2tQR!x<{v9`r_dy)Oe4vJ5w&e* z+d*KAUkaWY6as;imAmZm7f~j9^%?yjGQsZ#&kP_Iu5vMH<_){Hepf~-?difZYLM(L z2MZkO23Y~PD+>ixr`!#Xn`zs@?3p;tZ-pKqoA}3}UDIeKt3x6V2gvnw$|+_@37?kI zew9)obC{nEzp%F)7uTP&awS0#oLwkgHkDzzxIth-PtNkz$c2LfsxZVzHx`q5)A6F* z2aFpPxxvL`+O!s)u2RNEfzjZ%BUh$`A|;>GlaM#)(C0M?Uuq@IUSNNkzZ^Z&*H99y z9Rs!l_H#TOJ4Ug@M2#@af7XMf3kl|p$OVScWjm1wdv@ap4Q6B(2b(fvcmiZf#Vp%Sk~tex3)rK8IRk9yBC|5zEyV~K#*IY^pK z;%>l(-KsMMGT-N^=HGUqhI>GY$i2nSwESi}x(H5SI=ghzK%&)bgRP(QpSEuy2TzvjUgkO&j<)u0O#S;SZp(lSuCM=%|PgzM2RCv z4kLLCNfr2VI;$Hizu>=byEb+d_KV1Gu(ljcH%g&zXPsm^Yg76zFxx&Ll5e$}pV-#I zf7-s%i@vzB5}vu(aCeZm2=NZ2c@S@^Y=Q& zRVSd!>*P(DH*_{vCObMmSyLF+w;Nzaj4zjv+YQg4z7ZJ0@{1>aC))4c9V2>Nwecsb2tY;o2mUTFl;y=gsXW z`xvrZ08$$@0Y?|#(x08cHo$U71Ue|=`ZLKDk&~swn8f`m%aFZ?ztcZ>_OqC?G%}w@ zg2pp(M^xaiXenVPv1#Cw2)?1=Qs z3!Pev6hG~1^YGXazGuht`!pCqr^Xs%c_a=JG{5Mc>vyd4)$(ZZXyG3_C{a)Wo8_l= z&8?xlt1P5#BjvbJ?~}6h(z4^}$IHaik7?jRDiD4Ir&#gK29gMR7e_?gQdD$OEoSEz zgiB(|Ffp}gc@6FyE!bV0)@riiuyKBPaPEm(0eOHhAj}C2g9-yR81Sh9Rbg|OyFVHF zV~{+_|1>%^AY4T60KIPbaQ&uTebF+!qh8@h_8d($AWMX%lBs2b+UZrpRlEuNGVFMr zFGQ23NlmzROE;=o{;D90=(-BrAB*Xw>L{UCYlZ*p+2$j&ylw1DgP2s1^Tp_*ee4-; z3GwvA+i)}pfNrw3cO<7koOj`cxqoum$+roP+=iBAozr#sP;zT^?lzYJO2+$-{o`rkFAMVJ&J0D znX5Pw7K&mJW?sQHVWzPE5Kh;S>;_T{M=2Najih_!=KA54{dr^Y8Te~bYqK>JVb8{F&dGc%%<2Ll}CWru88oJJ5ORo(V2|KqI_{Uj}SL;1PtgXd>RPYBywg zt2(T%#{HpFNt~zceGYfihBFtYrG&~?6#VjoL61I+qSI%w2 zDelYsqeI!rS>y~bAth`Tfk606FMSdCucK6z5@e6^>xZvB5TZ?d=E!IlM*TiuHuCQr z>AZKh5C)3QT`@(Bgi|kq?!O?8qFZzTjzKlZUc_!u=$^ePk0OI`-IZA~y9DoOw8r}y zC~B%4ZEUn^d%(wD;_t_{6ka~IoeW)p zwIZ~xFYRcd^I7Xs7QZ%Z&{Yo7G{1BF>d1AF@v2!wN2n-$dtIp(Lwt@DZk%W&vl!eb z60+A9P|B;TZ{Y~LL8+Q1VeFEN-qX4Qt*`oG*tNpnPtN-$Uq$;- zrK`ZR-(XHD3_-Xq(%B_=#(D!t&?V%p=tikr;cI6;CHp;0i?{a;WJA4f;ONZMp%O5! zm8B~xko79R`Q+n$$Y0{zT7$C-)^6|tt+(p!=w;pOD1r7caX2!&vEsutWx%aPD+y6@ zLn}zv_PB%>9*Dxo~;HAufOJaTrx-Pqf`o+!QSe9vfPee=t zHzaOEv4}r8x{;J+n(zvNuc0;zq6Vx(7Q#Khon8@NAGErx>6{;g-`)(CO@co0O%Nld zMZ}KxYZ{B8=o!)S0V@%s309iS#C5;k0`sU5gL#bRx4=AZG{d}^ z=B+SK7%ebwFm3H0Q<9rM z06!IZksXt-umO1{-d>9PS{Q4RJ^s5pHo@K}cMUJuS#$W2)a4~RpAyT%L$c^UsJ6h~ zX8B{kF`j^t+r+tHTA6~G2rNU`x-HR5*}SNbLMJZjO~`Ms|Wj=ke!hQb%(ne z=U}ejd{Ax)K0Mrv$D-k4D;?pRR`%3{Z|5t56r3bVS4|a|E^Y{z z(3A7>YfUT12&xFg$S@aDMXTUNxfdAMD{?~%sf=aMKT@Ym90107IU4=)Bq<6O^F|8t z1|9mmCi10G(%c5@i}KUxx&By1uyG8;F4)h?%dr`bB}O$OF#kn25==hI8_ETN(G>$o z$e#T;LW2dKN2=DjQ>!NoNr&7O?`IMD(M)G)A^uKaJ5^sjlAK#+j`OdVPPb;+zQZ6_ zDQMLxq(otzdV)R*xC_9CF9dNK0;=E(76wMs0 z4#~Cl_M=B}FE&@O^V6gxW9O0quY0hJ7{%Ry1G{x+5;`x+U$y^w5H)N7E28%bd>tPU z#V=cWJLLx*KiY*3gG-p;uHUo@AYMx;YCc`C^_qNr^PNM4P<6M$Nq(zoyO=Ay3as7% zzAIZ~?~YCK{mp9W?#`FlgXp*z!&Aa$u9zi|1lr<8?#Ggx62cbApw}|!40Ko=LvkmQ zyO7j{N6q96bM>ckYU{IyPr!aTHtRg7#@vl^p0;y^R3_(e{x&dI4DAI&J7oX1)jssk zRqeu(De6xOah@^^&VM1_*!INW>2+t#z_^|p3SAKUyVBbJLUKJjnpw{d%Ckj_^Y=jc zPI;TL zs;^V|E^Aljl$H27xQ=m0ey{uEHC13ca09~33o(SlI6pP{0H2*YGt1|u^(i;-h@Gis zV=$jF)28DFp*9u^OASMV7J@d;e+TLuY#mB4zyd77;;fCevn}k(_Q`ptAX4dq+}e9# zU=MIq;@1e%Am`=^=O4ZGNX{U+7s)J=IV9(iJcNWJxrk&QiGie6B$e!MQN-=A^C*0sJ|J3~ zywtao-7eorcFI!U5jI}>XWyi+39+G~6i4NeU431(60GUOau*pO5qWLbqXQIzWn9~k zxdnDR?5U)VsBA!@&R z6J2ebnyVgjiu2?WO~pO(*`ckS)Szpak!xuBhpugu1Ya;gK$wSWESgnq6?3eoRJ+qHYLA#`1T<@6Ee9R zGqgoq@|LK4Yne0%tKXNO9!{@S%`6-L1p==k6d0wTk_>xsT*V`@U>Hj@wZG^I9P1gld3Qn^KVQlmBAze1|9MlkJ@ zPtV*sv0;2MGWB9l!dfd7r4xXGwl&}rPnJHKnP9`1+GSW>U#6gkmvi=`IsErfuL1)! zE-&8u#DQmEkL0_*PH+6$*< zBaO(=r+|3`5wG0B5mnQ)W-TC|DlMO#^G!U5+EAq%gtxsZ3Y9Q~;9AJ!7U7-h4Inww z$XnBmQda4|=l;!(dYG2)<8L4v>U9GrrzY>J0P}oRx~2kIugTl=Q~k(a;p}X{xeeB& z{DIzA_Zsx>#}`n7fT6>Y*^QMSo@oW&kO7X>8w01H-xmh@< zh}wU_wk-!=cWYj_-7wvIw2Im6Hl=OLf)@nbgpN>QMFf$Afavj@RahR%rL2*YazwaCP~uABjG!dcfz^uoHB5RNF{Yh$)6O@61sy2S(eK1EIqGuu7!i!-6>!7SxfG3Hmz`@Lvf@!$$>EJOc6u0)kDp|ZKGdLdmk!QU2iHI9HsFZwvA@)Y_TywZ&&sJ#NUKg^D1j%@X1y$I)dVPy8k z8=KQ*V^NXkkFm>{ddwkD2`9u=!glnJ6Ol>%YyhL7Aq3d9YJ~PPlFd)tgp^4HJd+?D zFWrz^E&WtuNz-8}j;FK7e$)okvF$p>aU>T9E0G_;^$@x+CV=l?s z#gSS+(zhYtwuq1O+;bg&kX^^A%0A3pAzAkJK(QPI#?2dn`FSuu#p3yq5;tAzGR^TE zyFFhbn)_gW4;lC4=OgEl6&RjCoX;t5q0K%htn|3E19yHlzZAQ&32dy{>&^y;q?3)s zJ}O>W$bcy?X*VsZ`n|4(DMs$aeA<%fg`9TFi}2&Wo6@Cex81)6Yh*i)8-)eAR#T5S zJ$k%m&!Plx$t=5=-oXAEOS09WsT+CScCD#j>!X+I_VonGASi~HT+wu;DdUhUvruqAI9G&`+Mp`7k~u@??+pFe_>hpJHSi9cr9usG}JQf zYI0qrzQ3{7oAW3re&vo&>?PAdWN=@3sO#hL(rfM1Ohu)?y02EJ$ndrDVv;;3o?&fTDs9$KN)V4(Zs0V!0z?}O< z?We8*3*+YPnq#hZCt75G?$j&%tK&92uJ1lYk*C>nGjr==_H>^)vy}u&DrVGZ4`|)8iowZ zt^rb*_>+Zw{I2Y^ZL7JU%66NocFpf7zY$`+ULB?PqHdxAUvLfxG#V7YlKgKt$Fg+W zzb7GEdxF3ArL3ttCu=QgEy|0Ut~sY@_yN$6;+)=gG@Wl9Y5|c(5%4a=P^QRbU~^Kf zY1izQDpNx?X&j}nOv-f<6qt{OQj!ElTJ%2r>&g$}WQe`r5*k59$T7`wmMQ+@D()4_o zC!OnDv%N}O+(}$WhQ()hH9h!2;w^bT1C;X|4BwRaFg)%(y>!F1jiqgE|AyPv_|8&Q zd`r2&&Ij)(FR~)4@DbEYtzS1B<6~{Uu_->1{`7@$+$q3Nk$Dm46F6sl66ZZLJ_XAf z{ChGCaj8~`@yJq?GtZ5oUe^Xu!h(1+Q(zZ@H!?E2bOd)}VYCm4}3+h!vxoy>qZ!}3!)3QI42)}g`^zbM|_!s{1iyr#>I zC}Wj)lt(3!wVW{K@3zAPEeFdaS3hLw?FZS(DqF#F<cM*2w6L1-hc=9((4x7ho)<{ zH$E%gDQ%Yfu)X_q2s`044A1|JxHf%1%04;>9UrPQB8KYx{6oCXXbtxzBhvbu_`-~} zFbY~>rfF%OXB++vSEo+GkD)m0`I>JwJMvRvadxrP3#ji8$Jfn)x32G}5@H-aC{{gJI>qoeHAIQ1PC9oA_->E|_T z*LaF^m-85cC{fR&mNP50$Se)MvwVyGK)uJ!9_lxV_=^bUp~G#Xtf4= z-xt3VCwG4 z>R`+m+HZn0ryghLk($U=Mz`@$u+(L8-E^9cZt#DBc61jK2Peex$*JJS6@|_A>BgWb zMhdx-1J3{UXl)Q5S^baLT4=5wMg0NJuc7RJ68}8x{w(2eX8GXd_c$z*fGJPmpon|#{U|#ruAQBaV5oi;V9SG+ z2tE7tvzyTinDHAR?R>F>aqP?kY3A<-g_lHX)taNPn#T}epfXYAWg%AAAMhbuT`l01 zREOa8YMAh`f;WRdBFbwU4=6fo$4-!<+-#|Fcnfd>fH#qe22^Zu-LT!J_|DqWEgyuT zQm@+@G`>1$Vc-XaC+OZh?s;x=%h0@CZg|xwn{+$mTCH+~KZCX@_*US)>slUN5p~3| z{GDo>yA7>jTBg5GcSucA>N4C#XX4Uj7>Vj6u7_Yyi=+wtlABGh>a`7x+nU1_ zRKSFUNw=|WG=1ur5erzW9Fxa=@zZnX=&NIZ)=*iuU47RwHcFu9t-ugh6gkVXv8kju I``D@f0%#wRdjJ3c diff --git a/nlp_resource_data/nltk/__pycache__/compat.cpython-37.pyc b/nlp_resource_data/nltk/__pycache__/compat.cpython-37.pyc index a8163a3faec7ca190ade96adfa30935ceebd4d6d..81821ed1f3adccfa36dcf174ba9d4f63ad862f46 100644 GIT binary patch literal 10168 zcmcIq*>l`RddD>EF< z>#x5Xcz1lfs^K>%eeM3+l&1X`J&b;1xVeE#oij8|XhQF5J>AiJhGXdTY;;Y>}tq$th+1Wv9&j6{mvQ>Q;MW&RDPJ)O5|amd1s4ToZ+5!>Rjq`Abn0r6-zm zfJZc(hTm8^=o~~j;Y^^d+&$ErbS5!!a_O*h1TB^B(cUrVn68EYEUM|)wFcV9y2pDM zscR3l(6%)H#Bx>syROk6_a01pKhZ+tW6eLvv)3^DN#|tmlyeFzoD$lm?mUNjT-3kR zo#)@z!~xNGqKSroa?^B<_(wK1#=+>m;JkoYjjrw3TpxFi1Nxwt0QAJB;hg5yGtP@# zo6bw94*~O+otJeDkdxvtAP@VmVBTZmh&U>aiQ@~F^Qt%@PCn86S8ow6XT>S;9H`S2 z&x;pOpA)t?jruj-@tN-V-g#$=cl>U;2HXGnnrTK zn^#eH5c_V>>qjz9>U|l6vD+Dh?Kp_S8Nk#Bec=IOMSA_dFOve@D0I|R4B!PfUhDfR zsl@(j%=h1zn7pmi68KR~*G*iijAE!QX-hh=&}sIv>ngw7aowciy6vu~laQiW1wY2V(pitu@{kfBBkL%!MmY|8{(2PpxSyEyi2$BlyA?>k9Y=EQ={qQ5EbSo0 zUw6u*1)91fab%3sVw@gnt+TkYwQ1S=Otug1VE4s%JqSAyhzC@J-;KR*h*p3qEJg#V z+IYx+g#8U%isth*M;Dr7;4+0S44k$lOktreh=M4hhO`39a$-K1z1iliV;t4_aaeeW zM_WlO=e>J`ltl-U7$V-cm!vX?+fk>Z{P-#cW;UD`m7a9q2e36rZ3U-Xq#!4h$(4xFlE(s zdyyD)>AvQ=j|N^h>nXXeh}y0zPhb*xl8RGQ5XbH+mE#M-0;my1sp;bLMN`Sg;G;K3pa7 zu0u=F2n`5UjrGaBD5kUI2oiF1)>NyHQG5aYx}|@v(WyeSm4`Z6*lk^>9%IYg);5fx zu?2mHlQqy**eVVUVQm}yRN5*JjU2in3Utq#E6(Ifo%A3rdWaS!`CI@)eOlkpztFZo zjcxsFG$pr9Ok{p%i0NV6BU^>cgoopMjK z(z?dBOkKy#Xjn#dc?KgT+kmPhJ)e|EqvR|62v&RSb-|HEupZb?o~IE+b=f2Um}kg7 zjGKfFhuvX6)y!5h^pyihK^B@_8y= zpu(o&RTN3VUv2wcd4U=jEt-B8m*iBFr_u2Nu2d}+EOVk#E^;d*vea7^F^qB>0Qrwt z0!XD}L1G5`3|3FoAk5Qi||DFJ?I9+3^8=KT|n@FsLb z?Au-_Z0W}X8QPvrJ7PD(79&5SW!tlh(5{OfbTHJag5}AT;=I4;Jq{viqcbCjJwJ%; zwinv-zCBP-$LR9xb`+8WX7{`hzL3PEwH<0LwA;-$0M9hVv)ZfuVY-(N+q$V2Axi@niTuer`iOHyX1pKlBDM+v|y@5i%|9J zC%=1CJKCO92K}hcv7Nw!(4o)Za4#7s&24i3q2nvpe!OA-__W-(-n5cpCz3rcmSk8` zxqcUSBNf5&nhP!2A{c1tx|@I*`G8MHErrWfE}=#j6O#6|%a4}NSP_8%%&Lzif4XbQ zQ%|Kv26%gqaiM+jqF-c-$|pn-^<=Fv)V7E&8kq$$-y?+&kf@z2w1Za4UUPc4|Q5=t11c@gH__O z?F_nEoJp)1L?7#C-n)12&b_OA9ORq9|7v%AFCGmMKlGYe#hE+d?FI|w_XLm`9Tb=|+oQ81oLi3}{~Ve(TdxR?_E zh?~`)5-rF}xut!&3TFuJ66qm0K<3&BJV7IE>(M3LnRHjvHY{-WF>tp9&Ynq=3PRto zQeGWeat^Qsg4MN0wY55)is{osJZIxROUHeJmXC2g)EKTrc(C+LW^jKuf}`|Jt`o~p z9pvsn+aat%jK1hDMM0Pp-9S-W33WFJmy^<+`^>JA9Dd?swg`ZeJ-{^!sTQ?p2Ilvq z+!=H^EK3SJ)xNXmdHw-&DI0}mHkdC@=yh|U z=5tSVgGhPH_o?^+6~v|T6Dr6rrnHF{2W?7lL?SWWNF}9=F7metYM!7hqK*6=dKOfC zN{z*oLh?uakkUoY(L;f0BR#sWu-rmoRbr6@GiNdpb`d7$O7>U!7{|7)T(gX7fx^@3 zA(Z7RXhnbU2=$`Duua1$w-*QDvM-f$u;;Bp=enWqW^I%}SoFJnrxbe&NEIMc#J6xt zlyf2jC&e`Hh0AU~QZC@o39xD$E&E}xPLPE@R)}*UBt?*$HFCb$U2kmYacbK1ue99` zyO|h~VnWJA03%?zf-0#n*AcOIQG)I58|eNQT$$%C={aAVq@QGvD~u_hm{_G6O1 z#SKUvyZ{6!%SBm*tw`@nvdYp2*@zL0Zt6!VXwhId3}GDAe&0Z1)m%2@EIq%;eefs@ zZU-zu3WmZs?XLEy`Veif-PX*yHGTJ!%ij<+)~(ih&~FwJ6D`R>7o~gao!NKX2X|4u zcVEr`2tpAGA8rQXQZR7JT?i3+i3#fOBScd3gmC+7mtBhO{uRR&IT0EJrK%s(C-h0Z zte=7sm#b*aDMZIZRH94K@$3X|bVOf(Iz$5wyW8mI-{?#!eo*I7453z!Q+SGO?Qsn0 zfMCdJiZ1Te-I2hyH3`UjHAOJw3?v)2Tvoy>=TaLG2JQhjy%)+aKOSI^-gTC58F5*jMsx zx*r>)i7U=1{2p3ZZj(w#o@o~eo2HJl#S@)+ zixRR!-8DoJrAb-jW@#tMiiLT?L#R2qNUi{}61xHv!X@K-cpiq7ya%)wQw_6Ybdpl` z0`dlJhtSpZb$g%4?AV+PIQ7qM0K+|^_k(UK7c6nJh#krB%Ri&yFA3=69-uwA37Gvq zC&0g;A~XBkz#in!(fWNtjd~FJBFrF4!&9Q3|9?8uMwreG+@w_A;xLs{KIV-4U9=~S zc_h&kG{_>DiQck+;+N2o6F%Kyu2^`pKioy}BTTtx&{_T!rrS$dYGLv|n}UA?un+f8 zkaQGLka}_o0%C6{zz!qg@lCUTgzrtd2PeWwq(i;UD1oO+$eokuV6%K~yvv=f{vlvL zru9>2u&zsLY2(U&+=C}|2F0HIbBX1Ja?lmW`Wy&41z>=f_qO!-Rl2tgo@LF!mIlJ_ z!R}HrrxQT)(7C{46$)0EUSN~rwvp^q$Zy(F$L?g)sMcsgo~t9H-pH0+|K8=+>n-~O zA_P<_^@H-S;GP7U?uK!23T|z)(X1mX7BNqu9>DdARVys0X_YFL`9O-{+*Z zitU?GuLsm_1&;%T?z0PR(M0yj4|y(HJRBhL$z!qE3nO2Id;$FTBFxd3(L7Y*njM8G zIGH_9?gGX_R9mU|!4$SYjDf4*K=^o>NUO+H3GXaHHhnj`AQ{agf=&m(A$GQa-D5?+ zjkj9J8FR9^H;73krz-c|RJ+^o5j`ULnXLCrPJFu;EG!a{V)jPh+yreJPsB-;{6Nu{ z2&w;cKYI`Qf8?$&UwQqFivK5R`>iGz$exE#X0a~?vd*+9;AHtwT zeE_Ad9r<9H<+)~q5%?HJDA23e1=zO1JSWciLkTX(fx(v(7cXDFzymm{nwf2n7@g`p zW+6@uyFqDFPC1zy+R#*2;QvCaBfW&!VMrHw#Ymf?EVAbdZC^?XKo+URUt(5rh(dn_ z@6|2(fFv>T-HB6m^AmMyh`Vz3Lzddai%|pwBc@8mc6B43%Q+X)P454W<&ax=3`wv# zEYK&-Nd&M+s5eHUgfu#%g7BEM3(|STrzUo2qGUY#P6aQykjvXKxTAI18qf}ZG}0yI zx@GX5o38^=d|ZQ`xfY*iDixW=q z$d{>i45?A)71ycw0~Ahe zPi8_<`zjSbp@JebN$Gj{3lvVBKPH0XmWFHWqNWZH8B?|gtelyuJ4di{=1KWl`rXEL zN}Q??MFakB8R=-0M#HKb{1pbQOA#+Ri$>ilo0P1sBgbJG4Wq1A%|@+=n&9g%Qe86| j_^^)tie}v`*UOc1&48V6=vALzd!%sF8ZyO2W0o&3?pU({4$42vuN{} zR^BHNATWi{c$VUfg$zM(VF_C}2Uf^ML%8RV@By4*Xo;rq&LOnLitx!h0{0VJiRYbJATWpOdq)}frt1E$&5wr-k8luq(O88S6o^txj?OS*^g z$yjEwns&D;k8dw7yj_zxlCx~DL&-TKa6qk)8zF%)PK#v16I%H@g5#`!0lXfcVu=No zM1JZhqWgE>P)-S-S&3h;;3b}62ToZDB`cX=9r(aDXb4lLhGq5h)+Rbtvwde_>Fd$( z)lM|%z34?Rhvc5_Yfc)pJ(-K-So1WQ2V6OHzqTetGSfVh`G1MZyovT>A)@SLClYZH zZy3VY2mn1?#~ZkYF5X24)lG{1(`PhS-`d6M(kNb{n2VAgb)$}0SgGk@+1B&5Wf;2ehh+ujPV$=toF@HEA5YXGeSsS zvIOipZbE8%oSZaHTIuvOaoRMAqoj3CT|2)gX__{6lk~Kczmqm?;-vX=d)nANt^3_O zE3FVBX|+dp?!0&3efQmW-+lMpH$VEY_R8nA>dvaFu!6tg;FIw^?vvGviXmN5gkmT{ z6y#r#XzPmyUL1$Xv{7)7OToA zvGZk?7)|#pxh7pPmkx&IS5<|6T+?JUJ)q>Y^U7$Q=^t2T1_qXk3K71g8Bw&@iWVyd z!%wKP&THe*6bDv$4k-4XVQEP4%3vQ|tMQwZwindOu z#x`@Ks1pkxQOs?Jl$&ZBh($e-4+hcUSAcCC-!2*^RM9Z#}W=T;jxv7>ad^0OV(;ek3IcS%1%`#>+#d5P) z&ZSX?D{iWNK^j1=_csVo-28tMV6#}sj8`#iX1IE)6EPrSV%;OE(Jt1D4Vb>&Vxwrq zbB`Dlo5dE?>@^pf)i>26ir6Z)J)($h=DwT0o2s$Dh@$Oc2a0x>`^|=%x^Vy{_lP!> zw3+j7`iu^|?-aZ6zRNs-_fEXGi`{tNZSKeW!L(u=5*e{a>_tb1MON$+`+;?d1ENE8 zii6^iI4ruv5ph%;6UW89@2@nDh!f(Z=oY6$@w9l0I3v!A9&sOj=TLc68ZCXiRkjL()v8uXa(F>0H`QCo=JLPMA5{oo@~ITIS_p z;8R7l(j3Ypl4g9QA7WTTK$9x@HT==jkN<=Cw-x~ibzBje@I9gwlyP-Jp~W?!Vw8Hf ztEyq=WJlC-9$Of5t4OjDnHY+v(wPhd*IGBbdM2GSKdi`laC&3greRBNa-7hnV#S_5 zGiP2MlKZ$L4b{3bwsM+i$r=0bn-3Q9dW16f( zV^eX8*yn``C*Bf9hRAQIM=Hh z)q|7ot2z@{w-a2(f|wYyebc6{Zn=xz{ke=i*xobdLz!HOl2}k4bn5GNt(A@R=935T zkR1T7o=arSS>tu?mH)vt)S?|=%y)6q`tCQ>3$a>P@6G2k zav$*tSY|>d``wV46V^!D?w5zDMHkiiEOQ886srREX{YisEB8?CUIP1_w#Ccmkzj7H zWWe%>V=ivq;8sZ!^yw>Rd?;b}yMeS77il?5T6q*rzw3NzaZEir`Mt%jYb~ThvM0;T zq~sRrW0qERnvP|YZ!WD;*D$dvjXB6FNUw1#J!Y<*+`9a|>ROL6)4BnwOwTqIR`6Em z&83T+@QSt$nq~PQ0kRveoFNNKMFu7x0Km?#SQLhYDs)X#BNdUbe8Aavsd4hz6)&qN zrqwBXK;s6?t1wB+4IVpnSjxO~{j4L{FptC}uCqoRio;Q)?Rfm4^MzH9M~Ngh=fHW^ z&N|uVzN#1z`Me>R&i9*Ftzq`0R7|-o%|v!qH%xopS-0BQP9kF#>;&it4O%v9^De!T ziY_|OuTJixhAf1qF={M(0U!x?{}2^$>Fq;g@`UrinjKLeNZ18c1F_-l3d(?54<82) zr*)4tIxjC-=)APXzf*%{+LF1=Etd1kE-i!cIC#h7 zgHF60x1A24GF81_NX`TSw@o~x-5b6h|A_fy4maew56dKU_B-A zKK}W63SW#>U87pe%qn9qhti;;brAyI7PT+M!VmqVvR&LtU zpSGeP&W~8pRKmiB8>I#)12xlH(R?aOj-S|3X-ej^QF6o^$%(_8TBZm`o!?&{ETWkm zwfoIzGH=<`ZrH-E9L?sX8O@q_jJAeL-I`HhrV_)MDVd|>8ln(p>&ea%(WOhXhm28x zV%RcM!}u*`%#3^~aaK^I`o!9_ZihE8mmz;e&^c}hzwN{#X=8E>~-2bd(Yz@Kw(torD7 zAU2|af>KZmTESP)C;X~X@Q=ezux@-4T0x2Hw*Mw#g?K>FTaY6nJ+T4{jfLe&9f_?f zdfKAb_MwDKWLY}eW!j?&PtsA$SvE>8eY#~kML{ztv24n99-AzNJjDXjBg`nmoMb=j zCKv5BG0meido4xTvLV_cdkH2O0G;MVVzrE*7`kd1>1~;=@C5Msn4|M4v&ooO%uIW< zhwa6)Brb|#GKaH>#iB(r&Ri7Xq1lI7V9VPw-3|2*o7@F!%h@uYL7daa*5`4-0H%6E z=(sz&g=~jy9JUEVzKstYh8Rv_;>mikwm^c$MK*7=-oTc>i#S+VE2hUGInSLmgGt~8 z7@{lI`P~~R18zFU3M~_i(wU&+>7)5 zp;&SD>YeTf`xv^Aa9>JyEF^~Y6 z-tN^(Z8s6vOkgXK2L?xALDuRS?wtbz+8W-9POT6SrJ+`>QA3*ZYOKlm?JW`KU27_e zwJ5h0UYRPb*0?4V-s4zS?3g5u942rD;4V8=6@{3lU6S`1>Jb?bMe&8yv1PNB(mI2< zXv!pSi6EvYwK|A@i=0dWd(B{lTmv6M-h~h11&|DAwy%JlrVUL>@~Yi2ouzdBJRdBq z>m#*!A&sk(L!YG|x2hOt@{-^aBsc|KiWX8MbBaEb{5HwSA+qFaft0)i?r8GaINDR>LZ!uD?TSW^$Xp zr!8eMSa=q~!fD<|@ zLNUK-p@?u=s52y#YITxe*Hl}*sa(^puC-wfv`J2A*L($^wV|LDJ*zma6f`K+M{2%Y zdq2GeCa@064bD!FCMaz%_tqtASSM%10!y|bIv`>-aHxA3xErF~0-KBF26}OF9<<7X zJ;Encc>`phrqPmVDXOjp@WUYjSP0K*c#W}zcT5WP`|6wcOZuG8Zm%oegSG|D3iF_` z%MYlwM(eZzrGu(mO9tfGehxI_dTAYSOg;Aew684rWLmH7Wk<|rOCFhgIqko_9Qc)6 z+2<$*n9-_S~FXj!tc zPaP|GZIZ&v>h-isv+^aGyH4D9_NZqgG}oS?l>G=}ykq21n#y?e~JwY6=`x2<)%B%ybD<6U%TAKs^? zfDV?jpFnwRJS;j7eTRk>0ix)O)N0LnX~TvCbLiANt(M!Db@LdS{v9!)WghbDntaUp z-r`2*(8m8#S56*i{asaUcic@oH&GKz+#Y&NWrcD`NBOY`<)|kci`5K@u;z4ZPWvSp zhx4(`^~J+Lyi*(<)j2=GsSY=YeIcDo<=u+ISCi%tr4D5`NZb&`$CP7|f?807fMX{- zJgpTs5a(`7I&y=ha`_ITEs4rFi2YPbpMWer(xNA=kWGbn^}%`QAaGt z1z>kkF;#8XocC_;-@&AhQzM>|nQ4saNt|I-ji7APoE4~xfyEW%q zJ9_t*ZSM>ds-DYZ&2UsXVorwSSt2db8F{W>pRyoj z{Fs(v9gvXD8L#F@C=v?K3x#|kJ>-**JL}sT)!NC!ZC_K54m1=LQFRL;(&$-PlWAHz-smYOh7af3^~$v>yRJ}mQ@w$UJQ!Hrq}cVrK%kvOZF6c4 zOgd`E)`oL<9h-;k<}4>Mak5V5+T}{;ct_`IUSFOMkgKQ}70j3`aA{%pcVLNw4P^TR_Hr$Z@&Nh_Kcy<~f!_3rx97Uea0vZ{zdk^p%tPUpzoOe7OI zoG5AQa`hAv>2BqTe133v$jkqQosV}mv~c1Jpa|{ywSOH z>nYF}xHBlFmUDSrXyk`;cce6*0o_Sj9EKwfo{a?5u&)Vm1v1yqOn&j;V_F?;aLQQa z+KU%P*0;y@x2{9h*}2qp_`-3NdyBGcmNXVtklargkCffyA-qClx>&(UiZZmGBfmjp zv;)iQ00_1Z5Z>zej;v_BeJovsrt2kQqK1{Mu87HSH}(7~5hxOR+dqcbF_wpWPd1vSNa)f;LdJid}oNYwj_qcw#JJ4nHD zO53BX!XWE0$W<7mCSULd>0J#MDzGl2iGk2$_T;mEqz^c`l1L({fvg#&z!HN<6;K{A zhipeVmt!HfhYHVpqa$e~%Aj|wx-_K{M=2?Uo2X$dno^d5vwXyOIaD^?`UWxNur*nIcCS|XJZ4ZnP2f@I+SoCcDoS;`0o+K(eLbh*U8l~R?C3cwC6ym?+U{R=^n-+chybr1k{4HYNIdciFsdjw z(Mqzh$bU|Ro?pS;9kLmN=BUNX#Ci7qrT(3$Hk_~A-=TieiJafE?K*L*=s~c~o+mcn zTamv<#r`DeS3V2e8^d1^w#6Abzo&*5AN7T$lIVj%UO2yO+3hP>eiqeXAAHUGfhfD2 zU!8wO{p{q^5By3klIpsk8tO;>CBT?=vAG*pTdWjnC>ImhLx4Q6d=CLq1!DgLc;M2+ zN+ytG|0{a&^BtqBW@dSGH)QqaN8-s;-*Y5LEoha>kXl1Q0UdAYRHdo%6=&szkzJo* zPDz|ZW?oD2pePt~E4eDpTKN*OgCE63to-`q-(GlKE0SvYhK?`prp9uRTKXwxC*?Cl z!JEp1gi-X1IPd`B9RxZF%u*sv{MV?nY9LBAVi~PYtplvl*eH@zOn=VtOQWkzAf@B< zoOdcI)W7EZ=+Z--tYadlD-n@S;ws6lh{rkS9FI@&csB@UNn`}#ak9SuaNd@<++q+1 zJz?Y4<#xf52#F&?rkZ&Cf1JkNo|eC7TDpGuHKC8LJ5TpEpS?&O#|c~_P(Dx*6ZtIy z9Fvqgot`K8P1OF9R2Bk4!Cxp83dnELA<1b@9#n5l_9q|J;_oCT6z9t?6QJ0~4I19v z2B!l#beCVE%Ij2FNtfp488dY-XHO}pm1@gEctJbLmgvrjlyfOY)#OED6|R;V@|f}D zb3=}|zC$#;w7*N(b)qf>mB)a)elPnps}pLlMtAo2y}Y?J2S_=}5tPmrhql|zI7fKq zs^PjUk*agf_CLAn`>2`f>L01)9L?PjAA@n4*qAx+m%05_p=F5A0;)yfg>fT9irMOPE2i{g{lHmr z^$*n7oR_XHT*h-choT9jK}}exjcH&J2=dFzfP3&olrdf zhV$>E1I^d(&J#Dt!px92yfGH{zCW0oW%%m!h*piWsyoF%UdwU9_#oZ4%>9{KgVlT+1V|K$6wy<1b;oXywm2mhIf zLy}Gd-tzhv!r0*0tk~B6gUZ;VJg?^l@~IT=FWdk{bjdrC!NjB4@4R{a^)t+vc@h)1 zmTfFWCy(EFP-`Q(J-6-mxQ~>{f7>B-giWXNDn+3c32!=(GG`M zqCpsuyiE7;K8JPdhP`2sHM)C4NY;9nVim-YKUAW{9YBzn9NbcRZk^?rBuSa~Pk!U= zVU35-?I|`We})QAyF3sUimifWWWAIw*~pc8K7{3wVHyPKSymEOPZ)y+!pK*Bz>nRb z8Hf@Hsk*ODt;Xe7Wb&bRd_p_<1C-J|&lm8NUj%S{@G4%+>}QuHX}g&68IEwTU$Wa+N9Cjt%<>z%O<52{uf-H_F9+Oc4lxOsZ zPTiAT&G^h&ktE^_b(ZI@4;3A{^VTQ31fgEA{1Ra=0$|c&gs&s89-zCsJErw;W?t*= zLG|+}?CGLnRsJ>b*ctvb2fD}io%DGHKL63EHNrzh>Q2>Djl~Uw9wxxEcA78_%zg;W z^(A_f64r!4(W_2Zb>07X_(B2CJ)exN^Pi zhXinq7QwB14SkY_oL4oTb$HkD)9?%Vs&$R()mpfVx`6I%e0pyU(#Emctu=i$bu|}t XfB3R9_Vn)Hif}zn6)T+=p5E}k4IYY@ delta 9222 zcmb6;3vgV;k#FAH&(rSeV@Z}JTb9<=&+Cumul%=>z_Jat1eU)KDE8r>AH3;v4EOpI3|83kw4Z{MGc_8h!Vzhl(n54{M6T6+_{St4U@s z`l^OXU(L|qt0lFR$M7(h$7^_r<}-Zs^&5Wr>V{6=0;7Pw0V4okPcoPa8KG36QAp#x z$)Z%TQJg9<=1@Q9OO~d}jIva@QO=an5*Xr7&P`Pq^O$l-Nryd(SveRGUtc_FFU*;Zyyy&@P{*49wPb=B5wngL^OCcQ8M#S0!`1^cpM&g!{s)=Z2d%pCrMz^4 z@zOz!Fm>b0Tr9{c16lA7CiM?2A78=CCm8TtGI7GX@n*ob0M}fckubR_RK*!KNzkrf zop%+UGzZ>FqEwNnDUpqQK4BLStUkl~{AfVB=Xwg{rgi#D6lmlNNpKCpT7u!JNq9f6 z=M86>v646Prn8E%iZ9_2_^##&zKkyi%9@=DU%{KtD!kbY@s{(Nky{I>m3$STR+($f z#xovcomtCQpJ9Ms!`A|Styz3VJHw1tz^vn~fN3??ozaZ-&|l9tK!1a|7Wx|!im{Oo z@Qr*EOtpy*^38k;v^Mjtd>h}+Z{Tgbo!`iB;yZW;-^q795j3{s_-=kPzlHDNdwD1C z;h7KMFS8!0+J4;M)dDMEP-0;<(ukO56zj z81I38kEsvb1ehIwiE|DZP8=QFiCC6GBaeRoe(ru55?^MbS!1cm4Ss!m7Rgg zY8VpW=8!PsG27%>H5>+v5Bj}{lQI+7yyAI9lt3zosE*pa(qWu>1U}r}(w|A0Ed#O9 z{>)HJ47N1Ov4WQBz?Njfvg4W5(6(D(q@@DzDWV)chWu4v4Rd5o@B`+Rm7$|SlVX@{@Kuo%8F%8@hx>Tn1R75wT6-jJ1R^oGc4j}6im#@2a2oM zn*6iH`+Q5608`fAn@F0QT3R|fMFjiabkZK&v_Go>P80(s;umY=;);#+j%Of~NJliW z60l+w0LO#gx@@s1lOL;Cz#f&4S47z6{I@IWyc^~s^HKm=?O4mQ&JWqqNU7uL$z+nE z8AbRkGbZBwj&7#8HIlIV#X20(Dr>7NH!jEK3Ir|4tEV#}g^T3);=_U`gyWC(ScniC zk?t4fP%;)b#dG=tBZRP}rm7_}L+H6#4EzaM@UDQLvy}mZ6r2(mftER1)$A ze7dH4bd8*rU2`C3nc=={VyJm1fDQ=La3XElW{ihpRybjYV@WF$9=1#?91Hgh_x75i zIS%5c|Gbke3-`IHKvAS%L1bvAhwI$Ky=NJmN8WQvFXKM$KdT$QbBaeX{NVKhr=&ec zGEF`;eRjMpeEj%S@A&bsHQYlH9q!GDFy>R(>pclbd81)WB;ibwn<9+)EDVXjjDf1n zfj}7k4uEM_h=R0W#_irvQn#b=2<9q{AmnVI?wY5R7s= z#eA?eK4m%PX+Qj@G+zM?TN#JQ8e?PXm^S8_@M499F_x{{XWGL8=9%=Q$-{l}Ys*Wz zIwPt;akSH@i3tU@Cy=d(^U}USBA?n!Fw8S?@JXrZj(9@@4V8AU=)a@ifrA9#!<_ zyEc|HR-EUXE-XRHyH8c#53Zb|qZ4HAx1 zCNJ7$6_Y0~5XBAzcrc0;@=seT4{nDh9KUedW9=diAO-yiNe_|(o?(tJO4Cfb%JEO) zMFM#zRipNFT%MN6t?w@y0y=0-DVi)MKuiKi&oOz|wig%Od3`Pv`ZAL{w{K#*k?#sgDxPze1K~)?N98X-} zd;&L#K-1kVNRi-iHKj7P}3nMs(~>xb0N2=!5sH2^+MV^IPcU@G7&;hZ z8BZ`Mcj`lWNDmY&o;=2oC5}sof4B|>@E@(R=I39z@kgw6y>coz7UYEk^TrfjbRI6L z)Q4;9c#zNGrDtIVyu41Zc?!~a*{KjOmoM+AVsrD~-SMo?w-ls_CONz(zjX^703C1+ zr7{8!N;r~Ia2&x2VVUNpaKCL2Sx`}rjEpqnfz+H4eJz+qoBQolG9a7wu9>$Jgk_6C zOgB};a0DdMn?$j^b8q{y5}-})X;F%kBI5Fje^{`k&rF*qhs3rH`R?BA0Bzk?g*7rQJZ4d`9|0yHstd$2ViK`*pXq2x zGcCW-xqOXNY?~+T5fK}LVv$20fVD?+94{8$mcvrC&V~+2!A(Z1WpUSr)i(f-Q96A= zNoVY6Z)P}sRXKbL_KibrzrEt-k;$w8(ebi=*Y57 zk?qY(;r1e}eLIeAhOv|rXQVeYf;>a_3fDwRd24|HN{d9=cDzw6@!i78!U1#+dE?29 zWx`3k5>Yj>yn9jAm5br%!n8%gJYnvUo!u+gL;11p=hd9!K_7{SahOE#2!cludg*AjFroR!?i2sj`=3osgm@{ z(W0^9@wvPVd+s}tT`?GdrSgRmmHHEZe(cB>y!m{z zRSkX?%oa9+`{eIq7lWU{rt5$CPEW;6P0-KIIglPqXGYTD0Sh*(9qVhRg~@t{?cNm| zvVOckrdO;YkJ4&Ewx;CoE}C2#udk*)E*)&q37muw?lQ`Oct@^5P!xucMjI&9c3^*3 zw4-a^-nRYSj@AbOL!sxNLKR=Ww#Q@fep7rB$KH>CViC=7Jjo0kRTxZ0A!QY`0Zg#O zI5LhA52S^8w&NKzM=i32XA8+@!E4~OIs~)TGghQD;hXb7ey=Np7Y148yH0Cxjf&je(MHTKz z$7fpc7*wsVV8=`B4ol9;n2v7sXGWs&-oC9+23w``fLRIRlZpTf zjn9&#I9wrSJ|2*5Wk?DVDN4@iMsalNc*WQA#}Xf?IrK0+rO*q@lu2tyXfiWm3W51; z#x)LiEFAJ^4ZX97)!3r>rYEIXxE&{>5~BzT47Hq<11wRA7KbtPCx6yjDE~c~Y&ZyA z@Gw)%iMw&{1GGugS(teRn5LeECWo#4f5WP>A>z0_2`y z*=2{l281j+j4lT(hh{=>{YYIFd{45{%+Gy3g#r5zf6y_Y%vj1w%cb*5tnmdAte-Hvxc)uG%{!NZ2iiq=7i6_%;Fy z!J7zhs;RcWnOR>`Bj25enBlJThzj#bEtcT0Bfu?AH5`ke4`0=e#tJbUjz zJ?-LaUUnUL`{O1&ZJd*(=c4W#@wFKPC#NlXz#}i5JKadZ{GZoVLLli8k8Hc|q|1u= zHVC&20ncT8DqX*ioL35R=3q!(!TqKu@BG#6>Uz}M&E8&DZwgD=ogujY z#04xQr4XDOj(du=_Z69Zs5U~uU)X~`LQQgAb=mnZ81;@P|D}iSQ5#SwC^-jbyjHZH z076h6d*o*K(mluZ9>v2Y9E0QCO^Pmr$DZMoqrrA@H!}GKr*phF?Px!+Q~VCu$r{R8 z%dv}R6RZ*%VT3Xw(MQJ}Zrp%t21cW^RfpG7dZ!ov4V(`Btb|NHC|=`(t0upzXL(s% z0_Hz@^8C?&N+xu=j`fRw0)p#ZWO0>x2V0~g`DOAo6myK5L`Wa;d*nhti$5T^a#<-^ zHNxwjRS!hPr!$XM&WbcBpYp%?^egJ#?*X2{v!Zee));r?0fRhPb|FVj|?9m zV-Nw=Rum9F#V)2|@gE2w`0$N?p;oBcM5-OpevXNAmb9G2I-m?&vfID9qtl^5MM-i*x05> z!|yVp5Ri@^ess1Iu_Qe8(4&m_ILc`yl9@h{5Sg?aYJ-ShCV%ny)wyBiMjoH-c^iQ3Uip)Q2s+E4lZXhp;uhI&}9FuOja22<`^}xx8lpes>XAnp44> z6G?31>f(V2KfBC)>vw|!L&4Fvq%wRsX>JQc&-x_*kE+An4c}SaFG010mB58aWhHR4 zD}`NNf)^y8N1xA1Sq-aDi-17%Q6rLIH|JoZnjbkT<^hZ?4ElEg|h9+uKL<3YpN~Kiduqs*>&pL6~*k*QJ z)M`zHVop$?8qTP_R$Ndx^au!Xf&&~^2qA9agv5bkd2cr@W4&+Qd-HbYeQ)0Q_l3{r zoL5REi@+G@_dIjvo>Sld5-d(!!YOAV-D57JgcsZbSGdZx?SiYoiXN)28mPM(*SY=? zn< z@o}zz!{I0R1k7WB!>fF91lzi0uHPWd`qw``M{7rzc$211Uv#>$*B*3Rai`anf6--m zmaR}%daR+X%nu?lFF#=~)4Keb)g9z(gIooPev_v9%)YX#aymsgdsh_~Vsc+g$OAg0 zT?=$Q*AvR21W5I!GGxQTP#LPRk+1_g)RKZ|C)#~8DLkN*B#MYLOF=6pyH>)ILZT!p z7H~wkepkg};a)>Mwi9xVTL(;hm5|-y&;TqA&2AB9C$q+eW?TlVl4uDevpWuQ0%QWD znwW{f?YkxvrRZCf#FdBVDm@;Y@v+2n{SK zsd~!~2SH}#V1hq2E{Fc!JKSI20P7zRa%XBgh&$bOZ|-u~Yx!Zc4hqDqK@ecHXcK^Vf9Q9o#<7T8a8CG13Ts`>qX(B)|zT-!J91Afa6LXd} z-l9B)ESWI5&)U5Q->CrMJK316>1&JIJF8E)u_QVsxd|t`M!1g z%BHBoCt?a=8li!J2Q&?V`-*c2FCfe!%mJj@pwIm{5a;EV{YK+0a7ASRLW^)5{>mU4 z6&DX5+22t*DesitUR{QOrj}ZrsMC#NzuOA(!?IIO!D6ZeP=9d|6;S z;_dIqRp;ZA^Pr_l6ol>nZ(NLl^8<`7%OmH^8b&vnSVIrS=k3CBJ@e*$0{82Oq2~=D zCSm}DignqqOdQ@ETcC7Zep!6>hEH*yGz>KV3g!(e5r2#XL-8k%59u|OX zqq_gnYY--01pp>b2@1TQob)ltEVU){X4knQf2q#WZ{*|ZrPXJ_o9dmY75Y&mu#fY~ zF!4I-*AVhIkUNl%z$YZ-dy|i7Ri3LY(eI^Kt2VBK_vw?u%Anvb(USbEw#Yb<-`1)n z49l`>D)2Y-XKn7nPZ(1KAgqWqGpb?>>?Yl8Ru5i)+5y~wiIl3&!%pM)+}bc-2^L&vdi3&s`8HmZhV Jj2rWY^&g4(R4o7i delta 2035 zcmY*ZO>Epm6!wgN*ZY$+e{Gbsbs|u^YMP`?)t@vCv<-i0n^uI1va~Ypj*~dqwU@EC zZ6Yr#NI0QFizN=65^;hPRXA`hLL9gtaY3>WaLo;=NJy0syf;g?U~9gadGlu8y!XC& z_eOs?QaG2-n-YBcZeFqHtowyM%}-&*q>{u)i7?7!rYy=O%2cLRl%L>z&_CD>G zN-EQtu_2*sMp`GXY%4-fD`~DOW*KH~NF|+RnF77R_OKkwZ;(=k6_^YXlXb98=(B7W z>w-SVhPoMrL7w%nUg!&Mp7pWa8*qH7gK6_pvH!vM&yYl=$JHIYcygMa8ch|y60LkL z6=_OG$5T?{8Y~HT{KD8$P<6+aoz-Md7c;GE+gKphWCb zx1^ZFbV_>L0(tX4)7K@YZE7*uBH*6B_LFJB_hP^;?nXQ)2i2Mrc{ja^7p;QG2?IYI zv2ME-3;Y9-RS#XuvBJplnZubi?=AXHROjxfIOY{Uj9iBaebkJWT+roS(_gd}UEk$S zb3(7|R4S_@)&dWzV55CB8ZX05qLyXu!C_z?E>{h1wQMkV z!Kqgw>y}fgyJ73Z9Srf#-|#&q&5_6C498fn(Gjbtr!w3sRl^#1nPyzSUKLkM)pAhx zqg3^L=H7vIxOy#BmIKc(DrqKs;%q5j@xrJ*O?9!&g%cs=PW34rcm_=e#a(pD ztL-g=AP;i@lGaDMNP!H}PQv?P-ev~U1T_RRzDoY0gH35ui6x{%+$P9%8OhL4A~mM# zqyZ%0v#|>K1=xxz^o%yW8I217J-hP)4T@_1@O^|36ZtEF7*Nv8DiqlHB`!bQ*zYi>DJFuAGb)M~EJ z#AcD0R9|vJCyF>9NWRezli}od-OKKUv-v)RzJwVEj^cd|K?{!oNc;8>6VhuC1$t~I zH58L*#Go3{C^JRB_>1@l&X$^5f9)69^}h)43rPYucT}$$el>z$jHhx3ZAj(|WWIVfsi) zviadj)G1Pa7CMev;(dTXjD=J@69cjc6~MOdf{rMbQu0y9?(KK-M+liozAT(4)5#x& z%P^klm>9T^TEag)2N9+aPXl`JZ+83RjxS{J?@V{|#6>iC0RS%F_K8oSR0Q>3zrNz?2EJ#z`3(FNJ4EZeTh)tAV!topemhd8J3D6mi#pkmm>G_%r zCKr2;ksp(HdwU12g0%g>u^eH030gRwJm{UUigd1&twLS%)?Ax;We5^(5jQO=d?GRW zMw>sQzNj5ECtOCi`8ALh$y~8hu%eW=Z5EUl0$z0@+fH@c7NLd0w;HAjl=!7m?()SM zd;X1i`^wqZUgqef;CU-b=po0%@fif+HTuM_s)*N;@A?kyr;3h*E+EAzgulZ$Yvznz LLovFHF~j@^dYI-~P0V*0vAp(!Q?G_I-Tkdw!WLL(AZ=Z;$5T?=&rqztgpJ{?5=c_&Za}L_xc&I=t2o-9Dp^4hW&?IdV*Y*S_ho)## zIGq;shNfy$Ih`Jy7An$;RAq+}p6F1F>22xa&#HpIdWJT}DAp$JQo^>K=uKvDMrfus zGgP9LaNDe4X=s)*2UrzZcx|i}^&aa^SRDBxdr*VEI<%{&`l%LM| zODSKh&!GGa&aa~UOudBiC7i#E@}>GL%Fp8bYRbni(PtB4HY3&$Vvb%$`7+L5PWid| zJj&1G{94M-*UKqi&ONN4`~rO;*6TvQR9|(y8rQDSFVk03;RZdbU#_pE!i{=`UP+iudX>J8zH9VqeLa0|*00bv z(DxRj;;^D^HMX|aYPI@CeG}p8^csCLeb?(-^sV&0&DgHjw%Lx^#Xo5%>w-H%JGGrO zlsl>AE@M}#rq}D+^zC|lhrUzab;L3^`IWIsGr)?t6XW*_G&((!r0roPY)kaHGkajk&N$j zX@;*t*|S({F#N5xMgvu7q>oM?gFa1qj|CcnM#9W0v+wYdn-WC;Qvs#{6a&lvm$;1Yn@01FA2wzf{wP6B*Q5z8t6@v^xcQ88Dxjb5d`A@3jEp*p2Ecbx8OQhJnD z$89QUM6D@xo9WcWa5Ux*HyUCLsJTGR+3pun!*uy$)WZgmhk^nEr8d*v6$+a6Hsc`k zvA_znaW{!bhzbkj=W;jC@^Wpl1SQT$ti=$i^<6$+Fy`}3lcUDe7ISr#tEblU_ z7d~IHym5?oLLpJCp+90GB?bfe?3jg>E5VP(g>>5(GDzvVxE%E=04f2Xok0MibJt#` zM07F%k4G-eo1_*bw&(3st%1Rqn4ZR#Ku{M3Sw_3RvCZFXM8zVsUQD3WY02&dVj}>L z<5HvsI5IPcIA1#R8+b_a<<@tnJP6$oOKd3MAG6R0FIBfEL6R0H0!fSbUYn2g|v~ z<*=paW~O=UVuQ?_aQCv|$PWn2DOUV8YD^wbiP}KLCbr2}ia-CftXC76Y8N6RACNs<%pp3a>2&;nF^Bj>M!{j+r>`;+X6tvbwx&T9rIE zvCHa$HbNJ|a_gko7e{>;k!q9A7gfo>Oo|kPaIy;{G3k&uPo8seOm;(F>77v}%cguf zCOz``DHqA)N@8+QUNf^wHhcGfs-7H{|LZMVeNiL6hnQRg+j3CCqdIJ24_nv0Q(He3 z%Ufjmv{@I&axZbZX$Z@`%<}bV!B55VxHO6`xw!H25zE`;lO4vWzPK*=iQ7q;Gi$QUDK3@GGh)?j-|(wuT4h(G(aynu zotApj9WtU(^84gT*<)}`gK7@vHqOYRnU`E#&l`x;y+eB5KqIp4++)M6DFfz=c8!P5B`*R2*J_2>{*Es4pq%5AEd*gCsNod;c- zIZ;C#Focg5EThZUA%bEg3EoSbo^!%BWfNLuG)96!gX6Vmx4k&JTudP}El6t;+vd$w zz1O0WvooN_T1xZ9e$H{2XWD~CScK)t`K78ZzdOHb)>JAKUak>r?V#bF2gFzl5$QBU zX|6Tu0lBn%ry7*U%e~phQS53LMpK|mOqI`;j~%-l%@|+>F&$X9N6GW$PnNWEz6Blot#VsNr zdf$jG#8p(ZL*BV~->QLlrWe(%Rg;3$a8tw=A?wgnWHU3!Vv=RP#G)lz)Y%wj0i&~` zqpwiPbjCW`gNB*y(<7bXV8pLeEKg(npgg;D!gS0#4v_>FZZj(oZWR3$tx##F{BY@n zCEVKwP#eZ+I-4Sb!sgRJKwMKbQp6MABy?D=T6SF}8>Yh&sQ@iL3xEMnF06Gxr06os z7w!o8lJVw|gmd||>YT@{Mr<2sG{l%-bswj#Y&OD1SG!pCC7D>UEzZ0i1pS8r&H|v< zrajsbGSduVLK_>}+_3&xJV?gk3%Cja*HfBd%>azOH9a4pl&J>9qx7w14i=Rg2uNo- zY4;wC`7J%6e@S^Es`v&0nYeG|4{SwGQ1ZN!fNAS&;F%myRujqwJook*`MJwh#SxoW zQepmy69D@Fb_2u+nD+KaJI@<22e>kT`2gi8bs1eW`cWZ)@iRskp~wNlbcWjWfUx#o z2pmK{05)c(BhpT>zdhu)cZp)Tef8X}tV=c?EdQp*2Q9sl~dW5DNnl>-si?5maSnBOh>dO(wQ!|SG2jAy;>=s zy}v-dQ}N)WLXetZrt!Ww5(!2{kTBv_fc^4;%EINaam#R67rY>{g>|4{ECJJropL*E zsrzQ1r3$Z{_=oXJ_MsM(#baYlTuiUhOiM|F=G*>`fDscgI`L(>rE0;PH<5&nOjlz| zBtVmq?aS(^Fj0#BiC+`=qkOpP+(Nb(jC*ns?cDEYNoQlw@C)%B(CFI^cqyIAHM?FA&b^*88B~-dtBH zL_~-WfS(Rf3;=sDvqJhJUyISD2WV#>v(8*zK`C@Y2Rc#A$0w0?bozsBd@_?%0Z(Lf zR`WCkWJk^D2{X`=2V-DFeowe-ROW2 z*?KM?Xl1I6sck_02l9XFv?0|0HUjk@$*%fKGX<5Jj#z-!p{RVWevBHHZ`2pZU1-Fo z8gRs>yP4*HBoY(#TwM<|#yIK`{Sk&ZhZY+F;G#?yt?)M)0CrhlLdMek6-_e6Wz~wxa=e=`31}>@0Z3{ase<{JrXx^LDMO?q|gs5}QGYY4q8W z7YGf2yE9Z~(@n>B2&Zfg`PijA8vY-7S%jf%!U=sW?C_iW1}ZU8j!QPsw3AC(rux)hpyn{wps< z4UQ%(v*;TIEKMn7&C%SKw%8_(7z; z3Gf)eHvkd;QC`z;Ome zOd@vBI@gBk#b}h$6?ei(yXLF`I!Y2e#%^qzZ;tA!U)(PW66k3O=Gf@vUQB;pQL(O$` zm&h&qrzbvY{+W6Sc$sRjs-6V<0;RVwG3WJ#2FPRbHwu=wf$n4P9)| z1&}uu@piP62^wW|NN^M+j{)##=tv!hHCZeB5v05TQvp~ZpF`?SfV%(&^fOd@q|+r- zu4Qvr+P^ClccW1dpqBvceFD1XY!CZsEoC00z(Qr%;hjO9d?t~zKcade$OvAEuA7x# zB<>Nr)paZ`_p1o}FrzH50{!z=W_TKArVm>TUt=WP6lnfkVomHBb=Dhb^C5xm(Oz$j z(dn%S2s+G(h=Z%VWT-HsKC6kk0^OSlfd%X0WE>H@I`~PH8YIS06*E*Y; zCp)`$(p^GeYm!BZ-+Z5WjR03|bTtMJ=NvYWQgLZ20x4r~;1*lutOQ zI4IwYKeS>ks_<<3JK;@TXe_OrcTtL*frfg!0hV*+hDa(AcJyXr>wVO9?+f?J*QMrCyb$uPjY*cXz_(7H- z6KVypwFF+0kKI^DN0;e0m1W(+gv6B~eW$Fvsak`DcW2e{k*aq+qRZtxIlIRsgOP!` zwwxFkXbN>}v?g>2@_?qRnF4K`h%^N0wv+nXggQIr?{3;$%mcFqTnAtMK{nH~GUyLA z=>An7$laf*4l-eSexe8#Qsw?@?pmWs7||BfNdaO@jLnKTaOE~shDOi71bVJ$N4U+v zx&;DaPqI|tCQs}lP&!9oaTQf^)XmHE^@K6geKCK74-PPDI+2cA+k!MuO!L*Rx!hN^ zxu#xRfyx;ayx|NP^>|e>-PVZIiP5M(3I#!fFQIV=Lb2TgH^rmNP7b%-#{~$^}FcG1miDc5ET`oFWo&hrAc7PHoj#kCtJpVm`sZ4-cb$jZ_&3iXSn%To>j>@h; zj6F)XYi&#f=X9BlD%!x)X!mn69f*>|x53Hk2Mb_&4)}#T0Ns-;tB%#ItRn_=Di9Gd zU&BEX>Ti~8>_P?{DLS+!DGyB~=2IeHI<|E-2#7Bb_?L~VEp1dDpWkvP>W&pz#E4yz zmf_cRTEsYP64z3xmIX}Ipa&CvTDi!&c%2K90yd!7YT%DH`e|Ue1DSMZ(Gdz;<|(p4 zdXxOc@fFi*slMs-H$*yOrh|40?UsGItTmG^`>Kf1)SZ_}}Co|Md1t@+GQf)zM98!avHgK%QdP*f7 z?Qo9zW-8w!C){!LiVn2FbfrPZg{8O=0Jo(y+>lH2@N~hOk`pL84v-=tEVx3a4-XZh9_>cH$N(qdNpbvRB9I&t?-1ZD1$$nFU$3bI6 z_eDmG-W$=eDeV}oII^!lun5!E*pR9l2zVM}0yW5EC;vhb$CP_kXla(K@1$bOn>*OC zk7Ufrf#B}JGmE2WnPPp13HMN<@NT=xTrTyWL(P630yn8vFx-XI{~xpKiNv?$)Azh= zGgRrlXZco-xDQl$o5!3+q;KiWbQ%X}Co^pP)J>FLkYjtx)vFTKy+_rIuc9Yk2e?YU z^W_cda#?ooB<~g=>Ihu8U?BSVg>1Wb`8X?p5&bK+C}%O{Q}<5TV7WSLWi@o4h;;HP zilr&G=1pKRh&!@h0)bDEVm;oERJHV;o;l$QR6?5wnkjS*$P2Q72clJf-|2Dd@1dmT z@P&d6n)Bd+fYF)UN+wVTWK0K%5VZ#P7?2p@Wal`lf>K|T-#-2HX6^<;VWRRah``$% zp3f{fAA>xGa!cQun`q;r&N{#yYCC40RL|t8A1B`a$^>;Gd;_bASc70P&_leLl!n{0 zNKm!gEz9m(ww`68O1OkuLEmO>{|#kd&`iECMOf}UOZ-i+<*8U9AHFZo#h&6(`ThH5 zjJrS&Sk)AF!u{18TQ-7LfxI|JQ2M^VxXhpgNqq}SDv?&j8xSis5)^L|c8a|D{#u$c zZ{7dJ__xra3gB@9bQQ}BWxJr2lAcwF%TdBkvI+c}0U7`d0F8j@?u*o{?elivcK{&f z7rT(!2e1cV;6iC9WhuzhtZS8!U$pT(A+6LE8+{d(8`tdO&Lu7883<<&&>rM88oK>W zbahAf&gjmCMgz5sm0KS8?rIDWIg^wj;8n8ElX*b_1D)z!o+sQIx&6V%=djV)M2HZY zv$KV64T&V?mqza+5TsIsX;5HJVV+nnmp@cEIdu+}x(ACOqYuqma){a|dk6NWt)ZDt zs|eVdkZ8K#T>)%+oqYYF+bh}Z*k}m_dmq}rj>wURcG6_+(7NS$MA#T zIOP*-(L5I^m~JxQ!6Zl}`bEOECe&|kQ}?o%tUBIM$I<*FaI%Nz`sse* z91vH52Jc_J$PRdx&r{ZP)0lpMXUp`uSdk|@qg>CwYPEG$1rTr+ng$nrEp z{FxM(RguyL<=4xTkH|Pz=Lv|ANy}uLr>jSL6dV+5KyU>C+6hr`O2K#Xpj&XHd7hdi z?SR>Q7Y$N)Z6n+*Zd$Ezkf5mPYL1#?&s06m?)-{~eCW}ta+YB%RZG(1Q$SZ+C!bBG zkVCz@5;Gnf?Z~@^=xKB**k$N&OOz8$a>J98Rw7BG*gtlrqq=hA7CtfkS6b;M(of0L zPkt0<$FmL+xy*^tKvx-F2t>UMlrH25X3Pi62oVb`WO9`cONm$hu|hXPrD} zf_gOPn#(tDtJj?M>+3gGT1``WcZ$e;&z$&AkN$|~P}~4Px{+E+siZ#cB!uZ{7JfZIVS(vt3DC=ZTtR$=D&b^Ur%q57^=Ntd zEQm}8k#8yTo~QRMycM+iqUFBc<57xDF!m z1j>0jJD9ab8=i!82}jHONd z6adqSobeKc+z1P7+PdbOwuAFETUR-Kmy6-%37Y;bQSe-;>VS`t51gr4(CABT>ug2u*$oXu?w6r5M`f!iMQTZU z$$CDh6RUuw*Bx}W%FD2s?+Zo30on=sVRtl4OYF=*7QCE&aQ z;7KY;^T;24=gb;bHPd(d8|meTFa%^^mkIYsk13)?vT@4jqoa%Dqc6N|+)q^L5niOR zjV}1D=bJw3()L#%wvvG9>gx>77L%^_*C<6}@h(!o0pJ!XoX}>a{KtR%eiOIgKx?4c z+x>0X(KCT}2$ys~2r*JEcN~>h$OSJ}R#u^O9Y8ezSP7I4YQfU5=6+5EE9fAs?*+2Q zBY+W4B%Xfpr?!iW+P2B>e1FaS|3dTM5vUTF9eg_1Jvur5US0|eeX8L3`%9{UCqs52 z5bYJn;LBf#WAFs4fX#>lKps^#K^{yfLp%pCFnus{{&gfMo<|ig^f6o%$Ts|*GMkF` zp-xIb=mQQ>p+IzEJsP`3p8Y|=eDs2r3^vC%QGv&&f1W0djrEPX_41!T=uN!@SdWIh zxVM1()c|pTy8+mL@`S=krQkVt0xfO_kN|f8yol060FMAX3IIPW4g#<#R3cTQQgk73 z_#Yz$R0417Js~Ik7{FS{qj$r)yF40>C){Rye{! zOJ_GAbsfMLnLol0KJOA(=mnN%eu!qe=qF;cnfFta{TLu+F8_&e z_hAarM^T*VYNnd4ia*H@U)`SJME#cm7R$}AO`ZQbk}2)qCrQ4E_Bhm2RF`U_K*vFl zsJt(GUwbhADq69CytQehEZJK*ePMq}?^>Qm1r9t?XfLFKXF;2eJV>@&qRh_>8|}!` zpB(yR&<8Z5L7SrV*u<=u(yE@a>1xd0>Nw@>kw5(LP4QmkkTaHbSUKeCTG`{I-`p)c zD z^usDwT6rh~q+0WP)F!njT~~TCblW%uE$+76rX0%bW0c8^7BeGCtI9ET|Kc5hxjU4u zU&O}sc$nUgue2S?IHU~F+}_j2n#pAyTp1hRlclE}wjC`wl-*-LHNJ;>cr^FWsMv&4 z1wEsBvU_d#wXLpfRce);LsH>+4keb+mVV48a^Q%TCH}0U3BZ@7t!o!5=J6i0{v~JmO+f3Ki2$#AibCl`Di&HO$ z< z=?jOJ$w*_k0IQbDe+h#Gb(hjT#d_&V*B}%e%VYPBT`-vn#IpcPFnO=1RO!TlS0;4K zEq;K~IRJcTJ5N6Ilj-pWM;_aXthMQZ7Y1%mj!K>I;FAdED$@^NHoj`iyFBhY^yRc-O^ChO$vFN8DQba%=U zKnEth?hCR&jE8je`afVa(oX@OLOE*F?>bw=Ym}p(jz?o?KzA0R5s6fX0eA2{=&*(o z3b~4jYPwCXczx6|yNBFII{nSGO;)q**=jm{a_KMS)O>rcI^Jee$E(u`ld0y~W)sYz z7UI>!*4J0Mb8xoS?J17V<$ts!f9cJNA_R?^ySvG{s7moKk5geunsmH1t7rxq+sMj_ zDPg^XoQ6~pz*PWVx%I7aYMKnZRTvr=J=XdAD2GF{9;7@;DU*-)2Zr$jl)~3qXYogn z`U>ie_Tl%6IO9wHro(!v{#Oi+3pL~)-x?psSec#%ivIYD9(|>)0X*BJDnUn?CZ3xQvDl`q3w8jIL0fX*5!c75SN;FH1|a z5xGL5A+x`cpZL|!&!}{O)lSAWQWU*NefyeHlc&xI?fp}9Pd_$TM` zRKn3ulI^4gi{lEk!SJ`rFTEWe^mukI3g!W91~?0V@f4UCB)$iI$sjGmwE49OD<9n; zo*7iiF+Huo^n|&wE%{z$5Biitl@!-RGG=pki}DMkA`b{sH4@xR1_W;(yNh>AY|g?XUi zQRQ%x-$A{-0lLVfT8h)5=JSN7Sb;)@MEP%b*|RWy-HtLMP$PaR?>b*>so>v?t)+VI zZU5LlDfhki*?2z2gtf+f;~ouk@V}v?KLNn`TH`&mzk{ry-w?!qQ03ArOA*YVUmId9 z_(2oAUu$0t&&WE-=+g;XO(trw>F^E_X2oMQ*ocIUuqWi#f4|;lv&q+r#>lkyr&Q-d zT1@H>!!kcKL@c6e(7yiyN>wajoJoQmtJQuZIMJ@OY`oRnt1HhLUKBV>m_yHOKngCh=cms)fAlPnN}Q6sSYgRv4=eJJg7@J`Wtsc0L2CkV1psWjRfID=^t_e-GYBz}3Mksae+(ha zbi8`7UcQQSu)`8zrgN!}Pi!XxTgtmb7$H5;?SBTXxv*N(`ZEcFMb!<->ox`u_mt)^mvf delta 17659 zcmbt+34D~*x%a%YX9Yr7!a9fnCXj@E4VVB4ge8O|Y%kz2nfE1`WR`GdLKe}nu2n&l zl8@x7uY~l%<#iChMi>7GSkfJL5X;rQKX;W?d zX;7k5)&A$Eah%W7VH!`+3GNTidwY860vRv3pt^)!c*0$+%GqD zcDPtA4o^#{)40GInyyY?W6|=o{7WoazCPn(o9d@<1S*)o1+%GOqBe>0 zlQ=(z@{=_W3FjBBu_Uya+AJ!V#RZG0L8(?o`7)Mp3FXVR*_5Bn`K6Sfqs^uKT+T1k=4taU zv8WZ=0&O9EF4w}^B5m;{in>CJXiK!ElsZRSrd3c~r54pzXy;H#l~$=$QOQberIuJl ziB;Nat(rbpYc<*$`mENMTx?Nm^qRIc>KbjWR!e1TwRPHh`mEJ9XdCHsoxWbHYqwrw z6aP+QyD79Gyiwh#SoDq5vreyT+oaWNo3#e5QESq+9CA<%%C>6TwC&msEn8Eyorj!? zCBenJwB6ddTx9soy^zTXz_L=XW1LFTcU_& zx|jek4q!aMB!I~P9)KwTX8}wFI2)iCpp<}NZSOK{dUyODinpgmv;CAa(2O4%CBT&L1#iaU}>`-P>L)U zr&1M`eG|4>;*Px*)LgBQ1WKLPX=Ddu!ALCbj|6nX7K;l(?#W^}h*(TFoc?%R1e?W3 zD)U+mTX#5Q*xL0TX3hlj(cHCHM8i~Bs8}p9_&d2_pqEP`#e;#c zPCC*=1=_6uI0pc#9RMH#i}(yBVi+&C%kE^KW_q?hn7t_1?Y42-05$9TfzB%FFSgKP zNqR9SBLXQ}38lrZ8iyjkYR3XXgSRfgjgjq zIl5U<)0bo6LbT$5|{#^Nf~Ie6@Ub z>~_!ThnNRut*k66Rx5#`3ScF`DgX#CRs(#Cp#=N1>X?@3cG#`i1-V%?Y}N9oMYk=f z1eOf+{S0%8^}UJOZ3Y%ACEQlANxnI4v3zmdp-C`ku^FJhU*8bzkUiteD=Sf(L4;-K z@6k`Go*HijB|9bDirp&enU-h5cZxwKHHS`1a*upw!t666*+6J_4sf8zisZ%eowG{L zOs?%j@`NY}ZPOfX^`r;X9Mp7k` zR}Uhok}8+T4~yHsm6SKh1JjDnOqDx{l&_JAAQ`RoxxXJL17;gx#{? z3hS{LtrO&eXH&i4mggeX?>*)t3y<=B`P=C;&y0^<)a_RJil;{Q%s609_dP&21onI7 zNbig@!|o$gcgfDPYh==U*u>s^8tfiG8+mr zhR1ZVUl%@F&h&0yrwEDDaqxC>TiFro$MffmWr`d>{Z0FqB3uJ4sNqNHFtZthWhM)Kd!e#MNp4X)L^E240|xLH!7|`180ZO_XfMgIN338 z)TqJ3W!SOWyX7tOemtX<8}IjrI`tUw&qg%3wSdH{<$?LfN)V)yrLCh6!7ST4+FLSh zVFT5!lQjz#%=$jsF#!ifo#E5rKTgfo%DWa^HwP(VB9n~xHz3O33S{bJXyN?QJ~HBV zv;Po67|w81(?hWh@`Z()OSnf1kRCg24-uoPdbxJdo)!I3&@E`53RAP8Y+sa2N6X;N z$RTq|*KJR}yQp3X!UhB^(u}EYqmb_3vbc|m zTja+}E~;irwwr8@M2|}VU8SOxCjTCBd|y0C5k%y#SD!_8XmHBTFYDx*&5Oz#ceb_66LD*h9dZ zV;VB-!JNbjvlBD-w-JGFOiQsXfgp!?9mR|LD<56Tep}bT~+0q zS(j`)hzbn19?@c5!FVfgBn6gjGTF{ZZ?MBh6IGb22o_pzzNttUO}`?=74q_`1=CoU zPU-|k*tQ1Sg!m!#x>LSYwMbEt<5q68DuuFj_3o8$j$}$#1Lt8g{UUuBF4JYrrMjcY zyV+y<;!)GqFbIY{)*9`yN>_DzHpm-Fb#KvX`C|1LDXZ_FSV%?QA|q>WXCx4hMnf?H zXDKcQ2+G|x;}bA*({@-Lyi&4}g;8-W0mDI4FxX+Z)05KF2HYLbIy1CJP%L`VoSmr={M_d2Cc zN&lM1=d&Tf9MUU(AO6f4=?=zxw6hKA>ty}f&5Qdi@LzV!rH;JMrzK9?C|_F}tYlMl z1VXxBh$lciwS{A?3?6l(fSZAIyffwt_yetaee$B(iPm{3*_adbBrNObgWY?2sxdN_ z84kW#zO;URB8W!p6xb72Bg?a%r^P{Jd1@U(YN%m6WlZE7?Zf_G6q5nvJk^kC$LwFSOby;}>? zu|ZsTQ1L7PbYnPzF-+4W()KQYsGZN5(kkHj4PjOH-k_YeX++Uv^yI-{z+(T2%DQF! zrVs0Xk6KL9Iuh<}WOzJyWO-0vuwJ{V-@e8}uH!h`xHWmO{-m`Wl*lGV0$+uhHWk;A z$q8)Zr!v%dQTjju?Zn3b89YO)Psx!@t!eCRJ%g|ZGW!7g)AHUXbrAMXPJ{g!>D{v2 z(?+$1JszaxsU69@+Pc-%+kH*78>=^MX`+blvMt348&E6;fVVbWE#wWN(YUBX&aMRm zaSoS63z{4QcnF{d08Y$s(n204BkP+7SRvx%&P}E9bmWnMeT%mUc(cqwm_X@s%u$M= zZZp8c1XT0TiJ}<8v9D9_)Wv#KWvCGm(>=-H*44^%Bn8ABG-bIxtb{BLq4az5m2Dr* z=eCFdO_A!IHbs?h_?l--qke`mSrB+=VJ@{bhOJHouexY#)kAdXD>@@Xvmy-YuvPv&ur={3O5sp>rl_P; z=E=#QQKkc23BV4J-O52^v6U4FpiOhera{~R5CixDz>n_F?S4;N=8S}ZkJjBFTGo&KUP9AH0O_>Q=h7zi2 zBE}@8#dTcJD&S42EP=8M0N#^#wE4Zv39be>78Aci%@F`Ez%W$s$Tt)Dc+ChD77##K z?2knw=`-?MQRbra?65zg8Fs&DiE-$ksXe;?w1krHx8JBJN9Bd#%ETxtAZBg-%!=y? z7Zos*%x)nHvQ1{#jP9WVbF0GaBZ@ScHd#d$?57voNkqf25l<#d;MELD=2=JDvTDv~d91^o=mCOWl;wClJ3YHS#WDIP&O`A5 z6z6+9wVED@2lobbvBDz?2ybfa6<)=^Azp7m9bLi%BQ2hI)YI8P=BJm@Da7SK`CR}W ziA6|V!8+mDc`5SjlgA;&%D5V-!vNO+^y_A@=18Bjsb0vWkV8xN(UxTLXZxax2aKG?BaNk2FDL6`JC!xeF3U9m^e~%DkHQNo zhx9|Q9=SejN_>H6WN)zL_2i%9PbwuZqW60QdPjIXb$XX)bx_cGQB?G-@Q{(hgi;nI zjs2^XF>+E@U1AC`V`^=v8GyyaTsS^Lia7;WMmFh8#LW#Iv%Z^32hIdFJEC{_v=Dqh zttynpIoAnbVgHbF5;TVe-RN81m7jG*5|yZ~0$2&aPM6ujoDnch0Tl~qP%!7{I{;z7 z>w!;ehL;#lnB0on=61Th?n!QsJIkFdP#d6%z8@?rmB}}{*I1P-Ipe&Yv~9fPyqbjJ z+CUqm2(B8dI_auTsLsaDW{Bio)v5>D>3~2GDvNlO068_*@pSMs1HcQwt`8IAR4G6< z0LanQt+l+<2hGs*IK07j!7YxlPBL;`PHT64Nbtig@8)y zU@*|$BLv|;MMGn1?Ua*JgC&))vSP?sb&7EO&hjstn+QFu3fXXsgVOXYPb&cz{{XX$u4gLRsBS^T;50# z)Z>?zARbXd0>`->cGhk-VAp|$RBRj{43s)xmTm zlHoE(q*3If{V-I7biOdhnTe>CyRR%$YURNz!&4FEa4vEKhyi&dmu4>RQ(^sR6wY82?Y5zKue@;++gZ!`S53j?0G0##O+w+k= z0I;6`jVkz3-FyLn=P%zy6)r3@IfS9^lDFURv&1cwqVvXJ3$`lW4afe)g^Y7NO{y8vV{d2U;YbD2z zYp5Nmkw=po4W)Ycq6?FeTSlZt6I1o{qd5>Iw;T$ki|Ae{e|hw8OK*mV7y}L;=uH(J zr!Xhn_bB*tn_*eMB<5&H<$Hx*b{i=HFxZAQ(446ZaCEa=@x8CCV=c1s_);~?^x9!i z#s0>yvyVQVfu>c?g22g&OVKmK=6k5@_T+DFvneSuiJS~F;&yc7&^Ch9{~x_xLU95{ zge{mwDO5c4XQf@>aAh3*^^TE;k3FF5l;<2@zUfwwgQO-4rzLDEk`{&iSQdsFxDLYf zn0)^DTqTzL$MM5T&T(M910W`Ex_!MHTJb8zNco$GMovwK8ymUMG_!s$fpFtxIpvO} z2{U35eM_{M28ORDK&Tv1LB8(P4b(a~3q?gK$7e3Dy5HW1>~xo7>k+4av2Iu-I!|NRTcyB490N2MIN~G(RD1gmkJ5j zkH8)q3%dfA>I#(IhkA3QR2Nae-5*~&541>m{tt?jkCQhNoDYw}8l}|;xYDD7SoE9g zG)Cp`PWh+1mIRm`YJ@AjIrM21^xb!+0%~%Y8GN$z%q$-R@hnE{OsefW!wNK~H%KSs zg?CRI^9{XpQHRf+$)DUk+?tR0mVJYGfKqbu^9#!m&yf%pqGlP=ig*!RW(JMo6)G*1 z=iJvoyM`n89ZlSi9u)vTBtUn+ymzFtq!x-AV`4FC_;lAt9mO7iT>$3-G!Zadsi4l> zY1gBCGXO*!a3!@3W!nK}0yGeycu+MjXTtP8lrJGEWL&+$x4L>m)eaUeZ9zXGWU4Jl z+m-L3ql=A?F1jJkH;o8t$(GLhpQ*%nk+aGe5nfqSE|Sj&Fzy3w1y~=UvV}7D#G|Eb zepV5n0@cygN>`GE5;Ie!*BXdt=@eY27@i_PiXc8YF@93!?4?C>01MLlKuKPV`lqGa zBKJHnx{%#Kzb}AMSI8?LxUQOwj!l+|VCVyI0n5LVNt=Q6Eq%BK798KAShi%%vqx}n{n#x7=&6-*~dv5n^`Vl`y^>(l%kSe28S|2!vFx)M|uLUV$ zFr2MH`Vs&xAP~{vTv#JwB zMfus2ze_v=O!Q|FPb2jU0x7BkP@F_PPg^^aR%!1;!9+|vhd$sVW#;tyL8mu56!T;- z!RO8yOgv1TmA7QwQ?Dn!K^xI*@K(~FsjqFILw^?WDnJ`0Y1^M(Z+!kxaKBE6udEg} zdN8Kin@MT3S*24F@t?rJ28HLKdCT%zD0UiV{I9~dGR*czBi@@BUpqhWU|27bmxqrhoB|0^A~UGi0|knUzCne@=_DmXI|jG4E%Em7|xV1I9g2m)8C?& zBhUXq>Qw;lkwFQ4=E%pN`CudW;8?1^+bey2*_$(h|E99E_d(c@=?`jOZzG^+QNNkt1nF5kAyBCIM@mJ#FaKv@I8qaeHmsSD-47skzj5VTzIgn9+VJUo4K zG;M2asAyX&pLpSTq7p5u0C<#EA;k;$xj-KW=md}e?6!^|g>y;~K;tU`4gy>Sa0uYX zsEq;K18^?@e6WZDuq7-*YPCWaO9E#=Vl4npU(CZjK5-Gqeg%Zs$O~++MGrtPKnDQ! zx~cu?4@56x2PW*=O0Q#wGvos|Voea*VzQ8@ILDk2wy= zLoZ)G{eW^@QBZqtJnv!)ef7>lUtF`-_2!zlIJnKEYtgat|Ni$>4?S$Okx={|fKDgH zF9`2|r(<6E{%_B&>l4MurdKw?AE4KCAwCHr#}C=WTNpfuW>|ao4tZJu#XJe|CdjY< zXPnELZM7@nxn#qu6|UhpA?S4%$IAI1adz7uS5L(PsJeRhnrH4A%6gD0|K^sCH%gXG zMrSJtG>HUYF4#(VvCBi9n}?& zcKGP|uK8S(cf^C0XXG?@(zokKuXJbhs^XTHyt%^pDM%D1pMCSEN}>>G&nM7pr8`?O zhEg=neExwI!hDSIF{S98mp>SyP<|WLf9gU+hI+U`J>?TQe+c1sg^0s#(Ez{6_ zI=~D79?x2&?gcPM78ghuSrXzCNR&eg#=}IuS!caN7$W=`)$$uu@;CI7Gti#Co1ZNs z?-p6VP?A@?J5d>N1-0(AmhLAW-$su-`RIG|oh;2`^3(SgH!$sf&X}pI!4JpfrmHX? z@R|LAWUxNziFs^)gzl>L)7Ma=k!pF~`xC8SDal*jAFi0z{J+5WEC5T-b`9N{Pwqaa z6veDOWlcMyyJN^p_bhym922L|7?VK7+cD@Pg9q^u@wGjR#x$2&SsZp{G>vJt;vh%( zk&-O=KRaxB7_we_86l_>ugYsaDmK;dug25VG>hCfo`>Z2|2>$nkc&2b6AQ><^-K8< zH1#e3oUS>>gM}Vo0T_Jo7c_p7Qr=KxPbRO;p+a&5=?yhK$2wi;XXK|F(J>jl zExQ|<$AV>1gw2{pxFQQ&SfM3epZm?GPn$+=AEK7d;136&v72Z<_+)%JVLEAt>rY3GL^G=0i(2!$ z828J+e!5)wgIxIM8A??8{yab7h5)$$XiaL25-a3@?gU580KW&gAK(uJXnh^3RB7_! zpLy}T+x$xGr|83zkF5nOpZWZLE>izNwT5$jthF;942jp!_ODcKxLWo7cIEYpO)tIRyr&PQo}e<>3l zhTcC%C-@3ApWdht@iL*)riR`#4@Y5cs*r3ZD$JX_0(AR~fZ>cbx6$FC`4qOAqDQ(! z2dD?&u(%N^4vV)U)gL}{qo7)_boT$g|iRSuq`P~`Gi{|_DqXqr~es@I* zqecFrXtBRITH-H>mikMh{rvs7%^m3fzM}Xao0K{FU0Z+8}K()eP5$XysHhLhICqY8CrMmw%)-O!HDzm3EyrTpK}1qqLD) z71fN^MrotzIY!%~jnRD6;?u@zDpW{aqISWm=7P1yxMdW@)qOIZdn8=FoGx zHdm{o=jGa!+B|yB(B^CP^qi?J&=%6OMq8vUrsoyf5^X6xXK8!2W!iFLJzHzgR!~i? zwo+R~&pBG7)-0MPN>=GS9^HCv1J!Pz+WA~t z&$WJSBh_x~tzE#iS81E5c2h5fh1%6xfEX^)-Q2!e3sQT~)D#(QF>&3Zg{ZD8q%S5( zmQaPJ=~SWf`%-<0)~rpWzm{8U{$=`NZL1d6uDQkPU(UX=O^Xn2q+9h3)H|xhs3wLQ z{|fDTtxelbnpmmbph+!GrB&KKEupnjsZnpzc5JgAu*x%fQ6e`rH~S-0ZU%1#L1MO9YzRW1&R2HD(mG$HJjjO%FuE z30(#wal@^JcZB2A*5DIHL83Jgj09Sxrc=KbpVi2x?s|(Z1Bu;jdfdp1C(vNNsYuDS z69{KW+wR2H)>vRlpoe;>HQE+TgttU=Bd@1U>TNRheo0%dC|2t$Y8=u<6I&%Kb~$HG zYLClFs&d#gM=90ar#22dSyo9U$zV8@3`by_zB~S^n(zI`kt3+WC*;Upj5L}nup=00 z*W+&}N9Erygy;;A0;2I#7IonAvec~M$|^>xmq!jAn!z_u-`-9dF{%D%%2Rc%m@o#6 zeGtNG3Nbc|B+fuU-yR5jt{xp#?*q|`AjkATKnUd>ea)3+AXg0FC1AJ%fmkp~&4w+a z$1Xx9k_ZIsB6V!^G$Hyct8a1@3z}V9M0sm4zBLfYPp$XOw$*~2tOh_QSw+CGhBd<$ z)HFF7RcX9@LN_TIc1X~N?lfT7 z$m@I#CT_Uko`mjq0h0_NX9C6EjA8~mbPlHOoAfI&c?ytRfo7@e9lB|)>`HK&C^w{T zpWG@&pxb$Tix68o^w=$_k(YIf#@^vbbdj?FGE`ItT6R-KbI=aKeH#? z#3(NnyKQXPDZI30;?5_82h_ajizo3sFkR5FNA+lH2C9)42!KMUwQYAG@R)jH`XeJC z_W8ya+8v67Lr>f+VU@WA^v#K1(@2MZp z`r!z~5yQPD9MdSW#WQ)Xpw_>s*4cLq<9M@@UNccvQJuxAM%3Qq%@&h#b_8WONU<<( zx2l(Fy`oZmQY)u2TRXMIjhyzlu2GDkqC2MV2n{6VTa<%Kq-&e6lvrij}9f;_Gz?jtDdCk^o z>XKz^0u-Z40@<7vq(Oo>JkPJ8oNj5ak zuJNdw05B0?GQe~I4$2%n@&gMRSNQ820`-lJD;njMXq^XuStjQLECN^punYixB{5bB zYs*y@va)PK5n5;EN+?c3Gco`G@s0dIAQTD4;~_l~A$9bsktUcINXT3q$m449!lj}( zb;rV&#UPGMp0&tR^-{JBcP}4I7&=kJTjc8Yf`nB1U6179|gAM|Cv- zr+k02?uSsp_Kg#P!4_ z$A;}wvPo_-!C^%Z4PLRwdHg;X}&aXr$EL$WvoC>mpwr*gR}R!9ME&5%27@9dc1bEXOd3nRichU3- zCKA$cA~?k~({3IRa}hA?SgXn+yypV2BO_YMTM76)rbUq<QVQTr7-J+_f)7fY2w=rMg)o2*^03b(Fu?j>Z| zteUsZpUN@TD(|5hLqup3l^YH2YK9hQwQkZGPHksAvm)Vm0z0GHPWATI=qQt++0h;e z$Mg)Q@?Prp9ko5YX7YVhA_wZzL;jGf3@0%=i;w(1H5^c1gqL(>>2=ub<)DO|kZIw# z(;XD+;}knAPK&RfIe8A!o4f-6tPLA6lQ-ct?GKK~HSI);XD@p}67An5u+VgX!FXo} zC_U*w4Uu?1=^TFnhV+=Rj0Rqa23;V0_9-%u<>V#2R30Us!~`8rYAH+2jD7_j}|}BmEVsW@mDII^CFY zLCz&Kn!m_8=17L4bT5HC_4l2Z&-CII>B?~GyAl*tdvZPZ_%>RhfF2Uu8@}gJ(OqlC zV?>67_BwP<1C5?1)xb|n)yKPLnUg8&vY!38OfA{HDr-s`O(X`T{&}}sOwR3e>=WHt zX7xIY=Ipd-u0wg9_QRZgD95-PWDVhW*vC}GeH1J|>L~BZ&lXoChm3RtLGZ{-9!03* z4AQ+O=*=LUHstrvJ{KUvQ!o)L?c^y?n%Z~mbe%OhNS-fP^obMjvOa-<^^C>U`7WH3~3p;#P&p-Jgl~LPR`|s*-5V^mOLn} zgw$3B#jPU64N~p?Z%sz!*57)(1_u|NrHLw&~skNwqi47jJ1(EdeL?& zw1}^z8lxyvGWjgHv$CKkb4q#6FXB0qtDHJLg`OGq1l^C-pKiZGJgZ_)3{rE{)qI8^ ze?Sdq#GDR$RlF+B5!z=rtWu{%Qu`KOxJg)%cHB&sRfLFai0;)aGvaCd990)ONI{U_ zLi55-|I0+4kZ1{6QA_{uBsE%8^^*=&dGN`r*+(6wkHS{b)*X5uSDjC|Y^#ep?Rvr- z2m#n>=?H_IcD@Fy#f0(xdu_uYBxg_3Q6I@PM>$?-EYk`WsE7&NW%-%!IWFoaIsR{5LMqsa%|6f^^e8m0k_ zR_;6}(lPrWXr)!kQ<7)QvuN};{F|(V+f&p+9}gxc!?Tb=re|wQhUDtnS;g<|7qN-UKONG=Cagx~34jy;mX5w$`8vwK1>nKFfl_)znB<0?zD#X3zacoVshr|> zHsj1}S07>7yN?t0F~|u5mLZt>9$|OpscRlsDUPR}cwmfk)Lv@w70YAvFjqEZC~*`- zcT+&AavmAi1*J(CgTfQCk&drr&AS85!Ei)EC-P+gWF~nM;57oiygrgnmZwmEzLEJI z%@`UzDC|u(A5-L;=t9ab?luMw&7jK5)U%J=Xh%uaT~?+-kDgp`BXBwO+9j)wS#cQH z-%X}3l~<_lL7I8y5u&3+6d*Caf~1kUBNY|RY|I(x$rfeu+2$Vx-jgAi@18s(5B~ zujPMDxS6hUFV+2DWqWKEoh8hDY*O%6;8YW!Q(Dv3BC-o ziGloIzNL;mzD+c%8NaAm%JZq0AgPUM!G#Q`)Z^`RvXepcG`0N~Xu{_$B&E}8C%W=9 zFGzm-i-sldQhkSg+=L0WM6Z|jMTNx__!|h@LD)(pPf{LxaKv}%jSgAFjtof*_pC@T zxQkK3Of*+5hC}P4PNBW z;Z-EQ9{XJ=;%p&9E10@gSC3X`#nw+aMTwQCw0e7bnR@h>)iu5DY3f2`UE^W^d7wn9 zApO#s&nn+bl|L!OWCRp4cRJ){2(gLUOVQPeu5P(0)$mk*F(-*`wzLPbjY*^Vbk^NW zoAd$D&!NiL>QEs@UK^j$*e4EdY?t6y$6(+17wW* z`&74H9eVcqF79PFdtnu3%GE@ri}m+IBup+R2?Eh+dXrK{f=C8CkXr|IHW+O_oWg1A z7}?amMICzXCUe~(=!mC%OFYrT_I*EN5YxBtGxr@Kfrx4RXS&Zkku}QaG6iSj{Er z?RoXf*JgV!G7+9tgI^!FB%7CASYH^?gBkil9j|tO`1+G8*v}rtXgMX?>3S9AJ$4DK zc(Mjw=<3%XiWp>ZU`OB)dDa3U>7v(f?-Wuke&gd&x1+lP;L_gSS7=-c0(1&-P;Gj1 zocX03YZ`Nxa~n!&>vR0rPId38hu@5N|B9a3Lo`Tys%E`)wfAr5N9v@eFVrvIihKWl zek6$SH?`bYS_zuxVTTwb-0E?o)m+AqN-R3O=yYQEsN>^ikC^P}TqL*-0m;AFd^U(K`UA09FEIjP`d_H$=^NyZZbesN8L` zwT%vrcbf+=FqC9p;9w7R^||x{*(C&#pxlQW^JSlbDNeyp2U5Z9v|Bv|d5%2w{@b+B zqL?vNjX%9~0*h`19UDLch}ltMIt-W?SdCUcI6b;1V-~UuY2lzl;2O8_;AT+fA)DkcpEwZ(}jV^)Fr;zRg|15$UBAQLs ziE8419IEH&W|KbwIa@RwDM7bWRq4C4GOhLkD7x?=HyqJr z4h{7p#-QuxmT}Mq=R2I)Iwk5jM9?%zTbnNTnlXc>9n6mvibehS&SG`?-8tgz)Ue;b zU_0x|UAD=!f%;sfa^Kf4iix=uQm#K#s(*R^(k53kVK-ltF8npjbW?w59NVAXq!$40 zbDn?#pN^#CdPzv2v+&^{Vk3Sb; zfV%O6FU*5^?q7N6VbBqij*-9}g|hry^a!inO+bLjg0s^muoLTJ!PvLB+6t z#&EV%ryZ%gKF+n*xzJnC7{`#)bOMq}=E6wp;hZy1-+!p&j2 zf?FsNjLex{ijnkFMPH7oWWMx^3_3O=K)Z+p31n_)N`U{6+VJJ{+HA3k2aLdBc+zY# z)i;EQzLq-v<#$B|#@&%eaW>}NLI-f$XdQS69EO8=joNqSvRsJJtqJwanf^`=OS$UQ z8LvpHzn`g?lOYGUbKqp9I7xQ8A7fgf60{kxTUHY3PgDNCx@>j`qKbEuSRr@Ae@f;P^2}k_l$P(A5Q~HNG=UXP-E#F>(TQ>KTlL>2fqHpDjNU_^i>Qz=Ha^)oU8-0}XY{ zIYO79?K%ilMWrq|ipd7*nwl0bY(NMf4TLcSdMaURqJaV^nY9-FOjgp6L`M^_TyNibkeehK|e}1M#`dlnmy$ViGs=TCcB| z9N+KdQ;8BYn?OCTOFl-QT5HQt$;%m}xSmcX6No{8W7{wq_m2IUHgd!eOW13 zTeoaYePB^t!~A9SjZLQe-wFC~{Jzqrv#>$e2xIhf5xQh0>Bkpz?Fa+)=XKZe&p9$x z1wju&RlG74Madg&;(^>-z=&GNi;^{VFtra{+u}dG9l0wq2D~Q;VVIk0oxp-`7D`RD4H|Yora@nSzXg2!@hdM z%6jwC)L?YQ`hahRvDA_U9SUw7%&SqF2!N=~L0e8j34vNx5TNUAI5&|oGd*J_OC(L% zg^VQ;f+a$u8De?#nE7%F04B1;oRpYU5)(qs2Y~BJIGTjnCCnwEZ^_oG>>V?gc;Ex5 zt{to7EmUyZoFds;Bm!c~5Vba@k?5*Q|bxf;|kR-;r4$J-*o z-TVnK9HO7fwZ$v;PfjKms)#ktt zb$V)u0{X6CN(+2qfp3lZ>l9CbRw>vX5i=_-7bzzbsQ(Idjap|B+ z`UXKazp+c^yX5|YSR}D+B7T7?X{j2QSS#=WQ+G>Tk51&=WqxtbMKhK^G9Lgr%b&|6 zLS6iWR9T36q#rYWYA8hju)Z{JjPai1VZ3s5U4hb3fK>ob12h8s3Sc$BvjEQltOt0J zfZ?R?K;%I3H>f$m(s{w-Ls;tT6;vVh115N7Q5(27uD#(RIRnhS|rL{MeaP|?jm>SvNX_T zA)9Alp77vpVM&+8>nU;f_w;v{dOYqj&m4E9JI9mj9z;Eojw(?-a4_K(&|eAtRnT9# QyVOc|Zam3(Rbtxz0`F;KlK=n! delta 15995 zcmbVT2Yj5xmEYMeY1QQ-7kOP|du6%X;3`?RWy?)Al3ZAq^-A+eT5F3uyK)f%g)=3g z`vzFxAZI5eITLahvKR*lC6t($kbp^I_~?-P(aqiE5&|aS-haN8R$i|QXZ`%X`KG*i zZQgq`-|Ss4SpRm+nzuMNH`_%2%9_{tethfxyrI3v%q9~zc}<*gbC7wNdYZjvdRl^( zkkxBtXtM@wA-mTea(Er;-4=9)vbpP+@a;{ zv+QHuJnr=7^DJ)x-^Fvd>ni3gq&~SkkNWiHX?a@FKC8Ews`7aORTWT83DpeZyLk~W zrtX7z2_JNo$vcGa;e+`Qsu{|M@=~fP<$L)sKAdWX@ezC^)ePs|d=z(6%?Li4kD;28 zd@LVF&oX`)FX!W_WfY&lCsK`jxv7_Xcm+*bp^e^W@s82P>@#`C@*X~!Poa+Eh^VQ2 z8nsW;%Bg)kpU!8{zzKXNpGD7!d^W#`o*sTNpF__IK9|p<=OjL#FQ8{7U&yQIIhj}U zMf9A)YxrV%PUTDZQhH9~y|sK96{hp$ypEnT_~m>BUrAHXURs@asRSwJYim&;(UVgIoDF>b?z9Aq`ejcOTzQHQTjnnsgDp`*}0HH>>v=Z4qzb zljuLc(c)dKRdbEE^0phz-X#iKI}cFbKoZ+h8oh&eP)&!n9%8TMSMU%I6W7alFOTp} zDlO;V;ybyZQk}MfM?1{7n8kfs-98hUpM&S|0zQS;@s)fV-_DzPjCb*lHkWrLH#eF* zS0I<&pf7o>UgJ>y1Fg@=i< z{vN4r4&N?+uuF>`mrsxW`?)&WeSw%3ik>jZ`ZATKGI@f-#@(Yg6L7iL)8YlTUi4mn%XuON%hK`=jkXpEEJIVxgr9 z(}@ZI3=*R-yE(vhi=T5b8dWLMAOug=0RGG6{>k+vr;AUZ*Pz6oCo^+kI>yf+pxdJn z5z`Eex)l@*z&jN^EP-&$V^hd=$F4w>bj7_6^dv=yndo9|MKNtU4BnF1I_)Voy%L=q zzE+`W-I}4T%tEjmT`o_on-O7SG3+e1MI9sETKLhi+KQPbff3~#7}2WV&kEMq>gnf*R;;`zE#xW2a;X9VyapPh6(D{kQR!hb=6%y zA275;I`{f~3Ay*;2gZWzv+>umw8c_guBj*E>xhuK(n`-2@%Cc5V?V&wy95Q@h% z&T47C=j2=S?y5@(%uRIzG>$H(i3t+YEm|;^#*aYHFTtTg$%gI%@jZ*$*CB3G1Sw(fH zn0%+|dUqzDl(ow*0{-Tp7X6yZ8>-!Gxco`AAa~Qn?{!Q70c*R+2vRCc;`FUtQ+O$8_}`YyGnNCeJwGk%h}A^s43Jq6Xz( zgc~KbaK@uF0bnA)B!FoE%HJx#kn3C4u-02!@2hQSSlb{LpmiYt7Lcd{SPZZfU>N{R zMu0g1MT%9Xbp+R-2vI9(3P?&o6yj0Q-6dt>uAfoe(9*iE4$22(gVSV3~5k$`Qp1mkMf*-NN75sqtm9W98Jg z?M5dLJ;YW5dR`y5^=q3}EU6^~9MyKRRM@tLjzC5}K=ui{tFUO^=8F?i2IlgqcFerfF{LvL8@($h2|-EP9vbDzAl z>HR64R7$R75yf~1*INBuXLtnN(x%03li~IE8bgxt7(>#DCtuhS3CH|_aMb6!M=siM z!)Z}D(+~t4q#Zv<{AI&NW*Dv?sG&~7?jpg?HEBu{jWOnk6uC+qFT_&g)-oR^-c7P}yNl5t8&+osYT z(L?j3({wrcC)1;5`Nr<@f8P87xv~#8dvny1HYkzxTZXLZ#lVzpDfxhNkKG7{DGNhS z-9j=Ju?&;#pt_gk&$b*~qt=}A`N~zOm0k^uMv@?EfX5ltf~^R4VjqxY!U*s1i9E1% zV@BVKgy*laVB6}9t!`@lLUwIiIvP}G4x-AFePx#Vw%5mzJ*15dMpPVJgdhZGxs?5u~ze7(?P~eJJr_gD_H z9$Twru!&OeqI$8Nh!V5pNx#3hKtNP_UQINriC7>KHddt-Zrrd)m98k^J7@*@V=w>U70bwqxd!fPqv}VI#hTGK>lqZvg3tOaRc7%(XQ6kz@7nV z&-JLzLguDIO1g0YcXpznWiKl?_t*!peN!x}#}+G4qM1XY$>Gi(Ys@VFonOidrBmC< ziW1%0>t;4fZftLyh(+fy>*gubbk>nJ$(LCR+O)8?r&Cll%H!>q*s+?#cIgQ$ou!k^Rl1!fJ8d3u4>epbAK9_8H^ZoZF-F+}oJ=W2oEmhCg}erN z4wK#FDOScHZlX7FGXSR6Ei{?98m}o+umyK?5iYfGl>zKW`#%#{Y#2ZZ-jxC5p!6LA zx}`JHd8R4YAcW`u&EF=Eb&PQ<0b9gDjJ_M-003mFkj;^y zs#8v}5NHAfbTSx{Iymgx+YLpVnkO|G+J7Q{BtG7vaLhrMLjXz^5tOn4RG8U`5Nzm#= zxG}g;wtEM?I^?r^E}q*Tr*ylvCr0kHFV3qGSM?3+L!m~){A_ae-u0E>Mz_(XrA;#& zM8^= z1s1 z&oQ%I^0MAiSHB_YB|iDm(c(JeM8cvwT50WfwZz0q%eSJS$VCZpT?_*F34k9UCH=#AbpQaV#~~g<8Pkg+0CNEp zikT>7k~>hW0X1b*Fqvi7l_S~VguF5UcK^PwrUC?!<*Ptyq3v(RTnwv5$o8vdEO`*6 zei4Zsc!mE~Wax#IE6`59CFbkjqmQLVO8o{9$;!5)_PJX|UdUUS2mUBGG);8)M4L&fB| zK;L1a;zv!StSV(%t7*V>K#DxfWP|#}YQ>Yh_Qh|e{znOcp?)RiHT2rY=UOyfD4)P{ zD@bz7cfUQ(uFwuoxV|&mSqv`g38_*mNEc(}qWxua`!(FBIyK=sbhrc{6X*5Ra7f1Y zk7V_U!~5IVH)2*VAk$H2r+uOpk6|KZ{oveK0@;QkWuoSc#Xzf3{2U{dauiSt6nHZV8LCUi>q5FD(4FE^v_rA9^-CZ?O zOPwtK{zld)JH9`S9hG-~e^)vWDx@jOQ$QKuQ5yYJKM!VcG_mU5|Fsbhi|(r$rMzA) zX1)dB-vD4X$>d+&bKme@6)-AL`5eIg0LbAyIpVh{{|-Pg^CC(qj<9TXYo|ZfZk){9 zu)mw(wYTD&YR>?HD>{Egy^n%UFBMF~ouuS!X0^NI#XnxhewFz4k0;v4cTudD~f&wxTzXm|S5WfLN9dCeLpKLeF@EJ(terhrpWz2yV{D9{CehKIW6(>G8Hx4dZd@`93f@sbFG5 z)t@*n?|h_#ZI`x3=dDspsUIPUP2qw?dF?`rcG1yF8qQa!?Idt!E1XVdH(Lp>cts@< z-+#1zN_6Lfpg+{i z{Z%HJ{n!+?PtJX8W5pZjZDe*vz=}~t+DLWUVU~|Pw#r|D){6j?0Vy?ts2QmYSdVU^?6hX*}id(zLUY+Vg+_k;3E@8xzBy9K@>kW0)mngi6gM)Lc4p zZl%3)%%}GF{ru=I`l5>SXw^7IMt(VMy^0P^K*ZD+2@+9?4AhJF0Dyyfyo)kKqi6M@ ziI##eWr_}~*<~eH@+T#p+^0s&*@OO98)L6D9&e#4jX%VQsr~#hkxK|_R(^|W>07Fg zPN#f6;u8o`=etBrCRB(gGy7$2&L(M#LGqKQ{&j;=70la6@(-b;BI_`eJOoH|mPqrC zbm|_Yrujf=tYC^8b!Z`RFB;5F2$;bE=+6A*4pB0V}B67J1*%E9NQ% zC_YiA4$?@gYP`~9KN5olHTpy9>68BFX4OMM>6>iuW_)2ozX6;p87KtrQ^V>$_MKL% z*_rLMi1(4zCBAy@1RI;A8tT$n^8g%bI3j|Oe?7LKyaFY7lC#JeXcC@jt(v$}0MO;XpIWMDlHN)r(c$S%k>rqx&+fRYwWC?zf9RP;jR^zZWF--oULMWbcmr*3)o3&So*Xup)#{b8Z|JS*V|`Tid! zt;}R*=av_^`nNQBp-$zx;*XE7RYrz0Gj+U2n_5ed0=KedGqF#1LSRl<(Ic!lHzBS41;H_3`uCmA2Dk;NEW zn(I+Yo414N-jbKU8g%~|BQyJFp!if~>D%1@d-hJ)HT{pgR*$;>a`sLj;d43cwN;~y zWllq;UjeicpK zsj$nkHwMcWUa#!4EamMpA)ZKxv#9sd@{2L;IRrM1+FkP6H#W@#GMz@&YS9Nr zQHL6Q*QV3dbke_|wqjZI<}*{DqY~vgik4TYJYXx9#_k`fb+EL(HSMgQhiGyU>7=vU zy~e2uG$oWVD3=d;4Y*wb+64#^z&wD$l!Bm(6EN1HR6sz;&U4vZ^1-(X$VQE`h}Ox9 z^e=(p6ad&3Ujd}ikEzDUoo`P_-_R=>Qk_%N|AR)1cGC1nHR({|A8Lhs^!B0l`+;Ar zgYQzwW7nMxHTBDDeGRpu7)>gdnnNYs)+QodophIqK13wX)-%wmvrr~-J@V8$2Wpk4 zvxx5joh1|qtIc=cXZKez9d=zRL(wfF+?I|B;+-nres@8-;+~`i-GPs&fuJVn9Bwn`frjSK3yp9f3KRINPP0%QOjvJ z*D|exMjEqGzVg0y-kuYv=Qip4aJ1a_!G*0azv}&$4{Fbg8)nO?ceDaIuW|PX&ipJs z3})RXzxr^}_DmihEDcx#YDuVtkjef|Q0FBOw~GqSA_|xerv-lkEiL;C2PN+Q=rE)2 z?+u@PZtNoaKApFw8v&BHJu>*|Ty~@U;inVX4f1!NHhZA3WQIcDK{S7?OPsMCMwWZ6 z^qyS6LNb1GwH@jde)-|akt2vU+|vq1+5#;;!_)K``A|9Lv&zye7)OdV6@fZv(9T5L zXF1lIuYn5bnr`=zlZuEwt5!?oA+%lra0(y|O@QidOeFri-LgfkK^qYwG(QI&u>c)S z_%TT3g3Txu5-EMerL90rq_Z;^2X&7mBA+j36R-rk^TOJ0cK{c^0<8f}c=AON5UT~B z14do)jlWGCr{=Zb-AsUX6EPyl*x2Nt{|^bsnlDP^!Y@Wum7yjBISQ+uotiD3{bIuX zyTn~z+`-Dgez%JpZ`j>T=WiX98r}u8!$89%XYj^d+0TjA)maoGOuze4W=joH};rwge+lt&cxC z3|1V`uP>5p4MC}Z0NpKsyra7VbQp^BNZ7L?~beAv8DbC>7tqr$-K)KhIto5~ag5t*$SoDdr_~b7!Yl}PMS|^!HxG*X#W8LuT{%%$^|bGKXSmv zu41#W{`*osCZ_3x>gD*CS?tcua9qkWPy(8d8Ei%j>7Vi&NPh;6KLJxgu4XdFKXb65aeEHC zxmWq_0u`g&Fgq<8rSzE2>M^C551=Nb!AWPL$c9HbOT2*gbt`bC8|+LP^ctur02Oo| zWoGdb7u!AeBOu4-?HjOk5h44}_%klnUR4DuKLHMGqQ3!-Sv2sYBRg{06Kp*MA(V@) zr=N{HNpEyTCfeesj9qa|zjI4QGnEnVM|VE}#N)}$tn2s{`E0^WueqgD(C_Aqg9vX{ zl&=2!I)h!&jAy{$=Ck-K`K&c9Bh_tuQaQOeG)yDTBJHrKlE)&_S3@ zx$%i8jR%0gR_I%WWW$@#?!U<0puF%LvLgZ{nhYha+ok3DH=ym}Cu%hnv(JBanz z=cs{9>A5cnWdSg#k&L#*Cy!vi=!LYp9py9mJa_W5zLHtDjb!C^ z6T;W2g8<7O8?N=JOFmIi4+>S}L=;drKhsvRo{LWfzP=GVvPogaad#el^2no4LHL*! zZy(8?aAHk$mzPJSKdZQO;`GVoymDt{tTI?x?V0CRill9VCBC_gO)o$qL}Wo-I$KS_ z>+Ui(oHfONUdHO!>v1-U4eGrgG(kh^5RUHe`a_)*Gm+T{6&94H*5DP2F%Ew6jZeW_ zP`j0Y*IHM*dL4b?pdU{3aUm{2L(ZC-O}-^{Yw5QPjba_us2kfVGI}lHSbMb+k8TTu zJG)}KCEOJ3m>>WBC?=V9q}@$dsw>abua_+7 zjNkV_r$^(rjbVJLQW;nz7uN*-)MS3lxC&r30MHshQ9(b^3PpYjB~7FTlQUj3mJPn$ z&1~^aW0|YBm4@I4d?EVX0sSPP1{f9-AiZp-?)2Do=EGXACa6EH&4wS*^wkgsj9scQ zE%UHto>j(WwtdEsEJV~P?QC8SY-G(jCTC0n9eXo`wc z#)o!nvW<%}-rYk=ngo&`7#@Cv|h zOuYnOK=BQLjR1cF$OXU$D&tzm`zS>KJ_7g*pg-Oj=esz$G*0LKfs#6R=+~y&5exWN zp%#Y*HAD3c>T4E0onbW$_!-dv3vw<9MR-?GtMbzL=uZjIR-3*~yUYb_M6QcDdx?!+ tQ@xd0E!m|-@l)lj%si|hK5{&pmN&d8zo@uqRM7}$vCHg?YvbAM{{zKE6SV*U diff --git a/nlp_resource_data/nltk/__pycache__/grammar.cpython-37.pyc b/nlp_resource_data/nltk/__pycache__/grammar.cpython-37.pyc index 22d6037803608d847bd268033d6ab49fccb0e30b..60ce228e8683427e065bb9d52348303da543ee9f 100644 GIT binary patch delta 10103 zcmbVS3vg6bn(o`Lbkd!WKnO_)&=7(okOV?_V~9XNULib0xgwV)=O$_G7w2|DV400$ zqB5o9h(}SG9R*Rr2P$421x4{OI%@ga)tdEgjn>Z2%+@$#IWs$3R5@e0|95UDorDfp z3)TNU_doyn&wu{sIp^l=G1u1zU3trLb8{T@uQThQw)K;PdBr{VIUNq>P#sKR&WI9q zs?MlObw%B(TLI1$@u(hpx+C6bmYOATo`^4+t!B%%Hey&cky;cTr;dvjtHse0wM22~S$f_o2lKN) zpMwST(q31u(yOTBSuV@#bEsw1H;Uy`U%p6IcNoO+6FY7*SKD{gq|X?<5?MD%WMW}IZZr)O(e`jy}Z|@ zPEAcSi3JH49GIp;j-AY=5Ozvx>@Qw1TZ?jTw^rW}hsW*DkTV3j{jy1D-Hn&ev z=dcZI9-B|CdP%WB_w-0+bJ;?+h^Ck)(Th_9=gYSxM-_DeyPP#p|3bEuHPUmDKK;mI zb_Mk;k*#Iax|}U%E2yu5HL;cST*_9lE9u$DR*%>mU(VLw;Jnkx_vy=e z%N^tn0hY(gSP`4XO4$@v%%-yGY<|06UBO(N9hEI+()p%;W2MW=OYm?kskL>+T9e^; z%*yYKg#NK2_Jf_uY{oGpP(L^X2-WAcUKtGP_32w&o_Iv!r6h7a~tL-#+t#B4DRuZD2 z@TuS!!d5E0h#njiHYh=B9D)62U~IEeC8?Z}%JRgM9XcOIYqqVqOWM>bTF#4 z$$8lTQ^_8jK^)UH?GEwRqIlhC5cmNG%`(!kNK(@d3Nh}=im@~zv*4Cb(>g*%ho&96 ztN8u;U_Wc_MGI+UnHSbj}Q@=je2c}e_z!j_&=P{Brg0>DIo$pBLT$^oVV zfHS`gAkA{X6dbZ%OFsi1;2Yz1`T{Bb_1;i^nJ$qTv_?#Jj}M z@DXt2`jmjvr|>!EN0U8Djpc3Qy1q~6xzs0@n%S7L>o>wx-W-%Nj_l&U7rV=6u9*&Y zl>k)$Gy1vP8cvHUTQWTAiR5$o8E`9~lQie%z=>dSk@&V;YYfth)C>i%n*u#L$N+N1 zd0|5vwCFF1-hg@?9zLI-WVG{buXu6lgR5aw$|PCj*r8;S^B}#anDcpPr$v;!{lvRp zo9C(DW}L+rnnx-cJYG56D}GxwrM3{GWi+|$0Cz>mTQm3|hsTwPxig*#E}_;4dS6Oi z)#CG+<3!oahk^|w6JmzBM0j}W$V3)JNkorn+U4S3s=uy;R><3G=wT_bVN3oddr7+< z)4LPAVXb(sW@*($*yh@rkLcau)=;ELOsGA5S<{H3S~*0XqFrx(Tbr*;m02VuiBpdZ z6NN7)Ufad&+4T!R$e}LFeSJI}<5H+pCiQaN=!_(X4@T5pC4M&h)Wtj(@vVti-8;48 z`)3%;J5&CYd7@pk%~@R`Lt^9vEt82MKA974oJ8C!J+`|o!b~z#V?mdk4#}uX{Gk4D zaO7Mq!}OnM9VMb=Zcb08D3o~)US<2_6==a$d?kQPaWdIu%7W>jNXIQ!;iYV5fU&QT z;l>LsQiui2F;yIERs!RWdLF3Kfo;hD)C8xyd%Wtk9>UH** zi#QaF^K;>BxaNDg-YcS~VW*v#25fLKv5*{J6M0K#Zf~y4=9x?YHQ2tAb`y*mq^`7w ztf!89VHRlI=O+)zrE2ICHyVW^ayYx@bwcY7RbdX*$rRPaYMG0Q>6lY`xohI4PB7=u*QIRZuRcLlw2olCT)xoYu3zAt`c2qme;AEX#^OE2U$b; zF5t2VSOH4mZM6U3F`XOSC;qsmp;9i?+nHdYq;9#|^<<@Q(1J0s<;EyWY+Ac$xOT6h z?rihO+MARrj#;(=NI{00!OCgl@uCk;M}a1n9i#w#{Td+z{A1YkP=m`HXP<_?b@O(ge9Ee~NWxnfN# z%+Z^&o%1^e_@rzp1;}6_%RfllyUk5ozEu{s4<9TGxi=BwdMruPCeS}@MR19O;=tDW zEz<4xfh0PF4>@XVZ)GY&OGzZ3l=jj*u@no1KP}jeX8zR=D-Gcx{78@y7d1LPR*WPb5W!WG4+6*PKe!VqC7QD(kgp0x%uP5eeLzjSA%gkfz(PZ zuM9!M%8G^!gU*Jw?NVu5t~IZAm+*)N=u$TZ<|;vd^@ zs%pf9O=)7@2L!~mfm{cy^SsF2(OA()trXi4DS!UGDPJJUpjdz181cxCqLnuj?`C3- zIb_wp)I={jDGAZf;ihrfT*kr*p=jHJu&m`lejR>3;a5}F(_)`?ICu-ShS*H5S}qvc zJ7d=Gd`}79Ix-d1c1}#%_0vf|8j%Qwi;07+MdIsSy;E7=Zq+I6D!F#eCShQ1Kt_*ik)S~lF(ZZ{GN=M!rY$Ymw4FSvF8VMOjRHwTY#`gfd z4M65~nSFkM*MA1+1%TVwTTxh@-vRIw0I5!Bl1pXX?U+SPEXUr5q$4TqT`ko6E($2p zfiGLEGpgJ^7jH59j8a!EdgcDFKiQ=POEZ&RT1u2T=KEbM6u(UTvV`mqW!=SInV>d{ zMcv~!$*t8+FvX#SF9ZkyAhpZFc|rN-t>|I^!={7+OU>impE`RuH7m1tKBx)+Mgi0l zu(F%FTlEC)z_>y0YJlq@hD@3ahayyaJ3-k6N?)5EN_Nr#i66#mUK`a>z}tQ(HvjNnD;}U0i98}Dyzzj!=B8}L zAEYL411JzXdW&uD^Y9KqY#=3o@IMiE_b%Hy;(mE6MjQf=aR8Y)gtEOK3?P-W zspC7RnAumV+>7Ik*x9#AdC)x3=ToW+F-RTzlPOo9+@ z9t72c0DgcY1p3XAiNR#*f5s)gx^>;;OPqk8$10|;9nlgpZA`$N5NgmhU)rc?{X@XR z;{5~TNa?>j5Ohw1+xE*TjhG%apzYUO=8W4WDc;%OUoBd0uU^`XW+TRW(t#;G*UE~7 zV|uJJO1oof3|Aby_>t)C^fD_V9Dxp~J4hmz-6{Qq=51}o8B2Jqay0~6{ z!NJG!wbH#{C>%3BbD5vq@w{?nMwa{u%<&`uIP#MKL)S?s0NM=Myfp1gm-+FXB}#20 z2&IPlufHhM*gS>~mucH8O78lV-0WPcXcdUDqN-y=i>AZsDG2a107{izA<{F;mbB7C z)O*@R#UYOxJ};e0vjd~(?xM`+=MTv^J16d5j+M}@nchWPYQyqq$xyq|pP68L%bV(Q zqi2|gWObQn-h20}O2hwG%N*+|H}5`lPFd4PO*?5miIs7=Fs!e)zOk zxmgq)ozXK;jO|m^QY@#LfaSqWufbnHJFBT>^XhdQn(d8y>S(zQq%sVzrIw1#=q|%{ zILW0C{b>gnh|o5O1lv3E^Ihh-qm4?Vth7#9N_mnxh6L(GN+eBy7`;(69-E#~Ri~-f zzG_PQX^6o0JT z_h@Y-y_VkUzB7o=Yv$U=LeTihCmvZWHLggFlj3W@u!4ZFF||{|_{>8JMA^xpznl8$YLxDk8pW!Uvz47<@5!>F$AQgDedEAB z&!D~A8S%u)>T#(#qzux~(80gE#n&g-DX*AIo_fPob&N3l5rBsYe4}jVuj6GE0PT_q zQ^9Y>%QjK@Y-djpO)r2fVvnL#4RD-*g`B<>m526>mfza&y)xXC;Stl2p&B9!#gGiTdxkH>X><83mDHqg8gU}^n zgSlEU#Qo+yWVqVr$~ON3jHF1T(Rv5qEduoA$7^38Nq+o8bU=ePkotqKZuXE*`l#Vh z0*YHHRCv;S;JH1nx`UvVx{`Xgox)PW8PDo)1TI#+*pw%Sy@6L~UYvb$cD)?0@U`@w ziec4DcYpZM;@((4A+Ms;{|%evKDv=ytgO-;j%wG)ipyp|r6 z4Z?aWDgGmRp+t!n$(kJT*MyRx%}-D(Cb~{fQcjqMPOnq^bI`R2pvC;^%tj>_4nsqr zNIp}neYtAoAQD32LV&){)7>V0{k1>%Aw8z;ZDmnUB-E-`=JG*9#hI6@%dM<-9*Rao z7km--Ay}Cnjo&1^XD?H}G;7XY=gmiofua(v9dzp92gOg{cxLnv0f}H=nP_==yy$*& zgYrA^%QqKke}p*y3GgQXFtKuCk>m|Ec1(Q*M0PU1S0=dGc6s>?NdF5!23ZAK$P8)1 zI(vz~6`^0u=uyXHIIzo4jB3H^7|gmHBDcsOrW;^R`cSz*_$`h$T@i{5?EBx;?q}3rFm|7~@-hOZC zLzq)~fn=%J(hd_Mi`f<~0yp~6ms_{T>*W7%n2BF5_Nrfl$+rN10{|ylnsheG%jlpF z2<>*~FxEaJ#k&H$u@iB40#4^BFTRC;=MbI`SFHMyko-IVgr`{vdnLY1|1`wBdAcsxuDD$#qg7{)8nSj~LtooXZGz z%ap5TOK(MtKU?G6a0Z3H#-lGMrdkOT|XEx>MI-_pf6aXO(4t;`iaQL8~+c(j6*{J delta 12574 zcmbVS33wdEm7eOkTBDIHTatBHw%fNQTaqQq#+EPnkbGdvU>O6$(TvqCY3#XNGcvLm zF@Vi-nm~$dh)IYXLzZj7Fy!J`b`uCmNH$CIk&yoOOUM_JEIF1^;V>KlhrtXb z%mR#_&ae`32Am9KuCOcO4!9$pfJf%tVQ)kYs50#dmqdI4pG9sMjzA^Vr2R%2Rh5}#=8WU6K$TH$ zOrxr5qr#X@&zZ9M45N~GDznXNs9BX!O(oT4&2eX-mhv-=8p_w0HKvcG)KN*TQAZ_p zXcMTXe7!M?^0Ul(%FmKg8;sc}9BE^AR%`9Hx^KS zffO>=SZFMwA!^2AV+lRy8B2{udd@eRjAis(V6yRr#&VM#T69_oES9fnyqX!E;ZVW_7g17Wb8Ip8Xc5cCOKARIhGr%jZ3Ht5=ULE$(E8h%B?-k0BcEDfwPls-q}p3G~? zYo(RFjm#=f#*)FX9^;0|L(xI;qw+ayBjXkH;NZ4NB~sIg9L>B`-OHLJmm;|=cPu$%@=45=N7v0Gx_+}* zS<|x=f*NghJ~Jm&iicz{M@?P7Q=G1ewfn)~1}W%es%GJ&uHPfJ)b3a`jan2RxTWg) zP%ts1>-URiYrnIqenPh8OeDGG;%1yr-4eCBF2vcow^rs_^4X9vXX-A!x=yln{gKQI z^=DX%oKwMcEGHjIOja4SF+xXm{fW%&4gblS=Rcb4_NnmiJU2E&QhNMJ%G;wV#@Ur`xv&D=cS%~}GU0dAV3CihaEoN94n%Bp`B z&n@WHUhg%+qQlr$p++LzNU4<+=18NUZf(5=JG+eu2DxM9&_J7LO3r{yI;2a`$8 z3#9Tks+%mdl!Q*`@OM;iOZ4)NOz+~2ZjWp}TYP-k{MK@`maE8VFKlO+Fj@q895|aP zezxqab_u1X&~_(PEfJaJU(wc1%>?PK;@;*{3#VqXXkCQOsIFfkcC~!a*bZsr6X{{G z=%neMD_K8iM$M5p-`Fh0Rw z75VPrIE8hMD)>$!*=0e_t3=oP!@)#C*KMIv9{8RTzujGSnxXA{(6p6^Ncq8`eqJl`~)Aa3@b2?(?Mr8Jd*wKdVa8jXqw?m=bxITtQ5Q z5UVV!g?=*}p3sDx0ppi~U20_qr9O`C;P+J@MT9cHo1&?cqo&aEVo#TE#_y@nbkJGA z5l{>ka2jhwsB2dBTDfm$w1Za-xor=W7@gu%T}Qgt8Ox1k+NRc;UZcfmJ>ejpI!doM zRv2w3(4wAl?Z!%3Gt1~OR?*I~A-j#8Z2(IfGIKXqxfTO26MhwlF(#`H{_H(*b$4q; zJEiz)5NYd$;-T&&s}N;9tyR!K0V<%q?`AIR*~`+Gp$0axJi&O}j2hB+a0$~66#Eip z(l7^t@P?P8z!@|QayxP>+KblUrR~~KVuj^S#txg4y+7|GhF?M5u%v@|vj2X?!!mPr zyv-It?!+XW{lN>|F6QVFwY!cKa`J;zT1*=y;+b8`Jo~6Npb2*AO!0?ZOWUsm^E!~+ z3gl8@I?AxvEI$R30owQQsL2!DCH7vru~E+0GaNU9Nz-x;n#o4BVCEQs+*sz^O{N=C;mCr+3nn`vik-wbE2E==pZ@yd_{u;1^K&}RX5Gn3LZ{;>4@#Hb7 z?(;-TshHEo%+Y-%$|Yt_PS&V$07Xn?l@@6GaOR8q|HRfBlN-zJgojQ;W;99NMo>R# zM!?2B;x`96_Dgdf1;X zHV94uzh3i=!*g%x|G| zG4^N0k?Xq_fjujBO3t%?G3Rr{sfj;c-`I02$?YYP#0i3qC30DQUp!&F)p!x9equgI=QSQm zn8uI98S|BS_tF@67s!3ZMSjgTLpC5R?;bUOCa|*Vn24}HfiZ=!qr_VXN7IyW+ zm*NlSKK!=QFM&Q%j&a9n2luBOeUv_gt2N~$<62TN=o;)cHqu2Lw9(n-xUOT&nR1Rh ziMuK3qLlkIg;2wF*hP88aF2Va%zN5l*PK?+`bbmyN=H&1FG)F0voRN?d?{CoomR%& zDTUvXa`&OOH07pRPs&3P)-yWp)3Sg2%;YeSCbXayHE-0m^I#-m(#VE_NiCGnLQ$=s zE@u*lk7>~uj|9WHcAey#f{{+N($g~d>2ZpW$uu(@!d*ZzZPMjRkS8tGp310cRdF*CJ8J5~(Gb-f)8&vGz0!XM zrbB7x=`5fQ;4F7AY|?&$W*4BF3NzkjKicib%1JKCs?3^3PusT9F~Wa^Q@{bw;Zd23 z`I*8zu5wmO&w8hi@-F)KvC*2XJ=R~v5nNff_Ei9yGhLOG9#tr(^oo1!YL;|OZ6^80a{0{O0h!@ zYDYjflsbpuJjqCK*O)u$p;AEIEf;|@=Ao`UvL|Qa!IWoQrJly6^t4BIbx3xbO<(Ca z(nHIlgvRP4ACv!j!G?OJQlaND61%I;|cDb15|RCLtCvQ;Peh+ zIR?2r_yxS#vGKnKUmKBt3Lx>}G{L4Ms!wnuAlg`wG4>ObEg|2OMFs3%&Z_9js+_i= zK330W$lMI2oQ>836~&;Nb8dc;h6A32#kVOzkVrd6UdUnMaT^l=f;5(kKTU=tk2dBW zxjso|2Vy2_YaPcGGWRxeOtLKHsdJ2ZlWsD)2Ue?bq`XMgkgPE^=}oy)-jq53YnG%` za&XRZHC2+9=HEi*27?r2I1ShBgePM3NhY7`^7iflf5A@i)<8GsU7Uxi(T1N+7OK7(YqIlW=|i z&mVsHVeffpg~yY~wA?%x9W>?jseREXK?@+~@tP**3BdAC5wX14I3Q6g!5S#R#!^p& z4AWwV1FmG^SR~*Dc9AGY0(+I=enf?UN@w{b6&U;!WKKf!gH*O4hi1QvP+O6v+t&ti zZM6h!fP1C{H^JEtI8XTGoM%I9&Ota%85_H{cQfFWnyAE%$8!)w35|s2!yevZC}W7q zP6ShOZuM7^>*C=w!I}d3n1U(-uVM=vWXH+JF$Z}vsv(5Q=$m5F34JFZiN@_Tr26;lGfVpo+U%puuM_QkjZr-T%t|JbX`@lEjsR zfX&}crGe6@X&Mykbdqm7u3|kYph?=1Vi>0T%j6_Vauf?6*RZelk&@=0EtXv*B3vbwbc`!~!pGE?%af)4`rnJHz zpiB?-glFTNsc&ogsGeUz#445dR1q^dMAsP!6z4U26xm}Sqae_!+^pOCkS=8J2Du01 zUfN08`&TZvsB-J_Nz}mGb1V`Lr53zo$*&`sszfa!&g&Gh5^E@-s$#*RrgGmbf4#!| zbN$tHE3_wb`JsAetBg!l#4~Z2%nv&+R!Rg@G;=DljkU;wb%65x8Y0#VUGCTmR(gWV zQzGnTRf}6jtNZLLV+pUdXxIUAJ;>!CxUsn40*2p!svyXuv5<-!H)oQux0NMPN)keS z;3@?v0a-!BD(N2SH{xf!GvnH)k3 z3^=Lx1l2yA`6u4&+<+R=oYX}ajn1IJKAC>hoyEJN^Jv%ANwoSL2(%>?YTsEvl6{2% z=@;By$St(g3w0;gZUq`hQ(-;7l==SARyIc#9z@NgNdS~T7uv}87ClZWk})MGK>M3a z&9M@;xgLW0L7-}@qQ}PdZm;{Q{61w`VP-iQOe~zz7^{QSj>!~XHC+?kJ zDw=QF&Ys8&-=wnUaw@mqM%hZR+($8=g05sQ7@^lCo=hE>_ar4R;)hCIV&2#d%c>z) zvVO#!bh%4BJ+^BxMn9pznP@vnfq2O}-6a}sKDfRPtS9~#}?>J#F0y;ll6dxw{`Vq(%^ zOt|gYIWgx;;2KpN62G|fhXrkl8y=<{c9C@yN;q7C-8M6Af0BS5e=r_{mF_XunQLBeChd)-zyON$YOXc_n{QZ0xGbYM=Al5A5tEqnB@yObY z?o)ddQq?=3-oFiMp%sWXZ1!m%ftZHfgo0aS+VX_^W5dy;<%!Fu-I<*Fa$=}*i}+`n z56ZoR)Dik6Wam}cO9lKc5Dbole+}u^Kp@2`4MpjEW$N+%fx&Yaa0Qh*DqLh?ADx|Q zoqnb&{B`=QW28oElF2Pl!K5q2eUH?Y&7~NUUHbeD@xmkVU0)?R)Zf?XVcX!smQ-a` z%UzS~zhDcvxIo-DMD(+R4Uj|PIK=%8NM7xg;&-2|FKaGn^36=`co1qH{oG^gq?(yj zGih5Y7m5y)0${;|p>Qa9jG~#;U*3XH?-3C=6Ks)+WA;1itZ}5FiZ&S*@1N015kBaJ zJiR+yC#oK8TZx^kHBH~Pvv1cGy<7M1+}o?~>$aSu^v#jVUqEGf_JbO_EaeH>Lw;3^ zJlZTjgPRzdG&;1f-Ys5!bT|8YrsT1&Ih+2LO8B!N&k#AsuI1lCW(x?DC5!9}r~~4U zj}NEoQ7-+AjCN;{Z3KCah^5ky6Sy(8JY0SfWxsU5bZkRHNsZ;U72ZZEtSmRYmOC7Z z(vCO>y+9daPbfNU>ZsuvaJFNz^j0{;O*+&crCZG)Cy)CBDz(@JnCG@*LDV#2i-uAA-4p#)MnXjop_ z8H9uVW@A}lOL1qWd9J)Gnxt&n6%7pP23^YjLcEh%!ajKZh3h;OuaiuuGu}T$ckKM6 zh`f0A!l5=`sI$bSFZHrti&HOk>i>qJ{0qqMKp@5PVG=F&()&FMN|Lc-(kHdq5$S!f z%IQcqf%R`kwH#x+J-uIvl`k(#^G|{S@C;1f8HKgCVvLu`wN2jyaGE>ezziNWX+I+S zrBx+ImKzkuKmtBEX6YCcfs$O>R+5aBZNz1{lJsRE|22|$H{3)X#BwF@n&ZEzY_c|O zIZ)>skEnU2m;Ff$ymDt3=16FTYAiN*L8E~Q8*oo<{iq6s2gd0xi?&_jt=cO3^v@%9 z{N0QXE#YF4jdTXhr?=|?XXTyBrT6_(le>N-#0KOn?O!udI*Ac^KDu|=L7 z74-SZpj|lw9FhR=mYT@Qz6-K_ROE2`6=j+SA7r|j_~q-{(jTD7pFl7H>XnEaBrhv3 z&;@-xmJ^oyW`omb@6?^HM`+is>y50ZQR(SzTrUr%mcr=V5h?5}W2~ dx?>fUGgell9r&L|&ju=+Q=wOgN4|0S{{i8-So8n@ diff --git a/nlp_resource_data/nltk/__pycache__/help.cpython-37.pyc b/nlp_resource_data/nltk/__pycache__/help.cpython-37.pyc index 7f307fd7170c5901fb97446209b86a9355f6c6a1..755ca73d1de83524afaff30d736ca4b1fd453b50 100644 GIT binary patch delta 697 zcmaJ;&1w`u5T5S&|8;+oO_r!9FAH%upag^v!J|jL^`#lMI=WD|i$iz}uXB0AIkIte!y(9vrBuud4g&`Ksz$@-s=Fq-kW~*Z5ohVVh1b-(vz6 zvw#3CNJ$wb2HSZCflrD3-np=3K&wRwY;DLWdSO60UAVd_x9z+l$iXegX%3?9> zLk737l86)r=TyEE0Sw;~Vu=VwT?}D7?mz%LCW8IknoYN#AC6|E%{FpXDSp;eOSM{8 z+aF~dpxEmNi>8(f4P-HNF(0GV6rDw+s|R$Uzmwsihxv?1AA>Zk-h~F4{`b)SkRl{K z{vRR6%Lg|Iv3^az5L-X8?;kM-#974xagmpsLe&5Ktj&1eg{YKViTa-YX#YAkg0Mli z%+>PcjJ9-sBqv?%hG3)hIyxyO3Wm9=La(jm%|-c`AQV3)Pc$u_kwn z>!0tdUB_k^bF|tVTVy~Sw18wZA``?3m7ad%PD!gjxd)e7>vIn4CFgC#ar~;0f^%s` zEbpLe10^ohE17SkjVTMOLY>bcSGjb|9kY~f$kGf>?i*(mv;pT;UYfW5Zlps-WE-56 UYiJ6Qo|*;Kx6z?AJ@=-+0hXkTcK`qY delta 747 zcmaJ;&2G~`5caOu_QrLRrb*KRf(u+yk)#LULRBP=AXP}*ETommEX2frT5q9LNI{}k z4v}Ai0}sH3Gmn7eJ2zf{9uTvRfQkcN&98U9{buHS@Fi&9Z?}C5ziUq)iTSF1Y55id zutEz6(1e5pt)0-+4sBv`EMch=I_M9;0r!lAE^LDb96b+)&;lPl4jT}hSz!xypbZ_& z_;Lff3##5o7dGD!Vo46Y#(J>TSQq+eJqze#YqY!ibZhl9-SLpk`Mv2pPv!JQ^m?8b zky=bA)k`uRF#NZLQc_eFQ6{dsXlOJ87h6^AWA-WM6GJxeF~A_UHaV5|?_Gr^Vsz>MMJ&v@xr8QPpmlD=nd> zO4gpa5hqgh3p<3t{ss?scjQI7nAzU?zSE$;hWj?~Th1r!36 AfdBvi diff --git a/nlp_resource_data/nltk/__pycache__/internals.cpython-37.pyc b/nlp_resource_data/nltk/__pycache__/internals.cpython-37.pyc index af20c90788a6aa7a45277f7f2c923f183ee481f6..3992005bc356460a43fbf3fc31c416142c0fc00b 100644 GIT binary patch delta 5820 zcmb7I32+?66`h`$-5u?&q?P1LvShC(s5My3*} z=JoI6AN~KI{yqEltK`FTq;E}bu2+J;ZqLoe+S?@Q4eSj6X+Si=lYWUv5|t>4NpYeR zg0viyby>8Eu83CERnfY1muTI(TeKeCBU(+@M4O}Mh}NrnX&(ym>Au4O7*b;Xc&?sD zq}@^?s7hvjmzP~nB>ab26=w5r_Hrp9@4*pQED$fy3&gBDRv0hRi-4BHQ`)8QRC+Ph zXwGq>Porz7kNSa{zCoh7H1D`X^UN9jie7BG`UXm9{vgq3(f}L8P zDKIiij7)`**>oD64kJN2gBC+OhnCQp(9Wf$v<%vEI*ZPR_5x57q;o*c9J2z{R6>6) zEr))2zpT%rYiR|o1as!ouhG7FbUu(Fx{ikE0-zSq^>iU!1k^&hfi9*?fLcU1(ki+X zsKs;>T}G>cT0(1REhwv^b#ytjOX-Dl1+>e|qRwhicoD4!h4pAaKhbOGX1bC#0ADR# zMH^vV9bHYEj!T#8%c;Ce3SIIp#ymvqycCNjETgSE5wW7lL^BNeBkN*j+)P+4%rv3z z8{hS(EnsOktivfY4b$#OGM)-s*=NWjii^2GEI;O2T$lsI{SqsJ=Cc)5-Dy_EpK~>l zc|6Zu);ASKTOw{i{KK?EdM!NZcK|XnC0#Phv!qVaD`ynZD-}H@ql(@F(NirsBUwtn z%o;3}%KhXRv0PN?(%5Fp-4A&L^F++=&5(A|>rQBvCqw%BfwZ*FoKEk6FXJ90y#$6O z7=^Thg&=gg)GJx8c4GObOG7=WGdBaKWzYoI4Qj6^<4XT9>AHWCHpt8OLg07Dw;C6 zFMp|KyVDj;c3ZqDzp{3U?M-*LrdTp!rqi~%ebe@JEEgOl+->KyKq%HRmSoJ2gXmqB z&(GwSkpO=_e?yJA^zb_CVp6p|RlVl3!!x&LZp!+Dl zqVV91@e{VkFu-QR_$hz4a8L2*q@aWqi-7e^Rxdj39y5(Iw#LjbMfS5 z8nRv@S$)<`YUzY|pI}dP$6HNS!QJJR7h_T@@T6M+EXW6objpx)8!cr(%}9d;QZk2a z%t)QC6VkO(8=*2)j%${Ccs?5~pQVr@eqCkxw2>Wr=52^Y(-yuyjcxqrl{*_Sq_#`E zShjboX>}wihL+*Wq5IoS%ZSDe5gJy5OI0JzlC}~v6Rec)m^ZZsWrqmc2@Ml_^(GIR zj72Fm6WicSPh);aQ%Z2NnToBoV{`3$Hd)==0x~4i*GLyYlUT=gn4K^ zoH`Hi+9f;O9k?XQzqzEcHVI@%W<5A^1Zl!zV}=gnxR8)T%}A1ktt7jTzqO>kPHbKl zi&-zO5(5sm;9y5#^mg2e3WVhM7qFw*ma6#*3G?3S{dFlAJfa5S5A-`m!wB0900UPd z&5#ZVpRlWNU>^_ERP{OLV?T{xA#?-S?qqAH8L=>(VZVkE(%251Jcw`z0BWU~f|#b+ z0qEHZO-7vXiG2!p3BVn*et>7u{vZ&Ntdcz9B`R_ACu=TVgp*_5)KM<9^YP(bYk8{7 zuHp-8|2)~N_z^FwyEJ$WFo4&WjTb0>@!^Hcj4zpHMRN1RA z9AiaUdHEjK%mScoS1Or~_SmjN9nnY!IM+FZc=yWF5R3eVH6fG-7LA$g0_=Fq#K9;_CfID87q3eR zQgaYyBFsgoK=?X8xvFk53 z9ODRKjUJq^2RpAVnxF=XFrt>pzTmeuX2}5GxO(;00pJO_*(qq4hVX5K+YnA8U~Qc! zE@wpCH{{_AS7gHO;=fznsDB4Iz@3B0IDqv3DIK;7#?mM|fx%K$@;$&G@;HI| zKFT_;rtwOc4;DW(A~#2v?8}~7Tj2JM$q)YHP38F`ma!XwsgS?9>B{-n19e0me;ynO zc)h{kE;Gy`9VQJ%6ZK4_Yy}D!N?CRT&=WQ+#5VCaH=nAREM4=FW7MXT*!>p5O$f93 zLzm3x61nS&rXdb22*b7q3kdc)a8VQ>RABotB@HO>kW~qGFV2j*Yb1YcEt)o%*>3i* z+i}Sq2q6Glj-=QrzI01*)txvd4x;ZNB{oRdeMpPbAXkL2Gt5N%p5Q%OKG-PY^)M`f zcs+nC&u0L8a9F0}C?V0wGmyBMKfLv!aWQIdt}GUBgb0)XWMR7i_VRn1>q!}Zt+}Lf z{IorF*v7PRC~9>?6X=-^ozL0!1E_kx+O~gUy~CHj6{@{cE1^!Rav|JrG{P#fJGXC^ ztHP)iGsAI+b4X1ZaWt)$KfAL&jOUQ;HK2Z{lFWiSpxEwsl6J>TTT8~Mu!*f5mN~{D zg+m-|EE$I@4UI)x85$=(;=FDs!+7;#XW>LI&i3tUR+fw8dJ07yL%^~CS&3E0c9|Aj z=^g2#4#X9wvdZp9NYiLys<_^ZTHziAS3k1GUWgTAPF!R6=Qq8^#}n%8T~bjZ^v3c~v@GuK-t&KfZqjd76K+e{R_X zSNO%A(y?g7G>ljIxo{cZZq(O|F_FE38vZni8sLSU4e`D%cx|ILv>TTS0h8wsT%(}k z8pd1vK-)~d^FUqoQzINs+}L?G1wnH*^}cY$iwEM1a66EvC}H@fp_nmif*-~C@*z)# z8~^8>fv%yQeE>V_pF|OR1I-@qPoe1V{ABB@`Cs7y9|(_hL}QehiH~6`GkJcbyJE~9 zCKZhp?H~O9NJBM>wowS)Iq>cbTgO+#R;+OQM2p^riBE-+YPzan#MQPh9_})S(k|p= zKl=-;Wxqj~u&sAs>|a92({!77VQdx8WXJJC+~S@#nN!{c?tk;m=CzBz9Ii>Xx(gT- zm49KbScrRNCcw}hoeS9n^Kf2PdAMz5A0C7Z&oTB(gkK}zQNezT@Oy+mAlL{mBfN@m z4&gO~*Ad=CcnbmD$8h@$cOfhp;~Zg!s9@;W>lFAIB)tGA&Bx2zL*%LK*7l={+j)cd z^PT1M?Ht4T_=A(M1_k2VVk_}*kL}%rM}hMbKszt$npZyh=KU3yPT2H&Fy`a?x>l3- z_#ItOj;XV^@V3}u@)5r^HfI?IMc2}BY|*e4)>ZftjZctd``{LFh()b%Ys}oq-;R}& z&$&0=wx|@fUV(t7h7?^(GHPNau}Rn_uw%D{gr)?);Dee43PDq3$bFibz9vG4mgIjf{CvLaZCFVb{i1V z2ptG52#+Ititq%0p3@VLEph5DR2)f^D%-b7+yg|w;K!M04-2|*v6_fkUE*`!`6VZS z98(cI{LaL(J{?K?AZJO0qX?qPUxn1w2m=UrA>55{4+7qT8Q#g*8H7g>DiF>B*fUdy z;g^PlQEhZ5qLCyu4d)lVD1=nJqzu!AeJ)Auk_gMFXC2D9xivWVnZ_JgoW_fqAASM) y(m#AP1mLr^Q1;7yw+eSFZ_pbJ1iXq?56@h97HC0!eR79yS{~8*B*nWy3;q}E-6&oF delta 5702 zcmbVQ3vg7`8NTP z+)Sv!fp&^g&s0mXO6s#ds&=tBb+j`+#*S01Q#*CMI%2EE`lvW<9XnWO`u}J1*u>E3 zbTj*%|2+Qpe;%7J-X|9?kfF`l*=`9w^V(aC9d}962iO_=Xh3X)Z{h@zBq~u7mi+0K z_=)Jtep$4NUlFb9S4HdaJ4CDbHPJf#PSNUqU9>L03tBlmQiOmVlah(4R+3 zp{iX+DMnuWk4;W zTj_FI1ylvCrYk^KC0$8ZLA#hX)zH;I_&`}LT?5M22<1!YHhL9Z3oJ|N)wB*)ETija zJ+#XyX_b82KgJX_`-tt?8a5+l)M{g7n;kB)m^cO zS=}8t+!c!lSWk5o->GdRCHyw6WGEL_v;{HS_;JA(=`HY0{1d>Ex(R$`_$rj#tqf{O z?W~%Vlk$i>C<|IiDnKiOR+B2ws^v&ZmNqQ2Yb+;~hsiC((y7v;vl}eeutX%7Ct|*9 zC`mfWpfjpl?j#uo%JOt)b!VTQk#vrbK?0)^j7n66b#hWFlm;bB?*B5FP*n5b1LJZxLLWZtTJKISg(Z?HT$By;c#WD(W9xwKE+mXjjg3u*v`>;-ZFEA zgXKfa_=%YnT?Mu~(c2zpv7nhq*lNqQEn97^wXJd2o;KUrxb=qSJxxuv(rH=@W6p3R z+OD?4adRtUF~)Qp#N4nfoRMQ7BOb82Y$syEjN58=EEHwgcX+oC`CPUVj8NMZ3v?;NCng0Y zR4xP7M^lIMPHP#{IAiO~3{Yf$KDBiAYIXJmcgA!mYJDxG4$nKK&Yd#v5$w_4NW00( z_@eR(zOi1t8a+(309a617U`CudK;5a_1!8ZJuX2zzAGJ;j^1c6A;Rn2k-&c@&v+{mITztS+LH6={eFaM<2Z`;4g`sg7x*KF@knUW< z_NFfQEaJ&n&krswBu9DQ(o%AWuc*&YJ+kyG0%t#%W%7rXpUE2!i|ve>{oBC!2l$?< z-C8#;>EVx6RjfV)q)%o?a3qN|VbP4Cn{izD=%QvY#sXH1jc~2Ht`=njBQ9n`2x1`3 zEjZX9jQ$W^QGt-MvU1p9s<(QvLW2CBRR?Ps3?5O--~(dPM#Bi(0RRIDq%EYqA|~u8 z4j6o8P4!UPeC+ok^da;E*jlW;+YDM*MtEq$hMgm@%BL}SdPxk6ll=(cG{TP&@Dk4y0A1VF5sFd+ZMaz;t7%0h>^}bG zh6eu`SOS6lDKh>)(!n{9PD5-Mg^O1K2nX@^r%rBMEq4i-x`>_a!HdSQVn#dMLlfpB zQ-=B9!|vqTw7o(Lum#v*BV?KEn)EgXobnovjB@))h5PR>GXH z_@QyRF5!hUb$&~Z=E*1${^qsiGX}-+3?f?_{JI_xyUAji(>j{{Jd_9IMuVesw9A_uT z2=^6~9!PBDZ|r=0QjShER}_m)GMR<7f|wilE6sJAFr&8G9|-sOh(BwrFhN-QmQ3ps zhR+&PlQ8;2R#zyB;p%GRn|D17_u;3z4oe$3eO!?&Lukhv5HipWrtiH zL}##E(oy;^Wq|hf^UtoY3*ZT5yA2~0iN}})H-rM0dyMvmO}`!sQ(+U^GA1*_{M=EF z4i<~R6NrXG?O`;|4QnO01{btWG{Rnhv%WZWwzXMVBP#Ai6m<;YRsbl~-O$($czv4Y zlthRXr&0@VJV!Q8R4Z&GFc`JMy9?ftY*$*&N@&q(B1~3`&nafc;bD1>f8~E? z^KW1Y=zazp2SW|)w+Iu$JJKtJACqOVRWS7&&)?s=LbxCc)wTw^i-p>!D)tx#JmzuE zFkaqc>t0Zwh_>gSAUC^}TgPeaOXsexRJcdvGmz zna?s7luWTJOL&tAhk~YIyv<2_2|r}i)nwSm-a+y2PNM^O!KYE)cLnd)#%dqdfXLbv zDhZxia4Q?e1-|#DmQ|wM{|9d%W9b-N6fyYfrpPjM1$3Z<(G%TP%&;{jfa2)+gr5pD z{Lfy2u2HW(2Cs&vQN;fI?RW@9f8$TKuU{}0;VXi@i!ywe zRy5v9U+`Ch^^ip;Et&c>{0mH962e;P`ug#Z+MYU)JX7OH|&iTIr_m{lKJih$fX_?j~JYm04cxlJlr5G;O2>@vnyIW|P zT&ZII-wMB_W8KiLxE@a}_6ox52zUapHxT}S@JEEV5p0C_5H29RkMJji4-h^?!0<72 zpP?5*Q$`U9J92OW13Ropx|CP~l~(zgm_&~W%WG78R^jQmnu8Je-fE9G+DP@$O55+ zY4k>8jG7Gj4Do9?td6}j;hhOD&2K=0Pib!6=5sW|rwty+cD3fw=^>DV zQpN|S>w~yS^Ux>Td{s>RE)dVer_iB#1o0zJytdk~`z*pg5S|0@y9OfRs`Py-t{HtM zPGuwHA}IWu=*pq}NbW^Agn&Oy)7SD*q>dxt6Pw+Ka6baRf5lstJ%QAd2 YdjAXXNLm>SrZZX5<$n7G>+}ZB}OzX48mb$tupz jD`EjEEn)=`AYD=1Ucvr;u6fD%DXB$8+#r$3NgVzFx2zRN diff --git a/nlp_resource_data/nltk/__pycache__/lazyimport.cpython-37.pyc b/nlp_resource_data/nltk/__pycache__/lazyimport.cpython-37.pyc index 8d4bfbd12454d2026018a0d1e49ed799fd83dd2d..20c322e550e200b86407c9065eaf73592c93556e 100644 GIT binary patch delta 637 zcmZ`%KTq306!$sj*v4^0h#;gWP1TB4h8DI;kr2>IEdvq@sDqbN%DE`26Q?hBD-cX{ zMIyQnz`%eQI#l@zto;K15M3*^3-8$|5moEqr+0eq_vhaEqH)u3U%T#^s2|wavwUqSbRu&`*A1g z?GG!nqpp6<8CNxPj~Oa7KROi@{@q$l2C%Arn(J)y@`c5isaEV((ZOE&P<=AztuSiK zK!T;t>?X6-H+%l2i(M$wNUij}G&u=ydxL#P3Fb>=Ju4OSJN< zIEK#qBo5vHL;!O{uTdxSR2$CB^4J6xsC(Zbb%Zzw;x@_%5si+WB^H!utWH24U=h$g zChE|@V`>g^qQ_zEks6=dVG{h$>W(4AiE`X8iw4SgENB0f%Mt>DK88A1SMDNi`iJ{$ zJ4=>DnIy9*S(Si7L6hJy!4rasYF5!wp@8r}ZIqjPRPA^M(65nB34G@`fdHL#n#-0+ XBmYuKt+o7B)~9h|P(Q<1-QSPz(|aQz;0(A_NOTd^Tx@jk}nb#0@taMX}RLLDBgD z3oA=WWo6?J*ly#0SUEQvToj$+aA)S8d(NDBOFks&Dy1PwtWjnpsU#Vs zMAjrjHhhcpH+8>!mrfYNS~m77V{OWVdf9@ z(~8y9p_ZSy*OtigLU) zDh)7J*UwJfOIB?(EwJ}GG!!A3L8L_swhh&C+ct{;7QhhFhw&!GZ`Jss%W%{K)7C|2 z&IjL;fDitq=Y$#SwK8_pxd~4bGmLF<1XKhS0h9BzU`%Wswbj#5JPpTxtk>J-e^NaL zW9K|cPqK;_lOrzsXAX|W7?@39?3TaKtcd-CPOW#Hb**^LG<4RFfTkdYFpe;R;7#pT n7%|8K5_~I~E?~7GBFf@%H%UDCi{xkv?w2g#oE92a6vh7qaMyc| diff --git a/nlp_resource_data/nltk/__pycache__/probability.cpython-37.pyc b/nlp_resource_data/nltk/__pycache__/probability.cpython-37.pyc index 6897e4fd3cec2bcd834b470200a2bc441c49ab91..f113338297a3341e1200ab268c48b9c8c55b84a1 100644 GIT binary patch delta 17467 zcmb_D3wV^(v3ve~{n>1i%{zgR7bN5XdGQF45MJRSj{x#eR$*Q8Z$eNP?#clS-0er zx{Xd_x5;T@h%5@8EI?<4(IsJYwvq$boG`XDjLlW@0Gk)aP7P!8l}Uh|$^kos<112%0bLwMPY)B9D5Zcc4Pz_9*s01iz)lNeE1`9nQVy7M#5k+M zRMV9Tz*U5)s+CHm>X5-XL#b9~!1v5B)l8*k2*_$YHj=#t(NX2rDs@m-=c&ayP-&B}6R?Li6rrCC|0v{7l{K?C?wjnbghDaFccB}*w#?8-EyP-#&nDS1kc zQm#}esYsZb1qVhn+%m3LSK@srkYBlmfFL+d_h-7udhAW<@afh zUSC(cU-7uST|p0bdjp!RbnWd5K%F>lSyXS-Y*?|^?d|mhw4|V?FX#&H@9}6UZbfl* zdxGwEuR9Q^WSXVj-`(R5iq|dkGYXl(;4wIvVsJ_da~hQeqQbf{y-CRg$7D`B$cfI# z#62HWi|<=kH`Ra+Cc)V7d$Mj2X8|a4wlW9kr@$!ZDveMoROTu3fu`t)!DMh2!_XFm zmdY(wW)U}}l_h6E5IDb}wl~0Q#S>|B%?^0JI`nbcHddVuMa>xK?ba;%U@95U0363Q zI><&6)^@EJNWXs#(VGLkJs!@}up-&z3V4E`jBb~!(nxJJ;~vj`%?SKp{7Ehsslw%o zapRMq<&f}YJk)~xcs_y%H^qZGM{bfX$z-Snsiq(iNreLOLI8(>MZVyfcPx!#W9jWt z+(A!79+R7Q1~ki_efWJLt0G`^t5}}(Ks*~8j7;%#xO=_9Fe{G2@B#oAvMB}ZU7@;c zH_IJYgG=ubr*ht|!68&inp9J#nQ)+E6plkXZ)^5;1%l`|E#DIVky~F*O-9)z(u6Ey z-R16a_jv-}5IK35mtfBtHC)*pT_+=}jHnbL}gk$BJ&07NhV$J{2i#TX705=iJY= zL8yQQy{WFu89vBE@j3l1uH4!)XK7a4Few0;5?T%-;NvWg3%`VEUMQ_Iuqs z)ioA&xp0=(l8#9?pn!4^olgg#vGC-g(bDv&0CbjkwERo6P+M;+06-~^?ueHrkM1Mh1$l+IrJ`b}tukG+b-RN- zUVqTrwUf_Bfs!xl?(y>=*^?mu4;2Gcko0Z)s%!@(9+k1{e}abZ{IlbSm{K2Ki{Z&@L>)%eOr z)3;nm&$j?>rFgfdbOT|gGH4|RzEBbEXWlz=9jO+D9KX+K(62kYI0WqMvWz-+v|-i)HwFY zjG=OL#%M53ugB%O@y@0xVq|{0dbXJ*iElL}H8H!P$1z|SV0$Epd#o+eo|;A(#wLm; zH+$XPI~8}!&EmSI1!8eiy1wp&lZA1b@J^8H7O*Pc1qD&kl+%pcBI*>nzoc%7$LEQM zc)#h6LJ}`JN4f5}km7iHz?!#(jyGSy>bjtl$g1XrGAxBb?s0ePgNbTdv-HrMITk8! z>10)!rNhr5G)7E>@jb2u$KZa7iKlY2#B+-lv|@zRS3`@tr^n+{cn@}G+U@W1jS;|% z`Njny8RZ-nD;B?AN%NZ)<59`RDBZ&}f+4tE4~ex)3MPTI!v7_{2YA2}eOH!~N?H;c;l5nD;BQqWCM2Th#6apBB;)`n^ zE=$Y{w><&M`&ektx>uNm;`sGq)%uD?4F4cH34J4!ot7R~m#-^GOI5u1GOjrsqI$Bp zfBk}?UZCfD5$r?Ihu|=RBn0F-*JJ4*0`dmx_z*tcj3B1cs9yOXR6LH+8%2^+Op?iD zx7h7+3K~|nQGGj9K@d29(h(Zm@S?PlT%M8*tbsg3PggOtnO&%FTIDo3f{(;H5+^1E zITngtE$-daS@n4+@mm2z$kC{yWF~SP;6hQmd42J1NICA5qp>d*_ikQ(F(MRZnJ{h% z(pt{3_XO;4(yS0)*-~-!covB1J*IM+X0d_Bw0#S(nhI%0zzzl3Vv^(x*@%DqmQeB5 zSJ;}UYBekDrXe9yU=E{71G-L4uuBYZ8hd!MBvR_-p`}+QN&3pg_~&qV;jG9B<#@ef zbZi&jLD3if98liR~1yD zXqq|b54yb^nP{w!+JT<$LQ-meJCvN_;j1367}cl7_Hvva@nT|MT-C_P*ZutttPUXfGJpi;L^JgW^Y$%ONUj+nNuJ2#dc~pb6FO6$czgSP-Ikb0P$*cFU&f~y<~fiY$gZ8I(;HT z0se{Ft+r63YnzF+iaXk;td4n}3?~~dh@861>HFIopama7Z~y`6I0s6VNfgVRN$8_4 zrK>&24~UQ3i>vQOGKz4?K3JPw9q#s^pU2x1$9qI9Q@+-OF)RezAgr`|QyNX6$M zY2x&O2K25Zg5Ar@i5_i}fHhYART5{rTXoV2tnoKVT;}_kPMVjPD{AjUNi_M(5^Eg? zrK_G?=i+ny>p=W`RO_-yvL(w&;li;cJp-M?#r!1TX7UgGwW!ymS*y~yWl~08zP^f3H@I zk|zi+{{~gHV%G6ZWypz7F#G3;2ajJ-OO%fx&rvaGTsbicHHrG+wK{jSR{}Y(YOy#u zd`&*i(s*6s1YaR0pZHMko9sq|3a?!&#%Y7o=`ykC3cxrXAXeqe3|hUrOavxd37eE-h9w1{(fI=-kP3rYT6xM;TsAimdAa zK0|zVe_K+i)*bsmpYVqh2qRQDBgQu zWf`WU{34t)q5cNp{N|w*|ACaGl&MgPOoE08*6G;g@hbk_AZ@%TQHdJ~w*8!#_TZJp z|BejC_gd8Tt>P;WzNRN+7m*ANe7pGax9WEHLMc8Wiy8PSti@sI!e88&PXOKJCNL)Y zh79=uY&IvuDUAs}M*akbush#O5AArUFlp;FAkk8n1Ogt8zvSw_Cg5_bNv_RK$>r4Z zhjhdkT^s5m)8!k0NSMSuAb$Di(fP=Q)>i&3mVN>t(wY?hZN!eF-j{?JS$|2n+G{2u zKepzQaw9?x$zW$iQOE^0;DXl5gCt-D2^B zr~Bp*gDN14BW^zn8jvexIw~!~I)D$1}fb`Z@A8_ILHbF);kfZ(xihx;W94u-di4}QF`p2CR~+rvf{Z0Ci-&{^?y!u{;N75{3h0dv@Q zEQU^CO#iqLg+DpoN~xa7mN3iZFVr+0G2n%MrrzcqH&{tJj zn13eNb1MpPYpt>U@x%U4Kzl;%&)vszaoC+#f8m)AE3W=*Q5#tEi-6!$MbYU3HYA!) z7bKGr!#vwh&y5e&r-A-9@zCk7HMT+t{Pq-ng|h{{86cb{A9vNq%ke7IoD3cLX{nS- z>W9NcNJfqPc`@?y;>J-OBL|tz4ch&CxrUEMU>MKur%-V!l=^%Un~w(&nh}47`vsKv ziwF(?h&UEW78gu@4fT(PHoUNuExQlte}jPBc^a0^0Ei5X0#Y4fZ$uEs2iL9Ka<)9;{w46IF9DWIKUocTnpx4Zz_LncLiMoi> z2$u(RQBgpCC34ksl(5R+jE2tC;-rKN2fIA&d)_vS%YOBz29ncAGR1^V5QPpw7w?;e z`=zay6m>r`i`QS;SWhaw6Qvt}5ybFx-W-jm{l(%JFXx9(EKH)sn=PJt%rwzP{}Z!# z_T@iaQkriNzxeg63GyV2DikC*P8p8A&e3T(dYY~*im&7;Zy*kJqaWwQ!tHLv#ax6+ z*@%12kb*7%V@3y)GsW$%Ol8kOfUOF>@k*g-I#P}fKA<_`8so49D2l|?Gb@K~Lx#r? zL|sF(hmY0iv`sUEL%HGN5S*sK;X7R5aqH**I08W6F<#gmqi1wLiXA#_0XUI^+gfB%A9JM*vKJ0n(Q1ss+2mnanBS=&sir$#;gms2E^u~e3 zt;orwbH(#P{zn9FB6f5ZA03fyBpMGYrB1Y-J-VKlSVu9z02kvlYMGdb=Rs)L7@GXs zGt#yTnEp7*G~woy9f(kvF?d)6iILm3Dp2$Iu9AUn?w2?QRGrbkm8~h&$fOXQ#x2 zZ?(*(*cM9w4pD=Bb(*xV9zG4DCIu-SWjqv`^7a9?nO48KNN*1I;svL-q0HYwfL!>W z5xj@sT?BstP-)W-n5{wH)yd(9;J;$c`v}eUWlMpacy*Ld4$@AUz4f_`_yZjY%{Zd;%MDf! zT>XRVHRrLp3D5?X!}xj1wVOVeY@4J{Jfl9vXfCG#5vC)e=aamA3J+slk#_oX#EDNH z7_*N26Pt}*fuloSA+G(jC7i};wsyZy>B22ufNt@?U~n=!KD zT+mQ@+u=6B^3aE$ewUT41N6m?S3ZII%f5J;t+fQ%0NmhH3mAK8I{9;W8*lE2*=DR~ zT&?Cxtg#-I%WVM0j44Kn%(hs=HZfTHv(=J(@Q60GJ{dH87%nExR?kW-dkAh6N{TgT zJ}M1cPJk>2je{1#$x0HGtyH!J%|YuCse{oqdU?Qt_YUdLI4CPAbf@u4N-A(lX*Tp7 z!20cdH^KcxekLd%m5v(n4Z}$T@(E~j6I0R#lN86GZNSVwJ~G6BDc-A>k1}Km)tQ4y zzJenL=uU!bj!-iI*BPiL8ye;e+LYX*h8(;knb(ZhB5g;3#{j!m(l1J0d+!x=b#lO_ z;I0?75;W9m5zy$h~U}`XJK&ESyQ(B<|gWdsL?=GTyl-AoB?||Esj_4iAbfq|a zn=)e{DO}2=Yn3H%&!=?cP7`~KS-}+ZLG5^}a#LOtlve0h{?Od|Z9<4I>N+c1LWiOd z5X!NJ_V@QdX^bhO?%5ZyqRG%-O;%LTSXrKB8MbXz&s&)?3ug&msI$NoBr?-mbkriO zj`>CfJRVf7RyI!^mf5PI==_N^!&wD)iIbvpK~L7layT53%VOY*p+SFgFWeI1#jptN z3ieB7a9fgYM?$1Adj0L2`4Z&H_)_?4#~ly;!-%1-Sx?JQT8v4 z>Uky7GrkH*F9Xn4nZ;M*16qQwK` z?v2`5e;)bH$joH+Psy%(C#gdBNc|SXrFrCi>NiuF{yYA~OohwBDRosE(@z#6D;D{@ z?qRy?)1b%t*00 z<8gKY$&1zz;JoJlkLfkQ^!K{BYHtpEaoJdDBc}<`E1RlHQEo2Qn69brO{sp^qELMT}_+Jo+_r5WOT|D zo1~av=c#|4%xY?Sp*ZfQ>r<>C&0sP@5shCZ*s>{Ga5NmH^mxivS;%!kL150Zec8%TVKtY5JDi?__njnf<+b5_UWpK*UvuQCtIf zsn+n3Jo5BHwvAOGF~??+76-@aA|L9R+PaASat7uxKpN|4{J?_!rl?C7vybOe3lmAD zH;Y+d{I)1TzS^^dH6*@7br8uCLU zFHrxuj4jdgYm=U*VjR${;a3phT?^hB?;DsSH6R-s6b$-{NRo+B8XY~|z{!qoy9_=7 z%$5{<|JyJq1=&Hvps}wtC=I}l5B7rd1{c9LbJ(cB-+&?4pqM5ZPQZQ*Ft7pEpK=X@ z<^kh?*$kUS#n@;t8k!B)=HXrrwpeEMOe=f28U_dN7t(Gg>>Zy23|+m~-xH$CxA>-z zMg7fkHmQiJKSP?k5xj&TrnYP~Z3WBCUyK9A#oS<#C4AQ^howRu%}dpVE7<#VFN9-v zk;|8O0-l`JQFV^p1H3oBrG@ZnQO?q12>HM27H@H#eIe}|Ki zXM#hOV>kbUJup5S%P}igz~f={t#!-~(do)Ib`9+S--aGw1H?2FzA!SH!{{+1r!4^P zP~T}|WlX5)>)F+jOVBw`5ouU}l%te|kJ$0u>fP(v(djpjBE@4_kP?L*B_;L(8-NY0 zY^W49gxbuG*r?MDdGhn)-TFpn1RbgH85zW%>x$_UHkl83Gg2R-f0YU2A!!L ziVH8oyTZQ;kGJ$&;e{6*9$;_}!+fk50PiY=Glh${SFW6eu}Uecs@n8u~5R2X>{G$>^t8 zT1K_WJ^SH~xX;z#ioAKRzc=5t6OPtVy_&_pb2kXZ2T9~xkoy-997hoI*ze-w!w6!& z`y+fL^PI)fpAdYE;5>qVAfPQJW_27>BuX$Shu|6bNXz|6q=V&V2wnh3ByCS=zRfdF9%O9+dAYz7oinI{gD`96|?nM}Adfh10v zC|I$7H@voEt<`|Vwfd@Ki>Y%R4w=(DL*=&2g>KT?9q>S^=OG=tY1s7V={> zC*+-!FNS=fRs{JX%8!HmSgjcH#gs3B{5Y)y@+Fing?y=22Kh3*6zG*fzFZp*`SFx5 zhx~*AtpW-vsKIzBn5a#H{3Oaxfc#|51$h_cD*Gjt0uGzIblGqnaNXwavU6in`EP&iAQ4P~?S zX@FD(`8ir6;Nbu3}?czgkGIPCFcsPVDT@-nfsxLoWRds7S2aZbB3Xm_=!8+XsjoOX5? zil*=Kb_XKKv2g5}j{|UO_YhIny_mz9nvvXwD$P3e0L$ZwsJP#R_@OgO4x62@_S zW^WGo!x3L7*wu2kxT9=NH8shah*T4r3TubA%iE)ekBZOAE~vzw6O1QJZT`-1?yM=T z{N8x5e2TJZ5}>tm&_qL>iGy{7N%wa~xD!%*qAaB`RAH6b!#%A$VfKZ(0cW@-@Tq`t zfBek}$5<_~X_7PKfMU1e+m*0{y}_=4p508`H?e}9h(A5?KBcY@JCl0C>#;M|nT?7i zrd?d$`%?T?*A7Xs)pjz;tS*M<&Qdg;6UZ z8;6RHfRTx+uwKjJiD^t>t+6`BE)WZ9X3b2C8iqU>hQVC`5-hoNsFoDJ5{jMT#+sYw zqI}-d0Dy{A;EzHL6Kof(Ju?b2sTApUWnxEd>x@iKvH{ISO3j9MG(o&lyF00pU@9U! z-AAsz@k-HNS36KQHc=S#M!Eu_NWkCL(AA5ngeCc#w=UQf;*r&yhd6HlmWgjZ3sMQY zr(KVD_U!la_Jj%71fPz@#&(}KVQS}oEn$Nd>H%{T&=V#ii%Zti6Y=}@gcD}&?QGWv z5@XV30dJcg2qz>GWn{WnrcL3F(Ed7`)ZlXDWjyuQ6^L+DXx@lmE|yu6O%!hr=_?3f z0alVul5p2U_EY#O;pO6@`nvHdD^OI1p8{r43Rn>eU))mW(u+dnXv2%@p_otRL?C?n8WU8w&r%dQ}5~GE$hUG z4V_h4ddcUXiqE$}*?KWJYf1<)NfY>LOszqH8o*~G*a#qD?&4s$>eQS$%bG5nmV|k~ zUyF1km|sp-(w3q$6KcfU)*XQDp-~jh{%Ixd4Z{>at^Em;FVxk$MSL{-^=8^ZGJBNO ztee+UR2)q?<1UiL|gJBnV zLJ_|=;0kd~=PqO@?2Y)tyL$bd?bN-u#~%uY(^R;=E1>uI_xU5eGXNb>dxJqe!h5N0 z1dLADAED|7>{{%aGj5=!4m(~0tgQkwR$Jq(sY}yCp`|k|EiE9iVW@4sEM%;28(!}{ z1)f(AnTt@q-298)+B7^_$uoD4pIL60lYS zy2o=RT4QWxm>@o1Fire${&I26`}vB}B1{V!mY9=f*8pahwWdgsBX9d z5U!@N;_fDAeX{j%A;?`pTHNMc64=!$J)qE>9OFBj_NavvQN+^3qe6eKHDs)?tJEOO&OYhXEZNjuWFv@Nr)i|gqfbHN!}G_kq_7fZqB@cr zgp-Smcs#G53~+b`OBw4c%#wnvhsBfpr|=+PL5_awrif>kHmt;h0!LBPIh0P39FK$? z4|H@I;!`k%a--vp^g(7~mK>4wqPeAdfOkLvt?Y=$gTL+Y%L9nD7Xi*K??$j6Kq5bR z5ccVTK9=P#nmZSCHaPv^x zm+^_Knwg4B!bx&acvqKI6J2?j>&k6p4TRdUon49FzPglEo`w~aZK)z&S@Y)Uu+T5B zRK&~YK3Fw6t%R!i!*qO|9{>K@SD1yo`9X2@`r0P+DuH!k`XHp;mM)LK(;p!(I+uMF zU0Kk5eZF{i{o;Whz~{XP`Vd@(;1B`}0#d9&OkIh9bcH(pI%cmyfGlSLn)CrEdk0k- z{Y%whQcNa?#of42H(pBg zW|N{8sYd+p>*Uf2p8qpuk@6LDwv3yNijy#b*Ba(D;G}420#XOXA1{1V-si|Z6Ko&H#gRKO?CVzX z^hJ%!CPAvsoPgUX@SY=FYWq#tB!b|(SUtK*+#(iTysaM5;dHO#aWZX0a=RP^q}#u`;x{=QYp?D0MB?WPVgruqC5sDUdGT(lm|?~5c%9x$GA zJ{z!3g?m~o3d@tR6Uz7j1eYVY0s+Z>45X5`8g5&f9KS-$@VKVmifCl_@;c!-e9SDX1oN3Zt7u$B;w3-HI$!VIy_US00DScZs zc)!NJEAIC$klF9Oh+s}6H(8wcuS1p z?vB;fgpX4&3Tdn=H;S?TRTA|KLa#82e*d!)6`i?JsG|5L8%1b$)#zf!LD6m|H3X+r z`f;?dGk{jcT~PaJTI^PE(ehMVa^tv5ZXC`%o7GBEygcb=(#;LcI<8~sV%{w-5071I zhdgZ5czF+dv<9~b;SjGgOKXUHCCu&KV9-lbmN0d~UC2li*CgHu|6kV`=FKlYig+4n z8orNgJOChdMv>l|FPHWp;)%iT^Qz8f_V$VY>RvA~&oFsW;oWy>195Rbc7b9@s*zi1 zZ_8x^{igx(;l9@-@iR>K0rCF+=_6YAL)aIZHa^mnlb442lGQy;KF#~RP*H14d1-mz zr78ACn0XT{I-_dhSnq>x@-OyP3CoKQ^*m;3LQTNK4q{O6DWMY+qpii8h3J3ztj2qf zxbAE9=ZZz|XZfA`2u`8;qxrKO_yuP)}Wa#lVxMS^pf*KNWKiK6h3;KQF4T zto~o{90yh(%HcUKSKWPA4(4S&K}jCpr$GZS4O8JqPAg3&dRvoC;^M;#!QDE1cv=k_ zA2`Fo!z2%fqnghf@X;NyXDgR&HzI*T@y6lm^<+%#ZQii%*@fmbk>3I5;ZS!(X5~^< zpkXz&L4azM;({1JB{Z4h8*lWm6=W%l{5T^12*D2#J9$F+DHhZtz>#EWCXsmr%BoHA z55yGHHY7tEbt~u>u}c&=6xUg#K_OykuH32~j`-m+Umub7kD=i_;k)_IF5+k8?e9@Q z+AMuq%%335tcQgo@;L}dy3oI~QBps{ z4zpXl{GFX6JxS!a{JX@aTmLTmMk7Phk=ou`upnnMz_k??;qT5kgSo+Z;wVs#0^=wU z=}8V^3R$M^N8fcG29#G(@5>;e){^y^q86wXV1aP=5tDASC=VNA=TBs1@s`_OHEqHc zJZth3w$%0?mKtgy<(&hMbWn*K-70#DE(C>K5^*E$Pnd1{sb%)TtCz_JF zz6NoVPa~CGTCU`91>SFh0l`)Z52yO(imvaKi8aTjR-yOG9|tf>PY$TMPh5BGV8s)N zIf$SFKx)O%Vi^<-ca{|A3U1QYH(bfyz@2b&^wYfH^SrkT<2T+t$;Kv% z2Olh6fqQd8@oaS~9=CKj>{w#-Y@VH3J*VAjUJ{mv4lSY`9vkwf5&Ro~RBICHgIGF( zM86cjdT7&Gd3Q#$XpV0 zd5q1&VYgp${fWQVUh>tlR+!D3u{ByKARXTCkN7$~-U!4Y1Vi{}RBPq@a?$n6iaeUz zWLL3YF37cB&jN)<#M{5Tqlu2xjNb-&AqriSqb8&rD?JkiPyFiQJEXR>GS|F6j@EDXPP&g9vg6 z4?@|;aoe+2wgtxt2PvXQdwDhDzk&d}Po1%ll-sl#Wad^@bGL=VV;1iz>j@b$T7>Vp z7dE6d$!&xnDNd{+gZyHoDxH<22;sVBBn^kast(<^XRJl^KmX2B;?qbxWtct9-Zb8l zRC0s4uM*`Ze*8k?S()ETizqyK{#lvdX%=zlz{~cz@?BNY(&ln)R6}1jkQY{j{cyTIw zMsBFV^9$oeFHJJlAbPfOf{AdX2@;uYkX+a;AQeJKZ5F-YBQ55pClF0c0Ro|gDebxpn6pmedA^6DLDfB&^reDvzv zIZ5>*A^jdnV-%bh%K+3@uu}c@yKg~%gGFxI8hx!((fSnBY-R15$iCkL zNb=$jS4~3MKO4St;MYka46?1bYf+FMJVofnsLnrQ!3PMC3V$ELM+iPd z@D~7ecG@%D)+7`BW32frg8xD=D5~C>mbBN++f5E2fI0EDv)Jp%kfJpS(1YGV2fM^CF&EPk(5qT*zZ!U$H`t>S_AelvY5 zB&9rXj~N9&09E-`aqpk&2Hr)iJl=!Na-JjN5|;*!M)cp9Me87she#u9qG-G{TYrLz z?^!^PoRENXf?}VlPJ_dOot#Uo_2Cf<{}h9`;Qc!$rbvq(u~e;Bsc@ zqrG!k#i|eHZNUeTGGr6|Coq$rV8Q=GfcQ{AX)2{@C;+493HbLwICn=Vq)o5_n<`)$ zSTQ`RRv7IY@x$?(HlFb$!rU73?84HMa@vWop5?i`T(kKj#?v34_MV2Xn4A7 znJ=L}EWZ55MvIIf5t~2%_Zm|85u(Cr}#GY=|0%REQ6^MQl+E#BQ@a#v2%ax!rIz>)D4b zcXVDo0D;|5Sa669*&}&}ltYGc!;mAYVw^0-bZdqEcCDyCFKPy?*;g@Ob}TY`2*PX* zutQ8Mh5++%{r0^lkiu04U`>ISNdP0NA_mbY1sY}jd3#S_98SNZXCAiRF{3{}Y8i++ z$}twb9JqC8y@^lVr>dVuiJ*kMeWHr z_7aL?w?WL%)cEeN9$_+$eT%&jCTFu$X6}GT|qYmEgjpi^W|vV`8%vX0t5Dwo78m6sE1jCC%qaI=K6#7@RMQ7GQNoZ;`==Qr5*{ z;NOCLtiZ_DjuNA-By8#RDvy@KY5g#ZRujRbv_Lv{o#H4?Wc9oZ@flx^s4FmC%2#6g90aQn zG$2@w;7bI*LEuAhE&}oz;G8{>9m2gHYTTB94gob9UyEoP5Nt$n9)fiUS`lnQuo*yI zffQ0X7K}%5J_1xR(y)wDNUOdDD=q-wwqr*FV17YJ@(eZ;>rmRT^TF#GhLOVe51|k) z1RO)Y0&$K7peqiO17aR0!B>F@{sLnvqTd9ukQSwknc!oAeZiuPhm>l>atFlxmjNu2 zLH`yg!jG!-m7(Gt-(zN=3=DP{e2ngz9I9Ql8b4Pz-Oxey1`SFmXEVz~%E+>}xhAuW_Udlgk#jGiXbTk2aX$ z>ioyp2R7D67pbWpa2@69ANKGL_AnuKaxF8*TI_6UeHf$mvR5E=Li978V9R%7pV-;7 z0n$cNL=q`!<817VPH42lO~>rT0KfwyZZmpCDwUoKz8x`ft-A}->(vwX!-ZE1)}#lK z8T%9{CY$vyfbeV7BU~c}hXY(4g+3L>f1Jmf82dc-S18;>ozY>;Dw(+y&=VG}hvDrr zj=O&%uT$Tjy0lN2^nf1Zbx@uwz8z5Z_t;+wnCo~XpUq^gpCghPb$rB5xCN`Q2M+JN zLZC2$kqCPL#i}0vsDN$w|3`Eu5G_|@Ky-5KE+;#=Y&f^6dl{5QE~7bWoOjLm;h!Iec-0Gxsd%}}#AR4U54#YErlJduq( ze#Lk;ko3MHGhMl<~9 z!9v@PEWnonJslPZ@60>Cc_M4fV|lRywQOlQ?M>7s(+~%KH1=98+t&+eL_d+C?={eJVK2!`M-#Ad!1y7_Zd43GI zUVeOaJ#0*~%Ah>Sg*!j>7?EPwvgpw&z>%DceLS6MRYx<8>mxRb=4%KW|H0z9v^uL zFhG%Sb&RN=@;bmV!1^1KR3Kqs{Fr>8d;Hie7P43dp^RS{8^3_9WTmkk3)qE|X_b&; zlp{QpPN^;S;sW;H3nyT=6#!C-LWFV}ltcYYXeAH2RtxK!5G!2DYDNAgTWow2D?Pq; zA&jmb>2Rc&B6cNml|yVi)HlWUEn+XupsP5AT1XzUkF?Z`CL38gCl*=EK3PC5Ky0aw zvaTpbZi^gjiXC0T8b=Qb#^G^fi(^mnVm8BchL2J}`64x`Y8mKF3EZSOR0l7O&2DDj zoz;NNOAw$^ri3tv1sQrYB<%9i*yqix`=4lpEP7)@OIg$M9Hq~UAk$BV?}^ZEM{4@O zM>X<^vCrB4ad}D}~40Q6*|J z!*N<;P2lo28}>Tk42%Dlf+<$AjQz)@xPmz9gA715$#4aWH2Gbq}e95s)X#LvHG3SN9AajP3bWYp^Rd_0u|qZ2Kr+>b8`H^avy47sFTag_ zV?C=Hn2HiYX~_V%hcWeY1kWIthP5=K9J7BzFcZPW2yjo9!OkZ!1q(BK@j0aQryz+Z zvFaB9+%leJF1*%(SBT+Q!vH5`9#1FL6JNXdg0ts^tNwwfy>9QU7NQ_o`?>q&X31IArQo1{a4Cen|nFl~Hy z?AzzDDXcj5*m)qXSuh&6t-U*wmN z^~eUlgmSkr+#Yck?9x2=)`BPb7$4csVJ2aPmzQwV{~6oa%E|}k0~7eZRnY72gxkU( zw0IAfz5mcy5?B?Ayc$5=eEAa0pWbqH=ma1=pC#U8?J z9Kpi~NZFpk6v^*xO#KPLrwIOm09{piP(&Y%51_|G{(^iSG#0b8dmjVDgeBCr8^*

C6Lf+ZlUyGi&}IUTRWU diff --git a/nlp_resource_data/nltk/__pycache__/text.cpython-37.pyc b/nlp_resource_data/nltk/__pycache__/text.cpython-37.pyc index 8740f6754cfadb7eecf93953c2919d1de8af6ba1..8e5f489bb9cb48f3f7f73968253c3e16513acd56 100644 GIT binary patch delta 6018 zcmahNYiu0F`DSl#Z+&;R&#(AxuOE4w?Zl6~oW$fsY?FkBS4?0-xSVhH?2T{l+}U;F zSOzx_LTLyk9i*m(ibSO`pqKZlgX;mTd1NBEGsD%0!QonEZ?tF0* zxYK_7?Kj_J=9_Q6`DQ-+i2VC^q+)M*dB_L9x!~pa<;3NRhNUs$^HHDfql8LnVoADW z`E|b~>#`Nl0|e>*v|r!t)aEFZYp?A(%Z*^mSCpO_FA^Q+6w6nVE6oskuzjF|!$0rRGV~Ml3p)F_T$p#M7o@uteH+ zD-)A;Ha+JUanqX3GKVip+seuuW8R57r)T+x()s4*+0)K+HWMF;7o?L}Yc}DSlj&T_ ze_g7tgZX6j0ABI2PI&!iWB;7Z`uQ&9avgrhh)d>0iwC;+r^*)!wy;kAaj@P)yZC>D zFKO$L+5x~Nr(%*T+eSLYG{K6Xi@&2@JhC3qJ^(R4D+;g;NRU`RHfjijg)YPf0PKaw zmH^1Um@1^*VBE>h7#W)hh28vG*~qq{xE@JnGr|$*NY(?Au7AcjE!2WxB$OEI;;TZR zvT5uqG=u|v!-}VOL zjfTGv`TMagC>dnNNrTx&EXacR?T0xRI>p%7W~3=LOsPR#|CHgNdZ4lG*g!IDhj=H2 z+e$6uJ}dvD)PvSwAi~L#=VamE(y@tQ_42<&=lTV^#D>uTjtk~NOw8?ME>?|hD*C6u zjMENtt`^4a22N!e%@03sOs+mVAPnSZ9u2TNJwXNH(#mE^yf1_X`1R@o;dW%i)We2( zxTd>2f-(V*cZ`qJXf0w4GP?_TMiB^ijUm>>=W7m=JNWxG`|jyR!d3(tf)ND!5Zr^H zI4BPj9cY1uHSpLK08~YmBEj0Is;ZTt5NpXDudOAkg>AxC->gDNqvW@dzBc!Rx|7nw z7-HTOi)okBrtP?DJex{EP}yuFG`mV-cGk$y!uT{45kV6$Gt`(DF;LP~VP`9!Zus!4 z1Nh;_-J}CUf*k=+SZf$irONn?rYjx7-J+|4wKQymi@I`l^E?@DMcHuxhailFjE)o( zbp2Lhp52X~wFn*p5Ub3`Fz;#|QUq%UKiaxpX+(}D{!D9UczFi8a&NU>l*n;Dt_|uG zINX59jG)=f2y3pA$}%hA2(Phbv=Gc&K!wMJOxnyCHk(8OMkq@nD6zu-q)jY60z_~K zzLfs%yj1^#-f~rKAIbcWdtb?!U>k zX>76pECko>+CBmg%kU3k3#66b-P6#33*9BtD}{UcV$UIxR!>sIbPelWBj)6o4Z+L*;O;p@C7`!0zfU3CDqSn`0?6C@*Dn(-rA*p zD$~Guq6er#gRr40RHbF`4$=V{q2B?*AM&GO79Kue+&%5)U0G? zi>PFUs|Wbm!PxExu|)&m%9(^^u!r#*LnJ?EqKz#fgCEk$N+nzQUk8u0Xvm)rCb3RR zOz}sDjK{Wt9lww&cZ(ieX!^0eFq?-}77#3W$~W zRqVJj-{8T$6EQ4KT*We=G+((O-7qr}G%;yHm4EuJ+!K4Bm)dSuS-kHB^v=Qd0!6SBa|7GSOs0(uF~6&xO0W!O z5?4mdt=?zMGQ-QQN4zEo_hlbkh2gC5fvXCGRbVj$E)eC6PgW(Fj~%|=j-sxdVEJ6- z$wF}Y*iQh|^5%&PJs0pZg5akBZgU`g37VR7?@hce_hC{7F0tk3Pj0s=%rNqH^OyBa zcM1bD@X20AAWAY3Ll{1V@y%n?X#^|g7}>jX?MH7As8ifW`bThsDa4zG0v!Nca)Q<2 zM?Hd@Rlu&F55WC=^jN$b=IS=XghM{&_INzOe}C+dc6&yK#W>|#9&KMbhXxH)DX~(# zxJ`qv5HGx%Z&~X4Cl)8 zvJIFU#2XkhZLn96{wji>BM|3_WlsrAgvJ82_TiH314ONn;Sd_CSN&=*R2QlZwW$jH z3l|Q@cWv+5i>wLhifq2EUAHfIl4_^p5&OEMRaCrXH81pk9N!hB=nZ zCj(&=!)1|&liMD0gVPB+;W$hWrl8vv1q0-%>=(!en1m}zhS z!%);Cf(QwpnMv=4j#VMOO3+KILa!hV=jnJi%4$$I-dngzcJhR=E9o#%w@e^iBu?RE zoEV0_5g7z?RU(}RXNaj=23>quX6W*}DN~Bhf9)*@~Mz;Z6 Sf*i6l+OM{SpghG^{QnO#RNra< delta 7821 zcma($ZEzdMb+>m1;0S(-BE@g%C{ZFIk{~Hjwgl1Cw`IvPqv|87Pm-Y{_DF)n0o2~X zq&Q$22x1Rh9L8CAd+NQI3s{039ic8yUZ`a-E% zyUwT+xR$E78;k~lR{_4xXcTxjwcc(rn(TbD(JTnnsSS3E(PFn6t)g9%YO~voc7fNX zI_yrPlSo#O*3o*}K-bYmx}G-C=IPLiY;+x!=my$yO`433~1`27?LHN7!Ze4lm#E}T`+U8A4EgH9;wb+$uX4>)jbl4*<@CB!mDc52pz7g%4`c~xI2*4u?z^Xb{YKnE01~Z+1 zq`h)(JGLVLA_^-HVEsst*#LG{jKJm~fP?ULWB{rZ(ZpykZ<*;pWnG11)lsq$m+dKz z<+2gP0!i1h9aal?B*+H%=WDtHHsSKDxLQM}`=h&#@vQreIr1 zrhS=WZ)^wBf=-6H7WI^A%SG`(V~=77NweMJHzCwkIYXhR?mcBjqKd`>%SQVg=fX{mfaHD?8Os3+Q zbUd4$OJ^3+Ucj>F-7B6vMSYX2>&yW~bnhx*m#;#EW!vFRlb^yT% z1m*qun2xkIzSPt~+W6o1xAEIezusC@`{sN31?v`|{Wj8j3hy;PD_5hd`KDI*$=0UW zFtoj(IX`cuX=!6(8Nyov$uzYVh4CvEqN9va{VD} z-);X=x6rd#n_#W<7oh;X@OsB0+1-v~4+EG0{}p4-Aq59|ift~kW7u4W;4FYhebIUO z(_K4)f^{Rm(KQ&{fE+FS`(2x>SM{qGKJ0o$CKvg#K4O@_;RWUy91oD3OpDnd^kkTA zx(v0(T5*O@aRww@6k{MDS`LdN0bP?_LQpXa*SgQ|4pRf%Gfl5RiOgYk#-8eRroq}05nyV8`?EZ3v0pfX3>`bvLqFL-2Dx?8%LE+ zkvI{n0(IzuBXF~4tQS@4sc~jPWc4?@8YWlg9W5wZ@2X8n95)MKDb%+wHU?id$3xqf zM$mE}{4ac#Ib-%hHWmLmHKZwyqaS`0RX$B6ImY2kNh5ooQDQC_{{Tqlk%`JOmZ6;tuw0tOKY1lRq6gMYZApG+3Ic8lCfi&tKi!e0Vh?pk>|=mH}j` z_Y5rmzST#^a5E`q91zo^}fnDRjJGQm+MHs~nA*csngr``>j{CVu7k_VTdk{l3 zJI#MJ*0t#~fEH6NPf2HOuX+Kl-poSWfnBl}`ToZ`^TM|vO?(-L=>WWjwYt0kt99U% zN>Y88z@Ho>L8V3xlR9WMk|1d!OU)15N7+ME7>pi*3wJQX#n;m{DJN~aDdk8`z9k=) zR>-pA5?9VCRH5p1a2I$!1i)2-=DYl#_8k9M$H@Ob9K-30=v#D}Nr2_unBITn#ED}k zPaivdkC*r^qWk`_SNDBGFF6JfGFhfyu}tRd)uYj9xHy422eVSzk`J90F3pO76E@trl;2i8xLLxP8KJb-b`3nVhxG+vM8%;dD z+sWTQ_#2PlIV-)yR`YR1WXhhhkVsafojML11n^({nM2PEmV<2Nc#p)_>Ig0O-~59^ zV|fux+fa(o)|U{&Ws7Hr5r($|g@BStJN{z?4dPYX8OMd^lz1Ji+Ps+ki?}Zg|0K%6 z<5F^_gNXk=0>Pw9QZ+ohoTQyhnd}a~efU4f)4a8Ql>hCK2_89K%YRY-Bu_nl`;%^o zfD9sg`2M3&vXf_yo*I6j9@v*lOYzkK)B8SLEAtPJy8V@Ue--;~Jb)i{T_^KbpLk^M zL8?1}Lxf7cf*76!CbG2RYf)&UtQ@1Bndv#G9F(7E#9C`Al=we|kz-#VJp(9BNccKp zNCz$NN;x@4R^$uvva+nY5W6V3PL>0?z)gr; zIrSElgK`+P*1?%25vps*Ql|*h4`Kg+0%!gM z49^MNGk&Vzi#ibd{}~{D;C3ad+=T0VR=!162*z%>+7(yLNmN~toyXk(4YNNeU~_IsC~1FROXubh-1+o;P0Z^6ZJ`MGlvjH!E6T2?M! z09snQ~)Wm>-+%7xteoJt#VDp_JO=b>D{V1ICa9iuATaBu4gnespr_3Abk>}>Wt;uec1Zgofg zOcl~Zl7yMjeN+0nD^7Z#q)J?ao?*I~&g;pv!BltVnNq!v+n@$;jq;-lT)7yCnG{uat@C^6GwF%=4tbkPflDC-5m zfv1v-MmUpo=d|MhbqbU`Acm@t>~2srA`|@9ks5ge zB)=N(IPumdykI=lWcQ_JtN=Y{`P~zrfmb#x6+Xr5KT!t` z`&_8&qDMWjIDroJJYPOFdPqz#15Nf80#TMoASMz^#h)*Wgl7?~nHkxw!Y@vJhmdxD z`^?Z7zGF(YT9M-T)$m!?h@+eC7Xh!^VsLnwx1WvoETe!2ZW9XLXn}bAHU8DJ6Z*p$ z8P=!%fLDKdQ~r6Ja1uqztP+nQ-SFwFIAMsx?8Tp#u%)nXz|RExCN{brSRTtkmtP|j zZulHFUqFzLBNWLxM+}o?zxMD&#J-B)1cI+4Ku>4rP{RHUt?Juj9xyKi8Q?sg**{16 zcMyCRfhfkUnnhqbb}UHJm^28d+8KtX)~E(mIozTtS}5EcZVGpY*K0xeQ^T9Xomy=; z7#`9>T9vjXENiV$b6O}oa;{Y&hxn(AQNKb?L^f0kRAdUG2$blvEK$s&#H{F9Z}T^e zJ!jbKfEpo;Gf4{PIgU}o!DSVdRRfq8jwk63`3c``0KE>3%kctNs!H+Qs>a zcaS!Y0H{rDIP811lVyHsvMDbjCGMFG;n*s{opqBb|80OCXVeHHJUm%4;s5W7 zVFkokC+L-rV!vQgE9j-X+rP`OwC@Eole4fLCf;+WfWq=K2!%G$0t{6mA_m4TGnIl8 zp0Lp>(1F9##^AcnrmS%+&^Y@6sB#Tl6E$#YXn^^D4Wt1vS%!wtfBy%d70vMg diff --git a/nlp_resource_data/nltk/__pycache__/tgrep.cpython-37.pyc b/nlp_resource_data/nltk/__pycache__/tgrep.cpython-37.pyc index 84f29c7984e066832a3af4f63f6820d9e4ae545b..4d6ff92ddb43da15a9706581c55aef507201b2df 100644 GIT binary patch delta 9438 zcmb_CX>gRsmDArG8Xcos=o}=04hG!^LI@-zVGv*tmW?qCknu?U35^)d3_UZp#9?89 z@e#nli{rQwXV=-ZBnj1YO6x3;(f2b znQulTL6{_9zwYVocfWq!-)p|{*UCGeRq{t2jvN#IiDq5*)ZDn9U$JSiu50ASPEqBrv@QMXbWFe$#*%5Uo4 zt=^7kSaiTbQD;dO{(c<)4_gq}nMDyWz1822tZ(vFdM}i{!B-EEw zvr=67!DK#8i2|B8`_+Ib#GcD6ifFW$KnZ~~N+kQKsOm8YzJQCuwA^I1Foor@e8t9! zSq}C@Ib1FMuTFykYN@n$YHZ%cbS`p+1iY2A&F>3Grh;>!p%}bU*5)b0Q8J4oB2+aL z4ExA(VJWRkk=R;Rd`gO#ASu<&^Kdmhr3Sb+5DEV+QVPqhW0m4D8xu|x>=-=1=9imD z1SD92jm8$zw7r$R?SvXo=R;!XO`@V8u$WHwz^RHdXssxPpOu%^5vr&}kPyMpN>t(K zn~+=4*AiDpRMTQId0YumLjzKXxJ?}3uc#_}4TsZ(u8)1PB8M%P*q1OyzVMxxiOdDV z5szQ>PV0)0F{BXx2wLUltt5oYs+s4!^GQK8tMCR+sA3IGruZcx;CiCrfPGcfWp88G z5UP;nlO>zrY}Mrksj{KBmgPZbqTk*j1TvaQ5iQVK{nKd@L15mZKx?5svAw&iWPoLPCe`VnQ2!l1be6HsSlQaIvqeL<@tYswAD2zE6IKTSNtdM2EEMRX z3A-+w6dvQ4_`b-!9g zbzv|{Q@sGL)w>4Lgr!Kmvk`0_?=wb`G1JOkUvQlhdq#NDo-8e7m zX^}0>4tnD0!70ruS)|*dc$qEal41<1a2*_KXkh(trXjYTqD!;-y_2e6*oaF?Y{&J7 ziN>{#4nmYMY83-;w(*Os3|g8_u`GD8sj(UbYCF+;Tlyl6 zK3<99X`ew?=U{@#;S?saZ!jSQaRC0ZeR7bPU<3vqRmCd0hivTykES=}Qon}&K)?1$vs#R0l3(t+wp+=S8u*Q!y_>Y^t|bV_!ntYWjxEM{Wg z-E_%XK-L_;#f}OucEoNtzV&>+bb9K^n37+#?2y2dW(tp*6jjJJFEcnlv8SJMtJsHN zH4DtqxRR2s3=KaIAK!Lnh>iioBP(i_$zaeg;v?xfN)V+K0cgK}2@VW;`Xv7#&C(%q zJ0a3g{|??CoE{}!i3^Byq$HDp1h2!j(Y^3qTQZ0O)Ppy8k%G*#=Gz@!w=kz81L$092~iS zP#*8`OI)U|_&NM)4I(G0yMYGerAZk7c}GQ(s6y5N?)QhW0vKxaZl!| zI@46G&zNrwoZ5S%PvWI!ZA(g?mH9I*$&j7b!8`l+4*p?%Q%(T=zUUwmjU%Xsx9+{1 zv7KFMO3QDa+eGK;2Szhk_J6ET;$^CI`=6|I8$39kxzfF9O1Eb0=^l9Dz*5G>-jWRJ z@{C#b!#nq#&zR*kK93!)f3q?{Nl)xO&+$7 zpl2%K&GB^^3nrvv{#+sjVEX7)JwjH=$;Jn6@{92_eiP~6_yh0@cMpU*@)934#%jE4 z$i8uM=tyww*ldgOF(*{CpO_{dNHb^Sjl5{`Z7F+m9EOzBL4@og>HZF z!?tpmn)z30K@8yv9MdfH6}U`^WWh>`NSBN6LBQv!G5jd%aBO*+fSbO$vZN&?4JlnI zgl0?Gx1eYCkj{67R*xgGgg+Olf1ZFuu7wGIZOLDeYVr%e(r(UE3UkF$>^HM7Gg<1E zSL0i3a2(}H2H!Cxfdk7wu_Y57IN-m~dP1a`x?xZIPJd_@LOq|jQn6qN)_7{?K=WaZEt zKDODAc4ag^WeBR%5l_;9RNe_AVP`cnqj~AQ$&`lyi8azftlZm`!hG>`?7zb-(6rb#dtI_GzYmfMv#Jn1tjH^Nv=yKbGvgmnj1SX;`7(P%$ zxh-*cK11t1NN#*lFfg%`_!1({2H!h9*%&v>9sP6wCeF-s$5%EZIn9C6b7c+JiDU9akRhEP35M{|?Ge>4xAI~5n{&febSOpt_Bab=RbFoJsd%?_nP_$HIdw`Z4gr21qb_hI6o`Ojlm_l$`8Q&mI z!9OikvkP!@X??p4kqMd}BOo`@EcH$jIE~;I8xU8q#msaD1}=PQ;v&slBCv}9m1o7% z1UP{i0t*Dr5x7j?3V|gATDE-E#wuP|%fjMo#OL*k=VDtg)-c;JEgFI2Pdx9EDoP(o zAk9|dnT-1UJXyF6tKwmBUW&3W16;aX@nu4nF1HdR8&~PgTy(>hCtWfkbkE0KAgjL^ zL~6iyTKz7Cx;z#vc6^DpGw~YUx=P`5PrmA2PFB21T&RjDVhGS1U!ne238cyE&@0UJ zM(y*fKQdTY<1D@wi#=s#Ny!pH+D3GlX2&c#?VDFqUoa@bZ^C~(J+Qr+7`h2G5|Ghj z_HvH7RlYjPd7`A*@TvYpWJV7jCjOq5JK*7GYRdnOL?WS0Ry^ZT?_zuq6|cwO8KKD6 zq(s2U)7_pZC1YAd&qc7-8(8gKRI$MDmF_0P5%Muje2v&jpBM*UK# zh&O0L3NKw$5HHXtp7MVUt=FCGZGdM!7W?>BZ56|PlZd9$F=9t%ekZ(it>gIJ$wi{P zN#HF4-y%T7mQj~d=ITWD;z>hTCX zg-rroU=kJrUm)N?fR7};c|kF(o9;yjU0=Vrfo8II&QGZ!`j-v*O_<72f#5$Ge(PhfyRa_g?PYiX~Fiv;98fBIqSUM6sbz;guT&UlS_(!0{J zZ_`h*K+0|~+@RUF%>{YXuMSZGID80!MJZL}Ne+P<8`pQ+^X!H8MbpWg279Z$#9nJ} T#^2fY+MF4Cjy>02kW>BtEN`#v delta 9392 zcmb_i3v67~nVxg!;TgZ*ZzoRt$~b=J;W+Ulc1Y~lA$AgINSwICjL&sEe$1FN2&Z&Q*tFEBwN=3VSwOdh8C6FprRa?=zCGG$Jcjn&l z*a?}oruomAd(Qd)?|+>C-20#Y_#4XgSCq_M85yY-_*>&U;Q87!S2K$jeavFvmJthQ zT$yGgj9O(xIh{^D>xlIMOQ70jQEgMHVmGtkf2y5ZFEDCt*dDb6lp{I^$4E-Qh1GksMaxJkckxtc(|8(Bhc=yO@XU*gmXQpe#k1i-CePuy&}Q*Go)2v{ zFW`mH=I|n33~eqi;ib^#@iJZxZ9cEyYoIOQYk4KKg?t@f4{Z^z;?>X=^BP_YZ3(aA z8=x)a{k)!UyvRn%cmr>QC*{0pkvBu9g17KiXxH#fd^5Cb`4+wv+DhKWw?Vs(xAX1L zuIGDs2k!(HRlJLL!;@;hllMSd!+ZHIXlwZZzlZOJ8FhS+@8NyW+rX9kEbe{YUu9Y> zsa?zeWTmnu@(cEZOg1F{nEifv3OGj~S?Xz?2+pX@lit%4!C9}EYHpGrQDbE?M>^PK$O{4$m+f0zI376Xf0 z(UhhZjm^86&W}bA;8JSmv_BLc3(m}Te_Iw7HhQw*=@2Z)2n$u64Tk(-e=ratTyBL{ zYbnto!VzOh>ZW_R8X8jr+#3joz5$#<6z6u*7qk_{0PMKDRrsMBAppZXXf$?OSILCs z>O7$a)cIM_eOa*wvi}wQHYaKa1^j8!UrTUx|+Zic&63y z@Juiijz|4E%uPgHE^n5X$g+}OZ(WAkA2Ud%@Sm8_IZdnHannBp?uW9sblW0=bK5oR zjCbAysM&=#a6%P1I2fl{4EP!ZPGAc?ri?fn|64rTY?g^OUI)*{AfM15wpREjKP{an zo15UY6c4x_y!@Xq9>R#WD54hV7?amd<;!oDy}b=l3*-Tjh(m<*^Fvc!D=#j(3G+i3 zz81TU*;pxa0zOd(w*0Jt6xO}3yH(Mw5Nx6vWf);V=KF>GeMNU4x|8gdV5SZDoK}&U zQanC&JSf!vgU7mTn*wrSx1nV=Mjl!7eMWIYN)3JTXZUkFgd4fkBOy76PP5_0AY;1i z`mQl#X;wd<*BrVi9j=a9K2urDK9;Xm*02_Nv+_@?=qU^?33{fr~xc(;d8D1O`4ahJhPGKYK=&o%2 zVm(9ByQ@(CYwa*oq^+)|z=#gbHRBx|!VOdyw=38F2=JT=h)*!y_*0jn4(f5+eea-vgLJTcBgH%YSO!%=+Yiwf@w* z8-6b_x4_*9CsGbHq8t0x1UU&TwR}(bgivQa@WP}9LLToJzC(mj25M%pXEXdyJQ#6o zu4Ve3!?QhsjllhLSq^S_z8$k7Ab0>LQGaEqiVA~+VZO&@+13Lwe;VUpW$K&LbdNl{ zwYw?7ez{~dJR?1^SD|>2^48YMoJG{j0m3aQxmJf&@JMspWqUCiZD@%d6QhVrH|6LO z>!fVmJ{Q zvSP0kG@zP@n>fc_?Cx-My@8=gIIK51KTGGiKxJhw{%t8b%Lx%J@L zq(yEsMQ%=7WSg8gl)T7JQ)ElhB0J;@_a&b!dQFk7NsH`}H}6kgWS=Q=W6~n`Nb6zF zT=FL)CGyTk zj!ecaedz9&9+8nF7r_keM+cG)>@nEF{lofahduEPi_)mL1CZ6Xm`Cm%Iq#s;R)B9h zk_I&ep!N>@z2KaX?;X3cg%0dhP9fuK*ZgY48VP;o%7eYV^|X))V;;P=P~SK(%;~p| zdt342(P4hS2``vW_>f@bMYN+eQ{unrEN{Xj$6R@G2B~3C1iA z`O)Z$Y%X%?ivjB|ewcjPL%%L9{@*{0IO!zs^B zmay;1uTLJ<3pp#acoaEC9rG~uA3@@j6;suDFQ68Pz*6G0rYqTL;*5OZR7qrJO0o{) zt$2xb4FP&I64edF5$M5#XS!a4m(UY2EZk9sty9@a@M%_aLE-IF7v& zNKOJ-VmrhW&_W_RC11So+QG9pavsSpBz;IOA$br9M>2=xG?K@VJdWfH5Y2^^-!PQ= zLRtz`Wy5~&w7431>0&u^&>cya{Etij;3iuv_qUek@y+?CxmnQ-sp0{7@$ww|mR!30 zWa+n121br%_ni)_Au@D6wMpB7{0?q2>9I|jVKA`ap(9;}n ziGLzIp~o2$-^Ec1v7%)+>z+Um!qv-LEn@$& zs?dIo#V>ADz#P4(hl_jgZy3tPfz|F(|6rA`JhiD7-KAS~EB4+%Lcmd&esgXU*X3tV z)#z_Sinxvgq#&t~6|L{b{_g=F$$={s>>c_0C%zK-;}vZc<9;6)fT2x36lTbb&X;`Y zxY~UD?r`Cnw~)Mz1aH7&wlKL9%4qh$Gc w&>Y_{B9xDENiEkO-zUg8GcK zfVN-DClszSroNU2vn1<4nqyf# z&C()ldoM$$AY(9R;K8nwoq=*m{ zAO!Lx4X?tb$qOhUr}=mbC1c*Z3CBqxK2tCG+yf_BsA2yO;Ckaw+2%b0Q5e17RX6P-1dAsG^!jQd62xLK;P_2fD z?Ntl|ULlDQ7Um>-!t;5Y`wV{p!KE?rUhgnF-Tb)sm^nm-utt+)%2mP0SO|rp2Pbg` zP7ygRCI${4p|w~}lcwi;<#k(KNlyBuAS_nPwP!YppAH;tIA+BU=izl) zJVk{3I_X!vId7qMo{jV}~&QacuNLD`{Fz3O7N%6suf4m0+h7pQ7eq|^a z7z&cNEM#zmUYeq!&!v`&o<4E7LFInpmGJG&YXM}f4w3bh>=Qfn}!DZATA^#GBB zL@pqa%y$edB^guQMO71=!wDx{tb?np;8n5)cB(9y39Bw$3axJ5kH;4rLAjPoiqCiE zvweoi`iv~wp!YDC6NjGq!=9@+3)E!tOyv~bUkTSO`h_5H-IjS(v!pUDp4zpO4L7HD zeM@hw2Cb4rUtuFFY>xtC3g*eGPgX6Uexe!)oFVQkkr#-lX3{F>H4vyIev#&>gQz{r z*oRfuo#oYskWPA>b37bUg3}e8_(mGX-WRKiIr})M0O}$?wCpnza zbc^YXe)Mna0OIevKRrjiOEPRW`#;4�)p`i8k#tS$^m!f04=8-PT(r1_ z*PD$OT$aK0=5O{cvxW*tbt^(qb`&^wxi?#=`}h!2uZ6s+d^p+1#W0KdU+D8DPW+5~ zs>fzx1A5D%Um$r=M?!y?1{!wC<|q3>{>IOK(H;51_UKv)2a|J*DDo!4{+<4>+fmEx$^l;UB zAZg9t$Ukks(x$LJ2s&xM=-pJ06=_kV*q9?Sp-yNKxrgYb@ox1XaCxa126)SrYZx3B zlJfH~L=_#a`U`oY0?>3_!Dr#vt#_&8gT>}+hfnFpRS%;c_aGMdI`Yc1Y@=gT8RPn; ziWkTf=6)&kpb+A->J?g(04_d0a&VOL$4Ow@2wim#s)U#(OOL>5mD}4W`$SBP?`Ll} z%j4f>?6mmz(J5<;RNKW%6T=s&)#kG@L5{*|)eqnvO{(mo%R|+@jHy?4tLsuE@CFeo z=GJq`AP$HWkzjMUCqA6$K0@gx?Z68|Y@dNDUMKVLQ$0D>1&l8rtepRft&@9C-dBrV zw_L!7j+-~8fEv%#Dl-Vwb7a!R*MC?C4_hc5bn6AWiKfKu$xRIvo9c_0yISjYV+5rI zbw@ZUdLfL`o1qW_CAfOAIte9K&oHV8g@?4NGuOOg2vnEqW^KK5XXtZ|hyp~xKz%_K zGE7qm!EGXxB|t|4w71qpzj|PbqimA)WVy=gzV{3{3PzEbEUo7-nTY0$TmxTIQ^$Jr GB>O+Rbr+%l delta 3451 zcmZt|TWlOxb?(gU?99Hre#EgiFDE9kH;vb^osDN7 zJu{9Sv(XAQLZW<7dZkvC`e9R5f*({^i4Q&yDpUeN1!~ow5duU(2>9p^0x1%YbMC}( z;&fMY?z#7#d+vGP^Ir4sYnhpJI-%fiZ)wi?@l)T<^tJwoGp^VQXI%A|&E%-sDvp|` zRdrjh#_U+tu#IZmjx(auJ+o@rmc(OTqMEdm)s&rL$~lD_Jbp*vaX0;nW@m72atr5{ zo56X9%ht0zvBB(4zMrRf`VO-fyO1cuJCLYD5_R(fJj*+AtA}^-?mLRTgZJ9BaNOnY<^xMA{LbC=ih@F_^GbeD9A;Oy08TjTXe1c8cZNRqX^z#*qY)1QOy0 z`i{XAqPFQTRGo#2=K^tvKz5UG-#dTP*-?Y0-3an3(3z0=+la$tz2VjzUrIF6?l)Fd z)-7H)XIM&nVy^6Zn01(d;tJmBdfcBBix#kw2(3fYC!5N;wxOI+UMyTyZ^v(&O%={G zjdh*tO>;xVIfJ>TxgOh45RW&_1@#iz<>e!n(V1wQSi6;YUq5@74djh5ajEDnyJrFF z069`%7lA`0zUD8NLIX(JM&624ipg1MEZ0_`*no_9t7CZlprpQ8^tP*OM8dy_`+gS! zWnY$=EWx_Oe>%QDMWza4Wv}kLu%GbEbbxo2mV-#1Jd<~BV+!L)`gQTw>^K_|xz35p zoj8x&)k_$O4r2s}@PWh9xYA`7ZtTF%rvs*@tBIJIF)cHXR+9h3AXwtv&fg4^8{q_k z&{$e2!r~(ijbZrs?rZJ0yFQ8+G$KR`6(NukgqmC9q0WoALc-+ZkrA~e_ZCcvw{!gi z(sVI2-SQ)WbqY_sAwJ1{+Iku>&?R%PSaLleTf!hMB%J_UcibfiFNPJ|f)|L{c>*N7oqEAtbXOa5WNv^bXq_wqCIM-R_(G+|i=GE0em)(= z4*3q|30x#FML=?vOx1D&3t!1LgOU@O@JZzhNHL9MA=%D%`8N6CB<_7CIu6XVhzd{z z^6LcQ#|TUkkn)iOr7-f$1AMk3@o^;i7Jfddftg_>E|_Vc)iw3%JPaXHCN=3t6^Y2c zh@XF$DdM*W=Ei=}q&o1^+eS0Cu5x38F=h3=rh#kUy&Z4HZ^#O8L%(e`v5YTS@O$z6 z;K8%+OKP)eHmwD2^^M|~dohV+YF9!>cuf?w(^al?5i8TWe9H9^QpdUd#A>$-+A$bq-1Y`i^ULWc` z6dG==T<0zvAnGJWH3fKvhzbNo3CxP=q3%gaAvlM?PL-Fzk>{}BKVWZ)yD%Np9jf6` z@$C_>4ozhGG@bQnDb}Or7?i}Xhdvl76E`X;_o`2xg_h%#y`t|sPQ+8z5F&Y2{ABnL z8*2Y$_*J#FC7TQh^)7qtE*p{B&cXt5T_mmsXoSlGHAC1L0_4-EL_~@EfTl8JpQ8Qy ziiWY0S1&I;q#;IzjJi4Tug9hx3Y1Up}Ir8a+Iiikgmgm-F!ow1-;R~kMC{C^sfwa{YDXVR}MILF1cMF4T zsr}2s5%$szJQk+Gt=4b4PRT3Qmi#CRtHf^*L0*GPM5NW119*;BAC?>%xF*O#DMXaG zf$y8ix>$xmqdk9ggC(Uvl(`TS{~ODlBX)L%y9>o-FSz%ZpP!ty@2FD*3IZXO>*u43N_eHl1DRvjj zGFW@(dlPI7NGK8?jqqi{ZxJ9VF*{ZXmyEN7T}JRL5gxnJx_{Z`sk5|Cr?#7p&U{Gs zM~}cPW(VCCn$NhmWJ?knBqViH=^xH^qu za6OEB?rNo6ugQSJuJ&HaL7o!TPH)a-Td_?E>|`)c{waPmHQM?Hu{9m1TCCI@C$G)Q zw`z6^*#pOsBx)&pi1m)us#lPYY~d`n1=y8bq>?TL)`(O-=P}}|R_Z|I#mc%Gz zLUXEG=gXdZI)yWT7=f;4SszQOS#582E!$GGgqE1h4kyxPkJ({n%})IGn|n<5f1qLS AKmY&$ diff --git a/nlp_resource_data/nltk/__pycache__/tree.cpython-37.pyc b/nlp_resource_data/nltk/__pycache__/tree.cpython-37.pyc index c737e0aad0a9238f52f084db84e9e8131025667b..3fa1eb0228066212c72ebf62507887f067b44da3 100644 GIT binary patch delta 11330 zcmb_i33yz^k)HSF&^3}rvMfuoWcjwok|i62@r4iYZ7_!&aK?t`k^1RCbJ1_)LzW}H z*d*ZyM;d|&*(?SU*lY;La4ZP{LlQy=m^dU5-japfAhEcejgV98C!8=jl0>PIfuJRaeRc!VbOm=@O~ zdfXfFYUq1ozKD;$eKCJL5DCOfA|;IX$4cX6k+OJsq@4SKSVcS-3C1fUmE13hRmI0d z#&Ekd7K&F#s<~YjtBH?|jE&buYPnw?8y6p`i_~czbL<+As1U*Z9uYLh@AK}{BK3r+ z6jg+&GVAwwBMoM`8Q7;0c8mxSHUuouH4>&;)DWh|Y$Un~)E_Hqsb9TBOx2jdUMP^ABk>Z>;%z7*d}6ju#-fyn7H2) znJikwB>J8rCW|TbJylE<)98DeXcARox@Z(ZF-c4jHKJD3iTd7($aLY|MK7d z7PYKNJxMu94QO4{!n$3RlKqLa(KDFnPWLAhc4#os-<=eu5$jKzG8(hgi&{%l&`J|o zuaVxJGA+9@DufX?)6s5-Q*UW!x9RqT)b4a&GGUx%~*ZqvCp|qF1zvw*6XUg6LDN-tb7jSS*%Mf3mn-EECJAHAM`F zv&A{onrcoHE5)k)9x~Z<5fiJ$8p6&HYsES;$4n6y>%|7b%p$skI8SWcuMuUKdgqG^ zm}|DUP+UZ`ts*Hli;od)o48n9LYO&Xi}?6{&uNi%5fNJn(IK{pOX+*ANQvztN~H5d zm*^(UX+j8-zE2lAS^0|0IA2ehUk1q8)s>Z}S2R-N=;~E zqn4~C5Z2{*`m+7Wbe}0DI2ZYmNn6-r3Q z0Wpd<9^GXJ`po{`zI51G-AIUV29Ity59A^ub!&1K;U;DtoA$n5y%dtlx+qvBr9iw= zPtRP~=>!Q|Z=Y*xsjy~i{f-F4u%l&5dd-BnD)1~N^5wL?Dp(u8j3#w)@>X;erF$$OYK<56= z+qD^R;*DXSU4AhFkQ1jaf=1^9AcSE+m73oc;w3JStG+gWvi4Bs@cbwJ!3YRTShI*a zu;k1JAlyp`+;f@VEjj8vcPkznU~;AtsUZK2Cd9`QQMp@g!_!L%M9Kz%IHxZvnc6K8 z68C9!@hjhbueZ1u-AbI;5#KSwC9>>_5{2Mfyv$Z^Co+SwOT!nUZ3*S2W7 zy5P=QCD+ZJ#|el%8BcRr$R}twx-_?a-O1GM4s~SR#;Lsiknhm&0#_P6GC5=>46Yi_ zP-m|{7&bw|@ysv#(8eIL2LPMY_Vq=rJ~mfyL%4~3)g?91L-QFj7i2EnkgAviUm2=2 z6Jk!XXO2^|%JU$|6An2Zi@MBl)sXtb#cQYVhS&&_suh++R;Kvk_!wJltn5--E;(@4 z1)%W9qv`HG+n-Jjma4|1Jl;(pQbsOkQcy_rI{S18Jl?p~YX_1&J(O2mqslZ9JWbKnnnn#zS*aG% zYbZAO=wDsA^_3Y{5jK|u(RT6MJ|Uw!f2D@E)oYEJTehv##(jpUY>GrEPkd=}SDH85 zA1)0~!4o%sFeS}&dbh#!Z>xou-8D^CI%6(}Ag$ZQAP$HL zBQ0R-vMb!6e%w7(`;_{h?%UO4Vy1U2?d_k7^((F>9U>+D34_i+eUZ}SV47RDZb{oK zDek)+#+gZqPFa7)3NBG2=FE^!*Ad%k1xvj?E#y_tnCCVZ+g((Fl6?$gwrcIkthkN_ z-$*$RO3-?9asG9*vlZQEsVza|)*_JNrJJK#dZ%mg%&OkoyxWFBw-+Er;HcIyq))db z*B0e=rIDiJBy9`>rH8Y#IP&DP9~{6rvp@x>^1P8+WCrJSA1 zt5sxuQ|>SYrQxWwn4oZEH7{AOMKkLOj-a;1bcZUYTlh=@kd%3|7GW!t<&&Y4U^95m z6*lJS)BtP+jw#vnII6q+HzJCZalJxoSeWt0ut5+$C1mbW@1!=4p#Gv05{1P}sI}42 zW(#OOjwim&a|BU8~t(2>&tW&q$&BHIDx0h|udNx&|p!X`zbcDJkt zjsxj@v=#s?20*#VDL}483-yi@0y*p(64vCP`oo~L+{NfryIh{x+U3P~sdmL7lFzAzako0S zBi-Q=@)0MtduZS$wSDI$XSw*Yp^0m_?;Wo-My5k!cI2hWEhPLY`}1Rfyf)CuN1<@ixKAY{}VyA!9tb0*1olz1A|=Z7|*0^dgB zdt5c_88`)=yNTxs9_z(Do7-J$QvCQ!RSVUlmid%Ae>mBCQY=0zDbvrWw}w|vf0hP3 z!SR(=2>Zb>zDq`KQi;94@nNs1KkcelsZUh8maDKtDCVmYG37b+;3wveU`NR(0KNsl zTM@fJJ_o?z>HBCs5AXwkg#cIw38P9_NxlH^BEVq)h^PGL)U(}Yj4qe*J1w_edBgb; z0B%v{$V>_dS6U63uZ%47Z~ZQW906c`R-!c)0MW%R$39C%rI|>h00w?E7B~l}MTE-@ zb8J-j<6XI@F{um%XV>bCL;MxE>0BQkR0YU)zP*`xDwtw&b-*<7@#$?_B;=Xgk)XVVtG@-Q*$*vR_u6} z0L2aJrO$nO{iu7LSWv`l8O2XF8={V}~$BP5m zLn`t4;ZCk=*vFR?u;y3x*v%qeBcem<(5+L|id#2yvg4mTGosSBi8-~9y)Dm}P1X5P zizRwQw`Y0i>kf<^IS<;7Cjj};dKrLi&6@J4@E<1@VxbBG;!Eh(0$`F7 z$~qDChiHx3!7=9i?CskTdiE19)lQSBQCjcQt7>#DI4)QloE2OW^algM%BqTBuw187 z$}5j>yd1i%+P9fK%ZB74@sbrkdx8#};KSkElNqhm*~gATL77y5tksRaigSUi*HHj-Qa2k3d= zY(2vWw}6=0lQt}R!A2JZJ~j2u%nFC!o9B1#$38?zrNmuuPD({){9QA&89#&m4wLPZ z$>eeO3_mA=8r6OG|1N?~G*xie!NS`+)}grF5*Kk%hb~ZR$$c%$f2>WFPj< zR(0W5e&^h9GB?LvUVu#`=j5|qry9OzL656E$ZrXF0ng2`=o{$12>=7|G@QM{yQY}a zXMG+zKCc?@ZP9G@=S}y{%=+^$A?W|jpD$L2zm`6sKlfy|-nT;w^M?Hujaz(m>=MT@ zt$60>*C*&5@G16PTIS_wW37vo1+tqS>{BZr+**P19;sO9qpC1c3wb?N%QJ7Nm1CQ$else-ZE6?N+SCW)my5#lCnuNK)KtBZFS`*=j-a%Rq zWghs(HJa9_7C&*>8Yb4A^EgZL7Z~wE=$qF>clJgRbq=dHpIFxhb#j{UF3Jk$_NY2M z*3;@^-}-CYs6B{lImTK6355Z9bizMVyPiC2EBjQ5Q`+(N;?Tl1#i%W~pwFd?MUcuf z;KSz)=(1MXL0CR=;%h4O)GKQ_X0hN9C|M>_77 zd88{Cj+(zfeUSbYl5|MYF=%Ja+_meIX%w>@zGNcza;B2 z26jeeulxFxLuf#~_RNX1d4<~c?N}?1QAy%$AKgNA9pgtlRbK3yh@eWn_w7{M!_Y)` z?u6b$yU_9S4ENKcYIVbR(uZ2rGf$`>34Pn?U5v$^+u6s#Y!i?sHB;)p88W zLA!_mjoXEGen>>9=0J840-m8c>cJOk)u9)gF2IPr^iO~9EF-TROAY4&7It;6pIFW8 zJ0UN<+W(MOLXGOsv->)mMp-$3F#m}!Tlj8py4v}@`<(50t!r?H{v_=zy~8@E0^l!eiWUh-ts`OqO?T5 zLvGoiI=;U_dsprG{$&@zs7|tGL-wP~Ys{xsm_$BH;5g5ff5Iczl0)~!s`dFxCcO)u z;(mp>tx$J9Z_OTcTz04vki!Zq)v_NXPXDtb^Ay~7je7G3d;bCVU9S$jaHJiRB;{cG zT)bZ7po+NfCUxtJt4`Hj!(M+mGU!JacGOxe1PLn zueLVa1G0M|TpQ}8FZz3P87cmrdi>SBBN1OBmL71l#D;cm`maHY!@?P8odvJ}fDf_T z(TV_k9pIY)8vy1LpjRZ0szC6BwREI}ew=4?N7J^RI`%w7D^4#?&3N*5_2f^xFAHeI zg!mAiA(77Mwv2vWnI4q%@4yOBKeAgDK;J)*tO9@|f-lKjIEsne#w|QPLp>y+Sqs6rh za=JubmTW?JJ$s>iA1x0ig_+s(qOm`E>|JL*DcPkvlAitYrGMP-mF`VpIgg0lZA+zext><$v!03E@jW zVYODR;_qs8wT1fV>$8iFT3@T!t@Y7r7X`I<`&qknSFPQC?zYv+THSNbKa&ZU0pZ&v zALst}{O9$b|D5wb?!5Jo^H+~J%f3=t>T|%qNxhe9)$0DTx{*1G!@(RO2UD0DQ$h+o z)sPBLXUrLQghQBR1KJFRs)zCz*GUI1~9d(4ls2f*9Vwdz|^zxfEf=o zh^b=}*u;H~g`s-Zz$Wc;gvPVUtP!3Q*g32To)g)4R>G#RI#$dku|^hP6|9QY^p}Jh zn0mP*c&@lzNs6F4S@f%&;&pXa-6P;i-2pCip!-5jwv9QNYo8(>e4s(hcivL6owcwT z`xHWMv)ENkgXe6vm4)Fshnb8qeV-J&S4YC861l@=uSE{=uc zy)3-+I&ZWu@yCaG;EuM~P+AX5GV%zjFw7dbO+1%#UG*09~e7`~u zei=Ck4>P7|w~Kj|?2LpH`JLkV%KxswxwV|pglV~rSTv&ZapI+_hS{i-mmt8|&JD~R zO)!0z&0IbeC=O(5tE&~INgSzd8KHQYg3RY4z-Y5dG|kY>sHw+caOWX<8lrt&C~*bn zNzQFLofMzE8Ohu`!m+e&R~&?{L-60gkaZ|!^ygQ^n>C+GOSDs1!t|Kk5>*L3($tjH1?1alRMrwY)S0K$2SU&l13L)XtXym)u2-t{YU|t&$W^vUG$-mzxMT;*5gcUkPHOID!B)9A$n*RXolb#Rq=@Whjt& zx-F?*FghM`m*qrXpoeJ%$pzagL=>7}?6@YAm@`@R;8gM^A=(#>V6LL1)R?((^KhbH zr>a8oVInCrW&JSBaAZ5j)U%4RlPITxq!OTYuN;!0Pa8YUx8neDadQ-V6|d@3JWA%` zj_Z~4FpPVGF3Yz8Ym+Qt7XyxWBf#p0CWz0@pAn!*AEQakpLdROC=;6ZjN88+35(K zB1-rm^W^f|T%{y&GfIS57MoTE>+!|TfH*aC&&vCi1vnDOrJ_hOu|wx36t+dk&>_*z zNK8gMh52#c;#kx$BguGbX|ve9de-`UKv3~=@~eQ)a-q+870Qv~2`w{a3t$^mm+CIvPH&YpEJR2PH12gzaR&Mk(nxE@d1yaY+5A*oa~YmBP$tg3wPtz9eIXoaN= z#WChqhzPW8f=TmSv9qgZN}=_pRO{o(Aw8i*VqwErAl~mf7$hlFmEF7_C87$FL#4-Z z4TOyWn(~LcgA*wO(eqLLyC_E+i>iZWs_LP&H+q(9BoztHkFUzPaQgq;SW>W@Nx}o5}5Z zg42pZ!LSv@+3fPcVB^w)HJR71II2cT=_N?Y5#TIZMO;tC!my+1fw!Pn#?ub$-UT33 z3~8f7se@6_*awlv88`YZPqMEMmQ9;hZUDjaBCzQXo%^7bn=5SFC?Q&Sc<1{#`B-fo zE(q3QntsId>dd0e7b?})L5~H+6}Mh46V_s?7PoB)Herulq(8-V(;U=@{v+|5E%#2f zTrfbMO+HJB*jV^d5c5CcfvYZ9N>PSB;om?&&X;?JkX)a@mo{z-_n;tN(~gU2+VnD< z3tT^NbuqRH*vTrdiI22ab#RWDw6%5Q3P)m=%VAb+_uZoKNs0si%xV_ctL!N^Ylh3s z+HG~L{3aE!Ro5t2Iuhzehgm-i2f4lK&1HKkhAR$?Kj_#~d4+?impRrtHo~_2z=Yv( z2NgxO0@P9uOdPH}s2pfG2pe`uM~5M$GV{%?uPGx#FRvTW8&?e^4bzD5Xv%EVcj=L| z8SahgjZq^p&1~#TrW35yce$=N4wzM2;o5pbC#o@>d#5G33iAIwTh8m-i9=d%f-2m|{88KRWxxTa2 z7|6*BV=6Sl!7I_Y6}xhLjbrq0g?jTfaJQxM-ryvW>Ybweow%*{AEuP7NhY_aQ!y~P8%gJFoV9z)i45>uFraB=4R*QEd(Cq$m{@i(UFuC6@V?ey4Nu zZloJVupPj0rEN%^YH+MuL7!EsrTUT_s*HxaQMpf`rHINexskGicC=`MJf+-%++Ogc zh|Ziu)uRLZPV^6%ygKuCbh&dRh79VEx~X+YZQKImcpT}UK=34hANJH@EdFgPG9z4xPbHEzAufuxYj`bEASm>ASS6-6lv+W$umFDaxbb z>BRUEatJx0IKN$zTd>DaEgi;im^|+U^htJqkvE+`1=vtA?Py^N3-1Y0qN6Mc)l$EJ~(p2W;Uj4nk8ydwIu;0sYftg%uk&=G6|cL5i}w=2LT>s zc@P1b&s!0+0kFKV3r@ibHpnXxHw(dR1alFbk6=E6WeBiHa@-utm4!Bim*MMr1i16! zn;jz{wS<=IxDIC)CYR@4?9>t9Rzt4aQEUw%Fc4ge;3fq75gbDBFoFz%hY&o2fF?H| zr_`#!4vg?A4uHlEXnB{4E3fXJ;dOeQJ`em4s4lABRKxu#{}iv+=kt2K^MJzdYxdUi zV)5-e>czWP-=Xx0VgAUbJ`_sc9l%yO0t_n4t>G3!o;!LFJGv0%(wm%#YYUVBx?9Ry zi30BzHyA_hg;|?XU<-o$7-@JoeTT%&>4{>!dBrjtvph-B*56XL72?M&9IXCy8KP+! z(Ya%y@TNDm*#v#b)xcw5fU(5JlRa6}Y2f9PEti|eSXT2BpxLyKJ*%~@05dGuQM%W8 zTHLdHqHyk9cb3d_se|P7)Q;fIaWX)7cej+{}94{{1bvAr+K-Tlb_uI4L>jJ(P z1smLc7B+qtczzDL60Np5DSXEQdq+6g82YQ?uAyYh8L_yGBsY(U_T3jweF>C7Ko?s5 zV3sf^H0@~Sp53aVl#2ty&9;deyFn`yp;UO@6rT;x8u5cB{w#vS2q+0L{`t=lP!0P9 zwq8N-DuQ+d=y{HAYX+SBRY^dbs5ih%6t#8xSSVgQy8vkR`K^@NFs55$jF0eK|v09;PZiS^)b_ujDA z)rPq2m597Epi*!QB@ua^=MBJ(%RG5qsXCjOvI*FMq{MV0J=g{}gVMICnZI6tOkQXP zN6MxB=OSMb0y^U3aQK@D@*^gfPeI%?1k(}V{ij@Ieb~wu!7(=!91ky+%P^BEgs}6* zDv|XCg1V?w$4BD(Jkj#SzW&FiLVAd}H6as)QxgVFak7P6mG>crXzqB0Q+0AIAjLgm{Qm2@ zN8{?$lCqOt1w7d+3j0!n>F^!#;r?%3OqSSWjv8ipV0VxA9a)Lwgu({?D@gpFc;nV~ zWwj^~-{{TX30kz9vgaE5?s3f;drQD&cGRN;!jr_LA)is&N?6-U>*DW4!`<8JDKkfHlvii{py#jRz}?qO`3+fAa8dHelJ`Ys#XWygmLegiYLP~k zY#`C5r31T8)L{7~vqqnWoNlq>+lLn6X`=^%`QMP96lXUsgs=;U>3ybVM0;a!YOWCB z1DWNL-hauN7s>X#vJsat0<6C0m z1Cy1vX}RD3z_e^UzlSpZ-{P4G{}0Vm#Af@U1pttbsSVb~0joq2~4^B|q zFs5h{(ek0> zDiB##*$Lrn+%8^z__P-*G$1j{m8ash9w?@+RWnDt%q8_U(Uya~nwsAP~$H%{}DAnT7t5xEr zC%ab>gUXkoN|NRt_=%XxSZ-eA&&9`M1qN8_D@nXehe)L z*N5Q2Kf(6cz{@i{3|*7OD?c$85s^!tSITo-bWN3u`Uqn4ZY7U~0oNpk|M|sM>;NTO z@NvQ?Lis5suM(?Ni{L+9d?~8zhyU>JqfUsz{2mwmD6f{ttn61qC;TqtG#?F{#pgfW z+g>}$N=pXupZX$&t_bIfN1uOCrU$v%iy{ihp%BY?qrj~U#{gX@e(#KT z#Y3;lE58@8g+9yPRbnge;=I%T3>}picqqxM_}qAdhj)1X5^oX5UYwoLe`50CViAAg zBIRT8#0yug?Sqbdfbs!+r8zziTj(PG5`f$eWHfU5U$8504^25*&?TZTUeWk5@)V9M z9P1_GofnPiqk((6FvPfBEB3sUnEO{L^T+r&v+!LnCcV7p{}8?##TzdlIlJ%;i(@}u zakk;hKZP-!CVa0F+kUY;Y^!2t5IFhi$%hNh%xVl56bIn~SG_V^_{YNkinbx2X}AE; zFGS=$l#_1-gyoh$6Y0RtynLGe6@c586OC8?_ydp%wM^kF#B;A4ZrgD(k5$^s;RhM; z+ay@84Sf)~u>w0>{BM~3_C1SiR(1o`(~ zKv>RjZ-nD~aVLO~AI)y<=izu9er@7HJS@(RP=N5bIE8zo@Qb@a5*I2ablQx@N8o~hu=r!B_bV+-b0#q5PX1uyz?=(F!=c25a1+nG}^)+M4otSDMPjf$Sm(- z`B`WwDl;wx;8Nj-JxTyRUi$IR7Z?HMBMk`X&l^})HR4DIhM=Rp!d2m`=%^4+z18r) D{G@{{ diff --git a/nlp_resource_data/nltk/__pycache__/treeprettyprinter.cpython-37.pyc b/nlp_resource_data/nltk/__pycache__/treeprettyprinter.cpython-37.pyc index e4a9364052d0d37757dbdcb1547c814238c29536..8fe009dc3019bab1085b0f5a25a3211029d7ec84 100644 GIT binary patch delta 4575 zcma(VS!`Tab?$7Q&0FFvjrTgS$MMqGA+a6D9%pglEKbruBVG~><9RRRm%Mp1z3+{a zxJljCDGCx&6E0A-K`4>hQh^jgjo4HcAykA26@@QekpQuX_Ja=uMd}CQoO5Th>xg>f zGw+^z&bep5=gkLq*!Ssr?0x6zTu4vP@{&<}YL_JPG z)faS2&zMeCmg}Y+Kc6#`Hm+-Ork>2GTHMfWjVBCCsP3`m&DksVe8!9q#S7cXOgfve z^*KXZx$VySYns4Fj^M-CV9w%$qB}UvW`!BN-ueh`Z7%E+5pTw23xMOICNw&Xj8W!z zESpE&{9z0qhQHRx?#6 zsF}Gm7zjp6A%cg`J~o8%x)7kg4l{WtE{evH-cBGM*G=7y$7jTU!%w#iuR~H1kAp4o z_<8YCWT6*N=7RvDjIT!x0=bq2a(!jA@?TZ`#q**ba)!g=>pHLlM7}7L=G!~Sfn}bH zx*dNq!%dAlfd!4*h7OM2uP;&#i;9&`-ED3|8sN*_;{ zDUC-F>ov8@w&TT|;oag?>q*unUTtj)?1aszpHGMft<9}FV6jHJ6F9G%YEm<5~RpL&$|*tgCsvC_H~vN(dNjjg;(L$o+fhtiK81wn zTVPt}6elolB+_$gV&Yvf*cJ1WhxUkE*LW}asoeU9RoP#}yIr%}%gKq7qT}Y2^%R`d zru_|Svr+sq5EP4BTDK5)KE>8ildAde;)h!f$MzNsBGd9#!PlB2J`H65L{bF-1F+~B z7#QF&ad7L=zVhQxS@7-z(yhr(%;bRh@z$GTF-b&z?ZfB1V<;m?0&{#>eIaA7$)qar zx$e0`AA_Br1F>tEH-g@lAMjr0W;KjAiQ%4oL#5!Brx0oaZaDN&DSb@Th8n~lb_Ydx zG$MZ6bM|8v<__^hbf&u-1?NMPKV%&@4T-m+Qv%i|ZoS9o;08l4_O8;8VyUZvb&~TUXN{td-h%kZ#}!ni@CZcC;Mv zVB4l8Dg$g#N`$ZpTh+4mJU@&=b|aVt;IPZGEj@;q?FddHpbU@5td1iByN@i!Q}{%A zLJf-Y?vsckMGJj2;*wo!0Co(K?fYHr9(bDO4M@MA9K{PB0co;Y-K6z;I{#dZjXxr~L!DAZ#(k(**(6aGAW{*1^FR#lX&1j5AC02B z%c)_A^MOM%e2q8X2988~aj*mbEunI&qxEZXf6tktG2A$f0Ng1_3XVXh(L@Hv1hC>% z%-q#N{#pzI<2iA9Vtff@&BBS4;nFgv0=aqS5}=Fn+;U%P+zrx=jw5XqXeBfnB^d zX|hRCAM32TLgZYta)!-k#DQ3UPnuAqu%-Afap=36ybtg)!9mi=+>AX~ zY2WL2SAP43>jJwIy6f8SI`^6w-v8yEWp>`z;7YMwu500&?kB5GyRL@ujR)6i{hivY z;kyjpbsEIK_dmC4LZUKys-RF=+F;dvAnqQRUqz0#%O*|@*eC*r&MYnZ#|N#2)C8xF zw}CE0nPLVK;f+X%D06RhPRWeG1d*vy%t!t_;y#Z63tZO1W~tN{5FrqJ5rBCA&`^tp zs}~U*K~Rjj!>;hG7@cX`^)lkoql%w`eMMezFCn^E+6A-GVt8>_Nc}^+Fmr?Til)P@ zavBX%qnw^zb!h?hEPoOp)O`aAly{j(9UeZu3)iCv#sD}GC=OdsrZspyRXJXSGA&=h zr&8QX!E1yKoDMC_*}!J;-r)|`xDr0{SyqXC0z&ozeDPc2t4AAZu(aebfNXq3{OssJ zjGRGNFkQyCU`O#?PVkha1XP~5griK^(PVo_zGdUqtQb0$h>io-do+Q#Wdr&WAQr2p zPVJkSX~#hY=AbcbPAhL78)i%2#6u$41P(B9OEYbYe;rU08Z#U&!nMw=w^zy zA@&A6`ni$$a;7EjN`m&XJf~)*RMRBCbr`wn}?m)c{f0tO~Q7)Gu|D0_K`c| zCr(Y9s6;?W!c`+xAe5l0m4ZrJxl%zz;uokADsBG3{6MN&K#`~_Bxpqhh;z=}hix3F zS3Yy^opaAU&+nf3`91deT~>amyxgzAU&F|m*pF|QuRruQOf)5`XiQU07G*P4Fmjn{ z+7)#%JGo(mIdZI+sGkTY-z)yHH*T3F_;x3~#_m(NNOc@x@|B#;{|tV`5+M zx%zD@u&j#3z?N7{7rzTmw&BUV9Y~n*mAFA5*K@$0T9_{XTSaH_ys(R$;jq|>4y*@} zcf`w2{Ggm1Sn!-USii2agA$S1+nMiZ;n}^3U zNu4+2sym}k+m71+-|;5#{f4uW*_+Tau;h4ocFNGE9Tz+uW;kq&?}Tk(FYgn9O;0pz zhY4SLuHzduGFn16&6^7Yo4(1~dXQ(ax+e+?%anajd=|RY7DoBfS9BIX{#f?O%}+HW zJKtJhfuIZ&#xgNa!Sq++)6LUulp7miT1ZQ@WLZ8?9=|VcHa=)US1slQ(L0J8is|@3 ze5)zT`o$Mbhj$e-nD0Zz5DH+@28v)YjE=A0jHgGm_y1LW!;wZ#K6y)6RUBJj_fZOLuTbI zg2yuPUj??mBJ0^05BZkn4#)gnCH}qjR$sqlBA>S6 zIPVn729m%WPtKUk+RL)2T)fvZa_mcN=jTy@rT(b_y)6VGH&a;^Wm%>9WO(-}Yn*1RvxM_#*v>l`bAR7ws z;xY9}-Fz&>91tJ%uNL2KYo13B$(@Y20kc;nc-FFXZIM$jsu6GsMKv6ffQkz8!$=5E zT$XP0EjVoi;<&9Wx1;4Y*Y#Z1GHfH8u{b7?<4VVK+(kTEr;)I!ZXFMRPuk)USkL1( za(Gj^kxbcyQhcMh(7y8!Rh1jrHY10RgQXnQb-a^tb3(Vc2bRO@WnxmTQW>1Z9U&xZ z#Ru(M*0sYhTAc#vCZ3>cu}oIet*8iete>Y+K!Az{X*F(Zdd^PqLEOIw$uJOyO-oqn zz?DuUCy`K=2W4hozy(AfS&XM~M7cr$MY(qd*GN&SB?xj*k*L)MZ^p$N9lx*ch0!3d zLCz{9)eA3oM$5dU$~p0m-S3>0=mGB>EE0-{8}C+5Na}W6i3!ff;eFI=Rp>$%jK!NW>Cr4d0Nro{_|w3uxbpY=A1*Lzx9QQQB$>niOM|LW=R zJq65AK+lLBy>pd0IT^wSMRh;6Y&ttByAvpH5^=K80&V z!w6Dd6Bqi@2bLXyY?CO4t+eGn=xM3&+N*G~sz>AoYQ@IAEfUM}a71xfE2Wg~wc_>g zuE^|7Qx8zUum7{*ml8S}`djDG1kkPE5tzw3RY~iWbpCZo=SQqF0W;-r!QoUTQgvq)3V~J!mapK6B~j~P`mi9% zt%Ms1$9N36E&_2>t^Dbvg6#grVKzU!o6pa+cj(CB!;4$ zju*N;XqkCZ34qtyWe0mje0X4*?G;0j#tNIbxNc3DHXjf(kYd_%;kvwE z@N#mz5a)?>hCeTU8`(TU_&Emad>0ZzMU0RGkd)CS4EYi%!_>gd{5j-7CqqMtr6@Cj z>S!9LQ%EQhlo%-T_`=zPDq|lmeC3c=E&thdWsKeR-&2k#m+p(?kvkn)t+wWdI_jxY zl5CH1C2&iyqi>A0vkNfA_PS@{1h9t@tEK#MycHQaN#Civ;3B*9D1@ojzfXuD3W6I9X7+qMSN(}o;$dXevNu5>@)J>dDP$>gWvouTWjOGJhnWRF`$AUrs@Y6RljAgYgVW~&#vkH5&e5jh}m z-+>3e0c0@jgI1Iq8QbxWW_da;uU;NP29(2hiNBoaija4<{lkN9}(Fs{tE=D*U@tSSkH{?y7l!aQrp;ws=(^ F`wx6FiDUo( diff --git a/nlp_resource_data/nltk/__pycache__/treetransforms.cpython-37.pyc b/nlp_resource_data/nltk/__pycache__/treetransforms.cpython-37.pyc index 9e4dd090a4dd246ceaded31f12594a856d592dbe..baa8cddb96ac59c9d090b838e63afbdc89d9796b 100644 GIT binary patch delta 940 zcmZuv&ubGw6yDh%$!4>gq-nNE47Ak#&=qQGqlc<3tw^PUMW`3|;21L761qR)?Czlm z3filOGT_a#Ab60V=-Io~>mIz;zdE>0L*CF7TQHF-T=svNfV?3s(Kzlh2iMX+Zmagm+@U7xe>wxH=Y#l_Kv>0e-Hu z4XymeDdSD;z%Xe<*Uu36@TEP7-Ml3=*v$ z_WdyG`aOqA>|1RO*{o{rDWH?EXXbh_YeCEGi`hVw9h>*j{BV8Z1wsXORBvC%w)}B1 z%d1BQ(p2W+J3HT4u8cvp3LpXK>`}uqYCug6R*AOQ>&6;N*pJ4nIS296r()C17W%|i zn%7W!xZkWR+{oeN+P_9l7lIgEht7UYF3q2Kn1Dp;EA;{QGcuFsX11p~v;PrFO1;2! zyMxfdvEQZp$L!7299kTHomxkzHLOm*LOB>FT43+bE_{BbO7Kg z5^aYDUKsI7*rkgQod;+`DmO?6BvI0jRivQzmr{pq&D=J#mq=eek%{94XV}}Bg{R9P zqGIfI`##2&LS-Jq=O7|&Y>Y@T$zx1{9>$4=`J6^EbugwdQ+5H4 zbupq2A0p7oY>!UCd*4Z$`(2OMfKbJnqrBvdf7OSA^VY+6vZOs0NljM+- zhxVj+kkO+j{{jg@1;K*{|Az&^D)>Lxlk+xoTd)iJ&71e;eeat$@1V`v%56o73;dgZ zx`7|QQHqc5^9>PnfgmDU$QE@G2_#FRMBYbwh~!9uB)KIKm88fFx56Y%GW&uaF{9*M zPo%46WKZBZGD(t|oscdQX;aYhA76e;PjH5c=b;`2IBM5-UE8cbGse4a-=MvEjU8rJ z(L1)7yNW{WNv^pb9M(jdghb^y+ozx@*v@eH#Q%ZFAtGo=n&5^P>brd}-W_zE!O(Cz zCKcK5Tnl-uqV~eTqqDc_N+KvhQ|zZ&K+oA<^)XtQZ027hB(uZW`laB=v5GR!o(k|o zfr=ljSy)b|A({aIK{Ubk3YwAyGsi(o(K+_H&_X&pDpVR3@SkxjFF$f{hc{@@8)L_% zwqfCIz8tW7#T%$Pc~zVZb9o=;TmQ>DgU)YKf+?jdizmudsO_uu)j`*$5~EzASz4-` zLFA=6u4Nhhfr&?sLC1gCm(l{NO@5VD5Sp7*F1$yP7!b+q`^Dw0IyXJZv4%Z52j0s7 zd4L80G{}o@Q+~}M?K(TO2HpjLDug2aK6ekju)k;t3Vf%S8f>e4R}J3Bp9fDOrtKEl z*K+m6bs&*3wvDcXu@ zBuEWZPDLM=atzzV7}5-@X<>Y7PJ&_aL3-lxCg@5FIzlFNq^$4){L$N6$TO)h~LLN@iKe-TCCGG&;e+-eR@ttyY`fX0_|> z)>3_`wM<{edBSFgwOn6rb?TkGA2C;q;G&{tm({Iz1MN;4r-=(hzl~PW>bVPozLMTW zYiKQyR?*|Mo;E-$Nk?cSZGzTn+Du!3YYlCsZGhL(cDfYsx&s1TMmsJDw8N;Gm-HTb zH(f!yKw>@Zrndpd2BVj*924m(ngr@bx|*&5Y9C!o*8%RQJ#_trIYHk<@1ebPBd~0y zeY78jw$M##v&03aUps7I)+G+ z_=y6`<=ymHF`spFc0QGqyj5bmcUbIQDf!W?nQ@%7J5w+mKjyf=I)0-Uc8zJ5x4rY? zwH=_TwJ_sO*!lFvbTOaH+SEvAZL5%RbEBr=%Z_1=Wg!mv593pVZ!dgKHSvx~$%enV zsvEkZm4vJ0O8BZUPlkm1{^&g@b?<$`o7pK zoR(*mS#>rvtCbY?eo5g{+LQ=w2;3>nx#rfClz9SBnbEh#0n6e?*~<=MWlE4y`*^)y-%OJQW8F&YOoB21oW6zT<9bqNe1!kKL+APm8~ z)m};-s9^+_cU?jt17JmDrpjB5(5|mSWQGC zlL$aIP|m1-CPQou@cWA6(p;XcfHvy}kR+VWPn!Pv@v<+zlNE7a&A6kqvrB zqs`?cZ?WGPjqHMn}o1eb5HeQ9dcL12}@Z*+EY5Ro9*}@{S*Nj6#NGT$_F4 zRAJ7B=3D@|8)iS0*x7p>SHRj3EJclkZ_FY~`NFxUS;yZS*TN*;&Lc^A47HK%{l@d>E-w707%@hhoo+f41gk7bO$uY;5 zri>Y1Ih!$y#-hdGIvDQsi{7!S&O;a$ek7f?Y+5vpblQ)l(`SkqGoP`b#~Bw1-}D3< z0l638xzRB@XW@M;4ksbptx*vP@FKH#E^7$oY zk+gX|%@_Iu314PLfh9$D1|)sS$Y&Qhiq8}oFmsNJxp6S*eb(Gd?(v#hPOU=w{n*_> z>fFbe&HU&cwmnrW@D2~~q{4Q2PquWe`wlXPsDWvgR!rb0#)}4IZJ64Ib3Xos_pg?{ zI}T3nr3ja0jGkBY(>qY!38q@jh(r@AbuLhYx@=9t3LOj3}h|Geskf zE9r+c7vu+`itMttrR|5K7=JtuuzR4Dlzl?=crJg|V6H*aG?#VRy)dB5uEDHI`=bFP zXWNO&6Tm9)jZKgQsTZpWd$_#3{bO;(k5D`D$yB8dZNSV}qck&^_Lg;wbO(Iw5n!+2 zWRC(Z1GBKlyvI8Zb>g;hJ`9p^NDGUCgt;)7^`zywecX}3CNFSnqZtQc*>Hmy_=!RR z1DAlwL>7{YFXQ5UsbCk_Meo(+jpPaMcgr^~(?eO)c8bjK$t0JD?fD6s@&tnI-qOxS za?IP@xsqHh-`}}}ocbx!aarsl0662w=xYl;=I&)DaU4y(Ws)De;oV?uXd7NXLKts~ z1ZgAeS?|3S7n2oIk*sNC^63fJGJk+F{64^&<$2)FZ(`4Z3hBCy@>fuFZP>vDeFqwGXOu zK@tHD*g}S>%9kNoxPJV&YZnf|1W@MKfwAmGaR0=>tQek;cgnT9|0(WiNA(y#tRDd{ zD{vcqA$1-S<5(^aA<$vkb|D$(PUG3r7i|Y}vzu{QzqfO!nLO>C9NOBCsnb`one2q2 zM>rb7+e}yCYDZV@hH*WXEiz{0T|UJ2c)uOmz{|%QslX%q8p%bkd0%?&eYEH|02W** zVAXN6K7SfIl)#G-UXQa95lZmto6^{>nS?9O3si>kSvVo&+hoC&rXp;R2>5qp3dQ#K z$VnkD-Yd9D3FXwMpk|(xU9}_wR!Rgkg@@QfZfIVZRZHrmMkQc`N}Pr+$uO02CG18@ z>O8p$CGQz_z&$!F;%Y6u7-j3lmh% zmy3?Ue6;{?n{zZ7*X6wFPO%SwkyQckwP9l>xO7dVRU?QasKe11T%KtzYcuLcu{V*) zL!r!A4KmdtNB~HNpyp~~_Vf(<2nDqTAwHG&z`@nKz7|x5n=YWlM;KHXB|)H@*ex7{O^O)Vl7s}=f#c=9#$#HQ0u+f;qIPa134-AvSAvQub6OzO!f@W zDh?O>6lj9vy+7PWJ}rMf{GQ0uT@a&aLJE%1uK@cJ99q!MlNz23a2!<6x;zEYCpYdr zz9e2Tso4qJc8m@T2p zh~3B8_b283`%|eKP+WAP#T1WXP=N<6xep82v7!r?DwM3?Pa$~bti{%M5!?^JSNQ8T zcsi^^f;!reb1)^mi@h*jAFgeij(6g&4HNKq%T4nK8;hcf7y!KqXg!vF)^K{@mNRps zJq#}uC{B6QU@Y*!3L@!NkYG6J2@Jj-c!L`(pD`VVnS?cXAKcY2r$_l%5X`I*2L=!f zBG`+7zlV-s>nMUd5gbP_f?ynhjo@VjB?J#3xQyUo1XmF}i{N<#SiAF^6qDne5M;$S zk;pSV=2`X*0-iHI!4^gj-^oQ`bt?w6f#3--h$+Rv;{@T%BVZ8}F;`ZT7~rl*HAaUP QS*102A06K*t&Vj39~l00%>V!Z delta 5265 zcma)AeQX@X72nz0yW2aT&yM57--#0^Bo`bf&i9AoU`Pzkhe;rOWJ7B0-E8c$_dYVa zCUH3T)H;w*1QZse(t=1rK$MR{dq5#oq3s{2Qd>%;mWoBLQ%hdZ#GVd ziPYZd-n^N4@6DSxZ{B-zpPgk-KF^ZZS69aseCDP13|$z0B-vR0j429NII|TaXqmxe zUo}+OhK!JGHA9na*a*uuVnk#cHKMZB4IQm&$8vEao~tsdatR}mt2U|`$qm`bT+&EN zT(fI(wMK1jmNAQ!X)2p0`iox9~WxI?jywJi)7vD@HrNl_z-(Mi%gWd={?*wvgY(>-ijD z9egftIIfh9MZA$WfqXG<<}GNixl!S*yzRKc+pOBDkgZc?T#`)-t~6 zkjfYHYcP5(U&5DSw3Da!GPKM2wY(E;7r%pd@fDz0!B_HbjI89V_-eG>d<|cVb`@X8 z%j~*&HOIDgYN{^!*}uBpxnX-`ORp1`9Z#i@5Rhk&TdswUpcQt;0%K)&D*24 zh5e9gSzb=<_TE*0=sg)~RWB&sxln4(dv3&yp3+OoX?7}hTA5w4#!Z&OQw&g_Vq==5!3^SO6nnXt}>~*iBmPDa4AxXmh{uwWX#pv+R<4&UQ$ajqTp3L;kG49Di*J9Q0f() zd=QE-nDdT_t3a)d;bXUNceH|OfgJT-u6gm!&->Duyw^qA!n@^H77<2MN^Ee z=<-UDwXMrA=X3%nOjqM9q9$1@i!+ranCkr|(#+OZK8^gIZG##Ujvsbhp3RH-AQuY& zQcP?hoK^_HI$%EZ8p?9px)~jZcBEA4s@PPC#=6-C72Q_)!Iie8%SVcCnWUt>>zp)aGvhK{}Qn7@h=G2 zO#)V477+bE1gw9;izeH>mYREOH-p@dn8hN_D5b$IU#)4VzpObaE%pLD<9$@KkS*~3 zT{CYP^{=)&u?G`p@ROUA&*ALgR4#p%jE9J(4nkw z+~0ZU=eDpTm2g8vWewh2O~=;+9KI&3qDZO21}8aw<_m_BE5&>4tm9@1xnggJceuHm z-Qqpde1Nrj|7o6IP2QObiykz_Y|m_(?e(@KmQRqZD7P{NZhh$@C5Oi?#~H#(1zg_Y zzj;SmZeaI$KWZ6Yeo>{6$@#QK>?n4xIhK{bym@g9^1Ho*t!ugs^{5jVi;4R&8tlDl zJfA&0ZVi#*U5H-6rAIUo4|qRoJ((sS5*C4508*OIxKW+W-)#xk;zK-}am7ANAhcN` zH^Ux&RX`}oHj?lpND;1Zj!As$)mkPVuDsOtk=pSMlFocKHdBV)W1G1&H+%PcFSPGl z8qkSvf_w(0psRNPtB6N|c<(Hj>-8*HxsVP^l9Ou;HCuUAu!FaavgT)i9EW?0| zRAG0#Xj|eiMETJ}w&}X?7asBFB-9m8y3z%-U5Nu35OH?uO0vhomftLupOn{=M zcm=?Zj^J(<#DozeIYV;u&(rJ-6(h)#W=?f{@A=h>*=v>eR)5HD+yTNtM7!6}B1O_l zeq`(j6&d1cLZay`NpYIs;K3o+)-&-&<*#c$W;@S^zAR0=j zw6OH>**c<5J-*@`*fO*ad@7aA8^gOV*?b>vq_jPmeK1J+q#-VtJjX~OJ&Kx5Dktay zTyb7|N-c#EeSCA!S z(Dl`VgOKi;t{@se>YyUZ9z!LGFh52?*S5tI-Y45OvVGpX?epumfXYZ@#)Yu*?lzeS zd%eBeS9Z)$sV<$(n_>bUZHf#fKHIv_JG*_T{7YK;9e`;Y$-AOsfby3^p|ylp+?X!b zOe9@p3NJBK)XE)7zEx9PHWnAFm_lDy#+8or*9{uNY2o(dzU+c^UfDeG(FjwSp^(R7?6JXIy=>iYUlYht?)s0oMB71adDS41s< zkw9V{k_-jGjVkJmn0ZvjLQoPDH3X^vQV~B~6osP`;(hddU7u$1`UaNGEQwHvx;J2+od-QB7mg4wYwH~{TRbkd0V!X^TReg+!o)) z*bLtnA7bot)jPSXm3>%wcGs^}8I1zhl5q(h{uXHb5bhMD(;brO&d~BGf(>m9&}SDj zp7a@+m?D{xLcy^bac>+lk21Rv_h}w}Iw~@}5I)9OhGHIl=yuT$9#dAFc3>_Je~R6u zPBJ{W=ychXN-jbtJ*DCer5;=4v@h>%U+S#vbh=ZWK)O4fRT5d%+3u_c*45QDh!93` zTYMkD4;{-EVHKUCwwbn(SWTa$udqp(j-+U2#gVLIZ6@_)3Q!YB4B`(El97oF2#pg? ziaU1!aE3n_l${sbX^v^Bj_oz6Vbf+qP^f2W@i#<)BPU%!uTN zMhj`@l6~PiQq9$*D)~@JIaoKzPAVrMo0Pg8DjvqaDz_@-6q~M)W!Wlz0dhFOpV0W) zq4DV&_EK@Pn`}f2Xv(n^J(@@(L>zc6ir+#8^=Ma*UC5ZW)7y2au!bvrLJ}j;)6S;F zAJLI(l;LT3|1=YD}Q%ErZJ9*zlO(l0wBF(+JPW_}d zU28?Y5NhFW^vrkUOLU>=ppi%#?uX3r6Y9jsU)@5%b_5yPJF#cw z4kL;B2$t&-RAS)mBO?*T7a6RA=yW;o$Pksx+wNEw9?6!-<2b}DqOSAi@10W?>j~dP zU@L)s0s{ng64*`PW&(Q%qzQ}?_%4AX1nwj70D*@IPys5QCUAzpSpv@yc$EMZ7vc>9 zGP$3F)fMqJ0hvxdAdEtj3={I&;b_pkoRlty8oei1;N!N{OF73OttiM`@l_k?kXb@q j6vsj9@x}U_cru=d&(<}3W3>tIx?8%VOXKZ&hhFtxMTFCI diff --git a/nlp_resource_data/nltk/__pycache__/wsd.cpython-37.pyc b/nlp_resource_data/nltk/__pycache__/wsd.cpython-37.pyc index 8b85d4d25a896763130366e69b97318574568e1d..2772e4b3d3062d8b3e2227c86c068e826f222096 100644 GIT binary patch delta 32 mcmeC>Yvkj0;^pOH0D@BXi5t29v#=TImlhZ4Z*eEi;^pOH00OzBfQ{V$S%mELGxBp&^|KNyGx7@(i?a3gHVd$RWCQ^2M+{;B diff --git a/nlp_resource_data/nltk/app/__init__.py b/nlp_resource_data/nltk/app/__init__.py index 458ac4c..19157ac 100644 --- a/nlp_resource_data/nltk/app/__init__.py +++ b/nlp_resource_data/nltk/app/__init__.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Applications package # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Edward Loper # Steven Bird # URL: @@ -22,7 +22,7 @@ wordnet: WordNet Browser # Import Tkinter-based modules if Tkinter is installed try: - import tkinter + from six.moves import tkinter except ImportError: import warnings diff --git a/nlp_resource_data/nltk/app/__pycache__/__init__.cpython-37.pyc b/nlp_resource_data/nltk/app/__pycache__/__init__.cpython-37.pyc index cc90f65930a2b524ca517fd123b0c4c467ce5ee9..54f030ec31a075b8c85c375995e2c9b43d2490d3 100644 GIT binary patch delta 431 zcmZvYzfQw25XSEkyJ-?q%AZ18kV=Olb)hSQjg1usikGP1fT}tTa#I9Tm%c!i7oZCg za|IJm!Pt!%o`8u<$p8|zWdH8I^QTXKcRyV3$nzYaHMA!F%fzdUKBVumZ5*vZ%>Udq@bE@Zq+ zTwL&os}(9%GhvN*J>wGb2L1%gsm-t~%722r{S(Z}f)%o074oQ(UtZc{5L?nbg{ITF zLICHu9|yWK+d+wL6X#ak>!zLA*kD-a^eatco(m&6RDn**C(ssMUx%&XK(~@-^Q2~{ zQ`>XP_hoeH`@$Y{@AkszHW+Awq@>37(VVpzVnH}zC%#cpthyJPCD*)BN?$K5e;xT` L%BxsRUU}mi-gR7& delta 424 zcmaFO{g9j2iI#2Fco9Ci+i!`Pq>7$&)o%EEt(KC$TUx^4;RfO)M$M$uG&tOe$iYT*#^_3epBP7l~k>yqZ;t Kk!SKTR#gD`NnOPN diff --git a/nlp_resource_data/nltk/app/__pycache__/chartparser_app.cpython-37.pyc b/nlp_resource_data/nltk/app/__pycache__/chartparser_app.cpython-37.pyc index 10a3a5381a784cbff865f9e553821c8949729f81..887e567a5b1c3cd5e6f2734fdc3089a91dd1f299 100644 GIT binary patch delta 19127 zcmb7s31C#!)&IPi$z&x22>TXxCM*d_*mnW}1i}&od5Xhi<|P@JCEl5YBq9j{m1-6B z+FGsJYF(<;iXFGVEhw%P-0JRg*Sb(Qw4b$~we>r{b7x5&5&T`0H^;k3~ItZ$h(73c+wk`e3`>aF+YvMbmpRHzq*`s#i8J2TiX)L-u(8lVs0@2ucu zw~S~HjnqemM(LxtDmyqjG)5o8<(%Ny&^UcuXuLk2zjK4`P_bUj<-Fj8&_sP=Xp%lD zG+CbnUX--G;v z>3gs-#~8WWsn4aFA^xFMGn8ruQ_Vc88Rj2OHN&Z95Y^0IYFq3t@{gdZ5mYstsumE^ zNdG9R8O4kj`bYc6?6K*Kj7tC5ZT3qw@v5-!3X?iqiL%oN8^0yffz?MP)#kNWg7xvlkVM zDO7B86pe`j2vIB_&MmG*rI|x)!_k;G>@&nrP)$N@ZXoLMMM8~1BWAcW#8lKd12H3H zX89uFm=TUeO)X$Lj9^Sa0tIFOEyn{qk1!fVgRIWmTr~!@nKheMEn2;@M$7^BOn@Z- zX<{ycgK!hrO#hF%2xMtF_N*-7j(?fA(XQpmnU8 zJ>jO1;jcHMotUyjSsqUy9Ef>5=gScVN2WW7LezlF+359cb2}2l6Ol;Fbe_``untMA zC*fOWd+nhNV5j zUdPF8&sqR0CIi_7mN_&|zftFH3dWMIpJ6mpL^GXrO<|wK62xwz`5$>?$a&L1opjTi zsVr^*uneN7FhY@LBW2Ow$n%C?v;d^yDgvf+o6#C2FJfN-7t`q#^-(j^SL^An`*uRH zX>!@HyOy!3V-|VZ#jjA}NrFxV5R=lWc|mPg>BQ|+=aLJDx6ey6!{m#r0ZuYEJ^V)0 z2pT?e3bKK|^1s7d=ddVufDmj?)wh}J@dX3EZOmYZj28uFgG^iwzy|1S4j9{$L8=4Q zU8Og(fxz}RQf?d(UBF|oG6{Lbwv#de0y7yab4CVcF%$H!yPCqMBFTGF>_GX*x z@#X-zY`~gvD!@<+f=+kgNg(X_Z;b&+aske7`s zpZy1XI0YL%8}APq83J~rM$5OI3O22+NNyQ7Ob#7CqUtyFE#TK=EQoyU)U<$l>)ky$ zzNx6?@{_5>3>DpS5Rrn0x7moAjzBmj_Q^ftr)jI?pT-Z&VvFq|rphs9}U7P4x}w^g*15)nSrL*AZv)S8Y=)773p)Nn5CKzuYs>n!*qHmA4$ft4%B3Dv(*eB-2r)|Bt8p@R4aEbvkPHs`3)88c{7 zI7nPdHSSFARlG@M@gl%MfR_N?26%)3A)!apmFR;zwYN+$?XC3xmNNQVW*zHWMDQFO zI*$rAEk}+lJAcD9RCH&U89{HY5!ADyO|_6bD&|wY7*D`-(GDUQ5y{2w9W;Q-%vUeqTd6jzOU+C;hYRkUFx<%1h~Q;K^*7ECQH@2WY|i8Wmu1EUNd1tX!> zN|hIJTo#W@@6-M8}h)50Yvsr&#Inxxs>4Glt%OD>!4pUyPw9-V1kJCVlBMOvKjz+CzM z^p0YlD|mCVtL`QT?FI7U8MP&hZVk2K$rHQcXf{v|n_2Ad8d+69g`ffsAz+*Wwj*Ey z0(uraAP%jc;}hNgaCAIMt(VB#XO7U0#-EvaR)%&wK4JcBO)Hf33k)q6;YzEK?=JAt zhO=_v_(F^7w-EVa*}m|a2|rpDTkY~t-+uC}MYAeEV^Y2l2nQpyjqJ4kbgPt1cwzj= zqLtc!4?&sh^-(@%2yZMRL@Ct>pPamS_>@K}wz(=}A~>mq{13Q^W|Xpti|MR_M>!I0 za_8cqV;K$MU14OL9wuKgwPG`8aiFfw5C+Z&j?2S~`wUH?>Y0_a>G2AK)`C3Z>+T`4 z<(G@ANM$pYEb7bSbEzFj;v7M2mHU?rx2lm2K5Xa8hnHN%!A*on3bLp$goywMfTvKK zSe&pf=@|jqYa`8ap}DhN@jf~erzUr+6Gr*uZN+??>1${ z->VY#9+Rjo@W%Jwe^30&rFVH@dkcDDNuXf0uAQ(}o|0HlM zlK^R>4i!;=7(hJ$hDEC6S=fP!PGfFYctELIPpr>Iy9dAzuoK{R0^e#yH0iup^Wwl_ zmrz4ecd0(V)#jS6)lIR+CamS4IF=1OyOIrJx%;J*+uJ5aQ4Mco#D7q_Rqj1)!OI&ae$@}ugRV~Hq(AtMSNi;R! zZqS|7{*|n%-m~@_YNb{Be}TkG{x?eFs9j}?P@kFY@isOFTRjvr6jRd~?&idZ4qR>j zmV;OS?6d^eZ&O^qLuq0cu7b{3Q$m)Z$ucH7qqRBYm#Z&ca2(YC1Mm+5G#%fg%(`N& zL4TxT99IFMIDh3w(8)K({bS4ke@yew* zy<;9v_jO>!Q#6?xx>u*!+oKLz)M*uV$k8fUy5WIkX|uT-eBuD6oLf_R?apT3r--zAjXL8OZU|}V zH$8UlJaOip1$jAn;c2nVGSqvWd}#Bz>qdYh`gOay9ToS8z}Wq$%mO-lqO2W(TUphu z^5V0%&Sj38SPMd@;AIR6i<12$N-8CB?vxdt;`S<_F9lcza0Flsz;b}oAodoNY5+3v zi9|;r0TEc@1eOVb;4j7iAi{DOR`CiUjh7A;xfbISPN^2sff{YLIxST&6J@c`oLC6} zQ9GFvYj_5P`GUYmIb_Qc?F&8>ICqObTiYLhCoscKXVU|ND~cgAtuA|j%}Gz!>7hF* zgAVZ{tyB8Qo}m(XFgR%plV~T!F6&3wv)9jx;-e^{&o9QC%N=^lV* z37F1kQ+S&QQjy3wDGa0SJXOJ2059|xqeA053_4mjO7TB|9Ed6z>wdabQi+vll23+4 zjamsln+VV$CGwOYod*Y_WYYMc)2XPamOaB&gK~ifYZ1JrbEbf#Q>>AZ@N}08MW?(y zJVQHEeh?nHmX|+{_DB;<`?l?34d!Dl0IPHzN;L#btyZ)#QnXfWM7wI6&FE8kM&wmj zIl8Hk(;A1(34%Nf!0DaFf7&vNtLTo7qE`=Ko&oOJp4v#zUq{t+`GP;fAunwlGK?jJ zaGh*~&0`BuEs;kWC%B+r;g|nze6xK6da_OcMXz)Qh6zY1o%9X0>shpSjs@tB$F1qE zhzQVGyWlVvC4MxQJZsDY5zutB8n^*U*K%U)v)S5hdVZ_g$I}`T@`n8s4p`|t2~G%_ z0}dIjSO+=4ChjiOrB^D$R?MMqrR!}Nm_^}Us#{4sqrC~u=FF!=ff57@w z@?-*=q%q`GvDJzTOLOl3hcwyD1F|&GnEJ{2O)s_c_JR$9(rIWDE}QVZYIm+Wh&Y6X zD*+w^fWTyrO>`q04YV0z6}~e)s}`EHu? z~z`Nc@_T}42Yh> zVR*aRpe6RI2w~zrv}A5=ipBzUtsHBV-<(7B)$TsZ+-cdfiNOG~0pxPtTCZ_rdmhBohGdifz|Bc@rJ#@);5Dx^yhfvF1x**oe@fL z=cxnKzTk#w5?27Qje1bxNyadR1h*WIUwGA>n3~$35fbGxY>yp;p}XitIM^H%7l69! zG>M;3-CGpm<85)DDBAr!%pAQ>N?$OD%bmdNUeQdbCQm zi&_`Sm-o$X=iy!fWOm)OC;uFEFfs89>Q6k3QrdF68m-t5lu1B23@`=YQq(>L7)iN7DC*%R~xfd?5kKomstrh`|=7B3N1ra7jTqUpjS#diH#rol}^h>m) z1K%|m-U8HdP+`@;?kPH^)UIbkHXabYT_D%wqPi+^a%R?DWCZ6`88*4};+smiXH+Dk zTnBo*3&1kGKT=!-12)m3O^!dp?YQtJhjOVJ^h*k-5zdcyrvoXDbb_Ark$IgE+i(eTz; z!k_2>nMJ=?mG;E9U;Zaeo$=DAP5hleHAOwPlC*7DTnVvGg+6wrDB6BnTg+ZMPVZ7vMM{ri~GKf^Z z5Rh`V<*wNwFl-k#o;(whzp6>3`<7BO*6e8w=^Yzda&*($=Ob_^NQi8gc6gL0| zs!1q7t+%(LmX`+}^G*0n2QR1~ZFr_#SRd{s6_89JKY{|r6P3-LuaOGqAz{(gm(DvI zY}uSRD>xIMX8~*`KqKX7#Ou4oqW4m}7XSR}T5Sqi2$h4_5LNyO{Skiwu#jCqbp>+Q zH8ZCN(Fzx!%!Ye}BJ~V)lge^H1hwXNl2U$oO`ok-5USZz%`gajt#ro0H`>;PXghlO zEf~SOY0s1judF@O335~0ZfXryQP&VbmJDCJcM)uWPBYW;uy|-^$$uaR&S9xI^&B6c zdfj?$E%?ysk-um?Ul&6IlnEjSy$@p60`QuBE)a3aT|p49Gr_6&?l8ecEU-37r)A5I%X+z*E4<7uI6s;^!dANEHAnyM(H31ThsD#5B`j?kx4C25V5B~&xXj5bGEQ8{TXRV^N%wm`u+8bKfB z6=}_od9Sev*ta-95~-&S@N7!Dn?lM?Q--F(8z(lbgOmzQM_Ztg^C7y3pDGXBIJj?0 zKYA8F4)C-@#mDlU8wb0gD&57Et%<+tF^^KwjHE29;-hccrM2Hn-_^u6?apkX9G32c z$h^>SVzLjU>6Hc9jS6H)i^UxTl4~|+P8~qE7}J?lcdd(p3RGfqx->cZmb0d@tEkt8 z;wGw3vbmYc-8~xQi?Vy=y|*+M;}GhU=)WVR|0g&6ZtqN>tLr!o*w{HP6 z932TSKr28Szz%@(0C+328>Kw}*u<&_P}sSsOoR1*0IYaUkbZFr)#$9q~R=C7=1Vg`Ho_pIyZB_h*+tz4w5IXwy(s8M9Nl$I* z4290O#Ub!c6Jp%2$a8ORui?3Os`dwg^bi3vgKz7Sk#{r1c@5xNfWtuBL!}e>K1@w- z$W?c&pTm3|X})-1M!O)7C%Ha>%ZUsgq1yN4n|F-xRH3V-0LuW50Biw39|Cxk6w)!d zHyY(W;-K98hou#kKDtx!V?_QD$n7B4#A8G_ zQ9AGJ`(ub7C*n^!5kD?h-MQk&5I#YKUvwgTf_wefodc%+2$BCn#Q*3-{1;8mn=l~0 z`GDI|=7$Ci0yGVEC^rCbEN90`Ok29CUfv&{JbDeVET_pesH1KYPgS~!`e{X!9xq4v+JFuM@kA=QXJV_W0?}HzhDamh+wR$@RSbtkB2Gs{CVkS$ zIv;3WO=aEb!xO)aXybLSkxDEjtrXq<0s~QVh14F{HwreRc6U^TbPdc|_visbzFOY? zz>tBgQ;Q~_B9e{r-3MC6v*p=4-8b;*Mg||Z@-k;SJicbJTkiT(KXnao7P_(Udz$$9 zs_5m4U;0L^xOd0{$hNND=;L{_s`1iZDBr7|;QLS(my zBO)T-{qwN7e2h;yI=!e+oOy_liw@nI=uL1-OuT}QEmA&1b@lN%2M=lO?2z!kE)FTc z6K>j;w~ijN$8y%-$)}+0%*;|>slQZkMwvK=Yyt8F03QI0sjj8kdaaK%7A>YD@SK_$ z-CG**WlZDKcEUa0+^Wgg2v`0JS{D*EBpBj>Q0V8?7o8iFe zqw(y+H`sIk0tvPP=W%)OBjdE=@~uZE$nPJ`91km_F=E{?HLpb@X4agbH&pBQ&NJnV zM`zhbYVz$zig~+1f^{MlpLEmzr&RYk8J+kHjrmMF4cosTN&7~MR?4Z5m2X0S>B)d_H;#1^HbS`>;*MgP+aL|F~jLi@>fT=O}SRid;YlgvMhO_e>IQg zx|AF&5#(TY;yLjA;df2MSp8l!kt^IRuXy2Q@7F|rYy|=O5n`al@Dl@G*b?~y%4Di< zK<3ftiA@k`G=6|50?CJX-M%5iy#HAqeescrJ;5e%g^Ad$XDnQ_`t@k?HFYkku?Xv>`3NWUecOk$3F%ACo{Fncduw{R6*BYUcruUz6;ziG{q8nF;; zEeaY&GMYJcA`-&9h3icxU8(rR=MX(DVd9j`q7+VHPY{z*81XOa$r1nA>-E~el$>cX zr$Uan%zJaeG&m_)z-)B)9KaQ*P(R)>wZ<;Kz*+bO)IL(4|7OdKlaY!n+EH~+JW7_m zwb0E|lR-#UrKIPFQ|{;FHE#{5Nh=}|snUwTROz(si1?!{JMW;;?#A2h2 z{cURr&-rfp7e~$9+6ybO-fek=V>v&$^TJ=g1u&UpNN(OAfO;O&_QB ztK@`Z6PyrRMxO5{8;|{pFZ2WV(HuDS!!IpFfItLwy8YAHN=3Dl{z|6 z7QLxG(Jf5TFi!Z%7BH@%d`vwSr3a!;dMrAGV7}H{lNbIySiE4qv_~Bq#@>#aL<65+~9Mf=1lXpyWwkBOk6;wQJGq;`Ci^nPA8 zgL&n7s!p$7yl~Z$>h%j(3Y=%1Oc12HFUg~ypOGXcL2A~oTeyD7(oLrv1-4(W$}_+4 z%;uhZdekUV^rsrh2Soq6{Q8Tn$<9gjDdJP<{(T~TOI~@rr2Pn$)KNBj@&fDW^%hc7 zr$@OsfD*haZ9xgAC;~?x;u!!3fD-^oxj(gP*^=6% zAVUyX3Ds_ucP6~@AnXeSaDlKZ5I8v!3J!d*c>!e#_&Ix}VAtiiG!O$BfYw1M4MeGr zjkaQ95Q@D~IT)q>C=EerIKWVVVF2kpPI})16G-f&QG#AO`3f&Ryske<4d2PquiCXI z;|IR_rTq-#R>B3gSiwXAN`D1)HcE$42O53~O^%Fpmd^9=AhZhA{ejhWWurHHjg9Zg z-+Vn{S%rv5}tEN9j3bZKQ?MXMvPnA9f;M zY#V;<;SYF&k$O=8$&j<_1>s<98_6c3a_2v@3RH}{Jrx~ zFo1d!culcDP~cQn&u2Wm1Dnj1MZg>ZFcM%Cz-WLm1QOK17MwYaA&-!Uc>W}{{`AsVR{Y%zzI1F2>3Eb6E&xp9`-ZbP0+TT<|93oDVw3AsZhjEp?e;r Ri|PcNPFJ2L|M<;@{{ul5{wV+e delta 19213 zcmb7s31HOK@&A4|yPM5H2oNqo2sa6ki*Q6Sl5okD00}7HqU&b&n`Frz@$DugQCSo% zNLAF=)>~VTdenPswO)9jRaC5Ph5A*zYo(smR$G6q{^v7qk7QZJf31`EeQ##oym>S8 z<{j(HpJna*ENf8xph3A7`ro*gH7@PaL8GS}&@2|OMYni0uhp;V8vnKGS@bu{ZwuIU zd%&Sz+Bo5O4JbDM;6R~X$Yr~KNMNWwG%!pb#@`NqQDC?}JTO8Z!Qa{bk%3YAsK99b z(qB)Aa+T9RF)&G=#N{0S0Hk9mjp`n(!dOTMqs8sGcZe^ z6)4lo0_A!+x99sS0+o7Yph~X_%+_aXmSq<2AaB8W7H@$uV|$ifP2YpPh4fu$%%ShO z%c2(V5bsc`7|Ip%2w|AFh`x*Xdp>;+_l}_N5&T_4-y^-F=zA1@A4lJ#y~oh^F~)I5 z@phYjJk^Zxj-{HhR5O}t7EsNx-f>hjj%r3x%?VU9-djvH#S5tFSgNX}stMkSR5g(q zpXiWR^ulHJ;EG1K81=iPXu`eCfS&mYgJ5eSp zaw|v9rQ+8OQNG#T+aBt4i;nU#>CbJ@T=L=EFN!OHJcB@qwa;2yDk`YhmsLC^ z3?M{_+@Dw4f=V;j7xo3i5qHpIh_RrWiP}70*yRZYI{ik(D6xxb)Y*IyBVal`p8&}siu4)p;1A9KeVt^Dem%u?x zC$NeB4Lb-pwOp&yNp*7VJ)>eH2Gv@%Jh^`GEX^*r4|a`b$s9C(63MiC+T9{zIwK(x zyxZ`LHBu`aHagV@3+QyYd_iBt~EiZ0g?Fm0V~Pe(~se6%7IikP-Dx_s#) z6itMDy(}AYpXQZs4;enP6&%{)9K1c?tEc5&c~#Nu zqO(D0=NUEaF264v5$*ErqA`=03L_TVj-(gssqRiWe)zf58_|l{i4Vclj?fKSACS)t z@1C;~$QuFl6or5(r3;u7HX<&zmd9g}5p%6GU?U|~(^^&`>W{?#I(GqNhey=DAYU1^ zdhGdxWNMpBG*fF(%z2)EOw}*Q+R^j0knA2^Jswukmp!SXqLu!q)Mz!cLtPQSFKCD^ z`P}GZqnOc>B7u>bS$-oZR#8dp0KkAuJ9#Hrb(RtInppu~kbt{K^q?shu0{^o>T44X zRH5fY+-)x5_Js}8#@faNh;x9OMME}of?WZZ;cYX*N>5}V>n$ZtwhlelWBC2?)+p#$ z6FWg{_k{c*p*tCa`Y`Q^39C&L7XTXyFdbc;UU$TJMm}`R?d6bBTn=z8ff7v_A!t%6 zGo5Zv#MezUA4)mq^kvsktL_MfM8NGQieG>*i${A@PPx8dg4d_yp5x>xV`~?}iph3P zp+9CWslaQry1V?5gqzzbPEh2qwRQzPX+|Q>C7Lhffw5=L1$DwrZ=$le86a&SgM|?Y zbsOZ>et!!^k}u^2$Npp?NW~QdOk0Q18zwJe(+3yR<`!*XGuzYR%GCWpx}KZCqR*zg-3@)t6X zBAHvXnmV`E|1^m>0kz9&CT>xaGEfHH2{xgi zzxVs(7?eO_W)-oXNq1PMo=I|;S!6mKcSR}|cR)+q2@p%STrqX54Q;Y^RO>Vqd8i)I zC{x@Lh({jz)ztcFh$jh5xHS0|do3#(t{cZaecBx7F4f*9ubno#{0@BB1e-mZ?suBl zHo^?aZ>OzXdOei{Jef=bA&}kJ%%Z?iGMEFM3SSN{8CJwlVJ;V;$#1y3jj);J3r55a zdCBy-6A=t)MW?XV;-dh_0nQ)VVhClkgG9CbV0v|RQYcf4h;2lp+j=NW>b714OAInG zU>+;hhq-c1$zxS)1iVJ7gk|mrJ-%g>*qQ18e}m+QuuII1eTE;9sJ21HicezXE_)vwI14G)#lI3*a6C zW#V2G?*qVCR9eAvnF2i0<(jDK&cThwH}%&1D~w` zJZGm<2{wm{76PVS_}bbdq(?(`%$hp25AB+n=kbTah7k;PwY8@b2XPi!i^NL#z^sLn z^T2~Oy#}SV09dyBr9@UarmVPeJXInwQb`a;C5H-_KLR?Iax=S)G9+>#aVmecbVVYe z;1N>=)n6=km3?|D_j?@3_X9`(4m~STdW3+E1Tsu(;!E^l+FWk0HxbjKsIhZ%x-mey zyXArMSoC5lsl`2yS%?}`8~~|9<#=K$YF;Lw=Ms0<79Xv_5%CAEfMKf$X*%M6Z7nUK zp2S*Vf_5(fGdCg(BTk!|=UkrXR|(-!IkfWHC?ZCmZBoVj`80hTNOn_ANj6I)UZb-3 zBf!G|e*!oJ@B{%uf{&YycqiPewWrdw_R_ySRrI$iT@SAl!AoRC;e0Asv|L$Mb*^@` zytis(Z7o%ZX#_AZWDtoVy@7_S(Lg!7&+kp=`B!Q>C>^ugYIycom4iA3g#}k6OWK6o3UTbo~QBD^yO3(SJ2*v{x4v(W`5WsLKJC@HeV=MQlKo8RWbWO|X3n;BA0+0Nw>i z%_L9cD^&NM{OO#18(~)^xk@~Uq9i+v5-e5CBF|?^CYV%0H>~9}?jKV7f8+ymgXJix ze1iQfu1~mFx$G=gsw5HTMFczy23}FQvM8H@DtV_U11avKUP#lau!Y4%d z?s)nfwO%A2teK!a7c*-%*|kq%^G-Zo(}v0LLSuL?Xj$rhflhwB(5+R=#=7Z6X_J8< zwMhP??wTL293LrXZ8f?uDcSG^{h?52B3NPR%BUSXxu=L7s9&XxdJmL&ZVx4MhHyth zLX?B5MIN_!d}WA=eU3#D;h%-*1KfBsO4-E4v@M16WyRa%uEoc0U^J`{3d6oWNN#0n zrDpD8Uu&yDTXQ;F_ynx;nzpz*ix**+9wsYD66)EDXlq5Acv@U?sh5&L!YTi^c&WBd zE?!bUoJZ?W`<(b`gz(9$myA!>=Qrp??q70cdk4hv6QFJaCV!6j9}nGOw)m6z0qN_g^Mbp3K2 z0hJ@uL63u|Q49va>Or;8%>Y~OD_Cqq+!0TEu~Lb69*Czn`;FfUPQ$^3&PXj{B)}*F zG;*(^bfF-@a7tNdT%MGFxm#ODD2AV72VKa^QltTpJ5^v}ahQw%!EmHXB-X$5lJ z+OM?1a>TmQlm1KeX3mlxkI@;Spd_%qCn5+(OB<2#fedmAwOFSy9!eM^<<50`9Tij( zm2&05(Xyjy+@eCrMkmm8oCh!ufceL__(}d5*<$9V#9nAB)g0BJoFo6+JYDCRp;K~f z1-)KDXW|yY69p$xTXkqSF$9+0;iK~(5}sG?O!|d*RdVb4ZM8hzycqHn&7g+N>NJPC z)Ip9qB*X4IyhJY8@X$#qv$+o<-3ySKbm&_!+qh98e@?gIDb#I)tUY;q4ays~!!dgnQ$$>oWhNlCEa}+&>fdoRDj!3A(pp5~fGf7EdgHF-1 zgsoAUrTE3R66`q34w%XFGWSBu3+1p@^2esjc@>R)l58@9W(wxJmEBF%ENk3E&DyYDXdFb609pB z7B4Y~TIrL87_~!Q|D#8v#UOMO3N0bI$|{rN)C)RAsFM`j>q5W?b5IvaYeQ+dkfOL90sP}pi*qwW_3-6&bo z`%R6VJC7gmJ0UM8Ks+UBUJvzN3t8ah7R{j*(!crg(5ai(jYAi1sx5J3943o^aGH6T zW+CnIXq$j=uPpCYdF5%FPhgIkK;B4!k5@+IJ!&xDqqSKi&Rue$t2DX<=t}{X0Xz#Z z8Q>(YgV-*V)&bb@iF`>Qj}hOZgvCWz0VV<hL)QfE{?Gb|Mv1 z3moi5nJ`o-mIFZ4e)h#GpNEb=CNM!xKYfYyV|7Te<8*J1c5&<@-+U{bq)+j$EQQdt zh-?S?82|$(%yhfeaO2+yv-Epa0 zO+@4?fk}>)pgl!?6Bw(lmSciTkI6*^>_Q!!SW#*RXq0CL=Q->sX2}PG^EF-mBRFv_ zFOR$$AdN7s9b3d|jGzI4b=Qc}8Um))B6=Aq+@iL;1J%j~^P%*H{&IR1x|vO&B+GQP zx&uCcFC7yYI1(ad`1+u}cJSQ{PzFSe|Mb~Qm(rafMYMddXhyd=u9lGB+e+1RErdV9 zDzE7rJMKFZnN38sSRUw{;edvPM`oXKD7p?krB4AxG}bHU#F)OM+DV=*^sPH-zaH_? zF@1@qJ3_)o7XX~Fi4gIlQGiJCKvXoddJWvsrD~Zo8Hx0WqSILWJR1}(EFuRK_FgQQ z9Ax8nE9&MgnWigdP`pxl8wQf5`BSNGrF=#7RGfy^Gn8zsAa;U*=w^;O)1c+;GL@v)D1Q)_yf_zcXiLy7heVp{m`|8k;1rDRjYvN~h*LJOvDnI+BaMm+Z+*7> ze@K(VJRnP(oYVE;X}l-l`BphNdsW=I8cOoiVa6lS>dyfl1%R+*Ze4Wa3nLXP@SW{i zUAJ!8@&+?UZ7REjAzX56_b7EQc>xHyvw=z$9wOc>f7kuXjWBYWTR;5~XMtswyCp0- z@EHJb0|0%%RV2H{K%V9x9F)KXT#zp+V_Qxx-wHxF3Js2DJe`IP4^uQdVl_Rwwg!r6 zATQ(i!CK%UV8G%`fIa}7uo8H&tko2{ahP*DfpwOgwI6fgsqdu4;0NTV7_#k z7;L3GrN~_|?^cI3Iu3n*0R~i{kQdydc5v~{ETWOPA1&FNyTTD)YcGcll~{!%1#HRP?tQoB5=briAw<3Mt_77PcnunE(5n*mshye zotv84orFZ03ELy&)*W;i?eF%B?VwHx!z=o^RCmZmG4g-gKXR}P7t2r28ynq`;LqKe zLsSMtn}2$)!44oU2DfC0p+WIAeL`>X+W_=$+SMbEday+37Ehy;Cdge>=cFJV9z_L9 zY}ff$SWn|40SHBxc~>*Pz)tllb+`h2Hm^6LfvjZ+uvGc#cUi;5(X}AacZwz zxhOUDYRcJe-vZBf05Ao5HXb(6ZJqL*Me^ed7LDG9*0eESO2{K)%XclbPTyXnfer)U&Xl24%P#Qoj zwcR7qkFV|pmYlKMDCZIw9ksXn2q_xdv-^H63UkdI`ou0YLB&d~(0D`1aOG;yB*GPL zUzM4#=_GAM1vS_sIpnG(bxU1MYw8v)Z&;>`l2;Zu?g+KgoQKQQz3y+_;*aW>xA|PZj+;+jP(^Y8$O!96|RHvT2RbHD83T`69D*Yot2OtZDJ( zH6bC>qZm6DE{ol!Ibtte?uyUEZ>j40tdPAO&P};_YX$!4@AmXYAT>_bN>U+W0XSa+ zO|b&f27HK=pe{1SwZMUD5=$-5<_)OjwSvd|3w*u>mK?#Lf|TLuR+0YnFQI@iZ5eQ@ z@jetVov18yEJP~M9CGGWm(=QD%jU$nz{&VL1z;lq8Yzb)Ue(jW#yQk(iG6)li&lvi zLgh#{MwQQ}KjImHG_oI4-C%jqH8m^Q`rrbT&v1`Wq;5xk78WcAMBoKp43v`mn&%d% zbZ{o59CGdO=10&QYdSRr-Ke_`Y~b;92$GDjtUJ>N7N)kv)Y{Vp#Wpls_Fa2^JuHAu zF;mj8aJ)^0+~1M+=6ZVR?1av*rS?B=J#b) z70)KEC9B1Q)aEOkMk5%ed?KY8GVV1N0qYjGq6^jqlthVkS3p^5($G|J+%#r*Qa|)!5zh-;Jz?>_{OG1JB~X>_;L6_kU-djlsfd?+$g(O{ zaq~G^^f!dwKx|Xa%qGe~=@y90@tSj!9fYbx=0q9^jtXN)g~hK3B-U}xmpOGvn--l| zTtiu97&{J&&7sMPTQ<#QS5Yq*#f?;-U~?0dGd)D)+q3)Slecu2p7KM`e?>?S%9dNt zuK~Kcqq_x!{hliFt0-|B+S6p-N_9`j;@c`WaacVIC>~JbZ4@ubCDsNMuYv}+GF74~ zt}9J@L8ZnGTPY=RiOmXj|A5?a z`^nl0x#pgt*cZ1i(CBcp;*N@G$p}i1eY@ybn$-1M@J|tA><8qIJEBc&4mQ>P0FWLe zVA}cOE)k43L8OBK*8uzuXxpiDB;SXq={33W&gKQoH!H;#uh?jBkzEO0rJ!NXMh zwmf|21lJODwG?0(z_S26W#~fykJcDE`*w%J{EXO@GNlAt*3F8_ z?-2J(-(Ab9()!4x=0}PAeUMv0uF1B$%4Ff)!>fM)>0?Ct_Y|pkOm4h;huohq)fE}P6V3Qp2n|btPg^J(MH%+vE-;7P)w_v9KD<~s_GCFv9j(LS)$wBxO=MFP z7}TlCl~mSk9y~g1LL0B1*xs?&W2wdm;xr{8*shR+9@;qx)~5E2RE4Au%z4((19Whu zeDI;MqdAhLaoI~G8|23i^=x9Bwf4J>I2+9Dd??F{rAbrJt)71rsHc$ht&Sh!Y4eZJ zZ(0XWQwJ^biHBcwxzP$Oid9r6#sd7B0Ig-5kp$6ne-a7j`=oaGkZEh8QP597oZW8W zbGP7i6!4H>kqgkYk<#SHkBmEkeaMFIKPt{VX2?b7cU|I{>DL@MVi%BDqr|%i5 zy&!w`_?>-JPlM}{AMI(*OL@Olu6+E4=}DQz^AL4k26;I^#D+g{gEj9NbkPc&z4FPY zrddC+$hY^F*6?&{>YhVg&6%1zZP=!Bf!`fy@w#hYB^~03*3wrl&w1up*|B$+HeSB9 zcgEZfB`Iu_6oQ3HyqiIw_o&V!CgQJX%xBWhA5QvOl61GMdwO;h-LW$y`rQ2;eZG+( zjQ>hU1=Ux8ohb)FXfti?hQE`3a6$(v@#h6uz&8P219%;AOdOK`e!6PPpHO6D%6Qz* z(fFxY<1>4+W;h77FSm)Q|{Y0d+6oqm@RVf^HYz3k@O9%57Be%K=Jy{ z;-&~)uN!T>qE$9NKUv!=BhN>o2hlGSJwV{Upf;O8Uw%WVxGw0U1qn}c`wt2@rk#!| zyNteqCi)3L3XdNbQW#0e&I~_HaxHA&-xC$rc@wPFLD+b7S+JDOfcr6nNr`-F(X*8# zhHh*21$%kNnCOK&U}bYJ(6DZAqTej}>B;-^FEmbt_DYI{6U-U`3L(wk08*APOfGz} zxR96UBD5R^_`B?UaeP!63BC^jyan(!z#9N>0=z>&AB?LOegLB$!m}NzTH*HkLh)KQ zv6qR!bkH(MYa3;ZE%X9Gybn&J3B-@)#4)HCBMbH~N?+oUEW8yn_P1&${E>=uqtX9` z9{U4QV_s2+M--z@i-8BIeYTi}1dO#vGRDMrA3y8eAmr`ond#kV5ge z4<$~{e-FM#VJXo0rE=HH=bV^g!N;xVfEp|)aAsJ_aQ>6{H56`Pu8}ML_*v_I!qWBM7`cdUZ}s&U zUSh!OS{YR#*5i*|{28N|L}g`p&r%7$A{4&Fv%ZWk(qIV>%Y%P@YGwi0#NS~d8teAD z`o{Im>67_SBG@mVdF3RnAU60wg_d7|ek%zyh#Gn(R~;>c46{*N4KN4bH~!Zs<*!EvQGNd8yP~4{EJyr24$E-m#wQ? zO_$HErOQ_>aW!vTv!qGXf^C|Dj+H0AHe%67kTfMwW*u;fn@OtAh_5wv#cOTa*rZe` zp{md;awGaF*$DaOp)qpRq5Qc}BTfBr=y*Q>ZllP`&Zj@7);Yi}=Ar$^1SZIf5B1Fd zK2l+^5}W=$l_Z`hYu~7g@|^q+BBf~~H8GrwLnbqi9-Dc)=x3w!yC2iOMbNKc`)NsC zF{l#@wiHa8PMevKH_XE^=?!q09<87zKAppfZ9+Vj>{FsClEdD7W;Ht#QuluL%7&7` zXcPX-CX;57So%xl)VErzr$BGKCa{Jv$qB+t(}+c(Mn3*lsG8B)FW4~pM-VoGv>t)9 zKziQp*2YQm?aEUw#2{FU6A}{8Phn`wlsL{#SUjXKorzlBNSb#1R|1ZB1OInoSuZ{B zw9myo7X_J4KmA^w`@nb;!*bB7*sk)Qu{Qz_FNt#=%9`Q+}8#>C|N zle7sT46smz|9^u!a!R$|<8^MoJovXsH7p^oc1Q~73f|30k%7}5m9YH*WV~>(jkBFm z?2)rSsGOW(RW}RNtMLSVr`-9$t|`!AVyKz4{QQ!OsIcR{7SPB6-%~tJ^RN z--CH1LI+A8WKoItGEOzJ92gVlGfj|_#|UqtjHZ@96bbS^43)E3pO(HvvjdUxfre0bs3SKByMU1_c@90Oe@QG}D-$ zyX7|@wv^`)XF7UmUhG=lupGb6;8iD+%HANFXKA|ic}$ZBnlV{~`)H9d zJ}-+uKD9dDU;TR5y7jA;h{ujXh|F`py!Yb`iB5@d)gnc^=P0yDSYMX4pSb69r}?g> z>l+r=tzOd5T(?R*eiUM)w||xgKRG2qOdU5hud8cbvTWl~rGA}=56ZSrUB`3JgI!AM zB>mAON78Ld?X%5^&Pmlt;-l#rx%(k`<7eg3C#j?kz1cSxrk`{3`6$OK-VQ!R=z`ap zO(@~uMBs2kJOjWRaw|&6#07F`4vU=5im&hqQ&uVcKT(R#2IOUxHyy}R1kwV5^-b+Y zdC$S?34*ymSQZGT0^v}60f4Ba4%DGc0ncSG6YPo{X+}Y!!Dt1;E8_96mLa_Mi@I_+d2pFcwrg=EK|2MW`+S zR;q6Ve|q!R+4;7-<6jfz9z(6vXQ~vX zh`=I8e>tmF&yQD?@jKXjs@I1ms^}?wnBGy+kAXQQ6$JxY@v{#v9aV=rTCIL()5T^#V;8bTSTBh+Ux#?6i9if)Z^xuF)$$*Z?*$&#P4W!bv2tp^y}$g+%NOU9rFSTdMv;md0GYPHtB{N1~f zy$IG|0||y;=PMolNCOOy0b`zN!rO2S{@mCe&=c> zdE*cd{Px^)&i6X6@1gfkxt}@hF6${P3%KZCy6Bw#!nv}#D?g);uuF4=m9RUe#NC=( z0qu!-G!OrJH821AG#~%^H9!4&W9JU9T@tUlD!y7< z9bcoZ;qQ`IeSEF9Hoi_<$KR!~hWL7Iz2e&E3YUe;PrJh9M&l`uwjo>*t~~A1HX2Rg zs!6x_ow4zhi&|7h6ip3#hF$7nxl%DpRmCn2N2j7@G?~afpsXrV?A)`(6AJy*1Vou{ z6D`b^_b=P0*z%oameMS*sl8V1B(B&6Xa;lvRKRY)Re)|lk9?(eRrjT|3l&M*_FBO) zGvjTza%@cXI6XUE$(42OI!3a+sbKgx;|U%ix><3#qO}BS(So@Y^gcI%*>1JUV91J2 z8DqiFWJDy>iLmIB=j+}oIY?uC($ktYMXQv{uT!?k50-CNmgH8hxZ9&Fk&msuTN#qg zYuc-qkfK%=nn))mjj(PS3Ck2qWok{GG9vF?bACxRCR?H#Q^A;ShLXY%rE-70suW4H z-cXLnck3HFhe=070ONoWz+pfTkOE*n4%|SLU99WTMAXvt?p(*(=af3`fpr{lXDk>W z3kSQNm+pq!x1OXXsv<5{5_b^y1+tg8lZZ@7=bmI@JQ_(0qf0FoLVmO1a{p~45gGZL zh86YPcy22B-Y}XtPHj^h>27RT zeR0S0YZOzY^HOe4V~<TOv*1RLp#0jyNR8$`e?c%ARhz7@K1Rl;u&PyBI zlja+a2V=1dm87lt2aIqu9T&80c~{fc_1y7;81K&k7&%kc-qu!+urLxK!xW?PXH9!6 zHj;!)_-FX35Q}7+T3eSzlT!qCsTrIy^ogiRt2-e~8C2Jm-wT0X7;HUDo>Mm{!}8ne z?xhz8hB!fbujNXbtCaQ`Gzpj!$!XmZ!H_{dVOyUhenC8myNTDB8`$)Qw|Xu`Y!5Az z%_2JF=C*d_NqMBLuKF8jeje})0CpG6^1ik%|8pR(k#DtCFB&D%b|(y5{ z!SNvBj?fNunK%R8d*ncSS~)6TYu~O+%1_&y)cFZM3WWu8v4Rot6nHNHQnLTDKFTn8 zSx=z_zC`lY+=a_ldzSIsD8gZD&Zk3YJ1SRiYc9i_)D2E@h2+6)Yt~1JLz&upLwCQv zx3|ChCJ}~u9Bf~3+%kl?Up};LRq!a-H-haCrcwsgjF-Tf1YmTL08|5XKoY=79Rqa$ zz={4cQTY+~TzzoxT46zUM%HYvulWJU*9o*m<9gf(nv|L1RoTD2@hVu!c3W+>60wz( ztq5Bg*SwKrENuHCX~Q(_;*>~6C|b-yN54f)zny!2`$x+7lNf+agwP*U{dPru@dHDH z`at&~@nfi9mf{t_{R9O{`v+&k%2uqxvECtdPp+(^tEloCH17cLfKST9-7D5L(>JB+ z`(VQzTKPmYG7+Qytxz&XncXZO>fWfFm9KWMqV49r?%EQlIT`(wRC-qW`TKp@(9_?1 zl0>$LHjaF4;>@@K4elVg&O-!Ox)wRQf2I7Zp6V>`TO4CN6}R{~w0}WBePCL)&xl3C z#t400pwEn-qHs&2sd>ke)`aS|{l@WBko=Fx&s6R;ri@rJMK#5$A2zHG4%LMVbTS5G zX}*c1@JBObyt8s>&-${zBPMwSbTrRk?~ST|eyQB2xE9U(0lx+O27s^- z11?vE>q0S+G(FDnU&%9jSNgFEu}MC=cbR_^NL9YEcWsC5O(YYjG%9#K1a@Fr1XIyO zn5-gti9y1)IiZh|Dk;dks%-4NysjOB9gb2fXlSZCgkK)(?Ob01&V2;-VtwC%?ty{s z13g2B^y|8b9u^r$i{+1cYnKIx)O;tVqQ^VPl`%2(u-eNCl*OW(Hl>fZkA zXrqhwbtu1-PxTquyP$R&Faw3MurXyCmOhrYtYktw0bVJW(mcxx-4_A(0I+w2x&x z?JaHMX4%@m%Fl^;ULNRIl@H}i|KT0qfi^0e>apFVhS6>1D4i3FLK8uepp9=X#Xeco z88H%+4WjEqSuv2_gj^qFIr)33LOt>1xxrj3v5W*C%jX8_l;6pJ7})F|!jS(XKOJbx zjuIKUod3POTU{d_ho~GKl>v$Y-vVy};M7e}iy#PrIznIK+&EeDQP2_%&c7p46p6!JOF_9z`aG>lX)!Z~HJEQ_%ItB_5TJ z4A*XeLxdmSn1o$CPE~=FgutoY_L){tSQL!!4KLrv=Kzj$Z-O#GRHD^+v{Lqn@1Y;M zBawGgL5n&_*vcO){FkIuCwGsWz6uK$j}cJ8F4IvI=FV2wvdJd%rw%sUy!AdK`7Zh3 z!3|P9xNX~lj>Hw1(4T7lIW_OjJ$Ud|WjVLKgmc9aIg_lDnytodjB0x*8BcZHB)@*+t|4SlrhFfjk9bhjH=3=DW(ONCQ#{M; zAdm5G{xYdLCzqq3uo$O^vMbqHx60;2YqB`%@L7m_l!`?yCnGq=P~Ge*vgho+tve;t z$yS85_>_RQCqAG@w2vn>q7M`uw6HA)m@U zPFMaK_BRH#y=l6r38H9d`SRhWEvRI4E=RPR(@|@JHyfvn-4Bs(JeCYv`J-Uf=+a#< zBwbB7OwU5a8x#@KRtB{)9PcSeoD+VbQDLH)$Hqq8jhIfKn2SHoT!G@md-Q^BAd4gu zI45WP96j}HwTwFu;KWyJoSAa)a2U;(-alHRxhC_8muub%cwoK;qx^EQM%k`&iJcIa z0MaB;OPqCg%1(WKTRqr3@3lmcsiMNPC^dqVD+LiXLyhm1XZ2rBGQ z_H-E&mBetiP8}i-wh2sT*3|S0L51A*^WEMQ7~1xQCZe%$;ZQ3{^kVMaTmDkXenjo( zOSkx64Do*eKH6}0RyB=|w#*zp(r~ld+|jt4tEOYd4p@b+6_co}T;Ay>K<^nT(F)nk;Iqg)O67+nUa;UH`JHfsa~UsfG+Wy0 zF$S(keDy+b(T2&%g!PCa>PTCxN1S>UnlM6>I&BEETQskWS9Cp;d4(vCVjE^W1`1n; zc$KJpeqd!S;(D-oT_@?gVEVKREFAE?+!@1Es@X;emHA33&gYABI}9*eoO$%ws_cwW zV4WQ_c!@k7*_P$=3@oJ9ihsmJq5xj`8Bmxk-z1%@CYNADpx}y((kWnS!G_@V4&#$c zZw;idpD;ji@^QuG{P`hxJKrLtvp!r-SD7gs!Fk9qq+Etif-0ClUVZ*6H!-0r)s8(Z zkv6u_AtqlZ{x!NX%jwK#FFuO#L^!A4Mvp%MAT7iyz-oeg==Flp2cUOMm!QW>muLXc z)1^x^0t)7X1+DYZ-msc_bt%P4jk{7#AAMMdW#?m$77D}IzEn_{28V|o*pD5j7ncmj z?EFBO-;Sn}B5YnVz_ExcWqR_lAy|O!aJ)zkHm+W%W3z^J*244VgUNit6)Xcr*efHk zher@WdjT)7k!U>qLr9;};KP7bVjcbP8vjrS1$wK{F99#Y=He zO6XO}mBpU*Zn^u|$LkN#95VhEuGqBpn=mB}Jm;Q+3B4;v#f~hh5qY2t98Sk?fr5?2 z&7^O8g%J!pXJwQgDwQ~lrFfglSDGMtAA`0(N-w80WA4fzjO5((B5! z$p20*m2XZL$-1ef6df&7XR-?d!^UM)QqaS_#`RC%x&}$)}B6=_AG=RIi50np!^U#xk_zFHT>=zC3 zb&|iHtG@M+(pd$4*upN32cwBtGMSn^t&2VA)IT|mvu5EWV75337eQ8-AaSyG&h5O(@g~h%GnWwFf!~H6YQFdyC><1P7{Bog zgs*r60DlX3M8K#E49#(I0|d1Mt@OXyspOuyeYc|2%3o#Mvta+xMidUI)VujKb}0hU ziN71c&AP5Oi(_B}bbUN+(bKZ73)Bp!f)`uHaSQKu&Q0Af=#euS*QzecFfUR}e(B_v ztSF_H+Ukp>oPX9N>S8z=jL~Z}7h9x`R`Z9N<4HQ3R6w{W5wj*+_zk6U{yS|&V*YQ)v;?DSzWSE|nx=N@Evy;GERI13Q@7VGGZKV$^ delta 7725 zcma)B3v^u7b)7ew8OdWw{$We9u_YsotPxn2uq_K&$d+Wu2K2El3E9ua!;|Jo8Z zrymnH4PNZI@7{CIz4v_f{piuN?jN6Vmvokt_+9+phSB|@$$LsFw!g(6VVCX-Yhia( z(>3{Z>u&ii&ba`xrz9P0#Un$?-=sjxtNNl~n zUOxTN4Y5kSGFGKm$@kLe#@J>0WtwZRD_k5dIqM3SnAK+r^qO#Kc-dK(9x!Xe%ctD* zuV&zki+e1a(DY4V_n<4dA`|tiZ+kmcOl!z}PFr87>A9DRCN=(P@KcGmj+ddU&z9}g zboH~cl(tEYmS30M!L`%^*aWx=Pzz`U>;$v{u4cH<1AuWdo@&z^4{8@S@JDb7T};bG z6xg1WF=}z=t42qI1sJhtZVa`pqA7$a*q)>jJ|TEghK^jJxm=NQhG5~MN#?1apMj&- zr7t(eQjuwM)EJwZphP+zrmNJ872he_&ttsTzhyEJGq>DioSaM~4Vv20pw6$|uWeUv zt=*xm%+;;Cqd;4!9^Y_>cCBizY+AjNJLx53lj-=B84g)yJY~@;b#rBfHlRLH`PiyU zF!7}!bJ~c8tg!@{v{VgO1vQ__R+-vi^^>YVTR+RtD8K*=0Imld1xx`jHwO-I$u0_o zBJoHn6xyBZ+4#6tAsR%+X>N-evC**6{)8&6zGd4f?h@4K!$Nd3*ZvvXj!ttSv$U-% z5g(6Cq{(a#7Ew&SP`%Z6f?Fu9-mP9&C7tJ|GN%ork;U)@wXNn0Tk7%I$q;l?7s`7W zE|j98hbU!NI{q+`U(R`=9xs}aQ?$9TUF zz{r`BrpCr9goPO&Gc6iX7wYybtKk+l;qT#R85OCvV0lFbU2b8pi!Ec?3{6HXUfoHG zt61=|(vP6QN2XXWp*|g~)%w+o!CjY#(*?vADVF<6uB?8A)-;1Ierqx@6H1XWX0lJj z)`*6@c%s|5t|oW*@~?YV&!>xBzzY?#(00|**tF#VDAX+7chDT;CfhfLWf5ESvrzd6 z;9&rKL7UWljqNp`N3om1_EKbGGG)6hGvf(_0|6RAtvhb&>b=IE3iRgmH^vRdma&## z8Qp=tx2wZVY0XsMZrY)ZtHLYmf(wiI6a*H{H)TxFXHfSTU`idjvWK(q{FR;gRyxD& z4Y{|k+)z*^bLA|6KP2Y{(RW1EH`kY7PQh9y0gox8xiTB(8qVhK>pOZwd%Al&4pRu? zlc@F@<7^GO3uV6}(sQu;z+Cc&%~&E%Cd3ay#AhUvCUZ{?HRAxxiXs4z2I+vC073{8 zTslmB_!gFUl1p<_>Do8ke;p;z2BUGc5EAp3W!k=|$?;_AUWb03gVgg3`jYWb%rq>{ zczRkL+7aN0x@AX2t?r#jo0g>)B`Gn%5ohHG%inP4r*hBkct>0M0F;Rdcd6&DD)0IR zzA-a;3Lv&vHfOHBf&Nh6HT|=3Xlto_#oyvS?p#G{dto0!hPDGl`vY9EJ>!XZDpRzV zj9AQ|R^2-qWeOMK=Zt?9!tj`0I?0h1MYN42qD&B*)WaQ{v~%i}j`cOzM|5}hNTzr{ znbTFXx_C2SqoR7vNhKKXuCZRzX{AYtqR-k;`JZz0{lp#ufn^R#-Pv-w)tEhQi^ z4r~1ZwAv_ZGC|vGMk8T!h(CYCwHY6W?B;-_dqxwf$)MZznJ1D42gT=ARo9vT!g%q@ zoa~Y%cqbDS$?yp0)RC^5k{2N9rhnoe?)$N>1J3-=c3F5KrZ(S;@4TQBJE9trvu7A88W2LtZ~`~>hX01zte zbGiCm7mB#Z(-SiIF_$w%DN380lL_qjobtA3h`(6^YZK1REEzYVmbp1@PJj;@8@aI# zjkW6U_iSv{J@G^w9codB6Nv4fAtM=yhnbUTH&<}u74wZ=PRsgs&8>=iTC}@WcTZjM zm$}0B?Cb8`&)et8o>uKO^~D}Ddn@F!fEfssgw1KoOoc|%sT32!dDImPLC;ljJh|uz z!0iAZU<&Z(01rT@;xD*lmxcz1I);V^L;E`pcJvL>(_9%`=`cMy>DT+lIbqC3eZ_>6 zID8DDL^L{T5d9?toc67H$)p)ga+HTeo}T44`ez2a2uHJ2BI(3~vwc3J-rT!w4H984 zDGorM7}n-m_PtS1{yM%92(~YtAl@`H1x*c&bV%*&UGI~mencJX4Qg+wyLykbd=b*1 z!eD{zW-*LzYc~ZoyKrpMpg8Zc7>qE=drR6T%s6KXwg0DD*O$H=Jkc-h>~ueqxifj` zFyD(xxZzjonZ63`g8F{n6~1es@29G;zcD+)#fh!*-}iO}E9rhTd7!8O@IF2t2aEyc zd6quF=P|%x2D^~6Ib?*xLeRY7rK6lfhzj~9;OhXa(e{KRV=0m~7xMti?ZPPM8y2}5 zGOwtY`-2C>BXNLmhhuyNBA_>00%t2X@uMqO*3k zJ&}|->zNN5dZSu(@XjmXW){7Uf2a{;ss3n7)TYdz2^qu2J zXD5(70bv!qg#HU6;*xh=Dl=HM@+eo>UQEZLU!n0+>aoG{TKI*24ZtMqqH$&j79uL; zwc1`QWl)M^@~4Aq_sY3K0^SqngdrcS0S2_SE_wzk3el%9X{1;|+}3_$;jgh!xw>xX z>~4wgPxB{pdPxYJF@K&Lm)ZqC<{RUe+}@@h9IjQ{hnt%hDWc0Uq5rS@+uVJ3?!n1(Vosd zp}Vy;;zJCod(wR2BQEjK>T5^p`e6k=-y?#pnMi6#&XP2r^A$XARphP#Ig#E!eOwDEpR@Oj0sFrnIWcp0?iwFE+?vtjatri<0ffgu{9svV4 zA>x<^=0~2TUPpgkI5zOWesy-DISXGCtP?D!mrxD^WYu?|go(-()47`p&q|I6?fwF- zFf)k}WUc$~$;sn#m=?C98}TU_DMTwt^&;wa;`+xo0Jxj4Gt)S4%a9?o;v{lW&z|7c zH*=}Ukha>f#rzFm_E`9DkxR1%RLB|YeUxM(Uq3H-b+{&xWh5 zP3IdI!5UX@CEBzmwK>^(^+S;M@v+z7laqn|&jFqX{51d*=KL)|S;AC;Ne%+X)WgZT z>gB9aqMa!;72(sFJeSHkk-bdM{p#n*HCKFCuU(i(r^fl!FMom=?i0>69bcu@^s(lQ zuR&NEn=%F(K%Enpn9+CDB(-EgG;H8v)QH<3M+qjRgDh`*$TY&vSr@FrEQkYDQmzJb z+wAqwJ_2c9gkM{EVzPuh+uy?7Z%1sd<${ZMgCeb1%T-!`rLC9QdKywEQj6zGFmPs* zO3((iE}d9`_>%XvjK?3a{4~f1_h$9%bWvcD-!4isB|Nu>)%)q4+OTS!K9~Jn0i>KW)C zntR~*IqIJP{D->lbbYp(MW`0=9x8=1PNHN1jssGF+W=o;(3ghx9PAhg^vE`gkaP~(XbzN2LNRZ4g9~=rsZC^WtXOv-Fs_O7PbFpAC87( z^3}Oab!jxt%JFw3csUdb){%+PkgojTW1dLiInB8=`-q<;6ES|?aB_x_>r?>n09Ywr zsnpB2uFEbY7T7HuF{1p2ER+du*Nf-U4o|sMimD~?Xlkm#dB#}yPNyPKLM-Z~(#~%x zGYwLS)YO+sCI2}snYIWKN2^fn&pP$^Yn=0lz8cjl7HS*FoWRS`8m_l}iP4)(euxpn w!5%aUqZEch{O65O=wha~i`rs|FhA_HFJ%o@D}$%7hzXkixf_esuWxJkU%4_0`v3p{ diff --git a/nlp_resource_data/nltk/app/__pycache__/collocations_app.cpython-37.pyc b/nlp_resource_data/nltk/app/__pycache__/collocations_app.cpython-37.pyc index ad048caf5db7eee4df040361b3d1519ce951fcd1..f63c30860d5fa9cf9f988e1dab9380c0bfb371cd 100644 GIT binary patch delta 5177 zcma)ATWnm#8Q$5u?X?rzu@i&itYas5<9o1U1IBSM36P`-IH9Dcq^Bo)j_rfb*|V8F zYdbE~NYD#V(U^gh(sC10RZ8gvBSc$n4^-+4qEf4>%tQNDDOJ@6`ckD2?f=cOy=HOh zB+7UGnfd3R|30&Mp8t(k-&S2+8KOVM#FGAprRt`^91Dfakjb2o%QVJ7hn=t%rfE#5I_>TrZ4c2E+Fr9#yTPo|I?R-oF{{m*M?>ZogJt&fDYP8#ht+i28Aqb{@E-1BHi>$ZB# zy))s9jGI>XBOy|F&lJ;o&G4yE=7!5~V1~)0Y0ugP+smDxLF|0dDq5N4C3cpz)DeS> z&3W7`yc*)IG*WP7yti23y<$h?&Z;4r{1^QT@iuW!yibjk?eO;eee!F@HYdL_e-GI+B@*bMIU;MM??<^rc*)rP-Mj6i=+{*do zynVEmUCwWtb2VC+GgVX3-rELScLrWPi_=ZnC3Q;BYKntvvDS;GWk1w-r$5?8DXf1kp-uC&3IMa|` zyay9iMmB3X7B_s4%jgl?vB*VPh%yOe)U|R&D&=z+g!lJdMtx>tibE#JO}254b}Qc4 ztl<;;QgK}eNhntIlAT_b1mn?-NqVRk?i8@tPW-b>7v#9NJ*#vGz#WNIaXGPNQyi?df8 zCgU@62A?XF`^=@S?=F95`%%`YBFDJ$T#PcsUZA}b;vYNqr)4zfP7tj!iRiVA35xGtTGAefpYwd*b0sTeQbCT_V9QoO6mW(>Rt3%}=;yq7l~FuL z(^~AptZmI{v3Y!g^hfSx66?}U$NsS0FU9%mn#-esruV@sFJH$NQIuNpZf?!mUa=s< z{e3|R`;Zq8>_FP^3jnyAp90JQ;6xeEQk$^&dPBA72`YFQ(~U)3TCii&xpd0%&_fM5~-Cg9tEvU);m{;Lqp7n{_Mven0E_1|J-=lfOY zncM?pM7+g2fYRnS0tNts1f(Zq$e1;|K?elKf5dAoKd3{H$>;acCy;1}GrMltgR@pd z@Tag=rppq|CB)miJ{(k8qB6O1y%g*bfTsZv7c@I%_7mcl>4q+4BZSLT)^i*$Yxwkv z`dq<$mpE#~r|IIsl9K&2N6D=$jH1f4>YO#F7pT=_A*5X0uMqzZ@uSv@N0p`t{tQh7 z3aMZ`fuYj$5-2FXYW?>xQD*%&Xs%g2yjy%;TwoveT)Vh$5qrBh+xFCiibhN+CbtCE zSZGbSowxz1N^xa=5MZt1sz!sWOB`x9`+e+0wv$n7$w$pmMWD0-532`GH1LoTh ziEUQ=uIHJdpJCU}0m?IQ%1X(|c&_N4vv~R5mx<*e@pSJyr;u`VKfHzAkV6-uk(;vU zahuOu^jw0llI%4jk=>Hn<#N#-9~H0knO&$D%6y1iE#F^5RGIrxI2Xl@{T~e=5JAU& z1ABtz&p`*XWgEeMX?bA(gKYFT_?a>x6P75Shp_9j8?IE2ICjCOS73hhMMkv{CZ2d` zVL+TbkQOxqr-mR#{lxc+d3wf%`TLOa8v+u30<@Bd0A8sWL6e)$^U;^YvjbynM0_%E zHi>mo{CzPx*tBJhnSVgjuZYRPmgDFUL9}6)OnUj?<;>s1LJQz+fQrVOppYLj;u`rN z5XfK|$rpY8r14yQx8HT;AsE^Y4HNxD3-||#LS9PAA(j^{1z0fppH?@Jtr~; zjxPQfbFTt^0#LE|2$Zs-1S)%fK?~0?av|P;NtTRq%$&$n^NqxyHiAxogyk;-kS4)J z^$w_ak%UUfhUazCs)6&i5<%xxB%{fM`kP9|c&F$Y8eHsv1hf_UKN^-@CnhkIlKG8Q zjYBNX_{YSGAD_4>bUo82D|LOc=ojfy(s>xdpc!4h_I!`)%~}QZJC4_~aQAkuet$Qs1Jzp5w;jGye08{S zQYFQcw8RnX;QjYgP_F@$+3=4tNcmqTblq?qGNTpqB~YG7j<}v#bga=zS}fd2kP0Wm JDItf){|Al6Cx!q3 delta 5170 zcmb7HYiu0V72et1S+8G7>?BSc5_@guwRY^-#z_cqf+0ANcgO?iK-i3TCidXjo#oD~ zU$os6s0uBGgi9Ox2vtgH8xW$Ep@p_ksX|r%AbwSqu2fa(k4kA(38`u&@~h`N;}=^u z221|t++xb#qZB~QoQmAO^Ms@!U0b*{mvg&m5i$xS2H#+u3=ou*vNQx z9e-|NT+g`S`*p^adm>ho)lAzpKN&T)(kNxM(5Qt*X{*(0dn#hI+a1=%(db1@oVMGa zjF7=?!p(UE=0rm(#XJN%LA1(0#m=@Z&AdxK5FhB>NFwQ| zSepHI)=eO<>K*;AZHl~Yr8V)H73`T)V8+{}p|5QJ%%c7BvVO;BH}#H%JKd&_6my>V zJeCfyJo~&J? z8Dtqg=9(p$56m;KU6T2pS?2DW`{UkOz7F|=m)_eDw)YRc_+wZPE3DPyH;yLfr)K^Pcw>6nlq%n z{O_7BKVCl80!Edf(zNqIIq!B_GpE0{Vwu*^Pvlf(YQHB6MgR8H^(c+S9{GCRx)oRw zdjb0ZcgR21^&iL9N}EtxJ{=RsF^^UI}jaM=m8`F z6@Y{CTz!jHC7-C@mf({el5f?wttvYjf<7w$QU68>^_WhGBlIy6!gd||ykIlAeCBOs zE2I86bqTidOeiEA6Vnyts6<94Z0&&-#o?g4^uvT*)(MEi5!U@P# z-s)881SYD@Y}R&dVFsQs;s+epCKnAsRhB@-b9TN+C2e95?d`sfa!TvWp_1e##~dPQ z=GDhcw`dDi8{N@;y_Z;xSWkDi*g(`&%}LuYx&cM4m=DMhxvU}4Ye8MLNYIB9MAc*j zvt@i|$`+MSQEggQChO&LX5JFJ<$;DlZHIiep|jlMUBvvN{IKEMM;;|g#fE3%r)p+Y zX0h{(yHsLy+!VupxyMFH^w;$7Bc~$TR7wt{S5JSiCapDa95umy)G2SADbjVHj5YSA zjCkJ5+eWmnTi7I)t~BE3y?mf_#K|V6GmVEz$7n?i5S(6^g#x`wX_HEW=jW1BaP&3y zrZ~kc)ftOR7pRNfrz6K)806499 zfIwA;&hj%u-UR2DaB14mC&wIn+|VcR35x;slCD^ge2;)J>h#cmuTeGTX9abQAyXJt zBSgz9>nh4ahGzHvEAOn+N-*3=o)Gq!;}v}s?dcYLj|ps&2oHzM5=8)FAWi~&06eP_ zS!x=#Sa^CPe$1YrCe2w9l z;sW3aKv~J5CI4Bzw*F!xOO&lXNvr>nr#AeyIt|$zV2j+)QzTsVO}Q?fqgFP|BkshaG9E2DhtXNaRlqbfu4MPz-8TR(4o zH%&p6yx7uMo-k)=twFxma`6zGTOppOiO{MGpkP&K?nO|re%1w-Fj3}$63wlXms{ok zs+&9H=TP?*Qr9YjwiixvNOU$#E%&LxW&;~d3>TI-2TRy+K57_cqcUWp8@Aj^aJ9<^ zH(EWY22Oc8)CJJ6lG^iPsfG=@vzM9nX7Dq=yE4ueXp$CGP3&?tU`@S=1eE=dR}2R^IpJ`2Pg~lOEh=4d~M5P zi&iDl3463qEIE0cPdH&!B#i4l+vm`9UCX_+ajPhNFb=|?~ohX*Or$868wI7u>Ii@8nM_3 z=mcyB3TjR~_+8 zD||=tWi0{$wwZiV@w}O{japI6Q>JHv5n-Fwg0u3ba8ynlV-|7CGy4GkS)q(ze_BS%h$L6<`mM3 zE`m294Lx-8nfYOx9=e5sO^+uio7X*eNMx|moN~SBElg>@Q9q?){?Hdl>v0A0Zu8D3M7>eOll~Rln=_ z0lf(e`>x1OZY}MC*s-*x^q^RvCu~%_1F5${WoX8sh7?gZD*Ms6CJJKT%ks^>0d0@` zxbJK?b|6C`XGF*84Kv;g1I}AYAERGBL-Pv6yTo%cl? zM#877#I`nz2^z6wWcquc`K<4QN)f0SrS3~|VCSLIRm}Ym@Ctw<^m|bAu4O8PxoZTH zdXtaKA*m(fJaa5vD;86OiV6D#(pVrL!>7>&P;Uc%(F%mxMfYo`g^cXj)ksZZaMzB~ zW-z`)Kq1w#3q(>hr(%@OKby5Kq9ru(2TaqiPTU8XOu94Nlv~6WS~6;aQF5j&y6`IJ z$F8cFnG!K#;7c%k6DMdWn{Vq}V^rteV6@H>ns_SyjBbxSKvA_R`9qW zydiVQah+f?3_gO&Aq*dNuVVNk03Q&iX4BcLsv?syT{n|iaNN0wsANC=-Y&ZKzA6$* N`Uil~q-@*0|G)7wBbopJ diff --git a/nlp_resource_data/nltk/app/__pycache__/concordance_app.cpython-37.pyc b/nlp_resource_data/nltk/app/__pycache__/concordance_app.cpython-37.pyc index bfb4ca53367cebe0b5fecf7d22f9179441e6631d..c153a83b271f994c25d049d421f42b21d04d621b 100644 GIT binary patch delta 6616 zcmbVQ33OD|8J;^^GD$!ZAPXd94;cd4*%LwtA*>06O`g>`le|YVWMp8aCYU?Ssw*PluHpVO-$l;rN@4w#v z|L=eA%-7DbSI)61tENmzu+YC~+)>Ykqf;^}&N7QdwW!Q*QOr-KFq&Ka)__g1GpyPC zj(}5f%CX&lbmzk9ftkuoIc5G-C5>1d{-Y?%=Sa+!m?JS)VxGi&i3Ji1B^F7XE3sIj`@88%S(8Pbr1Iry;h$wli$vMWt^94_%|`=VMYZ8Z|wy%DWL z=RPg`#Wk%S)Z|`dYT+tHz^o{%1ngvYIh9f*737}WqBycjTE zA?+kvi}NR}HDx^QGeMY8^akvtICWCKo5nLSxu{2)NSo9|+ju4>7i|~)3F}MJCpEEK znjjEmaV4vIU7fz5>MNzx4V8f#3^@dJP$yT{A$OJmnF)iu2OMK%e{#_-X=2u-CW7Oc zm|Rq+;8j(c5g6`LvnRF42bN6v$Pve_#A&38cbigOh9jV>nxEUn*6ERqk%C53G~zuT zU(grvcn%M|JH43AxSGt%iRVs{IrCE%FVa)*iX~SP`Pjgv)bClDA$%FlY>s$5asX#?(4Wq?44D`lEeeEo{v>kK^&*V!P%d%Hqo0|5=+*IY!O?s zio`pa%bhAsxKHF{rDt?u7~^#Zt=Wj#yQEz*5*SII_D(PNv}wC_u2CH=0c9*W9vf$8FtoRTyS z{(S^)CpTrWFjXwg&5IS?M3f?-=YFDm3H)>;cKg&wC*KQ+2*G=(Ps7=+QHZ(Q&ZX|v zpwIBgh1E^*a8TPOL>_~qM+GEiA(gKCH69!P^+c=`{rTNP~Y0HF9RSfinC3RMD&2c!tP`^^w`2c4 zpVqB7dhlW97q#M!Gpnox0tlF4^;OXSN+n@mxSi{Mf18&p37x~^*w}0!nKg_aIyE$~ zuz2~G#HKPU{Urd;7du(s*NI4VHIn3mv@n=yjP3-;_#Fc0s2wZzW;&&YgZnxQpMVGnoe-8>6(B;V~Yof0%w9QCVOwr^7$+tzK{*l5aJ?X2POw&~Hx zH;|#JBw=B8`m?em>%fQecd*1!++s^<2D?Xem43X;N{rkGC?PN$je$^PKmR@!9tJ!D zI0JYT@EG7M;G8&KmYs0~!%>=T zH1!=5?^l-Q!5t%xYvHK>s+U?m_z;GRqNeKZ<rOKw zqQAN@w%^`DJeS4A>SN^QmW7!MrD9okRU_IJS6K6F?Gc|Ij14}xUKXbpUY5Z(J8q&Z zaelwJWl?$~LS`o8B8?13IKq7)(|>7ZRFC9xYcbgc{k4HN7Cp=Ay#*y8~(*+O2G`^DQ!=BIoci^mDviDt1sB{G+8j*ZF%Vt8lZ z)}@VX*^eOrqgPm&+fM$6{Jx0c^oBxOP?hSXA$O9g?iH}V1bA8eZ`s0v7iehMyLGPe zR; z8i|px+s8GJyy$Di6ZM@-GoiYSz=#iceL=sjhsaCW`JV=<3lLKhzoWH!apS5RX5Is; zEZFa0gsSCFikDWU_h}e*0Coe`0X6~f_TYF_@C|^C09-2k6yPDidB9=-`kvW!&tUWz z0R4dDQ1jaXI0zg?%AW(EhNu%RBOJM)WP3z-_-_I5f7G7j?((rVkDy1~+i)YBH}IbZ z7pt<;0zJVST3R=^wAS-pbGo{5P5t(b+juo(E&<*IJOkJxmNaH(KyH);seCD|trelh zqto_-3SIomfSX0~n#|bqeSj#fqJGUm=Oysuis#o<#quBsyTr7np0%5iQXm3*Qw=y0XrNpTz+pj;(^cOoHPVp7tgM{ zDF<$j!-Nz?L})|4(AKZ1l{@=6xCe_JyHH$%*PIsn3N{kacLu55QcL27m4#9Zb~ujB z$BEQS+(+g~&VPV&@SDw8QG1){2SonH59a+1oP(ZWY~HIeK}JCx9;n%5XY5r`-@Kyk zm$c9Cl2&Pl&I$gEA9FcI=kk2Kq9ZCeCb35brb>&i@T9 zlCS@w5B?(HYU5WDYW%)%q+Jh$R@@=VTQ=>I^(=kO1v`|QDaXsna5eU{Yav|Eh9ku3 z!6%eATd#L*Z;Ew=oo&Ogw;+7oA>U*CEhsc4jod)zM zxAXVK&dnEB$e4c&seb~XggUzHb+}%*q<@Cdh}Pkwd-p)gmWNrp+#G7tuyReF4f@{1 zIsGIik3U%*-1egwy#~PfGic8Ue$cM-WLuM15?h%ZPsMZnj{gG-a$ev7 delta 6563 zcmbVQdvsLA8Q7}K%wY5H4>ghvEk8Qv2yBm_M3Gt9W zewq2+^UXKk%-x@zV7H!N*~_!DGc53TdGC7P(BbU-x`&y?B3on@wn*lGCNY{@B^&&= zhV2oDG0MoX42Fio*l`RawBg1YT)`B}2zelOvXMy#4`=eugW;-sxJy9op|D}bHx5|M4M zD#<|=H^K#Fb5xA95`Y$Qt-ZePau|6SFT#ITKDAb>*x+!Bv-ZYiq-ixW0)AcTQ+Y_y zKE0K8u+rLSTHxgv(WeezC8#!vj~%W{Q(ukhozt~INQnj%-;Pi~52;apNujKzP|i6o zF_dmpU*-tLBPzeN0D8oOY4vq8A%7Q2CZ=k-HtC*k##EPz?e^-FjYk{20%5Xp*#f>A zOD|qh+a@zV3mNW#Xk z$tz5$&AC%Ly&=(-{db4UaNH1A;);0le7qD65*}N8H4vCW4|6ysODECMzyB^qgC0@#X%*rGY%kN@^;`#iebEUL)HL5Fv zx@7N=LprooJa3-qJY5XVUBoiQ*XO>RYN`$-9uNoTeaN|P?=NG4{t^-b)=%4@LtZPx88IFOFtOvP@8eHrx zo;iFWXx@fIv@1C`Dt;aa@@7=B^C&VlA#6an0buuxrZvjYdah_%b&sOQd9<0=AX5YI z*!ZYb3N&L(9-JUyxS-1mcD*`sPyk*p21d<*KiB;hDu%x)6R<%QEI5$!|Jm&4F?bP1$A>RhsKCtxpRgD z*O)!F1Hnj-aiu?`#x=t(Zu2yhV66NegnJQE4&7Ry?G%6ToGK%c`(O%C)wuqJ#JPn*R+hq>VLz zroM^r1j0T5$+bfb#5H(=>lZVMt4WAcbZ(}b&Kv=N^#lE{eK!w43F5kSO}QB7e!%%>NGPV?rbPaJwbaw=4J6p z!`(gP!$Cfb>6#K&0y=zX1XLc2Yo&gsBRntPOhk>SvE9*T`|vwzeLrxm9-K>llq_ZweeEp)Q6k<%&VVYJ6Gc3Zu* z2J>?23t9Y5;o#7c@I)kR`DaCofn~{Ppx_qxu*ai zw-lUxry7cmoXHn~AvuGZ?&t7?2r5z8U~;Ol=Rs(?2sAyIk4HF_+XS>+QMhdP3QWD> zga?oUt-*n!UZi9~6)L)qt}Tu|XOiZG2cb}$S*Epq4|Os7yKQyeS~Jr&Gt<977TTV~ z2-+_ehnn{;#r+{8aPd%4d=?28n&>3@Sc(XvxUg&^R1Gxf!*EuScJJi^r?qxHo4 z3;sB2LV~F%$seU*TF|fg;JZul#h_zq)NUsU;+J5cUSzi22yeZEZKduBdW(^G zsvJvyor9~WH+>5&XPHmR zcC%pyfyDm^FtWKQXxOo)QXOKBuNS}T+`9_xnEmM)98vPg0VT;6i|cehj`aLLqfkl; z?gd&v9A6b^AY+uJcTnyTfQu>O3D_yxR?D?-qBsU%xWax-r*kmL3Bdh`iq}_P@5V4E z5yG9*#NIU}qW|iZ%V=dkKRq@(P5{hVctBGO;Nox&36NU`=ZoS-}-f;`0Qs zhCupYB}u-ElF9F(iL&+v@b`#@wdX4Tgp$d~aBSYW<747jXZsWD*4Y_5C3?D=TV4S| z7e(%|8F2CNJAh!Y)Ww{4HT5Y`WiZB@Z?lg4E@#=@4qv5t;<>dM;%{AL3x5u(5bIyz zAAb>Hyz}!JP2rHH2h>Qc`8Ltky>1(oImPNm@zE`R2&RUsV=$n^@TV-p5##V2q~}y} ziG~$k9TVIZhK2al?3_ER<6+5fG;D?;TJOM z3Zu(@6a`Ns;Q2Fba9eV+oj@A7!g}EEA<(f%es5HY=*GeV%(=lLsgdBCzm#}%V-Xw1 z-8S#)3^RN>qd!IJzfswy#e3m52EPs;m>7qV4v*=s6>UA+jLfawJu7_OJ-$tCot$>s ztTn?HRR#<@T*#kr{1I3m*(jbs31%waKV frZ>S)R`IaXoDMT>I{@sUHJxR#))`{xnl1kar}bLW diff --git a/nlp_resource_data/nltk/app/__pycache__/nemo_app.cpython-37.pyc b/nlp_resource_data/nltk/app/__pycache__/nemo_app.cpython-37.pyc index 2fc5157bd27ee9900aeb5f49e6b4d6694dbda4f1..51b22e2bfd32cbb354b5553d3efd4024a148ff72 100644 GIT binary patch delta 51 zcmdlJzbl^CiI-xJZBVEyYhfJW+zhnH74u`DLladXvxT Gy#oN5Z4ti! delta 53 zcmdlLzbBs8iI%9Nv)sxDxQ%dQY($Y{7{mBN7YR8Y3&Qcydx^VUar6{h%y$PS?SN&E%4Ol@n zXqBiXR!9w5VKr<;)T4vZy(|_?R9aPPl~t`)TQzEpRjbxAza%lss#EK%dbQqaP#ct@ z4Mp)#Jp75Gc-WYIL$Nw19*LKJqDY-s}A_l6>xSvshw^Wx<@ilQ^_d-b~| zCBe8kVx~a8SV4aY&_W?N?EyC;iox&`I6*CaFg;)=Giu0AWoT1PkH>`% zLq08Sjv1m+?mn;O{1$M$!nQL`albhzu(u1#K+PezA$Qk#hZLntwpX>&A@r(!gilqq zmDbW=v{y{|ZjyGv1 z4aeVaXEL^Rnw8u|!%?qTiS&${Duwqz7%j7U_0Ovf~X?a$$bsY^$>7MvdOq1 z5@ynvDl6}9m=}YJ)QHxfrLB^zB(W2Vz^K9m3<9f8clG?Bn_ zxwhF}B|4u-a=)0n#~6Ky%K?jBzA)lKKM?hc7P)6meW6m9l2A$R$edzjJcUX73C`9A zxPoA;e6ya;G;T{7LWdi6lkc*UIw3Ks8loM4I-49el7hX;BQ`Ow-xj1Pu^GZO1ZQmm zC3wVoS>9M1W_7dTJLR&*<~38kF>TUIu@0t1gRV1{X#<84?S-jByO5_Otg5>6pLj^({B$j2?OMpEWrSfk5iKJxo?pAbJtssz3n(!ABH}H~ zT{pk6*gt~d?ebVtXFU`>=D#E(5-p>ScfFY~NKHpEJ|f?4s#R)aNpsV}G%-})4hmF5 z9LUeGL^l6nh*7z#dBG}oGVz_@h%QXMKVfhPsNS7s@}M|EBT=8b#TSScIE>>P(Gyvc zlMggsI^)a=f(5=P%NNe7Dr92nO>))3XcH9vp0Y#@tWKJio-tJ4>V7+$Idcz4ZkIeNDc#bhT0|J6h_;kypfx0C==hHj#lf_u%4l82LO#{BbH68ACu@q6dJ? z?|5-DQ3WytO;60@#5IcrEPI41NEOLW>!@o<4#q*Lq=kR zwV(#{Oh%afX>lJ-6r9qo0M$Wo)-|~(ew4T;<>jqk>U;n~KL9)kNyW7uX5|&)lxl7= z)%Iu2*zmiuzU?>TXAHo(fUK~rU2k9B?)YMMf?Nw~XVa``U(rrUxIh=d0YR1Bq{R45 zYz3`OmH-%4d>wH1DFlxC0rd*$2oX7}T~U;+((b4Y?}G5XfI4|oN6*Zwh;)2HPYxPl z57@iqiyc+n(487GMpG0t2y8WwO_I*2OkBwmOe$%kg_}Y({#@I#3eW1}Sc(%Q(g0mf zryJ|-d`q2N&`pZ_X$%F9{+xetw9GvK%u9#6zH@#RJJ9|3{t1A+CMO^2oL!3A=Y~Xx zYSzo0(Uxz3b0>l7O&I!sc!)^H6F*pZNSLlxu2}x$I$r$_OydAM2I>)li70cu#2cS2m+8Iz`cd;K^`$5e4w| zKMm?I0Nyvz}I>{j# z-z!&i4J_u%+9FA+tGUm2SvE9xs{rn{$= z*9d5?b56I&7Mg0w7rM0$o)Xef`7?rGx>-o?`(<+FFZ?i(cuaQn%)4?P$R`1?lv+9w z4emw66BsxN$Quq@ik}jkJ5LDcEGzHpc_+-aD^36&mgT)Q9Is+8eLE355!WU&hT{<Ge^9s5@pc^Xi#G?Z~m?v}o=vU(K5t`>>fDNYftr-w^^6DZpaG`?$I zeRp6WB459F8AsP-OztJNQ*0zMg5$klYnb5f+@{rC%6J2jQJ>n7Hb+}gD$=c)VKYgE zMzf4`S|2n}IDU)Oyy+qPfS5rfhcLA)XD&$%#+q%rcB;Oen|5wq$Kr*zy`AQrfFjF? zDDrqcUd5wCJo4Q&j~I^;=k#R+s&Bv?HBdr7Nnf1r$tyt9ZtN&$wx=dti`y|xbA0q> zF{h1JG!my|!`cH~yvB9dsWc_%n$(7918DK4Nk!;_UdTPQ_G)ESDUp16Cur$rGGSAj z>OL|YuPz4Dj(_+7{w}<{L-Ssi@%1OimtzHFplHY~UyWe&0NnRojNUiqU{)ATu}+nD z6-K%NeSoQ1j+ZJh>}@3UCbSGa6)8a_qJ)%6{;yUUtGhzpv*A#~8mx2)fDdx^b8YL! zS^MCLW08$!+-TfD4^m4P0w$u~8_k=0+8v;F0zL(RL#koCTeSgOP)o&Gc{;V;N;BTi zJ-e}98Amea2m4rH9aqwBHyE}OKUywOiq%LgOTP%<%}mcFE`Uxk8PI+I=Hs8^Js%xU zVdaMbzXbdkfNVhJgq`1xq0 zzJs+wsHxONqh0wn2EWDJq#ZXVxOWkQ%Ddayh~tpqoK^$skR07|#agriL}z$q%-b$&RgcbEC#Uq!7<(E$^D>m0K|~^K7m>)j8uZ2L7H3pk6{22OFvau`mjO2p;+VM zd*Gs`yA8^jOR4^tO+9m(dtB~YoGMLsUo8`bi*hvmCK9c14fb_fLoK1yJ<+WOh~yUd zGgzEYKc&H;!BN#~%T3#w8XtwgLjc6gqy|}e@QuZ}8@GK+G1(QlLtvw@B}xi4#dl;+ z$t&b%SosyK9ORR#{N_j-gYINVt0f|B_EX-f&ONz(%|uARIZpEl zGgvs{_I>k+7JP)f?T&(15BLM%RlpAc^8t?&OcsVa!T1y4H2`$s0`G%~hscd`hj$!Q zR{WoSc&A*sYr`sbw@BW9UCVzH``rj&?>qY%j$cou40?SYl_z#JUceCwgY$WvfNSz} z{@`aB@mUhQB{%aU$`s{^*NGVoPtjXmM}Nk`*x!i=1C?*Wf#cwxTX%oK5bKV$SfD#B zcrWQLEpNK=vE}5qUZ%%aPy-WXZK;xaU(*0_NwOA{I#(Qer2OK_oEOm*zniH zJ0V}X>PMZt4|W`+|LG4s5`t4UB^f$!l&i( zYreJ{Rcx%du_;Z8;&uLtWj_MI8&O;ZK#g;so4bjce70gop#cg^k0suaORmlBn1xa3 z$?Z!bI9}SI1}+&hrnYEKaFQz;NZRq4N$Q&l*G3GxZRNxDnP+eCl-Z_;0UY42zn03Z};z`4v$pWM=ab=`~!UI_4P!8?vvLH bQ_!~)?6K^4HevLxrmr*{%&SCXpV9Omv6K%F delta 6908 zcmbVR3vg7`8Q#0u-E1}+0_2@QSYCkz5;hPefe;K39unk{@bC)Ux!HS@EZN<=oZWYBi?-AMJ2#sRTck7H zOuqf^fBy6OU*|t}?|w!4;Ive*vY^0Yqfd3$X62iQ3M!kf7hrme3ai z%_M@ec5uB6U(Cr{iYf~nM4C<|9!lzL8rO?zXDtB7!3-l|=5&R67^bwK9@JEV@1&kA zx>k}Jd2>mC=8Kf3)%L&Z9u_n_20ak&gTK;}%)8ssm(+Fhe z&_GDv&*t&kvT|twUsJZQIBNqnB*s%Wl)Ydtu0*$a1ZJ+L(}>0pYv)R(Q2`xfZ+r^z zj-}32&X;OrN7RVwrmM?HB#g*evBpMf_B+@r^c0(JH~T-VcG}l zP5V&2Y#&@i-;1(UHlMn>Q@2;=NW)PKWe}XNBXDB`$-;Iup73po=}b+8jObGGU7@1B6Qn&YJ|Pna$b=lesekfhF`uM0Wsxq$pt* zVWGvm#aFX(+%rZ>do$5O$$mG3Nz4gKx2|hlYOrsdQG_Ee=lA&*x-9+kw|z6R9dI^v zFXWS^KQp|RsIwl74DVrA6VS@B7CAo{HsZP-HTrvc*|*V(or+zvR-(+Dgb^cQVfT#F zL6@|`iI5!vz*kIHP>l|#am$oWCBbyv^ePG7KBIi*n4hrkLO#ns<`AzYb#R6+$CW_y zX8zFZhAQYj=~|Uw;o3pju_hGONlmw+J;A@4T`raKlA76b_7OvNcEr@6&iXSwA}pSH z(AfZ2Yi2IDIuqY?52!4pc7=5jE3%_A6dhtWP>bJbP4PXV85UzY2h?ys$?>1oTt0DZ z1SSMHubNX)lFdZd+xYr9{@GCYIcdI}+Zqi;)PycOTf2<@#MnL{xtZTSr+WAhk(8<2 z^d;x3nx+JeNJNclECiXofMBLgjR#ePGT0FSLR2n5zqn4xT1^M;Vz@Il7@jN-VJ=^1ElE9Lb zId`|Tv6|e<^{8r2u_c zBVVzgA^!jx5PQtv-&)YJ0)b0GJOLc!Z@=BN*9A<+ehO9M3uhC)Cp->E%Q;Uo@;a$E z*78UG<$_kJj4!XP8unv|n*i`=bHZq z4>Ud6Ph_}qArLVJbkosAM-!8YLBYp#@(BeA+2n85Hql8ksjjFI`<8RnM1qC7;_N6r zF_G7{fWkh`+Zgu5k_0dFU3IrNJch><;Bf-V|C!y7slY5ytURmi>kkF{{?6wt{Ppmd z%0={(-WLVbz$F3G88pJ=Oi;8iMq{8cK>0aq_wH_HL=z#(jz=&z^hi!F=Eio|-GHp9 z$5OwNY__=XtG3S}%n_UMB#iX=q4oZBBIwbCK8T?`PS1932WUC^F0wQSKO zd+RYGt*nXMK>JroDJl|sfWF8pk)yn%dH@{vVgn-kZQ`FSxp8eH#G!67D{_++ zdZ@<46sRr}dhB9jp@P}YPc*9aSkuKMVNAtNo=N{Un>{&`OO3@`Yg#i;?B?YE=#FHp z@!vGa_q63hxsxK6=m+20;m#8ufejW zb+B1c@G5%#9FU%MbW&*YFdPYPZ3shAWs5{$ro$QGXy)kQ1QV$uYay`agQ;z;O;WImNWW8_oahTh=@_ev z=<&GPqvKe5otk7vyw}*z3W*dkKbl3wMyY&RxoKOc?Cf0M+1@6^vv+(a^*h18T={xE zHg*=Zvh3^*4U&%S>{<-En1GxJG>t9%%9@P-dsS)m*QOTKAgB4F)%#td%MgEfb;*LS zl}5qzdnn#P1QegFMhvGMZB5d0FR{elOm{>LMZ*TwxYl81I#kvZH(h=E@twW5H>uxy zJh|qy)_@U^k?3d=C!xqgq8M0CTsbdDx2RB0KcjrE(jkspCu;(<0LEuMF;ted?jWI$ z`2%Z5hFc-N5+G(f@8(BbD$u&ghDmP++03_=?rjl@9{5$#yDp^ZV=NehD)t!|Ni1_K zs1Cq&0JxW&PepW}(rqv*#6)`b`!15}4^x+OsWa=Uq+z6GarIB;wu!TLspZ=^&i#er z@Rq%F){TX^I3mp)a=>u`*N#y{_kLnf0brF! z>={jqZ748<>YZN9}K2w``zUNhISd_1%8 z^X&Qy5bJr9w#CP+7XefDe=I z;{1Ufr28~~b73*zQNo$V0>P~2)2=M%&+p8u%j_T1ne@hvuI0kBvu6G! zHJ{*L>^yEIOHAEE|Ma0NgL~Y};&yPMNsG@fK&a%Qw{&Tj5hfU+(JnaoYt(QG+ji4S z$$I&ywFbnJxXD}^Xb6ZzMho#_j7I<&Gkk={3#!|6TCb%>AaS8>qw8y!c`^|;$xSD* zSDM22T=jG>oX>jY;<@0gXVES^@Iz2Rz<-6x$KZ|tE;KuaF!|NHZfwQ$){aExtVKgU z-4T@@>&(1Z%^W7-5BY}OcV3C}Ok7V%QDU4=FzhzKcEAn*Dq8ERx|68%Ef_P3BPrV@ zF~mo_bx*3J93qndP`&BJ^5S$m)T2Rtd=+N8(_B^sL1!pRZ1RMb*0m4Zh1C;_i4tN$I ztcNvN?~&p9)@x-AR69TioU{!Sz6A0V0Cg+76VL}hS;yW4+yOWY*adh6fZS~5RU{fF zvIpXh!T%XtkqM`QL76Zel*F+0j5;P-WIdQA1RMDlH6oRyo>Ob>ZC?<>^w4D^LK>%Y zgZ(WnCRRnph6Jkx)ByrxfB~r(jK%)N>$RdK|HPwPp<_1`Rg^3IJ_fzZ-gH&oO|MhL zFDUfN%qrByj}-i{HnUF7i-r?@b>e8vM=$HSKM@KuFP=T3-TGmtfPbbH4^JSrRnt!d z1uq-zh4<}DghvrtP)?XGqiZkyb|UuAuu}Fp*tv?LhQo?7ZdYP|Vr%Z^h@tg|^_Er= Tjl*jkcCR#-{^^fjr&s+46F3lv diff --git a/nlp_resource_data/nltk/app/__pycache__/srparser_app.cpython-37.pyc b/nlp_resource_data/nltk/app/__pycache__/srparser_app.cpython-37.pyc index f3c02a8fbf4c5e475235348880c3d25a9c193901..bdc68533e1032423915ae98b4b4187f1794dde39 100644 GIT binary patch delta 1855 zcmZXUe@xV69LIYO_^O>JrqK-1yBjHfm0w~SlK>9{=IGQ7%)DsVaeVK*bLO=EsI~rS)q38~;V^>jb)WC&*YiA| z&*yoL^s-aE%vNTz-FuJtYnrszJKAsSPE|_tnslCL45W@)nQ3&+E>C80x@@mVHIpkt zjhdRo+k`)lQ?{cdt4WBSl$y;xzs9!+xm78T@x~mTw~T*q-b#BNnaWt$fWu*xVux@M zX>K?kpPn$`TPn9vwbPL{Q3>;N)ZwhUlj*!OlWjGAbKbNlF=4!wL>F9X^!r0=S-Y`v zUcJS>M`+a)UWv$k0v@ZsF{*1?B-#|*$X}-6EcO1EU}OTVlQRAJ0rAh94aDb${6Tnqa`kx(S)jcM^1-+%xd zz)TAJ;`|W(l(&SvMd|rZv7;2vpQH4`;TX2cdgyrmuTHGT7Xt4AhXh39av>iDdX1sQ zGZ-6^At#NS1#YHv!q7vmGCOT}_&V#9Qr*%~>-#X|0G+gA+1wdDLcEjB5A|uJq34%< zog;HNpj$3rrh0vmP}mpKyj*K&5;O9jQ0ge1Mf)GwA@+<#OA4Q0UB>>xLB@t@{qip} z%qjk8BoGQVam`Hf)$6&l?Srsc3 z+4EY|o1LKT6<@G+T3b1Hx|~lrDl)2@wpK0@6Lqw59y>!Lm6u${kzx+;4R8gJL2~8| ze0~g^6i}1LHXq==u$CCJ?__z+eF%I63;?Tv-N5+S=uk-tyJjqYteV*| zH{vCETl|4_fcvm%%EHOj;GTeuNQ2~!4 z+#9tBKPAT}u)4pX88(zwypVN}yMYPG#@-B>ED>6_CdAIs=WDX9a*7k}r@z-^RShG4 zC$JgV1w0Fk035go>;RqwP5@^BIa%^OXm}hC#SgVjcQ1?H+ef7)fO*pw&xpT$usf9V;p*_ zGM2QnDN9wdC)S|^Wm^pG;<2=yS*m$_1>!90z2EPYVg$$G6> z=7q2^t2JJT`=V)PtS_&Qn(!@@J*+I#nzmdC^IJ^MbS`F^$jl;pjeD8%CZe%hPdAZn zqa?fDWI4h;ZE@6rfTJ9qcwbP}WI3$11{&!*Y&^&I#M>|uj;(}{InNOC zo?)>K#+tj~aF|Wz1y=TP(PpB3tT6vx#Q^mE9G;{GS<|W7DYYvN0a>T-!}1~U8ArtI zQ&rla(c{p308MO2ZIkIoY`49Dbg{$sjoBx-+LJ`(kXH#S;ee#e9Xj2^m15tKn2@)F z4zh@S6Zx1W6}(P{S+HQO`7|6pXFID?$q+kP@Mk90qZ@!Tz#s>2Q$FNj;Iwg}FolpS zBE&Fei`>L~42B_ASZrbLXYZ0@TzXJC24x-~%vMNSI3h`sH>`xbx-4mpieINgT)DV} z4_@QQ&Dx*)o@a7yw(yMwHji*3e^JS9(r+9oxk|`2=GroqU& z1@f|e&Os+u+5@Sdg@&_;iqygB5Ety75Y_`@#?z5aaXYOBI qA6Y3$cs(Vl+GeFUxW<#P_&E60uue^ITERx^&Y0m(?=orEEB^u!f#i4q diff --git a/nlp_resource_data/nltk/app/__pycache__/wordfreq_app.cpython-37.pyc b/nlp_resource_data/nltk/app/__pycache__/wordfreq_app.cpython-37.pyc index 0532e32f9fdbb4c6a7999ffb6245c9baea1359b3..131b18b2353715d3271eee51fff0c6b2009521aa 100644 GIT binary patch delta 59 zcmZ3&J%gLuiIS4NfcW}NoI0(@vY4= NEX<6IQIi!|TL8oX5LN&H delta 71 zcmbQiy@Z?FiI8CeOF>S4 ZNfcW}NoI0(anxo37G_4qTa!gtTL8JC6#@VN diff --git a/nlp_resource_data/nltk/app/__pycache__/wordnet_app.cpython-37.pyc b/nlp_resource_data/nltk/app/__pycache__/wordnet_app.cpython-37.pyc index da15b5f13770c5e6d2cb5d0710ff4b25377b3f8b..f6f752cf8b066342e243c6dc16d66167837cc993 100644 GIT binary patch delta 5997 zcma)A32HFd~TP3@FzDtK%}_hUkccfd2ov$$M$6;LGIu z&;8H8o&Wr2x%b*P$qj!cp{`KK?|@%A?`9=)bEtg!oYUc84(ecpIdu|uQYXQ-OLxWH z)Gga?-9tUVbL%eZ0_uT2FZIekubvn8QQwSQKFybMe(IMrKm(EnX;9J-4M`fNVMz<< zbb)LX(n3iW&;>yA^o8*vS|pA7^x}94Es^bfy)<4%FOhA(ULIdW7sV@R1#xI4M=JTx zKr^5(j#tsD_!7E=I1*?xpfAmFSWvBIZgm;_FYj}(5DQOV<6vQJ$tf4DVFj%48V9Xq zDYlRmT|;OcD`q9YS;0zK8PJvN5>^g0!U$c(7SYwLg05kew4N=d4XlbbvL&>MEv0K& zHC@M+(e-RO-N0(-MpjFkSsmTPR?rrfwzNgV;w;IStsiP zx|4OY?LY@u59*_(e?L3OE(dym9b#7i zrA%RmfgWTk8v}Ydi!uhZ=MdA_I5e(c6KoQw!eZ1ff>6fXBaX(5_y%PC9h6gmv?8N5LU!H?Tu-JH;Q&&7de!<+INGC6p3(fZ3T962@6on z-=@b56OFXrBxvwTuM7LV!U~@DR$_CpI2nr6g`sJAj*rK54G52!n&s6J(InF%K3;&G zP9tTB?}zHjE_7gdSyJii89gjK;kT<$lD8mSD8LmiGSI8#GBnc?&lP{HI2RaBgXKD&7<)<_EZIg5i!&wHfzi6swpwWk zB_mFL1sXYokR6HT;^~B6e6qB8?=Twij!tq-WtPVKPt9uzLjNOrg~JoQKyk}nV}W8c^Xh0R*cI$VebXG0<%2ISW;(i zz?clUL_rPlCq(D+VlmZFBDU6-$c+Hz^_v&*nx{my1T4Tm&r$vs$cgXnC>3YwYi(U8 zKZ&{*GvHAsb?w+&bJV|`^EpF>mS!~YWS*C8{u1y8#J!ugdgY1G zFaEHpCh|Nmt&o|Vn9#ME(QtGvGsj=R)_#P~h?bT{uMGRNh__UXoIphn(~KzRFG8CJ zXL6;hW17y7VYhQamqEZdWb9I14!B5(CUsqlno2S;>vy^Mc}qQcP2{&`;3Dv?)~#z^ z10%CJkTc@suL5iSS%U-HC+apgAC{q};F0Fp(HL;fNUw5hk;*e^${CUY6=wrdb$uog z#t_b{JPJHsCmaQ@XP(%+(Fwu5)%G;t^_}J7fo8vWb!(lyw(R7?C62@H5SraUlu0vg z=Rr=koQIR5v3&;|7$@5M3V)14%14mteqQ{jy|no>KK}$kj_Gl1wE#ddm)PVyvT!iA zReVf}ifyl47}$!L;mQO1;+ijk=Cxw1V|PIMe;G}_BF=SG7H-6*Oq48%{o=PBjjM68 z`FR8xV>SY-Fs@F`&ZQjl)uN+wCwT_))Voaug)0MaO>VC>+8Tij&Q=CSMtzVhxzED* zeucKPW@Wl=5rtiC(=rGwSUV@@pas{nR%FOfFjC8%%{KH&Q}Y7G>E=HJIE1qshK3^? zByQq&1{W5V*12R~6xp*syv%pNKKy*w0C_%B)_p(OAf2K=s4saP&ob=Dv*BAlMTsS1 zrlQ;>{<-~ZWe@7)S>9AEI)!fmEFDi7_849jPxeHr?K`^x$68`yo-K~^@qozhy@&ix zJlI>dMb6J}fQg%n$_$Rlfr#D!GGh~4GMH>c{P(~+mvQwCkWL(dT<(^aYw_eU&2p)X z&5r3b^vao$=iq!Zzl7eW#f>|D+$iTon%WCISg&@PLVY-&BV^|JdGyM z_*GUTz?(AtyFNiS{SDYM067HM9o+DeF+OJivMSes)SGjZ*JZqeKXk4tMrHIfyBGd& zmSG!^y4Ao7iuZ<6LwM-$=TO{*FEKop;y*wZoTvON8k^rncpU*N#_|nlC*`Fq;^%)y zE*hr!_-2G>q5<#8Ne-1Qhm4yzylw!We;38_Pz;>gC)T`~z?8p_$!vgzBj6?C@58If z-|&u+*)Vc1AuGjeqeJDmxaZG9Bk<})@7@)pQJmLmGPmsY5a-%fan;nrnZbRN;Z-?T zxt5%T_dv6IHiwSY{JqS1t;k)HlghOg2;Y$bmYAA9@^?Y*o~-Grtm$_}H&@rypq0NMppUwlTxaG#xSVB=?BO3tK(ZPr&!-jWb_!4BMxuz%_moy(lfQTKu z76D_l0y9?=y!*=%=i|us!E1nK#^RdgBzL@8al9^1}052)c|V8 z;Nlp-Ufk5IaF=RmJ4Q!`M(myy*n>Ia?FjgdgzrQ+j4+BIm!7=#eGwnO3_!!(yE;ck zl;NJf{!0ftck!=a&sPz?jth;SMYs>)euS?fJc95ggnJRbf$%WGqX-#<2LY(JcerzR z&j^1^Y&upw`VjK(Mz{mv3<93sydDA9Ck?Yl>i}NM~++exfi*bo_T&?_l1YA-ai{GwG zEF<0pk9aWQg;1GOa=D5x^|e8Lrgg1-704Qo0k}OumpFL5BIFFhjWI|HonErgDL#7q GhW`MmR%mhn delta 5958 zcma)A3vg7`8Q#0^>~0=}BoGopcqF)aLVyq;gap_;c!ZD#$N{`=_S_^(?%iF_-UWiA zQE{BAj75(!qtsXH`)yYd#phVH7F!f&#JO~=){d=pY_%PwGIs3$pPTH4h}zBM`_KK) z|2qHq&*R<$UrN`0A!V-4%=B2`SJK}ldv44u9KFtJu}}-KP>EVqNwX5Ggl(H@BQ`-L zA_28SpPkr6pF?$MPU0Msa}k%2a}&3q86-nc5Ag`yJb$rLn;WC0zuso5kO z8csDwn?j~&xg=K@cBxafJd!8cZZ%&kAk#!UL!GV_l0t0;nE}t9U=g(ii{XD}n}vF* z?`jM6DY^S?q=aVDtg9_#7LCvxI^}AKl+s)}6*#kL9?b_jhZfLjqtKX3B{GjrCuOvd zl+zhxJ}n{_&|>1JGf4$4A(eC%siLK%n$9LQbPlPdbIAfak1V8Rq>h%8MRYz{OfMiy zsGro+3bK?|k_K8umeFdkR|B*bo)?hibRk(m>*ylz*+>`DB|t}4(t5fS8cno;tfI?E zGhI$v=nC2hyw!9iZ34Q6uAp&-?f}|LchZZ1_EDK$40H<((te;@X^2vw7gB`|0Nq9h=@3vt z!*mzW?R1!`KzArRsWxmqm~UlQC_DCBU@p}`iCjc&Jr;j7`H5Ar^PR2|{-mp{$}btd zMC6i0R9EDfnux=*YfqHXh@$(ghO=iR5?6G?u|d(aAb-zw`?M@5@U!b`hoYKNyDK;{ z6paPhaBU60+`U@bl6=K|z$Q)MoxX)dD98k4ql%|d4afCRREsrT%Ww1TbHduNFc+VZ zd8=~K!+Kv-1_#~v2e6~n1SLQ$&t-7FJ24Qo6`xI3#{M3Q!3IB8UF z3-oT|fvhtnD9IKhoErv1`*(4YeZ91hf0A8QD@-~EqD%|whCRpzoVW590n4z(qUX5d zN2iodeSBQQ;_ygrDbLDUlb=m#wL)6!@|H`vT+3_l&ITdoM_A6E$eRI!Ii6RW?LY>4 zHEb|zglFVmo|g^`r@^r8iS!@ihx1oS+xhAI>%izhL1US)gpz(M+lEFiL`aRqu(3qM z#@{HY@9RV(&fX!W1gYVO>p`ab?WVX;?8kmd7rN&dL+`8$0QNvLBNi#k>4OonF5W%u z`bFtL(|)}&WJuN%F;$6|ru3{H3IA9unKOO8RML)?ry%SA=!CM(P$CIUVL@GCF+NgQ zG^rL1k1z`}9vbrV#|w*c_MBsatxg(+cBw+x#9&}iNGgDPKn?0(G?GpYI|%*Txw|O5 zWK7Z?32F)qeJ0xcIci%C`#>TRVoQ*d8XTJik3oK4(TerhVL0Prt+AWNrTjKw+++w{ zF@squu0=Vt0DD7Q~a-z>qbm>$!1Zv)JVr=?eb1z62nBkE7qbmIv#uUdhz#_3D{K~m`{Nb8> zepz+C*a%==pFWRQmmE?eumJx$LHQYw<5EWfKV4mB>ROqOy5}?CkcU)d_h%E-kEVR@ zsPQf^!#=O7OAhNnH5~fq1m$PPl;>3ah@wqa!~dS3^jz|rnoFemN{q1zU@Xmsy+2Av zCg<528$6TJ^|9xL!>1OM|G2}1ipT4CdSBqqg?F|Vf~$R=iqsF>V(Qot4B`CgcF4j0 z{*WhS%FbTopDlc*S=1j^gulhfVd>Gezl%suSCu&pe}LvHLafTk9{az7bU?ZnP466Ikg|JDbrs zs6>>#G1k<~Tb5o`CT3|&V{1w@<%InXcx(CJmacS)6CuDW8|L|61E!IwM+XO0Wo$GI zU5m`I*Rj=u!1>;WTBo^U_@fO)J#kcYP$eE>?6=S+zOh`X!G1+$mtePbP!;jQI7IA1 zT?}}t9Ez%{64K>pWZZ8tFIrYDo#GwKl5i3D@3NIur@+W~4#bRD*}K4+eAeK=y7<8I z`in)VIDy1vMiYl~MtBuli%=d*Q`(RSC@&RIP*ukg5%0ix6-R-?X@#TU{p7dHmslaV zx|Od29&ah+U(|beVNfa~zo& z2qN9D@$6Lv^8xQ?e7X5^{x4Yw?ZBVk>Z5A%k=!^JJA<1(DFaA+9c0%$^u#mzCi z%fAcwXPbBSI2>z{!AZ6l&c}niqxF92BmQOUv=w50{sc_iT!J*tFgXy>Ux19+#Fhvq z6%qRwc**3Nw)Ij=YHkfDQ?%%A#jph_9Ul|Uub3He4o)_68hW4SC)(es74srYZ3Z4J zSdo!*b);Rt1O3POiH_>gCTOu$2+aV7OO|OgB+G`KhC@3006TDWh7WGxdN`yh`cRaL z&>1>rXlX)3X&0D+LpZbe`r@|Ptyx)ak1xZWEs_(09{ll3*R5TI4j@{r4@jWcwkD+t z7sM$vCe$SMdh%cEhNUWzX35O(B#j!-IGxh)vYO!}Mf#=PTBVTvS5)<9py6;|x<#=-@@$!B=jMb>U&dUPW;mzPzw-jQs{# zc>4UL1*mZ`NT-FtiH!2F1#hnKiWpX-nU{E7cXO?%bG(roc6{Y!Mc7q~AkIw{TN*+H zA9%o|60qnCQpB=-d z3&p^>xh-LaCL{z?eFkTs1{xNxQ{szzDy7fyZjsb`9+0GRKC`c@5NFiX68a*S)xay~ z*YwSos(If_-oxEY|P*z3SRk|fKzXr|5@f=zz+1JU^fvNWVv{bs4 z&D(d)%G{L_&Sz)%VQqQxv0YwtmOM6mK!UfEcCGS)2}&4kIlxt@CqBgG+hNmkTJ2U= zo;L{2!(NBh#f;Cz7KUVa$8I2a#b;R2EQoA3-1ZectSN?-DXbIuGQzhAA0gm| z%T53g?~ty?YT^o8yc=$I4&*xl;G+{$)o_0ed_96rmk^Dp`fyDs%3_H)bED)ie!Jc~&9zwVe z;eLb%5FSK$0^v@CUm-k(F#0&+lL(I@JPbgbt=%mf13m25_%P;ZO-pZ}ZBzF);x!%j z^|Z7F*dwTOH^MClhY;{sW|at40K^M7R5chekB}qCeg;8o2hUnYq4^v>KacPN!ixZ; zU?2h4u%VGyG%}*W)hwdRx&m)oFQWur3)$Zh#C~!TT81;)zYDID<{tYx_SGQZj>>RH zV7Rs!E>m^}p&a2WivlPr5(|jop@c^fUIFC^#g?vhcOw*LLRFe7Ks1gSwYz*)`1}Q5 T3h;B! # Jean Mark Gawron # Steven Bird @@ -37,10 +37,11 @@ edge you wish to apply a rule to. # widget system. +from __future__ import division import pickle import os.path -from tkinter import ( +from six.moves.tkinter import ( Button, Canvas, Checkbutton, @@ -52,9 +53,9 @@ from tkinter import ( Tk, Toplevel, ) -from tkinter.font import Font -from tkinter.messagebox import showerror, showinfo -from tkinter.filedialog import asksaveasfilename, askopenfilename +from six.moves.tkinter_font import Font +from six.moves.tkinter_messagebox import showerror, showinfo +from six.moves.tkinter_tkfiledialog import asksaveasfilename, askopenfilename from nltk.parse.chart import ( BottomUpPredictCombineRule, @@ -90,29 +91,29 @@ from nltk.draw import CFGEditor, tree_to_treesegment, TreeSegmentWidget class EdgeList(ColorizedList): - ARROW = SymbolWidget.SYMBOLS["rightarrow"] + ARROW = SymbolWidget.SYMBOLS['rightarrow'] def _init_colortags(self, textwidget, options): - textwidget.tag_config("terminal", foreground="#006000") - textwidget.tag_config("arrow", font="symbol", underline="0") - textwidget.tag_config("dot", foreground="#000000") + textwidget.tag_config('terminal', foreground='#006000') + textwidget.tag_config('arrow', font='symbol', underline='0') + textwidget.tag_config('dot', foreground='#000000') textwidget.tag_config( - "nonterminal", foreground="blue", font=("helvetica", -12, "bold") + 'nonterminal', foreground='blue', font=('helvetica', -12, 'bold') ) def _item_repr(self, item): contents = [] - contents.append(("%s\t" % item.lhs(), "nonterminal")) - contents.append((self.ARROW, "arrow")) + contents.append(('%s\t' % item.lhs(), 'nonterminal')) + contents.append((self.ARROW, 'arrow')) for i, elt in enumerate(item.rhs()): if i == item.dot(): - contents.append((" *", "dot")) + contents.append((' *', 'dot')) if isinstance(elt, Nonterminal): - contents.append((" %s" % elt.symbol(), "nonterminal")) + contents.append((' %s' % elt.symbol(), 'nonterminal')) else: - contents.append((" %r" % elt, "terminal")) + contents.append((' %r' % elt, 'terminal')) if item.is_complete(): - contents.append((" *", "dot")) + contents.append((' *', 'dot')) return contents @@ -127,7 +128,7 @@ class ChartMatrixView(object): """ def __init__( - self, parent, chart, toplevel=True, title="Chart Matrix", show_numedges=False + self, parent, chart, toplevel=True, title='Chart Matrix', show_numedges=False ): self._chart = chart self._cells = [] @@ -138,7 +139,7 @@ class ChartMatrixView(object): if toplevel: self._root = Toplevel(parent) self._root.title(title) - self._root.bind("", self.destroy) + self._root.bind('', self.destroy) self._init_quit(self._root) else: self._root = Frame(parent) @@ -157,27 +158,27 @@ class ChartMatrixView(object): self.draw() def _init_quit(self, root): - quit = Button(root, text="Quit", command=self.destroy) - quit.pack(side="bottom", expand=0, fill="none") + quit = Button(root, text='Quit', command=self.destroy) + quit.pack(side='bottom', expand=0, fill='none') def _init_matrix(self, root): - cframe = Frame(root, border=2, relief="sunken") - cframe.pack(expand=0, fill="none", padx=1, pady=3, side="top") - self._canvas = Canvas(cframe, width=200, height=200, background="white") - self._canvas.pack(expand=0, fill="none") + cframe = Frame(root, border=2, relief='sunken') + cframe.pack(expand=0, fill='none', padx=1, pady=3, side='top') + self._canvas = Canvas(cframe, width=200, height=200, background='white') + self._canvas.pack(expand=0, fill='none') def _init_numedges(self, root): - self._numedges_label = Label(root, text="0 edges") - self._numedges_label.pack(expand=0, fill="none", side="top") + self._numedges_label = Label(root, text='0 edges') + self._numedges_label.pack(expand=0, fill='none', side='top') def _init_list(self, root): self._list = EdgeList(root, [], width=20, height=5) - self._list.pack(side="top", expand=1, fill="both", pady=3) + self._list.pack(side='top', expand=1, fill='both', pady=3) def cb(edge, self=self): - self._fire_callbacks("select", edge) + self._fire_callbacks('select', edge) - self._list.add_callback("select", cb) + self._list.add_callback('select', cb) self._list.focus() def destroy(self, *e): @@ -209,19 +210,19 @@ class ChartMatrixView(object): for i in range(N): for j in range(i, N): if cell_edges[i][j] == 0: - color = "gray20" + color = 'gray20' else: - color = "#00%02x%02x" % ( + color = '#00%02x%02x' % ( min(255, 50 + 128 * cell_edges[i][j] / 10), max(0, 128 - 128 * cell_edges[i][j] / 10), ) cell_tag = self._cells[i][j] self._canvas.itemconfig(cell_tag, fill=color) if (i, j) == self._selected_cell: - self._canvas.itemconfig(cell_tag, outline="#00ffff", width=3) + self._canvas.itemconfig(cell_tag, outline='#00ffff', width=3) self._canvas.tag_raise(cell_tag) else: - self._canvas.itemconfig(cell_tag, outline="black", width=1) + self._canvas.itemconfig(cell_tag, outline='black', width=1) # Update the edge list. edges = list(self._chart.select(span=self._selected_cell)) @@ -230,14 +231,14 @@ class ChartMatrixView(object): # Update our edge count. self._num_edges = self._chart.num_edges() if self._numedges_label is not None: - self._numedges_label["text"] = "%d edges" % self._num_edges + self._numedges_label['text'] = '%d edges' % self._num_edges def activate(self): - self._canvas.itemconfig("inactivebox", state="hidden") + self._canvas.itemconfig('inactivebox', state='hidden') self.update() def inactivate(self): - self._canvas.itemconfig("inactivebox", state="normal") + self._canvas.itemconfig('inactivebox', state='normal') self.update() def add_callback(self, event, func): @@ -271,7 +272,7 @@ class ChartMatrixView(object): self.update() # Fire the callback. - self._fire_callbacks("select_cell", i, j) + self._fire_callbacks('select_cell', i, j) def deselect_cell(self): if self._root is None: @@ -313,37 +314,37 @@ class ChartMatrixView(object): LEFT_MARGIN = BOT_MARGIN = 15 TOP_MARGIN = 5 c = self._canvas - c.delete("all") + c.delete('all') N = self._chart.num_leaves() + 1 - dx = (int(c["width"]) - LEFT_MARGIN) / N - dy = (int(c["height"]) - TOP_MARGIN - BOT_MARGIN) / N + dx = (int(c['width']) - LEFT_MARGIN) / N + dy = (int(c['height']) - TOP_MARGIN - BOT_MARGIN) / N - c.delete("all") + c.delete('all') # Labels and dotted lines for i in range(N): c.create_text( - LEFT_MARGIN - 2, i * dy + dy / 2 + TOP_MARGIN, text=repr(i), anchor="e" + LEFT_MARGIN - 2, i * dy + dy / 2 + TOP_MARGIN, text=repr(i), anchor='e' ) c.create_text( i * dx + dx / 2 + LEFT_MARGIN, N * dy + TOP_MARGIN + 1, text=repr(i), - anchor="n", + anchor='n', ) c.create_line( LEFT_MARGIN, dy * (i + 1) + TOP_MARGIN, dx * N + LEFT_MARGIN, dy * (i + 1) + TOP_MARGIN, - dash=".", + dash='.', ) c.create_line( dx * i + LEFT_MARGIN, TOP_MARGIN, dx * i + LEFT_MARGIN, dy * N + TOP_MARGIN, - dash=".", + dash='.', ) # A box around the whole thing @@ -360,25 +361,25 @@ class ChartMatrixView(object): i * dy + TOP_MARGIN, (j + 1) * dx + LEFT_MARGIN, (i + 1) * dy + TOP_MARGIN, - fill="gray20", + fill='gray20', ) self._cells[i][j] = t def cb(event, self=self, i=i, j=j): self._click_cell(i, j) - c.tag_bind(t, "", cb) + c.tag_bind(t, '', cb) # Inactive box - xmax, ymax = int(c["width"]), int(c["height"]) + xmax, ymax = int(c['width']), int(c['height']) t = c.create_rectangle( -100, -100, xmax + 100, ymax + 100, - fill="gray50", - state="hidden", - tag="inactivebox", + fill='gray50', + state='hidden', + tag='inactivebox', ) c.tag_lower(t) @@ -406,24 +407,24 @@ class ChartResultsView(object): if toplevel: self._root = Toplevel(parent) - self._root.title("Chart Parser Application: Results") - self._root.bind("", self.destroy) + self._root.title('Chart Parser Application: Results') + self._root.bind('', self.destroy) else: self._root = Frame(parent) # Buttons if toplevel: buttons = Frame(self._root) - buttons.pack(side="bottom", expand=0, fill="x") - Button(buttons, text="Quit", command=self.destroy).pack(side="right") - Button(buttons, text="Print All", command=self.print_all).pack(side="left") - Button(buttons, text="Print Selection", command=self.print_selection).pack( - side="left" + buttons.pack(side='bottom', expand=0, fill='x') + Button(buttons, text='Quit', command=self.destroy).pack(side='right') + Button(buttons, text='Print All', command=self.print_all).pack(side='left') + Button(buttons, text='Print Selection', command=self.print_selection).pack( + side='left' ) # Canvas frame. self._cframe = CanvasFrame(self._root, closeenough=20) - self._cframe.pack(side="top", expand=1, fill="both") + self._cframe.pack(side='top', expand=1, fill='both') # Initial update self.update() @@ -466,15 +467,15 @@ class ChartResultsView(object): c.delete(self._selectbox) self._selection = widget (x1, y1, x2, y2) = widget.bbox() - self._selectbox = c.create_rectangle(x1, y1, x2, y2, width=2, outline="#088") + self._selectbox = c.create_rectangle(x1, y1, x2, y2, width=2, outline='#088') def _color(self, treewidget, color): - treewidget.label()["color"] = color + treewidget.label()['color'] = color for child in treewidget.subtrees(): if isinstance(child, TreeSegmentWidget): self._color(child, color) else: - child["color"] = color + child['color'] = color def print_all(self, *e): if self._root is None: @@ -485,7 +486,7 @@ class ChartResultsView(object): if self._root is None: return if self._selection is None: - showerror("Print Error", "No tree selected") + showerror('Print Error', 'No tree selected') else: c = self._cframe.canvas() for widget in self._treewidgets: @@ -494,7 +495,7 @@ class ChartResultsView(object): c.delete(self._selectbox) (x1, y1, x2, y2) = self._selection.bbox() self._selection.move(10 - x1, 10 - y1) - c["scrollregion"] = "0 0 %s %s" % (x2 - x1 + 20, y2 - y1 + 20) + c['scrollregion'] = '0 0 %s %s' % (x2 - x1 + 20, y2 - y1 + 20) self._cframe.print_to_file() # Restore our state. @@ -571,25 +572,25 @@ class ChartComparer(object): """ _OPSYMBOL = { - "-": "-", - "and": SymbolWidget.SYMBOLS["intersection"], - "or": SymbolWidget.SYMBOLS["union"], + '-': '-', + 'and': SymbolWidget.SYMBOLS['intersection'], + 'or': SymbolWidget.SYMBOLS['union'], } def __init__(self, *chart_filenames): # This chart is displayed when we don't have a value (eg # before any chart is loaded). - faketok = [""] * 8 + faketok = [''] * 8 self._emptychart = Chart(faketok) # The left & right charts start out empty. - self._left_name = "None" - self._right_name = "None" + self._left_name = 'None' + self._right_name = 'None' self._left_chart = self._emptychart self._right_chart = self._emptychart # The charts that have been loaded. - self._charts = {"None": self._emptychart} + self._charts = {'None': self._emptychart} # The output chart. self._out_chart = self._emptychart @@ -599,9 +600,9 @@ class ChartComparer(object): # Set up the root window. self._root = Tk() - self._root.title("Chart Comparison") - self._root.bind("", self.destroy) - self._root.bind("", self.destroy) + self._root.title('Chart Comparison') + self._root.bind('', self.destroy) + self._root.bind('', self.destroy) # Initialize all widgets, etc. self._init_menubar(self._root) @@ -637,122 +638,122 @@ class ChartComparer(object): # File menu filemenu = Menu(menubar, tearoff=0) filemenu.add_command( - label="Load Chart", - accelerator="Ctrl-o", + label='Load Chart', + accelerator='Ctrl-o', underline=0, command=self.load_chart_dialog, ) filemenu.add_command( - label="Save Output", - accelerator="Ctrl-s", + label='Save Output', + accelerator='Ctrl-s', underline=0, command=self.save_chart_dialog, ) filemenu.add_separator() filemenu.add_command( - label="Exit", underline=1, command=self.destroy, accelerator="Ctrl-x" + label='Exit', underline=1, command=self.destroy, accelerator='Ctrl-x' ) - menubar.add_cascade(label="File", underline=0, menu=filemenu) + menubar.add_cascade(label='File', underline=0, menu=filemenu) # Compare menu opmenu = Menu(menubar, tearoff=0) opmenu.add_command( - label="Intersection", command=self._intersection, accelerator="+" + label='Intersection', command=self._intersection, accelerator='+' ) - opmenu.add_command(label="Union", command=self._union, accelerator="*") + opmenu.add_command(label='Union', command=self._union, accelerator='*') opmenu.add_command( - label="Difference", command=self._difference, accelerator="-" + label='Difference', command=self._difference, accelerator='-' ) opmenu.add_separator() - opmenu.add_command(label="Swap Charts", command=self._swapcharts) - menubar.add_cascade(label="Compare", underline=0, menu=opmenu) + opmenu.add_command(label='Swap Charts', command=self._swapcharts) + menubar.add_cascade(label='Compare', underline=0, menu=opmenu) # Add the menu self._root.config(menu=menubar) def _init_divider(self, root): - divider = Frame(root, border=2, relief="sunken") - divider.pack(side="top", fill="x", ipady=2) + divider = Frame(root, border=2, relief='sunken') + divider.pack(side='top', fill='x', ipady=2) def _init_chartviews(self, root): - opfont = ("symbol", -36) # Font for operator. - eqfont = ("helvetica", -36) # Font for equals sign. + opfont = ('symbol', -36) # Font for operator. + eqfont = ('helvetica', -36) # Font for equals sign. - frame = Frame(root, background="#c0c0c0") - frame.pack(side="top", expand=1, fill="both") + frame = Frame(root, background='#c0c0c0') + frame.pack(side='top', expand=1, fill='both') # The left matrix. - cv1_frame = Frame(frame, border=3, relief="groove") - cv1_frame.pack(side="left", padx=8, pady=7, expand=1, fill="both") + cv1_frame = Frame(frame, border=3, relief='groove') + cv1_frame.pack(side='left', padx=8, pady=7, expand=1, fill='both') self._left_selector = MutableOptionMenu( cv1_frame, list(self._charts.keys()), command=self._select_left ) - self._left_selector.pack(side="top", pady=5, fill="x") + self._left_selector.pack(side='top', pady=5, fill='x') self._left_matrix = ChartMatrixView( cv1_frame, self._emptychart, toplevel=False, show_numedges=True ) - self._left_matrix.pack(side="bottom", padx=5, pady=5, expand=1, fill="both") - self._left_matrix.add_callback("select", self.select_edge) - self._left_matrix.add_callback("select_cell", self.select_cell) + self._left_matrix.pack(side='bottom', padx=5, pady=5, expand=1, fill='both') + self._left_matrix.add_callback('select', self.select_edge) + self._left_matrix.add_callback('select_cell', self.select_cell) self._left_matrix.inactivate() # The operator. self._op_label = Label( - frame, text=" ", width=3, background="#c0c0c0", font=opfont + frame, text=' ', width=3, background='#c0c0c0', font=opfont ) - self._op_label.pack(side="left", padx=5, pady=5) + self._op_label.pack(side='left', padx=5, pady=5) # The right matrix. - cv2_frame = Frame(frame, border=3, relief="groove") - cv2_frame.pack(side="left", padx=8, pady=7, expand=1, fill="both") + cv2_frame = Frame(frame, border=3, relief='groove') + cv2_frame.pack(side='left', padx=8, pady=7, expand=1, fill='both') self._right_selector = MutableOptionMenu( cv2_frame, list(self._charts.keys()), command=self._select_right ) - self._right_selector.pack(side="top", pady=5, fill="x") + self._right_selector.pack(side='top', pady=5, fill='x') self._right_matrix = ChartMatrixView( cv2_frame, self._emptychart, toplevel=False, show_numedges=True ) - self._right_matrix.pack(side="bottom", padx=5, pady=5, expand=1, fill="both") - self._right_matrix.add_callback("select", self.select_edge) - self._right_matrix.add_callback("select_cell", self.select_cell) + self._right_matrix.pack(side='bottom', padx=5, pady=5, expand=1, fill='both') + self._right_matrix.add_callback('select', self.select_edge) + self._right_matrix.add_callback('select_cell', self.select_cell) self._right_matrix.inactivate() # The equals sign - Label(frame, text="=", width=3, background="#c0c0c0", font=eqfont).pack( - side="left", padx=5, pady=5 + Label(frame, text='=', width=3, background='#c0c0c0', font=eqfont).pack( + side='left', padx=5, pady=5 ) # The output matrix. - out_frame = Frame(frame, border=3, relief="groove") - out_frame.pack(side="left", padx=8, pady=7, expand=1, fill="both") - self._out_label = Label(out_frame, text="Output") - self._out_label.pack(side="top", pady=9) + out_frame = Frame(frame, border=3, relief='groove') + out_frame.pack(side='left', padx=8, pady=7, expand=1, fill='both') + self._out_label = Label(out_frame, text='Output') + self._out_label.pack(side='top', pady=9) self._out_matrix = ChartMatrixView( out_frame, self._emptychart, toplevel=False, show_numedges=True ) - self._out_matrix.pack(side="bottom", padx=5, pady=5, expand=1, fill="both") - self._out_matrix.add_callback("select", self.select_edge) - self._out_matrix.add_callback("select_cell", self.select_cell) + self._out_matrix.pack(side='bottom', padx=5, pady=5, expand=1, fill='both') + self._out_matrix.add_callback('select', self.select_edge) + self._out_matrix.add_callback('select_cell', self.select_cell) self._out_matrix.inactivate() def _init_buttons(self, root): buttons = Frame(root) - buttons.pack(side="bottom", pady=5, fill="x", expand=0) - Button(buttons, text="Intersection", command=self._intersection).pack( - side="left" + buttons.pack(side='bottom', pady=5, fill='x', expand=0) + Button(buttons, text='Intersection', command=self._intersection).pack( + side='left' ) - Button(buttons, text="Union", command=self._union).pack(side="left") - Button(buttons, text="Difference", command=self._difference).pack(side="left") - Frame(buttons, width=20).pack(side="left") - Button(buttons, text="Swap Charts", command=self._swapcharts).pack(side="left") + Button(buttons, text='Union', command=self._union).pack(side='left') + Button(buttons, text='Difference', command=self._difference).pack(side='left') + Frame(buttons, width=20).pack(side='left') + Button(buttons, text='Swap Charts', command=self._swapcharts).pack(side='left') - Button(buttons, text="Detatch Output", command=self._detatch_out).pack( - side="right" + Button(buttons, text='Detatch Output', command=self._detatch_out).pack( + side='right' ) def _init_bindings(self, root): # root.bind('', self.save_chart) - root.bind("", self.load_chart_dialog) + root.bind('', self.load_chart_dialog) # root.bind('', self.reset) # //////////////////////////////////////////////////////////// @@ -763,7 +764,7 @@ class ChartComparer(object): self._left_name = name self._left_chart = self._charts[name] self._left_matrix.set_chart(self._left_chart) - if name == "None": + if name == 'None': self._left_matrix.inactivate() self._apply_op() @@ -771,40 +772,40 @@ class ChartComparer(object): self._right_name = name self._right_chart = self._charts[name] self._right_matrix.set_chart(self._right_chart) - if name == "None": + if name == 'None': self._right_matrix.inactivate() self._apply_op() def _apply_op(self): - if self._operator == "-": + if self._operator == '-': self._difference() - elif self._operator == "or": + elif self._operator == 'or': self._union() - elif self._operator == "and": + elif self._operator == 'and': self._intersection() # //////////////////////////////////////////////////////////// # File # //////////////////////////////////////////////////////////// - CHART_FILE_TYPES = [("Pickle file", ".pickle"), ("All files", "*")] + CHART_FILE_TYPES = [('Pickle file', '.pickle'), ('All files', '*')] def save_chart_dialog(self, *args): filename = asksaveasfilename( - filetypes=self.CHART_FILE_TYPES, defaultextension=".pickle" + filetypes=self.CHART_FILE_TYPES, defaultextension='.pickle' ) if not filename: return try: - with open(filename, "wb") as outfile: + with open(filename, 'wb') as outfile: pickle.dump(self._out_chart, outfile) except Exception as e: showerror( - "Error Saving Chart", "Unable to open file: %r\n%s" % (filename, e) + 'Error Saving Chart', 'Unable to open file: %r\n%s' % (filename, e) ) def load_chart_dialog(self, *args): filename = askopenfilename( - filetypes=self.CHART_FILE_TYPES, defaultextension=".pickle" + filetypes=self.CHART_FILE_TYPES, defaultextension='.pickle' ) if not filename: return @@ -812,16 +813,16 @@ class ChartComparer(object): self.load_chart(filename) except Exception as e: showerror( - "Error Loading Chart", "Unable to open file: %r\n%s" % (filename, e) + 'Error Loading Chart', 'Unable to open file: %r\n%s' % (filename, e) ) def load_chart(self, filename): - with open(filename, "rb") as infile: + with open(filename, 'rb') as infile: chart = pickle.load(infile) name = os.path.basename(filename) - if name.endswith(".pickle"): + if name.endswith('.pickle'): name = name[:-7] - if name.endswith(".chart"): + if name.endswith('.chart'): name = name[:-6] self._charts[name] = chart self._left_selector.add(name) @@ -875,7 +876,7 @@ class ChartComparer(object): if edge not in self._right_chart: out_chart.insert(edge, []) - self._update("-", out_chart) + self._update('-', out_chart) def _intersection(self): if not self._checkcompat(): @@ -886,7 +887,7 @@ class ChartComparer(object): if edge in self._right_chart: out_chart.insert(edge, []) - self._update("and", out_chart) + self._update('and', out_chart) def _union(self): if not self._checkcompat(): @@ -898,7 +899,7 @@ class ChartComparer(object): for edge in self._right_chart: out_chart.insert(edge, []) - self._update("or", out_chart) + self._update('or', out_chart) def _swapcharts(self): left, right = self._left_name, self._right_name @@ -916,7 +917,7 @@ class ChartComparer(object): self._out_chart = self._emptychart self._out_matrix.set_chart(self._out_chart) self._out_matrix.inactivate() - self._out_label["text"] = "Output" + self._out_label['text'] = 'Output' # Issue some other warning? return False else: @@ -924,10 +925,10 @@ class ChartComparer(object): def _update(self, operator, out_chart): self._operator = operator - self._op_label["text"] = self._OPSYMBOL[operator] + self._op_label['text'] = self._OPSYMBOL[operator] self._out_chart = out_chart self._out_matrix.set_chart(out_chart) - self._out_label["text"] = "%s %s %s" % ( + self._out_label['text'] = '%s %s %s' % ( self._left_name, self._operator, self._right_name, @@ -936,11 +937,11 @@ class ChartComparer(object): def _clear_out_chart(self): self._out_chart = self._emptychart self._out_matrix.set_chart(self._out_chart) - self._op_label["text"] = " " + self._op_label['text'] = ' ' self._out_matrix.inactivate() def _detatch_out(self): - ChartMatrixView(self._root, self._out_chart, title=self._out_label["text"]) + ChartMatrixView(self._root, self._out_chart, title=self._out_label['text']) ####################################################################### @@ -1005,9 +1006,9 @@ class ChartView(object): Construct a new ``Chart`` display. """ # Process keyword args. - draw_tree = kw.get("draw_tree", 0) - draw_sentence = kw.get("draw_sentence", 1) - self._fontsize = kw.get("fontsize", -12) + draw_tree = kw.get('draw_tree', 0) + draw_sentence = kw.get('draw_sentence', 1) + self._fontsize = kw.get('fontsize', -12) # The chart! self._chart = chart @@ -1037,7 +1038,7 @@ class ChartView(object): # If they didn't provide a main window, then set one up. if root is None: top = Tk() - top.title("Chart View") + top.title('Chart View') def destroy1(e, top=top): top.destroy() @@ -1045,9 +1046,9 @@ class ChartView(object): def destroy2(top=top): top.destroy() - top.bind("q", destroy1) - b = Button(top, text="Done", command=destroy2) - b.pack(side="bottom") + top.bind('q', destroy1) + b = Button(top, text='Done', command=destroy2) + b.pack(side='bottom') self._root = top else: self._root = root @@ -1057,25 +1058,25 @@ class ChartView(object): # Create the chart canvas. (self._chart_sb, self._chart_canvas) = self._sb_canvas(self._root) - self._chart_canvas["height"] = 300 - self._chart_canvas["closeenough"] = 15 + self._chart_canvas['height'] = 300 + self._chart_canvas['closeenough'] = 15 # Create the sentence canvas. if draw_sentence: - cframe = Frame(self._root, relief="sunk", border=2) - cframe.pack(fill="both", side="bottom") + cframe = Frame(self._root, relief='sunk', border=2) + cframe.pack(fill='both', side='bottom') self._sentence_canvas = Canvas(cframe, height=50) - self._sentence_canvas["background"] = "#e0e0e0" - self._sentence_canvas.pack(fill="both") + self._sentence_canvas['background'] = '#e0e0e0' + self._sentence_canvas.pack(fill='both') # self._sentence_canvas['height'] = self._sentence_height else: self._sentence_canvas = None # Create the tree canvas. if draw_tree: - (sb, canvas) = self._sb_canvas(self._root, "n", "x") + (sb, canvas) = self._sb_canvas(self._root, 'n', 'x') (self._tree_sb, self._tree_canvas) = (sb, canvas) - self._tree_canvas["height"] = 200 + self._tree_canvas['height'] = 200 else: self._tree_canvas = None @@ -1087,45 +1088,45 @@ class ChartView(object): # Set up the configure callback, which will be called whenever # the window is resized. - self._chart_canvas.bind("", self._configure) + self._chart_canvas.bind('', self._configure) def _init_fonts(self, root): - self._boldfont = Font(family="helvetica", weight="bold", size=self._fontsize) - self._font = Font(family="helvetica", size=self._fontsize) + self._boldfont = Font(family='helvetica', weight='bold', size=self._fontsize) + self._font = Font(family='helvetica', size=self._fontsize) # See: self._sysfont = Font(font=Button()["font"]) root.option_add("*Font", self._sysfont) - def _sb_canvas(self, root, expand="y", fill="both", side="bottom"): + def _sb_canvas(self, root, expand='y', fill='both', side='bottom'): """ Helper for __init__: construct a canvas with a scrollbar. """ - cframe = Frame(root, relief="sunk", border=2) + cframe = Frame(root, relief='sunk', border=2) cframe.pack(fill=fill, expand=expand, side=side) - canvas = Canvas(cframe, background="#e0e0e0") + canvas = Canvas(cframe, background='#e0e0e0') # Give the canvas a scrollbar. - sb = Scrollbar(cframe, orient="vertical") - sb.pack(side="right", fill="y") - canvas.pack(side="left", fill=fill, expand="yes") + sb = Scrollbar(cframe, orient='vertical') + sb.pack(side='right', fill='y') + canvas.pack(side='left', fill=fill, expand='yes') # Connect the scrollbars to the canvas. - sb["command"] = canvas.yview - canvas["yscrollcommand"] = sb.set + sb['command'] = canvas.yview + canvas['yscrollcommand'] = sb.set return (sb, canvas) def scroll_up(self, *e): - self._chart_canvas.yview("scroll", -1, "units") + self._chart_canvas.yview('scroll', -1, 'units') def scroll_down(self, *e): - self._chart_canvas.yview("scroll", 1, "units") + self._chart_canvas.yview('scroll', 1, 'units') def page_up(self, *e): - self._chart_canvas.yview("scroll", -1, "pages") + self._chart_canvas.yview('scroll', -1, 'pages') def page_down(self, *e): - self._chart_canvas.yview("scroll", 1, "pages") + self._chart_canvas.yview('scroll', 1, 'pages') def _grow(self): """ @@ -1134,19 +1135,19 @@ class ChartView(object): # Grow, if need-be N = self._chart.num_leaves() width = max( - int(self._chart_canvas["width"]), N * self._unitsize + ChartView._MARGIN * 2 + int(self._chart_canvas['width']), N * self._unitsize + ChartView._MARGIN * 2 ) # It won't resize without the second (height) line, but I # don't understand why not. self._chart_canvas.configure(width=width) - self._chart_canvas.configure(height=self._chart_canvas["height"]) + self._chart_canvas.configure(height=self._chart_canvas['height']) self._unitsize = (width - 2 * ChartView._MARGIN) / N # Reset the height for the sentence window. if self._sentence_canvas is not None: - self._sentence_canvas["height"] = self._sentence_height + self._sentence_canvas['height'] = self._sentence_height def set_font_size(self, size): self._font.configure(size=-abs(size)) @@ -1229,11 +1230,11 @@ class ChartView(object): rhs = " ".join(rhselts) else: lhs = edge.lhs() - rhs = "" + rhs = '' for s in (lhs, rhs): tag = c.create_text( - 0, 0, text=s, font=self._boldfont, anchor="nw", justify="left" + 0, 0, text=s, font=self._boldfont, anchor='nw', justify='left' ) bbox = c.bbox(tag) c.delete(tag) @@ -1296,9 +1297,9 @@ class ChartView(object): # Try to view the new edge.. y = (level + 1) * self._chart_level_size dy = self._text_height + 10 - self._chart_canvas.yview("moveto", 1.0) + self._chart_canvas.yview('moveto', 1.0) if self._chart_height != 0: - self._chart_canvas.yview("moveto", (y - dy) / self._chart_height) + self._chart_canvas.yview('moveto', (y - dy) / self._chart_height) def _draw_edge(self, edge, lvl): """ @@ -1312,7 +1313,7 @@ class ChartView(object): if x2 == x1: x2 += max(4, self._unitsize / 5) y = (lvl + 1) * self._chart_level_size - linetag = c.create_line(x1, y, x2, y, arrow="last", width=3) + linetag = c.create_line(x1, y, x2, y, arrow='last', width=3) # Draw a label for the edge. if isinstance(edge, TreeEdge): @@ -1329,13 +1330,13 @@ class ChartView(object): rhs1 = " ".join(rhs[:pos]) rhs2 = " ".join(rhs[pos:]) - rhstag1 = c.create_text(x1 + 3, y, text=rhs1, font=self._font, anchor="nw") + rhstag1 = c.create_text(x1 + 3, y, text=rhs1, font=self._font, anchor='nw') dotx = c.bbox(rhstag1)[2] + 6 doty = (c.bbox(rhstag1)[1] + c.bbox(rhstag1)[3]) / 2 dottag = c.create_oval(dotx - 2, doty - 2, dotx + 2, doty + 2) - rhstag2 = c.create_text(dotx + 6, y, text=rhs2, font=self._font, anchor="nw") + rhstag2 = c.create_text(dotx + 6, y, text=rhs2, font=self._font, anchor='nw') lhstag = c.create_text( - (x1 + x2) / 2, y, text=str(edge.lhs()), anchor="s", font=self._boldfont + (x1 + x2) / 2, y, text=str(edge.lhs()), anchor='s', font=self._boldfont ) # Keep track of the edge's tags. @@ -1343,13 +1344,13 @@ class ChartView(object): # Register a callback for clicking on the edge. def cb(event, self=self, edge=edge): - self._fire_callbacks("select", edge) + self._fire_callbacks('select', edge) - c.tag_bind(rhstag1, "", cb) - c.tag_bind(rhstag2, "", cb) - c.tag_bind(linetag, "", cb) - c.tag_bind(dottag, "", cb) - c.tag_bind(lhstag, "", cb) + c.tag_bind(rhstag1, '', cb) + c.tag_bind(rhstag2, '', cb) + c.tag_bind(linetag, '', cb) + c.tag_bind(dottag, '', cb) + c.tag_bind(lhstag, '', cb) self._color_edge(edge) @@ -1378,13 +1379,13 @@ class ChartView(object): if edge in self._marks: self._color_edge(self._marks[edge]) if edge.is_complete() and edge.span() == (0, N): - self._color_edge(edge, "#084", "#042") + self._color_edge(edge, '#084', '#042') elif isinstance(edge, LeafEdge): - self._color_edge(edge, "#48c", "#246") + self._color_edge(edge, '#48c', '#246') else: - self._color_edge(edge, "#00f", "#008") + self._color_edge(edge, '#00f', '#008') - def mark_edge(self, edge, mark="#0df"): + def mark_edge(self, edge, mark='#0df'): """ Mark an edge """ @@ -1404,7 +1405,7 @@ class ChartView(object): del self._marks[edge] self._color_edge(edge) - def markonly_edge(self, edge, mark="#0df"): + def markonly_edge(self, edge, mark='#0df'): self.unmark_edge() self.mark_edge(edge, mark) @@ -1421,7 +1422,7 @@ class ChartView(object): # Check against all tokens for leaf in self._chart.leaves(): tag = c.create_text( - 0, 0, text=repr(leaf), font=self._font, anchor="nw", justify="left" + 0, 0, text=repr(leaf), font=self._font, anchor='nw', justify='left' ) bbox = c.bbox(tag) c.delete(tag) @@ -1460,11 +1461,11 @@ class ChartView(object): levels = len(self._edgelevels) self._chart_height = (levels + 2) * self._chart_level_size - c["scrollregion"] = (0, 0, width, self._chart_height) + c['scrollregion'] = (0, 0, width, self._chart_height) # Reset the tree scroll region if self._tree_canvas: - self._tree_canvas["scrollregion"] = (0, 0, width, self._tree_height) + self._tree_canvas['scrollregion'] = (0, 0, width, self._tree_height) def _draw_loclines(self): """ @@ -1488,7 +1489,7 @@ class ChartView(object): c2.tag_lower(t2) t3 = c3.create_line(x, 0, x, BOTTOM) c3.tag_lower(t3) - t4 = c3.create_text(x + 2, 0, text=repr(i), anchor="nw", font=self._font) + t4 = c3.create_text(x + 2, 0, text=repr(i), anchor='nw', font=self._font) c3.tag_lower(t4) # if i % 4 == 0: # if c1: c1.itemconfig(t1, width=2, fill='gray60') @@ -1496,16 +1497,16 @@ class ChartView(object): # c3.itemconfig(t3, width=2, fill='gray60') if i % 2 == 0: if c1: - c1.itemconfig(t1, fill="gray60") + c1.itemconfig(t1, fill='gray60') if c2: - c2.itemconfig(t2, fill="gray60") - c3.itemconfig(t3, fill="gray60") + c2.itemconfig(t2, fill='gray60') + c3.itemconfig(t3, fill='gray60') else: if c1: - c1.itemconfig(t1, fill="gray80") + c1.itemconfig(t1, fill='gray80') if c2: - c2.itemconfig(t2, fill="gray80") - c3.itemconfig(t3, fill="gray80") + c2.itemconfig(t2, fill='gray80') + c3.itemconfig(t3, fill='gray80') def _draw_sentence(self): """Draw the sentence string.""" @@ -1520,7 +1521,7 @@ class ChartView(object): x2 = x1 + self._unitsize x = (x1 + x2) / 2 tag = c.create_text( - x, y, text=repr(leaf), font=self._font, anchor="n", justify="left" + x, y, text=repr(leaf), font=self._font, anchor='n', justify='left' ) bbox = c.bbox(tag) rt = c.create_rectangle( @@ -1528,8 +1529,8 @@ class ChartView(object): bbox[1] - (ChartView._LEAF_SPACING / 2), x2 - 2, bbox[3] + (ChartView._LEAF_SPACING / 2), - fill="#f0f0f0", - outline="#f0f0f0", + fill='#f0f0f0', + outline='#f0f0f0', ) c.tag_lower(rt) @@ -1570,7 +1571,7 @@ class ChartView(object): # Update the scroll region. w = self._chart.num_leaves() * self._unitsize + 2 * ChartView._MARGIN h = tree.height() * (ChartView._TREE_LEVEL_SIZE + self._text_height) - self._tree_canvas["scrollregion"] = (0, 0, w, h) + self._tree_canvas['scrollregion'] = (0, 0, w, h) def cycle_tree(self): self._treetoks_index = (self._treetoks_index + 1) % len(self._treetoks) @@ -1581,11 +1582,11 @@ class ChartView(object): return # Draw the label. - label = "%d Trees" % len(self._treetoks) + label = '%d Trees' % len(self._treetoks) c = self._tree_canvas margin = ChartView._MARGIN right = self._chart.num_leaves() * self._unitsize + margin - 2 - tag = c.create_text(right, 2, anchor="ne", text=label, font=self._boldfont) + tag = c.create_text(right, 2, anchor='ne', text=label, font=self._boldfont) self._tree_tags.append(tag) _, _, _, y = c.bbox(tag) @@ -1593,11 +1594,11 @@ class ChartView(object): for i in range(len(self._treetoks)): x = right - 20 * (len(self._treetoks) - i - 1) if i == self._treetoks_index: - fill = "#084" + fill = '#084' else: - fill = "#fff" + fill = '#fff' tag = c.create_polygon( - x, y + 10, x - 5, y, x - 10, y + 10, fill=fill, outline="black" + x, y + 10, x - 5, y, x - 10, y + 10, fill=fill, outline='black' ) self._tree_tags.append(tag) @@ -1607,7 +1608,7 @@ class ChartView(object): self._treetoks_index = i self.draw_tree() - c.tag_bind(tag, "", cb) + c.tag_bind(tag, '', cb) def _draw_treetok(self, treetok, index, depth=0): """ @@ -1641,10 +1642,10 @@ class ChartView(object): tag = c.create_text( nodex, nodey, - anchor="n", - justify="center", + anchor='n', + justify='center', text=str(treetok.label()), - fill="#042", + fill='#042', font=self._boldfont, ) self._tree_tags.append(tag) @@ -1660,7 +1661,7 @@ class ChartView(object): childx, childy, width=2, - fill="#084", + fill='#084', ) self._tree_tags.append(tag) if isinstance(child, Tree) and not child: @@ -1671,8 +1672,8 @@ class ChartView(object): childx, childy, width=2, - fill="#048", - dash="2 3", + fill='#048', + dash='2 3', ) self._tree_tags.append(tag) if not isinstance(child, Tree): @@ -1683,7 +1684,7 @@ class ChartView(object): childx, 10000, width=2, - fill="#084", + fill='#084', ) self._tree_tags.append(tag) @@ -1694,14 +1695,14 @@ class ChartView(object): Draw everything (from scratch). """ if self._tree_canvas: - self._tree_canvas.delete("all") + self._tree_canvas.delete('all') self.draw_tree() if self._sentence_canvas: - self._sentence_canvas.delete("all") + self._sentence_canvas.delete('all') self._draw_sentence() - self._chart_canvas.delete("all") + self._chart_canvas.delete('all') self._edgetags = {} # Redraw any edges we erased. @@ -1785,7 +1786,7 @@ class FundamentalEdgeRule(EdgeRule, SingleEdgeFundamentalRule): class ChartParserApp(object): - def __init__(self, grammar, tokens, title="Chart Parser Application"): + def __init__(self, grammar, tokens, title='Chart Parser Application'): # Initialize the parser self._init_parser(grammar, tokens) @@ -1794,15 +1795,15 @@ class ChartParserApp(object): # Create the root window. self._root = Tk() self._root.title(title) - self._root.bind("", self.destroy) + self._root.bind('', self.destroy) # Set up some frames. frame3 = Frame(self._root) frame2 = Frame(self._root) frame1 = Frame(self._root) - frame3.pack(side="bottom", fill="none") - frame2.pack(side="bottom", fill="x") - frame1.pack(side="bottom", fill="both", expand=1) + frame3.pack(side='bottom', fill='none') + frame2.pack(side='bottom', fill='x') + frame1.pack(side='bottom', fill='both', expand=1) self._init_fonts(self._root) self._init_animation() @@ -1818,7 +1819,7 @@ class ChartParserApp(object): self._init_bindings() except: - print("Error creating Tree View") + print('Error creating Tree View') self.destroy() raise @@ -1870,10 +1871,10 @@ class ChartParserApp(object): # TWhat's our font size (default=same as sysfont) self._size = IntVar(root) - self._size.set(self._sysfont.cget("size")) + self._size.set(self._sysfont.cget('size')) - self._boldfont = Font(family="helvetica", weight="bold", size=self._size.get()) - self._font = Font(family="helvetica", size=self._size.get()) + self._boldfont = Font(family='helvetica', weight='bold', size=self._size.get()) + self._font = Font(family='helvetica', size=self._size.get()) def _init_animation(self): # Are we stepping? (default=yes) @@ -1889,214 +1890,214 @@ class ChartParserApp(object): def _init_chartview(self, parent): self._cv = ChartView(self._chart, parent, draw_tree=1, draw_sentence=1) - self._cv.add_callback("select", self._click_cv_edge) + self._cv.add_callback('select', self._click_cv_edge) def _init_rulelabel(self, parent): - ruletxt = "Last edge generated by:" + ruletxt = 'Last edge generated by:' self._rulelabel1 = Label(parent, text=ruletxt, font=self._boldfont) self._rulelabel2 = Label( - parent, width=40, relief="groove", anchor="w", font=self._boldfont + parent, width=40, relief='groove', anchor='w', font=self._boldfont ) - self._rulelabel1.pack(side="left") - self._rulelabel2.pack(side="left") - step = Checkbutton(parent, variable=self._step, text="Step") - step.pack(side="right") + self._rulelabel1.pack(side='left') + self._rulelabel2.pack(side='left') + step = Checkbutton(parent, variable=self._step, text='Step') + step.pack(side='right') def _init_buttons(self, parent): frame1 = Frame(parent) frame2 = Frame(parent) - frame1.pack(side="bottom", fill="x") - frame2.pack(side="top", fill="none") + frame1.pack(side='bottom', fill='x') + frame2.pack(side='top', fill='none') Button( frame1, - text="Reset\nParser", - background="#90c0d0", - foreground="black", + text='Reset\nParser', + background='#90c0d0', + foreground='black', command=self.reset, - ).pack(side="right") + ).pack(side='right') # Button(frame1, text='Pause', # background='#90c0d0', foreground='black', # command=self.pause).pack(side='left') Button( frame1, - text="Top Down\nStrategy", - background="#90c0d0", - foreground="black", + text='Top Down\nStrategy', + background='#90c0d0', + foreground='black', command=self.top_down_strategy, - ).pack(side="left") + ).pack(side='left') Button( frame1, - text="Bottom Up\nStrategy", - background="#90c0d0", - foreground="black", + text='Bottom Up\nStrategy', + background='#90c0d0', + foreground='black', command=self.bottom_up_strategy, - ).pack(side="left") + ).pack(side='left') Button( frame1, - text="Bottom Up\nLeft-Corner Strategy", - background="#90c0d0", - foreground="black", + text='Bottom Up\nLeft-Corner Strategy', + background='#90c0d0', + foreground='black', command=self.bottom_up_leftcorner_strategy, - ).pack(side="left") + ).pack(side='left') Button( frame2, - text="Top Down Init\nRule", - background="#90f090", - foreground="black", + text='Top Down Init\nRule', + background='#90f090', + foreground='black', command=self.top_down_init, - ).pack(side="left") + ).pack(side='left') Button( frame2, - text="Top Down Predict\nRule", - background="#90f090", - foreground="black", + text='Top Down Predict\nRule', + background='#90f090', + foreground='black', command=self.top_down_predict, - ).pack(side="left") - Frame(frame2, width=20).pack(side="left") + ).pack(side='left') + Frame(frame2, width=20).pack(side='left') Button( frame2, - text="Bottom Up Predict\nRule", - background="#90f090", - foreground="black", + text='Bottom Up Predict\nRule', + background='#90f090', + foreground='black', command=self.bottom_up, - ).pack(side="left") - Frame(frame2, width=20).pack(side="left") + ).pack(side='left') + Frame(frame2, width=20).pack(side='left') Button( frame2, - text="Bottom Up Left-Corner\nPredict Rule", - background="#90f090", - foreground="black", + text='Bottom Up Left-Corner\nPredict Rule', + background='#90f090', + foreground='black', command=self.bottom_up_leftcorner, - ).pack(side="left") - Frame(frame2, width=20).pack(side="left") + ).pack(side='left') + Frame(frame2, width=20).pack(side='left') Button( frame2, - text="Fundamental\nRule", - background="#90f090", - foreground="black", + text='Fundamental\nRule', + background='#90f090', + foreground='black', command=self.fundamental, - ).pack(side="left") + ).pack(side='left') def _init_bindings(self): - self._root.bind("", self._cv.scroll_up) - self._root.bind("", self._cv.scroll_down) - self._root.bind("", self._cv.page_up) - self._root.bind("", self._cv.page_down) - self._root.bind("", self.destroy) - self._root.bind("", self.destroy) - self._root.bind("", self.help) - - self._root.bind("", self.save_chart) - self._root.bind("", self.load_chart) - self._root.bind("", self.reset) - - self._root.bind("t", self.top_down_strategy) - self._root.bind("b", self.bottom_up_strategy) - self._root.bind("c", self.bottom_up_leftcorner_strategy) - self._root.bind("", self._stop_animation) - - self._root.bind("", self.edit_grammar) - self._root.bind("", self.edit_sentence) + self._root.bind('', self._cv.scroll_up) + self._root.bind('', self._cv.scroll_down) + self._root.bind('', self._cv.page_up) + self._root.bind('', self._cv.page_down) + self._root.bind('', self.destroy) + self._root.bind('', self.destroy) + self._root.bind('', self.help) + + self._root.bind('', self.save_chart) + self._root.bind('', self.load_chart) + self._root.bind('', self.reset) + + self._root.bind('t', self.top_down_strategy) + self._root.bind('b', self.bottom_up_strategy) + self._root.bind('c', self.bottom_up_leftcorner_strategy) + self._root.bind('', self._stop_animation) + + self._root.bind('', self.edit_grammar) + self._root.bind('', self.edit_sentence) # Animation speed control - self._root.bind("-", lambda e, a=self._animate: a.set(1)) - self._root.bind("=", lambda e, a=self._animate: a.set(2)) - self._root.bind("+", lambda e, a=self._animate: a.set(3)) + self._root.bind('-', lambda e, a=self._animate: a.set(1)) + self._root.bind('=', lambda e, a=self._animate: a.set(2)) + self._root.bind('+', lambda e, a=self._animate: a.set(3)) # Step control - self._root.bind("s", lambda e, s=self._step: s.set(not s.get())) + self._root.bind('s', lambda e, s=self._step: s.set(not s.get())) def _init_menubar(self): menubar = Menu(self._root) filemenu = Menu(menubar, tearoff=0) filemenu.add_command( - label="Save Chart", + label='Save Chart', underline=0, command=self.save_chart, - accelerator="Ctrl-s", + accelerator='Ctrl-s', ) filemenu.add_command( - label="Load Chart", + label='Load Chart', underline=0, command=self.load_chart, - accelerator="Ctrl-o", + accelerator='Ctrl-o', ) filemenu.add_command( - label="Reset Chart", underline=0, command=self.reset, accelerator="Ctrl-r" + label='Reset Chart', underline=0, command=self.reset, accelerator='Ctrl-r' ) filemenu.add_separator() - filemenu.add_command(label="Save Grammar", command=self.save_grammar) - filemenu.add_command(label="Load Grammar", command=self.load_grammar) + filemenu.add_command(label='Save Grammar', command=self.save_grammar) + filemenu.add_command(label='Load Grammar', command=self.load_grammar) filemenu.add_separator() filemenu.add_command( - label="Exit", underline=1, command=self.destroy, accelerator="Ctrl-x" + label='Exit', underline=1, command=self.destroy, accelerator='Ctrl-x' ) - menubar.add_cascade(label="File", underline=0, menu=filemenu) + menubar.add_cascade(label='File', underline=0, menu=filemenu) editmenu = Menu(menubar, tearoff=0) editmenu.add_command( - label="Edit Grammar", + label='Edit Grammar', underline=5, command=self.edit_grammar, - accelerator="Ctrl-g", + accelerator='Ctrl-g', ) editmenu.add_command( - label="Edit Text", + label='Edit Text', underline=5, command=self.edit_sentence, - accelerator="Ctrl-t", + accelerator='Ctrl-t', ) - menubar.add_cascade(label="Edit", underline=0, menu=editmenu) + menubar.add_cascade(label='Edit', underline=0, menu=editmenu) viewmenu = Menu(menubar, tearoff=0) viewmenu.add_command( - label="Chart Matrix", underline=6, command=self.view_matrix + label='Chart Matrix', underline=6, command=self.view_matrix ) - viewmenu.add_command(label="Results", underline=0, command=self.view_results) - menubar.add_cascade(label="View", underline=0, menu=viewmenu) + viewmenu.add_command(label='Results', underline=0, command=self.view_results) + menubar.add_cascade(label='View', underline=0, menu=viewmenu) rulemenu = Menu(menubar, tearoff=0) rulemenu.add_command( - label="Top Down Strategy", + label='Top Down Strategy', underline=0, command=self.top_down_strategy, - accelerator="t", + accelerator='t', ) rulemenu.add_command( - label="Bottom Up Strategy", + label='Bottom Up Strategy', underline=0, command=self.bottom_up_strategy, - accelerator="b", + accelerator='b', ) rulemenu.add_command( - label="Bottom Up Left-Corner Strategy", + label='Bottom Up Left-Corner Strategy', underline=0, command=self.bottom_up_leftcorner_strategy, - accelerator="c", + accelerator='c', ) rulemenu.add_separator() - rulemenu.add_command(label="Bottom Up Rule", command=self.bottom_up) + rulemenu.add_command(label='Bottom Up Rule', command=self.bottom_up) rulemenu.add_command( - label="Bottom Up Left-Corner Rule", command=self.bottom_up_leftcorner + label='Bottom Up Left-Corner Rule', command=self.bottom_up_leftcorner ) - rulemenu.add_command(label="Top Down Init Rule", command=self.top_down_init) + rulemenu.add_command(label='Top Down Init Rule', command=self.top_down_init) rulemenu.add_command( - label="Top Down Predict Rule", command=self.top_down_predict + label='Top Down Predict Rule', command=self.top_down_predict ) - rulemenu.add_command(label="Fundamental Rule", command=self.fundamental) - menubar.add_cascade(label="Apply", underline=0, menu=rulemenu) + rulemenu.add_command(label='Fundamental Rule', command=self.fundamental) + menubar.add_cascade(label='Apply', underline=0, menu=rulemenu) animatemenu = Menu(menubar, tearoff=0) animatemenu.add_checkbutton( - label="Step", underline=0, variable=self._step, accelerator="s" + label="Step", underline=0, variable=self._step, accelerator='s' ) animatemenu.add_separator() animatemenu.add_radiobutton( @@ -2107,68 +2108,68 @@ class ChartParserApp(object): underline=0, variable=self._animate, value=1, - accelerator="-", + accelerator='-', ) animatemenu.add_radiobutton( label="Normal Animation", underline=0, variable=self._animate, value=2, - accelerator="=", + accelerator='=', ) animatemenu.add_radiobutton( label="Fast Animation", underline=0, variable=self._animate, value=3, - accelerator="+", + accelerator='+', ) menubar.add_cascade(label="Animate", underline=1, menu=animatemenu) zoommenu = Menu(menubar, tearoff=0) zoommenu.add_radiobutton( - label="Tiny", + label='Tiny', variable=self._size, underline=0, value=10, command=self.resize, ) zoommenu.add_radiobutton( - label="Small", + label='Small', variable=self._size, underline=0, value=12, command=self.resize, ) zoommenu.add_radiobutton( - label="Medium", + label='Medium', variable=self._size, underline=0, value=14, command=self.resize, ) zoommenu.add_radiobutton( - label="Large", + label='Large', variable=self._size, underline=0, value=18, command=self.resize, ) zoommenu.add_radiobutton( - label="Huge", + label='Huge', variable=self._size, underline=0, value=24, command=self.resize, ) - menubar.add_cascade(label="Zoom", underline=0, menu=zoommenu) + menubar.add_cascade(label='Zoom', underline=0, menu=zoommenu) helpmenu = Menu(menubar, tearoff=0) - helpmenu.add_command(label="About", underline=0, command=self.about) + helpmenu.add_command(label='About', underline=0, command=self.about) helpmenu.add_command( - label="Instructions", underline=0, command=self.help, accelerator="F1" + label='Instructions', underline=0, command=self.help, accelerator='F1' ) - menubar.add_cascade(label="Help", underline=0, menu=helpmenu) + menubar.add_cascade(label='Help', underline=0, menu=helpmenu) self._root.config(menu=menubar) @@ -2193,7 +2194,7 @@ class ChartParserApp(object): def _select_edge(self, edge): self._selection = edge # Update the chart view. - self._cv.markonly_edge(edge, "#f00") + self._cv.markonly_edge(edge, '#f00') self._cv.draw_tree(edge) # Update the matrix view. if self._matrix: @@ -2215,7 +2216,7 @@ class ChartParserApp(object): # Update the chart view. self._cv.update() self._cv.draw_tree(edge) - self._cv.markonly_edge(edge, "#0df") + self._cv.markonly_edge(edge, '#0df') self._cv.view_edge(edge) # Update the matrix view. if self._matrix: @@ -2238,43 +2239,43 @@ class ChartParserApp(object): try: ShowText( self._root, - "Help: Chart Parser Application", - (__doc__ or "").strip(), + 'Help: Chart Parser Application', + (__doc__ or '').strip(), width=75, - font="fixed", + font='fixed', ) except: ShowText( self._root, - "Help: Chart Parser Application", - (__doc__ or "").strip(), + 'Help: Chart Parser Application', + (__doc__ or '').strip(), width=75, ) def about(self, *e): ABOUT = "NLTK Chart Parser Application\n" + "Written by Edward Loper" - showinfo("About: Chart Parser Application", ABOUT) + showinfo('About: Chart Parser Application', ABOUT) # //////////////////////////////////////////////////////////// # File Menu # //////////////////////////////////////////////////////////// - CHART_FILE_TYPES = [("Pickle file", ".pickle"), ("All files", "*")] + CHART_FILE_TYPES = [('Pickle file', '.pickle'), ('All files', '*')] GRAMMAR_FILE_TYPES = [ - ("Plaintext grammar file", ".cfg"), - ("Pickle file", ".pickle"), - ("All files", "*"), + ('Plaintext grammar file', '.cfg'), + ('Pickle file', '.pickle'), + ('All files', '*'), ] def load_chart(self, *args): "Load a chart from a pickle file" filename = askopenfilename( - filetypes=self.CHART_FILE_TYPES, defaultextension=".pickle" + filetypes=self.CHART_FILE_TYPES, defaultextension='.pickle' ) if not filename: return try: - with open(filename, "rb") as infile: + with open(filename, 'rb') as infile: chart = pickle.load(infile) self._chart = chart self._cv.update(chart) @@ -2287,61 +2288,61 @@ class ChartParserApp(object): self._cp.set_chart(chart) except Exception as e: raise - showerror("Error Loading Chart", "Unable to open file: %r" % filename) + showerror('Error Loading Chart', 'Unable to open file: %r' % filename) def save_chart(self, *args): "Save a chart to a pickle file" filename = asksaveasfilename( - filetypes=self.CHART_FILE_TYPES, defaultextension=".pickle" + filetypes=self.CHART_FILE_TYPES, defaultextension='.pickle' ) if not filename: return try: - with open(filename, "wb") as outfile: + with open(filename, 'wb') as outfile: pickle.dump(self._chart, outfile) except Exception as e: raise - showerror("Error Saving Chart", "Unable to open file: %r" % filename) + showerror('Error Saving Chart', 'Unable to open file: %r' % filename) def load_grammar(self, *args): "Load a grammar from a pickle file" filename = askopenfilename( - filetypes=self.GRAMMAR_FILE_TYPES, defaultextension=".cfg" + filetypes=self.GRAMMAR_FILE_TYPES, defaultextension='.cfg' ) if not filename: return try: - if filename.endswith(".pickle"): - with open(filename, "rb") as infile: + if filename.endswith('.pickle'): + with open(filename, 'rb') as infile: grammar = pickle.load(infile) else: - with open(filename, "r") as infile: + with open(filename, 'r') as infile: grammar = CFG.fromstring(infile.read()) self.set_grammar(grammar) except Exception as e: - showerror("Error Loading Grammar", "Unable to open file: %r" % filename) + showerror('Error Loading Grammar', 'Unable to open file: %r' % filename) def save_grammar(self, *args): filename = asksaveasfilename( - filetypes=self.GRAMMAR_FILE_TYPES, defaultextension=".cfg" + filetypes=self.GRAMMAR_FILE_TYPES, defaultextension='.cfg' ) if not filename: return try: - if filename.endswith(".pickle"): - with open(filename, "wb") as outfile: + if filename.endswith('.pickle'): + with open(filename, 'wb') as outfile: pickle.dump((self._chart, self._tokens), outfile) else: - with open(filename, "w") as outfile: + with open(filename, 'w') as outfile: prods = self._grammar.productions() start = [p for p in prods if p.lhs() == self._grammar.start()] rest = [p for p in prods if p.lhs() != self._grammar.start()] for prod in start: - outfile.write("%s\n" % prod) + outfile.write('%s\n' % prod) for prod in rest: - outfile.write("%s\n" % prod) + outfile.write('%s\n' % prod) except Exception as e: - showerror("Error Saving Grammar", "Unable to open file: %r" % filename) + showerror('Error Saving Grammar', 'Unable to open file: %r' % filename) def reset(self, *args): self._animating = 0 @@ -2369,8 +2370,8 @@ class ChartParserApp(object): def edit_sentence(self, *e): sentence = " ".join(self._tokens) - title = "Edit Text" - instr = "Enter a new sentence to parse." + title = 'Edit Text' + instr = 'Enter a new sentence to parse.' EntryDialog(self._root, sentence, instr, self.set_sentence, title) def set_sentence(self, sentence): @@ -2385,7 +2386,7 @@ class ChartParserApp(object): if self._matrix is not None: self._matrix.destroy() self._matrix = ChartMatrixView(self._root, self._chart) - self._matrix.add_callback("select", self._select_matrix_edge) + self._matrix.add_callback('select', self._select_matrix_edge) def view_results(self, *e): if self._results is not None: @@ -2478,10 +2479,10 @@ class ChartParserApp(object): def _display_rule(self, rule): if rule is None: - self._rulelabel2["text"] = "" + self._rulelabel2['text'] = '' else: name = str(rule) - self._rulelabel2["text"] = name + self._rulelabel2['text'] = name size = self._cv.get_font_size() # //////////////////////////////////////////////////////////// @@ -2543,20 +2544,20 @@ def app(): """ ) - sent = "John ate the cake on the table with a fork" - sent = "John ate the cake on the table" + sent = 'John ate the cake on the table with a fork' + sent = 'John ate the cake on the table' tokens = list(sent.split()) - print("grammar= (") + print('grammar= (') for rule in grammar.productions(): - print((" ", repr(rule) + ",")) - print(")") - print(("tokens = %r" % tokens)) + print((' ', repr(rule) + ',')) + print(')') + print(('tokens = %r' % tokens)) print('Calling "ChartParserApp(grammar, tokens)"...') ChartParserApp(grammar, tokens).mainloop() -if __name__ == "__main__": +if __name__ == '__main__': app() # Chart comparer: @@ -2572,4 +2573,4 @@ if __name__ == "__main__": # p.strip_dirs().sort_stats('time', 'cum').print_stats(60) # p.strip_dirs().sort_stats('cum', 'time').print_stats(60) -__all__ = ["app"] +__all__ = ['app'] diff --git a/nlp_resource_data/nltk/app/chunkparser_app.py b/nlp_resource_data/nltk/app/chunkparser_app.py index 699b7d7..2aeca10 100644 --- a/nlp_resource_data/nltk/app/chunkparser_app.py +++ b/nlp_resource_data/nltk/app/chunkparser_app.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Regexp Chunk Parser Application # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Edward Loper # URL: # For license information, see LICENSE.TXT @@ -15,12 +15,13 @@ parser ``nltk.chunk.RegexpChunkParser``. # configuration parameters to select what's being chunked (eg VP vs NP) # and what part of the data is being used as the development set. +from __future__ import division import time import textwrap import re import random -from tkinter import ( +from six.moves.tkinter import ( Button, Canvas, Checkbutton, @@ -32,8 +33,8 @@ from tkinter import ( Text, Tk, ) -from tkinter.filedialog import askopenfilename, asksaveasfilename -from tkinter.font import Font +from six.moves.tkinter_tkfiledialog import askopenfilename, asksaveasfilename +from six.moves.tkinter_font import Font from nltk.tree import Tree from nltk.util import in_idle @@ -59,51 +60,51 @@ class RegexpChunkApp(object): #: which is used in the help text. (This should probably live with #: the conll and/or treebank corpus instead.) TAGSET = { - "CC": "Coordinating conjunction", - "PRP$": "Possessive pronoun", - "CD": "Cardinal number", - "RB": "Adverb", - "DT": "Determiner", - "RBR": "Adverb, comparative", - "EX": "Existential there", - "RBS": "Adverb, superlative", - "FW": "Foreign word", - "RP": "Particle", - "JJ": "Adjective", - "TO": "to", - "JJR": "Adjective, comparative", - "UH": "Interjection", - "JJS": "Adjective, superlative", - "VB": "Verb, base form", - "LS": "List item marker", - "VBD": "Verb, past tense", - "MD": "Modal", - "NNS": "Noun, plural", - "NN": "Noun, singular or masps", - "VBN": "Verb, past participle", - "VBZ": "Verb,3rd ps. sing. present", - "NNP": "Proper noun, singular", - "NNPS": "Proper noun plural", - "WDT": "wh-determiner", - "PDT": "Predeterminer", - "WP": "wh-pronoun", - "POS": "Possessive ending", - "WP$": "Possessive wh-pronoun", - "PRP": "Personal pronoun", - "WRB": "wh-adverb", - "(": "open parenthesis", - ")": "close parenthesis", - "``": "open quote", - ",": "comma", - "''": "close quote", - ".": "period", - "#": "pound sign (currency marker)", - "$": "dollar sign (currency marker)", - "IN": "Preposition/subord. conjunction", - "SYM": "Symbol (mathematical or scientific)", - "VBG": "Verb, gerund/present participle", - "VBP": "Verb, non-3rd ps. sing. present", - ":": "colon", + 'CC': 'Coordinating conjunction', + 'PRP$': 'Possessive pronoun', + 'CD': 'Cardinal number', + 'RB': 'Adverb', + 'DT': 'Determiner', + 'RBR': 'Adverb, comparative', + 'EX': 'Existential there', + 'RBS': 'Adverb, superlative', + 'FW': 'Foreign word', + 'RP': 'Particle', + 'JJ': 'Adjective', + 'TO': 'to', + 'JJR': 'Adjective, comparative', + 'UH': 'Interjection', + 'JJS': 'Adjective, superlative', + 'VB': 'Verb, base form', + 'LS': 'List item marker', + 'VBD': 'Verb, past tense', + 'MD': 'Modal', + 'NNS': 'Noun, plural', + 'NN': 'Noun, singular or masps', + 'VBN': 'Verb, past participle', + 'VBZ': 'Verb,3rd ps. sing. present', + 'NNP': 'Proper noun, singular', + 'NNPS': 'Proper noun plural', + 'WDT': 'wh-determiner', + 'PDT': 'Predeterminer', + 'WP': 'wh-pronoun', + 'POS': 'Possessive ending', + 'WP$': 'Possessive wh-pronoun', + 'PRP': 'Personal pronoun', + 'WRB': 'wh-adverb', + '(': 'open parenthesis', + ')': 'close parenthesis', + '``': 'open quote', + ',': 'comma', + "''": 'close quote', + '.': 'period', + '#': 'pound sign (currency marker)', + '$': 'dollar sign (currency marker)', + 'IN': 'Preposition/subord. conjunction', + 'SYM': 'Symbol (mathematical or scientific)', + 'VBG': 'Verb, gerund/present participle', + 'VBP': 'Verb, non-3rd ps. sing. present', + ':': 'colon', } #: Contents for the help box. This is a list of tuples, one for @@ -115,8 +116,8 @@ class RegexpChunkApp(object): #: for a list of tags you can use for colorizing. HELP = [ ( - "Help", - "20", + 'Help', + '20', "Welcome to the regular expression chunk-parser grammar editor. " "You can use this editor to develop and test chunk parser grammars " "based on NLTK's RegexpChunkParser class.\n\n" @@ -154,8 +155,8 @@ class RegexpChunkApp(object): "the status bar at the bottom of the window.", ), ( - "Rules", - "10", + 'Rules', + '10', "

{...regexp...}

" "\nChunk rule: creates new chunks from words matching " "regexp.\n\n" @@ -170,8 +171,8 @@ class RegexpChunkApp(object): "and regexp2\n", ), ( - "Regexps", - "10 60", + 'Regexps', + '10 60', # "Regular Expression Syntax Summary:\n\n" "

Pattern\t\tMatches...

\n" "" @@ -191,42 +192,42 @@ class RegexpChunkApp(object): "" "\n

Examples:

\n" "" - "\t\n" + '\t\n' '\t\tMatches "cow/NN"\n' '\t\tMatches "green/NN"\n' - "\t\n" + '\t\n' '\t\tMatches "eating/VBG"\n' '\t\tMatches "ate/VBD"\n' - "\t
\n" + '\t
\n' '\t\tMatches "on/IN the/DT car/NN"\n' - "\t?\n" + '\t?\n' '\t\tMatches "ran/VBD"\n' '\t\tMatches "slowly/RB ate/VBD"\n' - "\t<\#> # This is a comment...\n" + '\t<\#> # This is a comment...\n' '\t\tMatches "#/# 100/CD"\n' "", ), ( - "Tags", - "10 60", + 'Tags', + '10 60', "

Part of Speech Tags:

\n" - + "" - + "<>" - + "\n", # this gets auto-substituted w/ self.TAGSET + + '' + + '<>' + + '\n', # this gets auto-substituted w/ self.TAGSET ), ] HELP_AUTOTAG = [ - ("red", dict(foreground="#a00")), - ("green", dict(foreground="#080")), - ("highlight", dict(background="#ddd")), - ("underline", dict(underline=True)), - ("h1", dict(underline=True)), - ("indent", dict(lmargin1=20, lmargin2=20)), - ("hangindent", dict(lmargin1=0, lmargin2=60)), - ("var", dict(foreground="#88f")), - ("regexp", dict(foreground="#ba7")), - ("match", dict(foreground="#6a6")), + ('red', dict(foreground='#a00')), + ('green', dict(foreground='#080')), + ('highlight', dict(background='#ddd')), + ('underline', dict(underline=True)), + ('h1', dict(underline=True)), + ('indent', dict(lmargin1=20, lmargin2=20)), + ('hangindent', dict(lmargin1=0, lmargin2=60)), + ('var', dict(foreground='#88f')), + ('regexp', dict(foreground='#ba7')), + ('match', dict(foreground='#6a6')), ] ##///////////////////////////////////////////////////////////////// @@ -255,74 +256,74 @@ class RegexpChunkApp(object): _GRAMMARBOX_PARAMS = dict( width=40, height=12, - background="#efe", - highlightbackground="#efe", + background='#efe', + highlightbackground='#efe', highlightthickness=1, - relief="groove", + relief='groove', border=2, - wrap="word", + wrap='word', ) _HELPBOX_PARAMS = dict( width=15, height=15, - background="#efe", - highlightbackground="#efe", - foreground="#555", + background='#efe', + highlightbackground='#efe', + foreground='#555', highlightthickness=1, - relief="groove", + relief='groove', border=2, - wrap="word", + wrap='word', ) _DEVSETBOX_PARAMS = dict( width=70, height=10, - background="#eef", - highlightbackground="#eef", + background='#eef', + highlightbackground='#eef', highlightthickness=1, - relief="groove", + relief='groove', border=2, - wrap="word", + wrap='word', tabs=(30,), ) - _STATUS_PARAMS = dict(background="#9bb", relief="groove", border=2) - _FONT_PARAMS = dict(family="helvetica", size=-20) - _FRAME_PARAMS = dict(background="#777", padx=2, pady=2, border=3) + _STATUS_PARAMS = dict(background='#9bb', relief='groove', border=2) + _FONT_PARAMS = dict(family='helvetica', size=-20) + _FRAME_PARAMS = dict(background='#777', padx=2, pady=2, border=3) _EVALBOX_PARAMS = dict( - background="#eef", - highlightbackground="#eef", + background='#eef', + highlightbackground='#eef', highlightthickness=1, - relief="groove", + relief='groove', border=2, width=300, height=280, ) _BUTTON_PARAMS = dict( - background="#777", activebackground="#777", highlightbackground="#777" + background='#777', activebackground='#777', highlightbackground='#777' ) - _HELPTAB_BG_COLOR = "#aba" - _HELPTAB_FG_COLOR = "#efe" + _HELPTAB_BG_COLOR = '#aba' + _HELPTAB_FG_COLOR = '#efe' - _HELPTAB_FG_PARAMS = dict(background="#efe") - _HELPTAB_BG_PARAMS = dict(background="#aba") + _HELPTAB_FG_PARAMS = dict(background='#efe') + _HELPTAB_BG_PARAMS = dict(background='#aba') _HELPTAB_SPACER = 6 def normalize_grammar(self, grammar): # Strip comments - grammar = re.sub(r"((\\.|[^#])*)(#.*)?", r"\1", grammar) + grammar = re.sub(r'((\\.|[^#])*)(#.*)?', r'\1', grammar) # Normalize whitespace - grammar = re.sub(" +", " ", grammar) - grammar = re.sub("\n\s+", "\n", grammar) + grammar = re.sub(' +', ' ', grammar) + grammar = re.sub('\n\s+', '\n', grammar) grammar = grammar.strip() # [xx] Hack: automatically backslash $! - grammar = re.sub(r"([^\\])\$", r"\1\\$", grammar) + grammar = re.sub(r'([^\\])\$', r'\1\\$', grammar) return grammar def __init__( self, - devset_name="conll2000", + devset_name='conll2000', devset=None, - grammar="", - chunk_label="NP", + grammar='', + chunk_label='NP', tagset=None, ): """ @@ -343,12 +344,12 @@ class RegexpChunkApp(object): # Named development sets: if devset is None: - if devset_name == "conll2000": - devset = conll2000.chunked_sents("train.txt") # [:100] - elif devset == "treebank": + if devset_name == 'conll2000': + devset = conll2000.chunked_sents('train.txt') # [:100] + elif devset == 'treebank': devset = treebank_chunk.chunked_sents() # [:100] else: - raise ValueError("Unknown development set %s" % devset_name) + raise ValueError('Unknown development set %s' % devset_name) self.chunker = None """The chunker built from the grammar string""" @@ -400,9 +401,9 @@ class RegexpChunkApp(object): # Set up the main window. top = self.top = Tk() - top.geometry("+50+50") - top.title("Regexp Chunk Parser App") - top.bind("", self.destroy) + top.geometry('+50+50') + top.title('Regexp Chunk Parser App') + top.bind('', self.destroy) # Varaible that restricts how much of the devset we look at. self._devset_size = IntVar(top) @@ -417,131 +418,131 @@ class RegexpChunkApp(object): # If a grammar was given, then display it. if grammar: - self.grammarbox.insert("end", grammar + "\n") - self.grammarbox.mark_set("insert", "1.0") + self.grammarbox.insert('end', grammar + '\n') + self.grammarbox.mark_set('insert', '1.0') # Display the first item in the development set self.show_devset(0) self.update() def _init_bindings(self, top): - top.bind("", self._devset_next) - top.bind("", self._devset_prev) - top.bind("", self.toggle_show_trace) - top.bind("", self.update) - top.bind("", lambda e: self.save_grammar()) - top.bind("", lambda e: self.load_grammar()) - self.grammarbox.bind("", self.toggle_show_trace) - self.grammarbox.bind("", self._devset_next) - self.grammarbox.bind("", self._devset_prev) + top.bind('', self._devset_next) + top.bind('', self._devset_prev) + top.bind('', self.toggle_show_trace) + top.bind('', self.update) + top.bind('', lambda e: self.save_grammar()) + top.bind('', lambda e: self.load_grammar()) + self.grammarbox.bind('', self.toggle_show_trace) + self.grammarbox.bind('', self._devset_next) + self.grammarbox.bind('', self._devset_prev) # Redraw the eval graph when the window size changes - self.evalbox.bind("", self._eval_plot) + self.evalbox.bind('', self._eval_plot) def _init_fonts(self, top): # TWhat's our font size (default=same as sysfont) self._size = IntVar(top) self._size.set(20) - self._font = Font(family="helvetica", size=-self._size.get()) + self._font = Font(family='helvetica', size=-self._size.get()) self._smallfont = Font( - family="helvetica", size=-(int(self._size.get() * 14 // 20)) + family='helvetica', size=-(int(self._size.get() * 14 // 20)) ) def _init_menubar(self, parent): menubar = Menu(parent) filemenu = Menu(menubar, tearoff=0) - filemenu.add_command(label="Reset Application", underline=0, command=self.reset) + filemenu.add_command(label='Reset Application', underline=0, command=self.reset) filemenu.add_command( - label="Save Current Grammar", + label='Save Current Grammar', underline=0, - accelerator="Ctrl-s", + accelerator='Ctrl-s', command=self.save_grammar, ) filemenu.add_command( - label="Load Grammar", + label='Load Grammar', underline=0, - accelerator="Ctrl-o", + accelerator='Ctrl-o', command=self.load_grammar, ) filemenu.add_command( - label="Save Grammar History", underline=13, command=self.save_history + label='Save Grammar History', underline=13, command=self.save_history ) filemenu.add_command( - label="Exit", underline=1, command=self.destroy, accelerator="Ctrl-q" + label='Exit', underline=1, command=self.destroy, accelerator='Ctrl-q' ) - menubar.add_cascade(label="File", underline=0, menu=filemenu) + menubar.add_cascade(label='File', underline=0, menu=filemenu) viewmenu = Menu(menubar, tearoff=0) viewmenu.add_radiobutton( - label="Tiny", + label='Tiny', variable=self._size, underline=0, value=10, command=self.resize, ) viewmenu.add_radiobutton( - label="Small", + label='Small', variable=self._size, underline=0, value=16, command=self.resize, ) viewmenu.add_radiobutton( - label="Medium", + label='Medium', variable=self._size, underline=0, value=20, command=self.resize, ) viewmenu.add_radiobutton( - label="Large", + label='Large', variable=self._size, underline=0, value=24, command=self.resize, ) viewmenu.add_radiobutton( - label="Huge", + label='Huge', variable=self._size, underline=0, value=34, command=self.resize, ) - menubar.add_cascade(label="View", underline=0, menu=viewmenu) + menubar.add_cascade(label='View', underline=0, menu=viewmenu) devsetmenu = Menu(menubar, tearoff=0) devsetmenu.add_radiobutton( - label="50 sentences", + label='50 sentences', variable=self._devset_size, value=50, command=self.set_devset_size, ) devsetmenu.add_radiobutton( - label="100 sentences", + label='100 sentences', variable=self._devset_size, value=100, command=self.set_devset_size, ) devsetmenu.add_radiobutton( - label="200 sentences", + label='200 sentences', variable=self._devset_size, value=200, command=self.set_devset_size, ) devsetmenu.add_radiobutton( - label="500 sentences", + label='500 sentences', variable=self._devset_size, value=500, command=self.set_devset_size, ) - menubar.add_cascade(label="Development-Set", underline=0, menu=devsetmenu) + menubar.add_cascade(label='Development-Set', underline=0, menu=devsetmenu) helpmenu = Menu(menubar, tearoff=0) - helpmenu.add_command(label="About", underline=0, command=self.about) - menubar.add_cascade(label="Help", underline=0, menu=helpmenu) + helpmenu.add_command(label='About', underline=0, command=self.about) + menubar.add_cascade(label='Help', underline=0, menu=helpmenu) parent.config(menu=menubar) @@ -550,34 +551,34 @@ class RegexpChunkApp(object): self.show_devset() else: self.show_trace() - return "break" + return 'break' _SCALE_N = 5 # center on the last 5 examples. _DRAW_LINES = False def _eval_plot(self, *e, **config): - width = config.get("width", self.evalbox.winfo_width()) - height = config.get("height", self.evalbox.winfo_height()) + width = config.get('width', self.evalbox.winfo_width()) + height = config.get('height', self.evalbox.winfo_height()) # Clear the canvas - self.evalbox.delete("all") + self.evalbox.delete('all') # Draw the precision & recall labels. tag = self.evalbox.create_text( - 10, height // 2 - 10, justify="left", anchor="w", text="Precision" + 10, height // 2 - 10, justify='left', anchor='w', text='Precision' ) left, right = self.evalbox.bbox(tag)[2] + 5, width - 10 tag = self.evalbox.create_text( left + (width - left) // 2, height - 10, - anchor="s", - text="Recall", - justify="center", + anchor='s', + text='Recall', + justify='center', ) top, bot = 10, self.evalbox.bbox(tag)[1] - 10 # Draw masks for clipping the plot. - bg = self._EVALBOX_PARAMS["background"] + bg = self._EVALBOX_PARAMS['background'] self.evalbox.lower( self.evalbox.create_rectangle(0, 0, left - 1, 5000, fill=bg, outline=bg) ) @@ -624,9 +625,9 @@ class RegexpChunkApp(object): (i / 10.0 - min_precision) / (max_precision - min_precision) ) if left < x < right: - self.evalbox.create_line(x, top, x, bot, fill="#888") + self.evalbox.create_line(x, top, x, bot, fill='#888') if top < y < bot: - self.evalbox.create_line(left, y, right, y, fill="#888") + self.evalbox.create_line(left, y, right, y, fill='#888') self.evalbox.create_line(left, top, left, bot) self.evalbox.create_line(left, bot, right, bot) @@ -634,30 +635,30 @@ class RegexpChunkApp(object): self.evalbox.create_text( left - 3, bot, - justify="right", - anchor="se", - text="%d%%" % (100 * min_precision), + justify='right', + anchor='se', + text='%d%%' % (100 * min_precision), ) self.evalbox.create_text( left - 3, top, - justify="right", - anchor="ne", - text="%d%%" % (100 * max_precision), + justify='right', + anchor='ne', + text='%d%%' % (100 * max_precision), ) self.evalbox.create_text( left, bot + 3, - justify="center", - anchor="nw", - text="%d%%" % (100 * min_recall), + justify='center', + anchor='nw', + text='%d%%' % (100 * min_recall), ) self.evalbox.create_text( right, bot + 3, - justify="center", - anchor="ne", - text="%d%%" % (100 * max_recall), + justify='center', + anchor='ne', + text='%d%%' % (100 * max_recall), ) # Display the scores. @@ -671,22 +672,22 @@ class RegexpChunkApp(object): ) if i == self._history_index: self.evalbox.create_oval( - x - 2, y - 2, x + 2, y + 2, fill="#0f0", outline="#000" + x - 2, y - 2, x + 2, y + 2, fill='#0f0', outline='#000' ) - self.status["text"] = ( - "Precision: %.2f%%\t" % (precision * 100) - + "Recall: %.2f%%\t" % (recall * 100) - + "F-score: %.2f%%" % (fscore * 100) + self.status['text'] = ( + 'Precision: %.2f%%\t' % (precision * 100) + + 'Recall: %.2f%%\t' % (recall * 100) + + 'F-score: %.2f%%' % (fscore * 100) ) else: self.evalbox.lower( self.evalbox.create_oval( - x - 2, y - 2, x + 2, y + 2, fill="#afa", outline="#8c8" + x - 2, y - 2, x + 2, y + 2, fill='#afa', outline='#8c8' ) ) if prev_x is not None and self._eval_lines.get(): self.evalbox.lower( - self.evalbox.create_line(prev_x, prev_y, x, y, fill="#8c8") + self.evalbox.create_line(prev_x, prev_y, x, y, fill='#8c8') ) prev_x, prev_y = x, y @@ -729,7 +730,7 @@ class RegexpChunkApp(object): # If the grammar is empty, the don't bother evaluating it, or # recording it in history -- the score will just be 0. - if self.normalized_grammar.strip() == "": + if self.normalized_grammar.strip() == '': # self._eval_index = self._devset_size.get() self._eval_demon_running = False return @@ -762,7 +763,7 @@ class RegexpChunkApp(object): self._eval_normalized_grammar = None else: progress = 100 * self._eval_index / self._devset_size.get() - self.status["text"] = "Evaluating on Development Set (%d%%)" % progress + self.status['text'] = 'Evaluating on Development Set (%d%%)' % progress self._eval_demon_running = True self._adaptively_modify_eval_chunk(time.time() - t0) self.top.after(int(self._EVAL_FREQ * 1000), self._eval_demon) @@ -803,210 +804,210 @@ class RegexpChunkApp(object): self.grammarlabel = Label( frame0, font=self._font, - text="Grammar:", - highlightcolor="black", - background=self._GRAMMARBOX_PARAMS["background"], + text='Grammar:', + highlightcolor='black', + background=self._GRAMMARBOX_PARAMS['background'], ) - self.grammarlabel.grid(column=0, row=0, sticky="SW") - self.grammarbox.grid(column=0, row=1, sticky="NEWS") + self.grammarlabel.grid(column=0, row=0, sticky='SW') + self.grammarbox.grid(column=0, row=1, sticky='NEWS') # Scroll bar for grammar grammar_scrollbar = Scrollbar(frame0, command=self.grammarbox.yview) - grammar_scrollbar.grid(column=1, row=1, sticky="NWS") + grammar_scrollbar.grid(column=1, row=1, sticky='NWS') self.grammarbox.config(yscrollcommand=grammar_scrollbar.set) # grammar buttons - bg = self._FRAME_PARAMS["background"] + bg = self._FRAME_PARAMS['background'] frame3 = Frame(frame0, background=bg) - frame3.grid(column=0, row=2, sticky="EW") + frame3.grid(column=0, row=2, sticky='EW') Button( frame3, - text="Prev Grammar", + text='Prev Grammar', command=self._history_prev, **self._BUTTON_PARAMS - ).pack(side="left") + ).pack(side='left') Button( frame3, - text="Next Grammar", + text='Next Grammar', command=self._history_next, **self._BUTTON_PARAMS - ).pack(side="left") + ).pack(side='left') # Help box self.helpbox = Text(frame0, font=self._smallfont, **self._HELPBOX_PARAMS) - self.helpbox.grid(column=3, row=1, sticky="NEWS") + self.helpbox.grid(column=3, row=1, sticky='NEWS') self.helptabs = {} - bg = self._FRAME_PARAMS["background"] + bg = self._FRAME_PARAMS['background'] helptab_frame = Frame(frame0, background=bg) - helptab_frame.grid(column=3, row=0, sticky="SW") + helptab_frame.grid(column=3, row=0, sticky='SW') for i, (tab, tabstops, text) in enumerate(self.HELP): label = Label(helptab_frame, text=tab, font=self._smallfont) - label.grid(column=i * 2, row=0, sticky="S") + label.grid(column=i * 2, row=0, sticky='S') # help_frame.grid_columnconfigure(i, weight=1) # label.pack(side='left') - label.bind("", lambda e, tab=tab: self.show_help(tab)) + label.bind('', lambda e, tab=tab: self.show_help(tab)) self.helptabs[tab] = label Frame( helptab_frame, height=1, width=self._HELPTAB_SPACER, background=bg ).grid(column=i * 2 + 1, row=0) self.helptabs[self.HELP[0][0]].configure(font=self._font) - self.helpbox.tag_config("elide", elide=True) + self.helpbox.tag_config('elide', elide=True) for (tag, params) in self.HELP_AUTOTAG: - self.helpbox.tag_config("tag-%s" % tag, **params) + self.helpbox.tag_config('tag-%s' % tag, **params) self.show_help(self.HELP[0][0]) # Scroll bar for helpbox help_scrollbar = Scrollbar(frame0, command=self.helpbox.yview) self.helpbox.config(yscrollcommand=help_scrollbar.set) - help_scrollbar.grid(column=4, row=1, sticky="NWS") + help_scrollbar.grid(column=4, row=1, sticky='NWS') # The dev set - frame4 = Frame(frame0, background=self._FRAME_PARAMS["background"]) + frame4 = Frame(frame0, background=self._FRAME_PARAMS['background']) self.devsetbox = Text(frame4, font=self._font, **self._DEVSETBOX_PARAMS) - self.devsetbox.pack(expand=True, fill="both") + self.devsetbox.pack(expand=True, fill='both') self.devsetlabel = Label( frame0, font=self._font, - text="Development Set:", - justify="right", - background=self._DEVSETBOX_PARAMS["background"], + text='Development Set:', + justify='right', + background=self._DEVSETBOX_PARAMS['background'], ) - self.devsetlabel.grid(column=0, row=4, sticky="SW") - frame4.grid(column=0, row=5, sticky="NEWS") + self.devsetlabel.grid(column=0, row=4, sticky='SW') + frame4.grid(column=0, row=5, sticky='NEWS') # dev set scrollbars self.devset_scroll = Scrollbar(frame0, command=self._devset_scroll) - self.devset_scroll.grid(column=1, row=5, sticky="NWS") + self.devset_scroll.grid(column=1, row=5, sticky='NWS') self.devset_xscroll = Scrollbar( - frame4, command=self.devsetbox.xview, orient="horiz" + frame4, command=self.devsetbox.xview, orient='horiz' ) - self.devsetbox["xscrollcommand"] = self.devset_xscroll.set - self.devset_xscroll.pack(side="bottom", fill="x") + self.devsetbox['xscrollcommand'] = self.devset_xscroll.set + self.devset_xscroll.pack(side='bottom', fill='x') # dev set buttons - bg = self._FRAME_PARAMS["background"] + bg = self._FRAME_PARAMS['background'] frame1 = Frame(frame0, background=bg) - frame1.grid(column=0, row=7, sticky="EW") + frame1.grid(column=0, row=7, sticky='EW') Button( frame1, - text="Prev Example (Ctrl-p)", + text='Prev Example (Ctrl-p)', command=self._devset_prev, **self._BUTTON_PARAMS - ).pack(side="left") + ).pack(side='left') Button( frame1, - text="Next Example (Ctrl-n)", + text='Next Example (Ctrl-n)', command=self._devset_next, **self._BUTTON_PARAMS - ).pack(side="left") + ).pack(side='left') self.devset_button = Button( frame1, - text="Show example", + text='Show example', command=self.show_devset, - state="disabled", + state='disabled', **self._BUTTON_PARAMS ) - self.devset_button.pack(side="right") + self.devset_button.pack(side='right') self.trace_button = Button( - frame1, text="Show trace", command=self.show_trace, **self._BUTTON_PARAMS + frame1, text='Show trace', command=self.show_trace, **self._BUTTON_PARAMS ) - self.trace_button.pack(side="right") + self.trace_button.pack(side='right') # evaluation box self.evalbox = Canvas(frame0, **self._EVALBOX_PARAMS) label = Label( frame0, font=self._font, - text="Evaluation:", - justify="right", - background=self._EVALBOX_PARAMS["background"], + text='Evaluation:', + justify='right', + background=self._EVALBOX_PARAMS['background'], ) - label.grid(column=3, row=4, sticky="SW") - self.evalbox.grid(column=3, row=5, sticky="NEWS", columnspan=2) + label.grid(column=3, row=4, sticky='SW') + self.evalbox.grid(column=3, row=5, sticky='NEWS', columnspan=2) # evaluation box buttons - bg = self._FRAME_PARAMS["background"] + bg = self._FRAME_PARAMS['background'] frame2 = Frame(frame0, background=bg) - frame2.grid(column=3, row=7, sticky="EW") + frame2.grid(column=3, row=7, sticky='EW') self._autoscale = IntVar(self.top) self._autoscale.set(False) Checkbutton( frame2, variable=self._autoscale, command=self._eval_plot, - text="Zoom", + text='Zoom', **self._BUTTON_PARAMS - ).pack(side="left") + ).pack(side='left') self._eval_lines = IntVar(self.top) self._eval_lines.set(False) Checkbutton( frame2, variable=self._eval_lines, command=self._eval_plot, - text="Lines", + text='Lines', **self._BUTTON_PARAMS - ).pack(side="left") - Button(frame2, text="History", **self._BUTTON_PARAMS).pack(side="right") + ).pack(side='left') + Button(frame2, text='History', **self._BUTTON_PARAMS).pack(side='right') # The status label self.status = Label(frame0, font=self._font, **self._STATUS_PARAMS) - self.status.grid(column=0, row=9, sticky="NEW", padx=3, pady=2, columnspan=5) + self.status.grid(column=0, row=9, sticky='NEW', padx=3, pady=2, columnspan=5) # Help box & devset box can't be edited. - self.helpbox["state"] = "disabled" - self.devsetbox["state"] = "disabled" + self.helpbox['state'] = 'disabled' + self.devsetbox['state'] = 'disabled' # Spacers - bg = self._FRAME_PARAMS["background"] + bg = self._FRAME_PARAMS['background'] Frame(frame0, height=10, width=0, background=bg).grid(column=0, row=3) Frame(frame0, height=0, width=10, background=bg).grid(column=2, row=0) Frame(frame0, height=6, width=0, background=bg).grid(column=0, row=8) # pack the frame. - frame0.pack(fill="both", expand=True) + frame0.pack(fill='both', expand=True) # Set up colors for the devset box - self.devsetbox.tag_config("true-pos", background="#afa", underline="True") - self.devsetbox.tag_config("false-neg", underline="True", foreground="#800") - self.devsetbox.tag_config("false-pos", background="#faa") - self.devsetbox.tag_config("trace", foreground="#666", wrap="none") - self.devsetbox.tag_config("wrapindent", lmargin2=30, wrap="none") - self.devsetbox.tag_config("error", foreground="#800") + self.devsetbox.tag_config('true-pos', background='#afa', underline='True') + self.devsetbox.tag_config('false-neg', underline='True', foreground='#800') + self.devsetbox.tag_config('false-pos', background='#faa') + self.devsetbox.tag_config('trace', foreground='#666', wrap='none') + self.devsetbox.tag_config('wrapindent', lmargin2=30, wrap='none') + self.devsetbox.tag_config('error', foreground='#800') # And for the grammarbox - self.grammarbox.tag_config("error", background="#fec") - self.grammarbox.tag_config("comment", foreground="#840") - self.grammarbox.tag_config("angle", foreground="#00f") - self.grammarbox.tag_config("brace", foreground="#0a0") - self.grammarbox.tag_config("hangindent", lmargin1=0, lmargin2=40) + self.grammarbox.tag_config('error', background='#fec') + self.grammarbox.tag_config('comment', foreground='#840') + self.grammarbox.tag_config('angle', foreground='#00f') + self.grammarbox.tag_config('brace', foreground='#0a0') + self.grammarbox.tag_config('hangindent', lmargin1=0, lmargin2=40) _showing_trace = False def show_trace(self, *e): self._showing_trace = True - self.trace_button["state"] = "disabled" - self.devset_button["state"] = "normal" + self.trace_button['state'] = 'disabled' + self.devset_button['state'] = 'normal' - self.devsetbox["state"] = "normal" + self.devsetbox['state'] = 'normal' # self.devsetbox['wrap'] = 'none' - self.devsetbox.delete("1.0", "end") - self.devsetlabel["text"] = "Development Set (%d/%d)" % ( + self.devsetbox.delete('1.0', 'end') + self.devsetlabel['text'] = 'Development Set (%d/%d)' % ( (self.devset_index + 1, self._devset_size.get()) ) if self.chunker is None: - self.devsetbox.insert("1.0", "Trace: waiting for a valid grammar.") - self.devsetbox.tag_add("error", "1.0", "end") + self.devsetbox.insert('1.0', 'Trace: waiting for a valid grammar.') + self.devsetbox.tag_add('error', '1.0', 'end') return # can't do anything more gold_tree = self.devset[self.devset_index] rules = self.chunker.rules() # Calculate the tag sequence - tagseq = "\t" + tagseq = '\t' charnum = [1] for wordnum, (word, pos) in enumerate(gold_tree.leaves()): - tagseq += "%s " % pos + tagseq += '%s ' % pos charnum.append(len(tagseq)) self.charnum = dict( ((i, j), charnum[j]) @@ -1017,14 +1018,14 @@ class RegexpChunkApp(object): for i in range(len(rules) + 1): if i == 0: - self.devsetbox.insert("end", "Start:\n") - self.devsetbox.tag_add("trace", "end -2c linestart", "end -2c") + self.devsetbox.insert('end', 'Start:\n') + self.devsetbox.tag_add('trace', 'end -2c linestart', 'end -2c') else: - self.devsetbox.insert("end", "Apply %s:\n" % rules[i - 1]) - self.devsetbox.tag_add("trace", "end -2c linestart", "end -2c") + self.devsetbox.insert('end', 'Apply %s:\n' % rules[i - 1]) + self.devsetbox.tag_add('trace', 'end -2c linestart', 'end -2c') # Display the tag sequence. - self.devsetbox.insert("end", tagseq + "\n") - self.devsetbox.tag_add("wrapindent", "end -2c linestart", "end -2c") + self.devsetbox.insert('end', tagseq + '\n') + self.devsetbox.tag_add('wrapindent', 'end -2c linestart', 'end -2c') # Run a partial parser, and extract gold & test chunks chunker = RegexpChunkParser(rules[:i]) test_tree = self._chunkparse(gold_tree.leaves()) @@ -1032,13 +1033,13 @@ class RegexpChunkApp(object): test_chunks = self._chunks(test_tree) # Compare them. for chunk in gold_chunks.intersection(test_chunks): - self._color_chunk(i, chunk, "true-pos") + self._color_chunk(i, chunk, 'true-pos') for chunk in gold_chunks - test_chunks: - self._color_chunk(i, chunk, "false-neg") + self._color_chunk(i, chunk, 'false-neg') for chunk in test_chunks - gold_chunks: - self._color_chunk(i, chunk, "false-pos") - self.devsetbox.insert("end", "Finished.\n") - self.devsetbox.tag_add("trace", "end -2c linestart", "end -2c") + self._color_chunk(i, chunk, 'false-pos') + self.devsetbox.insert('end', 'Finished.\n') + self.devsetbox.tag_add('trace', 'end -2c linestart', 'end -2c') # This is a hack, because the x-scrollbar isn't updating its # position right -- I'm not sure what the underlying cause is @@ -1046,18 +1047,18 @@ class RegexpChunkApp(object): self.top.after(100, self.devset_xscroll.set, 0, 0.3) def show_help(self, tab): - self.helpbox["state"] = "normal" - self.helpbox.delete("1.0", "end") + self.helpbox['state'] = 'normal' + self.helpbox.delete('1.0', 'end') for (name, tabstops, text) in self.HELP: if name == tab: text = text.replace( - "<>", - "\n".join( + '<>', + '\n'.join( ( - "\t%s\t%s" % item + '\t%s\t%s' % item for item in sorted( list(self.tagset.items()), - key=lambda t_w: re.match("\w+", t_w[0]) + key=lambda t_w: re.match('\w+', t_w[0]) and (0, t_w[0]) or (1, t_w[0]), ) @@ -1067,27 +1068,27 @@ class RegexpChunkApp(object): self.helptabs[name].config(**self._HELPTAB_FG_PARAMS) self.helpbox.config(tabs=tabstops) - self.helpbox.insert("1.0", text + "\n" * 20) - C = "1.0 + %d chars" + self.helpbox.insert('1.0', text + '\n' * 20) + C = '1.0 + %d chars' for (tag, params) in self.HELP_AUTOTAG: - pattern = "(?s)(<%s>)(.*?)()" % (tag, tag) + pattern = '(?s)(<%s>)(.*?)()' % (tag, tag) for m in re.finditer(pattern, text): - self.helpbox.tag_add("elide", C % m.start(1), C % m.end(1)) + self.helpbox.tag_add('elide', C % m.start(1), C % m.end(1)) self.helpbox.tag_add( - "tag-%s" % tag, C % m.start(2), C % m.end(2) + 'tag-%s' % tag, C % m.start(2), C % m.end(2) ) - self.helpbox.tag_add("elide", C % m.start(3), C % m.end(3)) + self.helpbox.tag_add('elide', C % m.start(3), C % m.end(3)) else: self.helptabs[name].config(**self._HELPTAB_BG_PARAMS) - self.helpbox["state"] = "disabled" + self.helpbox['state'] = 'disabled' def _history_prev(self, *e): self._view_history(self._history_index - 1) - return "break" + return 'break' def _history_next(self, *e): self._view_history(self._history_index + 1) - return "break" + return 'break' def _view_history(self, index): # Bounds & sanity checking: @@ -1099,10 +1100,10 @@ class RegexpChunkApp(object): return # Show the requested grammar. It will get added to _history # only if they edit it (causing self.update() to get run.) - self.grammarbox["state"] = "normal" - self.grammarbox.delete("1.0", "end") - self.grammarbox.insert("end", self._history[index][0]) - self.grammarbox.mark_set("insert", "1.0") + self.grammarbox['state'] = 'normal' + self.grammarbox.delete('1.0', 'end') + self.grammarbox.insert('end', self._history[index][0]) + self.grammarbox.mark_set('insert', '1.0') self._history_index = index self._syntax_highlight_grammar(self._history[index][0]) # Record the normalized grammar & regenerate the chunker. @@ -1110,7 +1111,7 @@ class RegexpChunkApp(object): if self.normalized_grammar: rules = [ RegexpChunkRule.fromstring(line) - for line in self.normalized_grammar.split("\n") + for line in self.normalized_grammar.split('\n') ] else: rules = [] @@ -1123,20 +1124,20 @@ class RegexpChunkApp(object): self.show_trace() # Update the grammar label if self._history_index < len(self._history) - 1: - self.grammarlabel["text"] = "Grammar %s/%s:" % ( + self.grammarlabel['text'] = 'Grammar %s/%s:' % ( self._history_index + 1, len(self._history), ) else: - self.grammarlabel["text"] = "Grammar:" + self.grammarlabel['text'] = 'Grammar:' def _devset_next(self, *e): - self._devset_scroll("scroll", 1, "page") - return "break" + self._devset_scroll('scroll', 1, 'page') + return 'break' def _devset_prev(self, *e): - self._devset_scroll("scroll", -1, "page") - return "break" + self._devset_scroll('scroll', -1, 'page') + return 'break' def destroy(self, *e): if self.top is None: @@ -1147,14 +1148,14 @@ class RegexpChunkApp(object): def _devset_scroll(self, command, *args): N = 1 # size of a page -- one sentence. showing_trace = self._showing_trace - if command == "scroll" and args[1].startswith("unit"): + if command == 'scroll' and args[1].startswith('unit'): self.show_devset(self.devset_index + int(args[0])) - elif command == "scroll" and args[1].startswith("page"): + elif command == 'scroll' and args[1].startswith('page'): self.show_devset(self.devset_index + N * int(args[0])) - elif command == "moveto": + elif command == 'moveto': self.show_devset(int(float(args[0]) * self._devset_size.get())) else: - assert 0, "bad scroll command %s %s" % (command, args) + assert 0, 'bad scroll command %s %s' % (command, args) if showing_trace: self.show_trace() @@ -1170,14 +1171,14 @@ class RegexpChunkApp(object): self.devset_index = index self._showing_trace = False - self.trace_button["state"] = "normal" - self.devset_button["state"] = "disabled" + self.trace_button['state'] = 'normal' + self.devset_button['state'] = 'disabled' # Clear the text box. - self.devsetbox["state"] = "normal" - self.devsetbox["wrap"] = "word" - self.devsetbox.delete("1.0", "end") - self.devsetlabel["text"] = "Development Set (%d/%d)" % ( + self.devsetbox['state'] = 'normal' + self.devsetbox['wrap'] = 'word' + self.devsetbox.delete('1.0', 'end') + self.devsetlabel['text'] = 'Development Set (%d/%d)' % ( (self.devset_index + 1, self._devset_size.get()) ) @@ -1186,17 +1187,17 @@ class RegexpChunkApp(object): self.charnum = {} self.linenum = {0: 1} for sentnum, sent in enumerate(sample): - linestr = "" + linestr = '' for wordnum, (word, pos) in enumerate(sent.leaves()): self.charnum[sentnum, wordnum] = len(linestr) - linestr += "%s/%s " % (word, pos) + linestr += '%s/%s ' % (word, pos) self.charnum[sentnum, wordnum + 1] = len(linestr) - self.devsetbox.insert("end", linestr[:-1] + "\n\n") + self.devsetbox.insert('end', linestr[:-1] + '\n\n') # Highlight chunks in the dev set if self.chunker is not None: self._highlight_devset() - self.devsetbox["state"] = "disabled" + self.devsetbox['state'] = 'disabled' # Update the scrollbar first = self.devset_index / self._devset_size.get() @@ -1218,46 +1219,46 @@ class RegexpChunkApp(object): def _syntax_highlight_grammar(self, grammar): if self.top is None: return - self.grammarbox.tag_remove("comment", "1.0", "end") - self.grammarbox.tag_remove("angle", "1.0", "end") - self.grammarbox.tag_remove("brace", "1.0", "end") - self.grammarbox.tag_add("hangindent", "1.0", "end") - for lineno, line in enumerate(grammar.split("\n")): + self.grammarbox.tag_remove('comment', '1.0', 'end') + self.grammarbox.tag_remove('angle', '1.0', 'end') + self.grammarbox.tag_remove('brace', '1.0', 'end') + self.grammarbox.tag_add('hangindent', '1.0', 'end') + for lineno, line in enumerate(grammar.split('\n')): if not line.strip(): continue - m = re.match(r"(\\.|[^#])*(#.*)?", line) + m = re.match(r'(\\.|[^#])*(#.*)?', line) comment_start = None if m.group(2): comment_start = m.start(2) - s = "%d.%d" % (lineno + 1, m.start(2)) - e = "%d.%d" % (lineno + 1, m.end(2)) - self.grammarbox.tag_add("comment", s, e) - for m in re.finditer("[<>{}]", line): + s = '%d.%d' % (lineno + 1, m.start(2)) + e = '%d.%d' % (lineno + 1, m.end(2)) + self.grammarbox.tag_add('comment', s, e) + for m in re.finditer('[<>{}]', line): if comment_start is not None and m.start() >= comment_start: break - s = "%d.%d" % (lineno + 1, m.start()) - e = "%d.%d" % (lineno + 1, m.end()) - if m.group() in "<>": - self.grammarbox.tag_add("angle", s, e) + s = '%d.%d' % (lineno + 1, m.start()) + e = '%d.%d' % (lineno + 1, m.end()) + if m.group() in '<>': + self.grammarbox.tag_add('angle', s, e) else: - self.grammarbox.tag_add("brace", s, e) + self.grammarbox.tag_add('brace', s, e) def _grammarcheck(self, grammar): if self.top is None: return - self.grammarbox.tag_remove("error", "1.0", "end") + self.grammarbox.tag_remove('error', '1.0', 'end') self._grammarcheck_errs = [] - for lineno, line in enumerate(grammar.split("\n")): - line = re.sub(r"((\\.|[^#])*)(#.*)?", r"\1", line) + for lineno, line in enumerate(grammar.split('\n')): + line = re.sub(r'((\\.|[^#])*)(#.*)?', r'\1', line) line = line.strip() if line: try: RegexpChunkRule.fromstring(line) except ValueError as e: self.grammarbox.tag_add( - "error", "%s.0" % (lineno + 1), "%s.0 lineend" % (lineno + 1) + 'error', '%s.0' % (lineno + 1), '%s.0 lineend' % (lineno + 1) ) - self.status["text"] = "" + self.status['text'] = '' def update(self, *event): # Record when update was called (for grammarcheck) @@ -1265,7 +1266,7 @@ class RegexpChunkApp(object): self._last_keypress = time.time() # Read the grammar from the Text box. - self.grammar = grammar = self.grammarbox.get("1.0", "end") + self.grammar = grammar = self.grammarbox.get('1.0', 'end') # If the grammar hasn't changed, do nothing: normalized_grammar = self.normalize_grammar(grammar) @@ -1277,7 +1278,7 @@ class RegexpChunkApp(object): # If the grammar has changed, and we're looking at history, # then stop looking at history. if self._history_index < len(self._history) - 1: - self.grammarlabel["text"] = "Grammar:" + self.grammarlabel['text'] = 'Grammar:' self._syntax_highlight_grammar(grammar) @@ -1288,7 +1289,7 @@ class RegexpChunkApp(object): if normalized_grammar: rules = [ RegexpChunkRule.fromstring(line) - for line in normalized_grammar.split("\n") + for line in normalized_grammar.split('\n') ] else: rules = [] @@ -1299,7 +1300,7 @@ class RegexpChunkApp(object): return self.chunker = RegexpChunkParser(rules) - self.grammarbox.tag_remove("error", "1.0", "end") + self.grammarbox.tag_remove('error', '1.0', 'end') self.grammar_changed = time.time() # Display the results if self._showing_trace: @@ -1314,9 +1315,9 @@ class RegexpChunkApp(object): if sample is None: sample = self.devset[self.devset_index : self.devset_index + 1] - self.devsetbox.tag_remove("true-pos", "1.0", "end") - self.devsetbox.tag_remove("false-neg", "1.0", "end") - self.devsetbox.tag_remove("false-pos", "1.0", "end") + self.devsetbox.tag_remove('true-pos', '1.0', 'end') + self.devsetbox.tag_remove('false-neg', '1.0', 'end') + self.devsetbox.tag_remove('false-pos', '1.0', 'end') # Run the grammar on the test cases. for sentnum, gold_tree in enumerate(sample): @@ -1327,11 +1328,11 @@ class RegexpChunkApp(object): test_chunks = self._chunks(test_tree) # Compare them. for chunk in gold_chunks.intersection(test_chunks): - self._color_chunk(sentnum, chunk, "true-pos") + self._color_chunk(sentnum, chunk, 'true-pos') for chunk in gold_chunks - test_chunks: - self._color_chunk(sentnum, chunk, "false-neg") + self._color_chunk(sentnum, chunk, 'false-neg') for chunk in test_chunks - gold_chunks: - self._color_chunk(sentnum, chunk, "false-pos") + self._color_chunk(sentnum, chunk, 'false-pos') def _chunkparse(self, words): try: @@ -1340,7 +1341,7 @@ class RegexpChunkApp(object): # There's an error somewhere in the grammar, but we're not sure # exactly where, so just mark the whole grammar as bad. # E.g., this is caused by: "({})" - self.grammarbox.tag_add("error", "1.0", "end") + self.grammarbox.tag_add('error', '1.0', 'end') # Treat it as tagging nothing: return words @@ -1348,8 +1349,8 @@ class RegexpChunkApp(object): start, end = chunk self.devsetbox.tag_add( tag, - "%s.%s" % (self.linenum[sentnum], self.charnum[sentnum, start]), - "%s.%s" % (self.linenum[sentnum], self.charnum[sentnum, end] - 1), + '%s.%s' % (self.linenum[sentnum], self.charnum[sentnum, start]), + '%s.%s' % (self.linenum[sentnum], self.charnum[sentnum, end] - 1), ) def reset(self): @@ -1361,40 +1362,40 @@ class RegexpChunkApp(object): self._history = [] self._history_index = 0 # Update the on-screen display. - self.grammarbox.delete("1.0", "end") + self.grammarbox.delete('1.0', 'end') self.show_devset(0) self.update() # self._eval_plot() SAVE_GRAMMAR_TEMPLATE = ( - "# Regexp Chunk Parsing Grammar\n" - "# Saved %(date)s\n" - "#\n" - "# Development set: %(devset)s\n" - "# Precision: %(precision)s\n" - "# Recall: %(recall)s\n" - "# F-score: %(fscore)s\n\n" - "%(grammar)s\n" + '# Regexp Chunk Parsing Grammar\n' + '# Saved %(date)s\n' + '#\n' + '# Development set: %(devset)s\n' + '# Precision: %(precision)s\n' + '# Recall: %(recall)s\n' + '# F-score: %(fscore)s\n\n' + '%(grammar)s\n' ) def save_grammar(self, filename=None): if not filename: - ftypes = [("Chunk Gramamr", ".chunk"), ("All files", "*")] - filename = asksaveasfilename(filetypes=ftypes, defaultextension=".chunk") + ftypes = [('Chunk Gramamr', '.chunk'), ('All files', '*')] + filename = asksaveasfilename(filetypes=ftypes, defaultextension='.chunk') if not filename: return if self._history and self.normalized_grammar == self.normalize_grammar( self._history[-1][0] ): precision, recall, fscore = [ - "%.2f%%" % (100 * v) for v in self._history[-1][1:] + '%.2f%%' % (100 * v) for v in self._history[-1][1:] ] elif self.chunker is None: - precision = recall = fscore = "Grammar not well formed" + precision = recall = fscore = 'Grammar not well formed' else: - precision = recall = fscore = "Not finished evaluation yet" + precision = recall = fscore = 'Not finished evaluation yet' - with open(filename, "w") as outfile: + with open(filename, 'w') as outfile: outfile.write( self.SAVE_GRAMMAR_TEMPLATE % dict( @@ -1409,39 +1410,39 @@ class RegexpChunkApp(object): def load_grammar(self, filename=None): if not filename: - ftypes = [("Chunk Gramamr", ".chunk"), ("All files", "*")] - filename = askopenfilename(filetypes=ftypes, defaultextension=".chunk") + ftypes = [('Chunk Gramamr', '.chunk'), ('All files', '*')] + filename = askopenfilename(filetypes=ftypes, defaultextension='.chunk') if not filename: return - self.grammarbox.delete("1.0", "end") + self.grammarbox.delete('1.0', 'end') self.update() - with open(filename, "r") as infile: + with open(filename, 'r') as infile: grammar = infile.read() grammar = re.sub( - "^\# Regexp Chunk Parsing Grammar[\s\S]*" "F-score:.*\n", "", grammar + '^\# Regexp Chunk Parsing Grammar[\s\S]*' 'F-score:.*\n', '', grammar ).lstrip() - self.grammarbox.insert("1.0", grammar) + self.grammarbox.insert('1.0', grammar) self.update() def save_history(self, filename=None): if not filename: - ftypes = [("Chunk Gramamr History", ".txt"), ("All files", "*")] - filename = asksaveasfilename(filetypes=ftypes, defaultextension=".txt") + ftypes = [('Chunk Gramamr History', '.txt'), ('All files', '*')] + filename = asksaveasfilename(filetypes=ftypes, defaultextension='.txt') if not filename: return - with open(filename, "w") as outfile: - outfile.write("# Regexp Chunk Parsing Grammar History\n") - outfile.write("# Saved %s\n" % time.ctime()) - outfile.write("# Development set: %s\n" % self.devset_name) + with open(filename, 'w') as outfile: + outfile.write('# Regexp Chunk Parsing Grammar History\n') + outfile.write('# Saved %s\n' % time.ctime()) + outfile.write('# Development set: %s\n' % self.devset_name) for i, (g, p, r, f) in enumerate(self._history): hdr = ( - "Grammar %d/%d (precision=%.2f%%, recall=%.2f%%, " - "fscore=%.2f%%)" + 'Grammar %d/%d (precision=%.2f%%, recall=%.2f%%, ' + 'fscore=%.2f%%)' % (i + 1, len(self._history), p * 100, r * 100, f * 100) ) - outfile.write("\n%s\n" % hdr) - outfile.write("".join(" %s\n" % line for line in g.strip().split())) + outfile.write('\n%s\n' % hdr) + outfile.write(''.join(' %s\n' % line for line in g.strip().split())) if not ( self._history @@ -1449,18 +1450,18 @@ class RegexpChunkApp(object): == self.normalize_grammar(self._history[-1][0]) ): if self.chunker is None: - outfile.write("\nCurrent Grammar (not well-formed)\n") + outfile.write('\nCurrent Grammar (not well-formed)\n') else: - outfile.write("\nCurrent Grammar (not evaluated)\n") + outfile.write('\nCurrent Grammar (not evaluated)\n') outfile.write( - "".join(" %s\n" % line for line in self.grammar.strip().split()) + ''.join(' %s\n' % line for line in self.grammar.strip().split()) ) def about(self, *e): ABOUT = "NLTK RegExp Chunk Parser Application\n" + "Written by Edward Loper" - TITLE = "About: Regular Expression Chunk Parser Application" + TITLE = 'About: Regular Expression Chunk Parser Application' try: - from tkinter.messagebox import Message + from six.moves.tkinter_messagebox import Message Message(message=ABOUT, title=TITLE).show() except: @@ -1497,7 +1498,7 @@ def app(): RegexpChunkApp().mainloop() -if __name__ == "__main__": +if __name__ == '__main__': app() -__all__ = ["app"] +__all__ = ['app'] diff --git a/nlp_resource_data/nltk/app/collocations_app.py b/nlp_resource_data/nltk/app/collocations_app.py index 36362a8..b2165e9 100644 --- a/nlp_resource_data/nltk/app/collocations_app.py +++ b/nlp_resource_data/nltk/app/collocations_app.py @@ -1,17 +1,19 @@ # Natural Language Toolkit: Collocations Application # Much of the GUI code is imported from concordance.py; We intend to merge these tools together -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Sumukh Ghodke # URL: # For license information, see LICENSE.TXT # +from __future__ import division + import threading -import queue as q -from tkinter.font import Font -from tkinter import ( +from six.moves import queue as q +from six.moves.tkinter_font import Font +from six.moves.tkinter import ( Button, END, Frame, @@ -44,38 +46,38 @@ from nltk.util import in_idle from nltk.probability import FreqDist -CORPUS_LOADED_EVENT = "<>" -ERROR_LOADING_CORPUS_EVENT = "<>" +CORPUS_LOADED_EVENT = '<>' +ERROR_LOADING_CORPUS_EVENT = '<>' POLL_INTERVAL = 100 -_DEFAULT = "English: Brown Corpus (Humor)" +_DEFAULT = 'English: Brown Corpus (Humor)' _CORPORA = { - "Catalan: CESS-CAT Corpus": lambda: cess_cat.words(), - "English: Brown Corpus": lambda: brown.words(), - "English: Brown Corpus (Press)": lambda: brown.words( - categories=["news", "editorial", "reviews"] + 'Catalan: CESS-CAT Corpus': lambda: cess_cat.words(), + 'English: Brown Corpus': lambda: brown.words(), + 'English: Brown Corpus (Press)': lambda: brown.words( + categories=['news', 'editorial', 'reviews'] ), - "English: Brown Corpus (Religion)": lambda: brown.words(categories="religion"), - "English: Brown Corpus (Learned)": lambda: brown.words(categories="learned"), - "English: Brown Corpus (Science Fiction)": lambda: brown.words( - categories="science_fiction" + 'English: Brown Corpus (Religion)': lambda: brown.words(categories='religion'), + 'English: Brown Corpus (Learned)': lambda: brown.words(categories='learned'), + 'English: Brown Corpus (Science Fiction)': lambda: brown.words( + categories='science_fiction' ), - "English: Brown Corpus (Romance)": lambda: brown.words(categories="romance"), - "English: Brown Corpus (Humor)": lambda: brown.words(categories="humor"), - "English: NPS Chat Corpus": lambda: nps_chat.words(), - "English: Wall Street Journal Corpus": lambda: treebank.words(), - "Chinese: Sinica Corpus": lambda: sinica_treebank.words(), - "Dutch: Alpino Corpus": lambda: alpino.words(), - "Hindi: Indian Languages Corpus": lambda: indian.words(files="hindi.pos"), - "Portuguese: Floresta Corpus (Portugal)": lambda: floresta.words(), - "Portuguese: MAC-MORPHO Corpus (Brazil)": lambda: mac_morpho.words(), - "Portuguese: Machado Corpus (Brazil)": lambda: machado.words(), - "Spanish: CESS-ESP Corpus": lambda: cess_esp.words(), + 'English: Brown Corpus (Romance)': lambda: brown.words(categories='romance'), + 'English: Brown Corpus (Humor)': lambda: brown.words(categories='humor'), + 'English: NPS Chat Corpus': lambda: nps_chat.words(), + 'English: Wall Street Journal Corpus': lambda: treebank.words(), + 'Chinese: Sinica Corpus': lambda: sinica_treebank.words(), + 'Dutch: Alpino Corpus': lambda: alpino.words(), + 'Hindi: Indian Languages Corpus': lambda: indian.words(files='hindi.pos'), + 'Portuguese: Floresta Corpus (Portugal)': lambda: floresta.words(), + 'Portuguese: MAC-MORPHO Corpus (Brazil)': lambda: mac_morpho.words(), + 'Portuguese: Machado Corpus (Brazil)': lambda: machado.words(), + 'Spanish: CESS-ESP Corpus': lambda: cess_esp.words(), } class CollocationsView: - _BACKGROUND_COLOUR = "#FFF" # white + _BACKGROUND_COLOUR = '#FFF' # white def __init__(self): self.queue = q.Queue() @@ -88,10 +90,10 @@ class CollocationsView: self.after = self.top.after(POLL_INTERVAL, self._poll) def _init_top(self, top): - top.geometry("550x650+50+50") - top.title("NLTK Collocations List") - top.bind("", self.destroy) - top.protocol("WM_DELETE_WINDOW", self.destroy) + top.geometry('550x650+50+50') + top.title('NLTK Collocations List') + top.bind('', self.destroy) + top.protocol('WM_DELETE_WINDOW', self.destroy) top.minsize(550, 650) def _init_widgets(self, parent): @@ -102,7 +104,7 @@ class CollocationsView: self._init_results_box(self.main_frame) self._init_paging(self.main_frame) self._init_status(self.main_frame) - self.main_frame.pack(fill="both", expand=True) + self.main_frame.pack(fill='both', expand=True) def _init_corpus_select(self, parent): innerframe = Frame(parent, background=self._BACKGROUND_COLOUR) @@ -111,12 +113,12 @@ class CollocationsView: Label( innerframe, justify=LEFT, - text=" Corpus: ", + text=' Corpus: ', background=self._BACKGROUND_COLOUR, padx=2, pady=1, border=0, - ).pack(side="left") + ).pack(side='left') other_corpora = list(self.model.CORPORA.keys()).remove( self.model.DEFAULT_CORPUS @@ -128,10 +130,10 @@ class CollocationsView: command=self.corpus_selected, *self.model.non_default_corpora() ) - om["borderwidth"] = 0 - om["highlightthickness"] = 1 - om.pack(side="left") - innerframe.pack(side="top", fill="x", anchor="n") + om['borderwidth'] = 0 + om['highlightthickness'] = 1 + om.pack(side='left') + innerframe.pack(side='top', fill='x', anchor='n') def _init_status(self, parent): self.status = Label( @@ -143,7 +145,7 @@ class CollocationsView: padx=1, pady=0, ) - self.status.pack(side="top", anchor="sw") + self.status.pack(side='top', anchor='sw') def _init_menubar(self): self._result_size = IntVar(self.top) @@ -151,37 +153,37 @@ class CollocationsView: filemenu = Menu(menubar, tearoff=0, borderwidth=0) filemenu.add_command( - label="Exit", underline=1, command=self.destroy, accelerator="Ctrl-q" + label='Exit', underline=1, command=self.destroy, accelerator='Ctrl-q' ) - menubar.add_cascade(label="File", underline=0, menu=filemenu) + menubar.add_cascade(label='File', underline=0, menu=filemenu) editmenu = Menu(menubar, tearoff=0) rescntmenu = Menu(editmenu, tearoff=0) rescntmenu.add_radiobutton( - label="20", + label='20', variable=self._result_size, underline=0, value=20, command=self.set_result_size, ) rescntmenu.add_radiobutton( - label="50", + label='50', variable=self._result_size, underline=0, value=50, command=self.set_result_size, ) rescntmenu.add_radiobutton( - label="100", + label='100', variable=self._result_size, underline=0, value=100, command=self.set_result_size, ) rescntmenu.invoke(1) - editmenu.add_cascade(label="Result Count", underline=0, menu=rescntmenu) + editmenu.add_cascade(label='Result Count', underline=0, menu=rescntmenu) - menubar.add_cascade(label="Edit", underline=0, menu=editmenu) + menubar.add_cascade(label='Edit', underline=0, menu=editmenu) self.top.config(menu=menubar) def set_result_size(self, **kwargs): @@ -192,55 +194,55 @@ class CollocationsView: i1 = Frame(innerframe) i2 = Frame(innerframe) vscrollbar = Scrollbar(i1, borderwidth=1) - hscrollbar = Scrollbar(i2, borderwidth=1, orient="horiz") + hscrollbar = Scrollbar(i2, borderwidth=1, orient='horiz') self.results_box = Text( i1, - font=Font(family="courier", size="16"), - state="disabled", + font=Font(family='courier', size='16'), + state='disabled', borderwidth=1, yscrollcommand=vscrollbar.set, xscrollcommand=hscrollbar.set, - wrap="none", - width="40", - height="20", + wrap='none', + width='40', + height='20', exportselection=1, ) - self.results_box.pack(side="left", fill="both", expand=True) - vscrollbar.pack(side="left", fill="y", anchor="e") + self.results_box.pack(side='left', fill='both', expand=True) + vscrollbar.pack(side='left', fill='y', anchor='e') vscrollbar.config(command=self.results_box.yview) - hscrollbar.pack(side="left", fill="x", expand=True, anchor="w") + hscrollbar.pack(side='left', fill='x', expand=True, anchor='w') hscrollbar.config(command=self.results_box.xview) # there is no other way of avoiding the overlap of scrollbars while using pack layout manager!!! - Label(i2, text=" ", background=self._BACKGROUND_COLOUR).pack( - side="left", anchor="e" + Label(i2, text=' ', background=self._BACKGROUND_COLOUR).pack( + side='left', anchor='e' ) - i1.pack(side="top", fill="both", expand=True, anchor="n") - i2.pack(side="bottom", fill="x", anchor="s") - innerframe.pack(side="top", fill="both", expand=True) + i1.pack(side='top', fill='both', expand=True, anchor='n') + i2.pack(side='bottom', fill='x', anchor='s') + innerframe.pack(side='top', fill='both', expand=True) def _init_paging(self, parent): innerframe = Frame(parent, background=self._BACKGROUND_COLOUR) self.prev = prev = Button( innerframe, - text="Previous", + text='Previous', command=self.previous, - width="10", + width='10', borderwidth=1, highlightthickness=1, - state="disabled", + state='disabled', ) - prev.pack(side="left", anchor="center") + prev.pack(side='left', anchor='center') self.next = next = Button( innerframe, - text="Next", + text='Next', command=self.__next__, - width="10", + width='10', borderwidth=1, highlightthickness=1, - state="disabled", + state='disabled', ) - next.pack(side="right", anchor="center") - innerframe.pack(side="top", fill="y") + next.pack(side='right', anchor='center') + innerframe.pack(side='top', fill='y') self.reset_current_page() def reset_current_page(self): @@ -259,14 +261,14 @@ class CollocationsView: self.after = self.top.after(POLL_INTERVAL, self._poll) def handle_error_loading_corpus(self, event): - self.status["text"] = "Error in loading " + self.var.get() + self.status['text'] = 'Error in loading ' + self.var.get() self.unfreeze_editable() self.clear_results_box() self.freeze_editable() self.reset_current_page() def handle_corpus_loaded(self, event): - self.status["text"] = self.var.get() + " is loaded" + self.status['text'] = self.var.get() + ' is loaded' self.unfreeze_editable() self.clear_results_box() self.reset_current_page() @@ -297,22 +299,22 @@ class CollocationsView: def load_corpus(self, selection): if self.model.selected_corpus != selection: - self.status["text"] = "Loading " + selection + "..." + self.status['text'] = 'Loading ' + selection + '...' self.freeze_editable() self.model.load_corpus(selection) def freeze_editable(self): - self.prev["state"] = "disabled" - self.next["state"] = "disabled" + self.prev['state'] = 'disabled' + self.next['state'] = 'disabled' def clear_results_box(self): - self.results_box["state"] = "normal" + self.results_box['state'] = 'normal' self.results_box.delete("1.0", END) - self.results_box["state"] = "disabled" + self.results_box['state'] = 'disabled' def fire_event(self, event): # Firing an event so that rendering of widgets happen in the mainloop thread - self.top.event_generate(event, when="tail") + self.top.event_generate(event, when='tail') def destroy(self, *e): if self.top is None: @@ -331,21 +333,21 @@ class CollocationsView: def set_paging_button_states(self): if self.current_page == -1 or self.current_page == 0: - self.prev["state"] = "disabled" + self.prev['state'] = 'disabled' else: - self.prev["state"] = "normal" + self.prev['state'] = 'normal' if self.model.is_last_page(self.current_page): - self.next["state"] = "disabled" + self.next['state'] = 'disabled' else: - self.next["state"] = "normal" + self.next['state'] = 'normal' def write_results(self, results): - self.results_box["state"] = "normal" + self.results_box['state'] = 'normal' row = 1 for each in results: - self.results_box.insert(str(row) + ".0", each[0] + " " + each[1] + "\n") + self.results_box.insert(str(row) + '.0', each[0] + " " + each[1] + "\n") row += 1 - self.results_box["state"] = "disabled" + self.results_box['state'] = 'disabled' class CollocationsModel: @@ -434,7 +436,7 @@ def app(): c.mainloop() -if __name__ == "__main__": +if __name__ == '__main__': app() -__all__ = ["app"] +__all__ = ['app'] diff --git a/nlp_resource_data/nltk/app/concordance_app.py b/nlp_resource_data/nltk/app/concordance_app.py index afdef61..a7f55d3 100644 --- a/nlp_resource_data/nltk/app/concordance_app.py +++ b/nlp_resource_data/nltk/app/concordance_app.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Concordance Application # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Sumukh Ghodke # URL: # For license information, see LICENSE.TXT @@ -8,9 +8,9 @@ import re import threading -import queue as q -from tkinter.font import Font -from tkinter import ( +from six.moves import queue as q +from six.moves.tkinter_font import Font +from six.moves.tkinter import ( Tk, Button, END, @@ -27,6 +27,7 @@ from tkinter import ( Text, ) +import nltk.compat from nltk.corpus import ( cess_cat, brown, @@ -42,89 +43,89 @@ from nltk.corpus import ( from nltk.util import in_idle from nltk.draw.util import ShowText -WORD_OR_TAG = "[^/ ]+" -BOUNDARY = r"\b" +WORD_OR_TAG = '[^/ ]+' +BOUNDARY = r'\b' -CORPUS_LOADED_EVENT = "<>" -SEARCH_TERMINATED_EVENT = "<>" -SEARCH_ERROR_EVENT = "<>" -ERROR_LOADING_CORPUS_EVENT = "<>" +CORPUS_LOADED_EVENT = '<>' +SEARCH_TERMINATED_EVENT = '<>' +SEARCH_ERROR_EVENT = '<>' +ERROR_LOADING_CORPUS_EVENT = '<>' POLL_INTERVAL = 50 # NB All corpora must be specified in a lambda expression so as not to be # loaded when the module is imported. -_DEFAULT = "English: Brown Corpus (Humor, simplified)" +_DEFAULT = 'English: Brown Corpus (Humor, simplified)' _CORPORA = { - "Catalan: CESS-CAT Corpus (simplified)": lambda: cess_cat.tagged_sents( - tagset="universal" + 'Catalan: CESS-CAT Corpus (simplified)': lambda: cess_cat.tagged_sents( + tagset='universal' ), - "English: Brown Corpus": lambda: brown.tagged_sents(), - "English: Brown Corpus (simplified)": lambda: brown.tagged_sents( - tagset="universal" + 'English: Brown Corpus': lambda: brown.tagged_sents(), + 'English: Brown Corpus (simplified)': lambda: brown.tagged_sents( + tagset='universal' ), - "English: Brown Corpus (Press, simplified)": lambda: brown.tagged_sents( - categories=["news", "editorial", "reviews"], tagset="universal" + 'English: Brown Corpus (Press, simplified)': lambda: brown.tagged_sents( + categories=['news', 'editorial', 'reviews'], tagset='universal' ), - "English: Brown Corpus (Religion, simplified)": lambda: brown.tagged_sents( - categories="religion", tagset="universal" + 'English: Brown Corpus (Religion, simplified)': lambda: brown.tagged_sents( + categories='religion', tagset='universal' ), - "English: Brown Corpus (Learned, simplified)": lambda: brown.tagged_sents( - categories="learned", tagset="universal" + 'English: Brown Corpus (Learned, simplified)': lambda: brown.tagged_sents( + categories='learned', tagset='universal' ), - "English: Brown Corpus (Science Fiction, simplified)": lambda: brown.tagged_sents( - categories="science_fiction", tagset="universal" + 'English: Brown Corpus (Science Fiction, simplified)': lambda: brown.tagged_sents( + categories='science_fiction', tagset='universal' ), - "English: Brown Corpus (Romance, simplified)": lambda: brown.tagged_sents( - categories="romance", tagset="universal" + 'English: Brown Corpus (Romance, simplified)': lambda: brown.tagged_sents( + categories='romance', tagset='universal' ), - "English: Brown Corpus (Humor, simplified)": lambda: brown.tagged_sents( - categories="humor", tagset="universal" + 'English: Brown Corpus (Humor, simplified)': lambda: brown.tagged_sents( + categories='humor', tagset='universal' ), - "English: NPS Chat Corpus": lambda: nps_chat.tagged_posts(), - "English: NPS Chat Corpus (simplified)": lambda: nps_chat.tagged_posts( - tagset="universal" + 'English: NPS Chat Corpus': lambda: nps_chat.tagged_posts(), + 'English: NPS Chat Corpus (simplified)': lambda: nps_chat.tagged_posts( + tagset='universal' ), - "English: Wall Street Journal Corpus": lambda: treebank.tagged_sents(), - "English: Wall Street Journal Corpus (simplified)": lambda: treebank.tagged_sents( - tagset="universal" + 'English: Wall Street Journal Corpus': lambda: treebank.tagged_sents(), + 'English: Wall Street Journal Corpus (simplified)': lambda: treebank.tagged_sents( + tagset='universal' ), - "Chinese: Sinica Corpus": lambda: sinica_treebank.tagged_sents(), - "Chinese: Sinica Corpus (simplified)": lambda: sinica_treebank.tagged_sents( - tagset="universal" + 'Chinese: Sinica Corpus': lambda: sinica_treebank.tagged_sents(), + 'Chinese: Sinica Corpus (simplified)': lambda: sinica_treebank.tagged_sents( + tagset='universal' ), - "Dutch: Alpino Corpus": lambda: alpino.tagged_sents(), - "Dutch: Alpino Corpus (simplified)": lambda: alpino.tagged_sents( - tagset="universal" + 'Dutch: Alpino Corpus': lambda: alpino.tagged_sents(), + 'Dutch: Alpino Corpus (simplified)': lambda: alpino.tagged_sents( + tagset='universal' ), - "Hindi: Indian Languages Corpus": lambda: indian.tagged_sents(files="hindi.pos"), - "Hindi: Indian Languages Corpus (simplified)": lambda: indian.tagged_sents( - files="hindi.pos", tagset="universal" + 'Hindi: Indian Languages Corpus': lambda: indian.tagged_sents(files='hindi.pos'), + 'Hindi: Indian Languages Corpus (simplified)': lambda: indian.tagged_sents( + files='hindi.pos', tagset='universal' ), - "Portuguese: Floresta Corpus (Portugal)": lambda: floresta.tagged_sents(), - "Portuguese: Floresta Corpus (Portugal, simplified)": lambda: floresta.tagged_sents( - tagset="universal" + 'Portuguese: Floresta Corpus (Portugal)': lambda: floresta.tagged_sents(), + 'Portuguese: Floresta Corpus (Portugal, simplified)': lambda: floresta.tagged_sents( + tagset='universal' ), - "Portuguese: MAC-MORPHO Corpus (Brazil)": lambda: mac_morpho.tagged_sents(), - "Portuguese: MAC-MORPHO Corpus (Brazil, simplified)": lambda: mac_morpho.tagged_sents( - tagset="universal" + 'Portuguese: MAC-MORPHO Corpus (Brazil)': lambda: mac_morpho.tagged_sents(), + 'Portuguese: MAC-MORPHO Corpus (Brazil, simplified)': lambda: mac_morpho.tagged_sents( + tagset='universal' ), - "Spanish: CESS-ESP Corpus (simplified)": lambda: cess_esp.tagged_sents( - tagset="universal" + 'Spanish: CESS-ESP Corpus (simplified)': lambda: cess_esp.tagged_sents( + tagset='universal' ), } class ConcordanceSearchView(object): - _BACKGROUND_COLOUR = "#FFF" # white + _BACKGROUND_COLOUR = '#FFF' # white # Colour of highlighted results - _HIGHLIGHT_WORD_COLOUR = "#F00" # red - _HIGHLIGHT_WORD_TAG = "HL_WRD_TAG" + _HIGHLIGHT_WORD_COLOUR = '#F00' # red + _HIGHLIGHT_WORD_TAG = 'HL_WRD_TAG' - _HIGHLIGHT_LABEL_COLOUR = "#C0C0C0" # dark grey - _HIGHLIGHT_LABEL_TAG = "HL_LBL_TAG" + _HIGHLIGHT_LABEL_COLOUR = '#C0C0C0' # dark grey + _HIGHLIGHT_LABEL_TAG = 'HL_LBL_TAG' # Percentage of text left of the scrollbar position _FRACTION_LEFT_TEXT = 0.30 @@ -140,10 +141,10 @@ class ConcordanceSearchView(object): self.after = self.top.after(POLL_INTERVAL, self._poll) def _init_top(self, top): - top.geometry("950x680+50+50") - top.title("NLTK Concordance Search") - top.bind("", self.destroy) - top.protocol("WM_DELETE_WINDOW", self.destroy) + top.geometry('950x680+50+50') + top.title('NLTK Concordance Search') + top.bind('', self.destroy) + top.protocol('WM_DELETE_WINDOW', self.destroy) top.minsize(950, 680) def _init_widgets(self, parent): @@ -155,7 +156,7 @@ class ConcordanceSearchView(object): self._init_results_box(self.main_frame) self._init_paging(self.main_frame) self._init_status(self.main_frame) - self.main_frame.pack(fill="both", expand=True) + self.main_frame.pack(fill='both', expand=True) def _init_menubar(self): self._result_size = IntVar(self.top) @@ -165,90 +166,90 @@ class ConcordanceSearchView(object): filemenu = Menu(menubar, tearoff=0, borderwidth=0) filemenu.add_command( - label="Exit", underline=1, command=self.destroy, accelerator="Ctrl-q" + label='Exit', underline=1, command=self.destroy, accelerator='Ctrl-q' ) - menubar.add_cascade(label="File", underline=0, menu=filemenu) + menubar.add_cascade(label='File', underline=0, menu=filemenu) editmenu = Menu(menubar, tearoff=0) rescntmenu = Menu(editmenu, tearoff=0) rescntmenu.add_radiobutton( - label="20", + label='20', variable=self._result_size, underline=0, value=20, command=self.set_result_size, ) rescntmenu.add_radiobutton( - label="50", + label='50', variable=self._result_size, underline=0, value=50, command=self.set_result_size, ) rescntmenu.add_radiobutton( - label="100", + label='100', variable=self._result_size, underline=0, value=100, command=self.set_result_size, ) rescntmenu.invoke(1) - editmenu.add_cascade(label="Result Count", underline=0, menu=rescntmenu) + editmenu.add_cascade(label='Result Count', underline=0, menu=rescntmenu) cntxmenu = Menu(editmenu, tearoff=0) cntxbfmenu = Menu(cntxmenu, tearoff=0) cntxbfmenu.add_radiobutton( - label="60 characters", + label='60 characters', variable=self._cntx_bf_len, underline=0, value=60, command=self.set_cntx_bf_len, ) cntxbfmenu.add_radiobutton( - label="80 characters", + label='80 characters', variable=self._cntx_bf_len, underline=0, value=80, command=self.set_cntx_bf_len, ) cntxbfmenu.add_radiobutton( - label="100 characters", + label='100 characters', variable=self._cntx_bf_len, underline=0, value=100, command=self.set_cntx_bf_len, ) cntxbfmenu.invoke(1) - cntxmenu.add_cascade(label="Before", underline=0, menu=cntxbfmenu) + cntxmenu.add_cascade(label='Before', underline=0, menu=cntxbfmenu) cntxafmenu = Menu(cntxmenu, tearoff=0) cntxafmenu.add_radiobutton( - label="70 characters", + label='70 characters', variable=self._cntx_af_len, underline=0, value=70, command=self.set_cntx_af_len, ) cntxafmenu.add_radiobutton( - label="90 characters", + label='90 characters', variable=self._cntx_af_len, underline=0, value=90, command=self.set_cntx_af_len, ) cntxafmenu.add_radiobutton( - label="110 characters", + label='110 characters', variable=self._cntx_af_len, underline=0, value=110, command=self.set_cntx_af_len, ) cntxafmenu.invoke(1) - cntxmenu.add_cascade(label="After", underline=0, menu=cntxafmenu) + cntxmenu.add_cascade(label='After', underline=0, menu=cntxafmenu) - editmenu.add_cascade(label="Context", underline=0, menu=cntxmenu) + editmenu.add_cascade(label='Context', underline=0, menu=cntxmenu) - menubar.add_cascade(label="Edit", underline=0, menu=editmenu) + menubar.add_cascade(label='Edit', underline=0, menu=editmenu) self.top.config(menu=menubar) @@ -268,12 +269,12 @@ class ConcordanceSearchView(object): Label( innerframe, justify=LEFT, - text=" Corpus: ", + text=' Corpus: ', background=self._BACKGROUND_COLOUR, padx=2, pady=1, border=0, - ).pack(side="left") + ).pack(side='left') other_corpora = list(self.model.CORPORA.keys()).remove( self.model.DEFAULT_CORPUS @@ -285,10 +286,10 @@ class ConcordanceSearchView(object): command=self.corpus_selected, *self.model.non_default_corpora() ) - om["borderwidth"] = 0 - om["highlightthickness"] = 1 - om.pack(side="left") - innerframe.pack(side="top", fill="x", anchor="n") + om['borderwidth'] = 0 + om['highlightthickness'] = 1 + om.pack(side='left') + innerframe.pack(side='top', fill='x', anchor='n') def _init_status(self, parent): self.status = Label( @@ -300,24 +301,24 @@ class ConcordanceSearchView(object): padx=1, pady=0, ) - self.status.pack(side="top", anchor="sw") + self.status.pack(side='top', anchor='sw') def _init_query_box(self, parent): innerframe = Frame(parent, background=self._BACKGROUND_COLOUR) another = Frame(innerframe, background=self._BACKGROUND_COLOUR) self.query_box = Entry(another, width=60) - self.query_box.pack(side="left", fill="x", pady=25, anchor="center") + self.query_box.pack(side='left', fill='x', pady=25, anchor='center') self.search_button = Button( another, - text="Search", + text='Search', command=self.search, borderwidth=1, highlightthickness=1, ) - self.search_button.pack(side="left", fill="x", pady=25, anchor="center") - self.query_box.bind("", self.search_enter_keypress_handler) + self.search_button.pack(side='left', fill='x', pady=25, anchor='center') + self.query_box.bind('', self.search_enter_keypress_handler) another.pack() - innerframe.pack(side="top", fill="x", anchor="n") + innerframe.pack(side='top', fill='x', anchor='n') def search_enter_keypress_handler(self, *event): self.search() @@ -327,61 +328,61 @@ class ConcordanceSearchView(object): i1 = Frame(innerframe) i2 = Frame(innerframe) vscrollbar = Scrollbar(i1, borderwidth=1) - hscrollbar = Scrollbar(i2, borderwidth=1, orient="horiz") + hscrollbar = Scrollbar(i2, borderwidth=1, orient='horiz') self.results_box = Text( i1, - font=Font(family="courier", size="16"), - state="disabled", + font=Font(family='courier', size='16'), + state='disabled', borderwidth=1, yscrollcommand=vscrollbar.set, xscrollcommand=hscrollbar.set, - wrap="none", - width="40", - height="20", + wrap='none', + width='40', + height='20', exportselection=1, ) - self.results_box.pack(side="left", fill="both", expand=True) + self.results_box.pack(side='left', fill='both', expand=True) self.results_box.tag_config( self._HIGHLIGHT_WORD_TAG, foreground=self._HIGHLIGHT_WORD_COLOUR ) self.results_box.tag_config( self._HIGHLIGHT_LABEL_TAG, foreground=self._HIGHLIGHT_LABEL_COLOUR ) - vscrollbar.pack(side="left", fill="y", anchor="e") + vscrollbar.pack(side='left', fill='y', anchor='e') vscrollbar.config(command=self.results_box.yview) - hscrollbar.pack(side="left", fill="x", expand=True, anchor="w") + hscrollbar.pack(side='left', fill='x', expand=True, anchor='w') hscrollbar.config(command=self.results_box.xview) # there is no other way of avoiding the overlap of scrollbars while using pack layout manager!!! - Label(i2, text=" ", background=self._BACKGROUND_COLOUR).pack( - side="left", anchor="e" + Label(i2, text=' ', background=self._BACKGROUND_COLOUR).pack( + side='left', anchor='e' ) - i1.pack(side="top", fill="both", expand=True, anchor="n") - i2.pack(side="bottom", fill="x", anchor="s") - innerframe.pack(side="top", fill="both", expand=True) + i1.pack(side='top', fill='both', expand=True, anchor='n') + i2.pack(side='bottom', fill='x', anchor='s') + innerframe.pack(side='top', fill='both', expand=True) def _init_paging(self, parent): innerframe = Frame(parent, background=self._BACKGROUND_COLOUR) self.prev = prev = Button( innerframe, - text="Previous", + text='Previous', command=self.previous, - width="10", + width='10', borderwidth=1, highlightthickness=1, - state="disabled", + state='disabled', ) - prev.pack(side="left", anchor="center") + prev.pack(side='left', anchor='center') self.next = next = Button( innerframe, - text="Next", + text='Next', command=self.__next__, - width="10", + width='10', borderwidth=1, highlightthickness=1, - state="disabled", + state='disabled', ) - next.pack(side="right", anchor="center") - innerframe.pack(side="top", fill="y") + next.pack(side='right', anchor='center') + innerframe.pack(side='top', fill='y') self.current_page = 0 def previous(self): @@ -396,9 +397,9 @@ class ConcordanceSearchView(object): def about(self, *e): ABOUT = "NLTK Concordance Search Demo\n" - TITLE = "About: NLTK Concordance Search Demo" + TITLE = 'About: NLTK Concordance Search Demo' try: - from tkinter.messagebox import Message + from six.moves.tkinter_messagebox import Message Message(message=ABOUT, title=TITLE, parent=self.main_frame).show() except: @@ -427,13 +428,13 @@ class ConcordanceSearchView(object): self.after = self.top.after(POLL_INTERVAL, self._poll) def handle_error_loading_corpus(self, event): - self.status["text"] = "Error in loading " + self.var.get() + self.status['text'] = 'Error in loading ' + self.var.get() self.unfreeze_editable() self.clear_all() self.freeze_editable() def handle_corpus_loaded(self, event): - self.status["text"] = self.var.get() + " is loaded" + self.status['text'] = self.var.get() + ' is loaded' self.unfreeze_editable() self.clear_all() self.query_box.focus_set() @@ -442,16 +443,16 @@ class ConcordanceSearchView(object): # todo: refactor the model such that it is less state sensitive results = self.model.get_results() self.write_results(results) - self.status["text"] = "" + self.status['text'] = '' if len(results) == 0: - self.status["text"] = "No results found for " + self.model.query + self.status['text'] = 'No results found for ' + self.model.query else: self.current_page = self.model.last_requested_page self.unfreeze_editable() self.results_box.xview_moveto(self._FRACTION_LEFT_TEXT) def handle_search_error(self, event): - self.status["text"] = "Error in query " + self.model.query + self.status['text'] = 'Error in query ' + self.model.query self.unfreeze_editable() def corpus_selected(self, *args): @@ -460,7 +461,7 @@ class ConcordanceSearchView(object): def load_corpus(self, selection): if self.model.selected_corpus != selection: - self.status["text"] = "Loading " + selection + "..." + self.status['text'] = 'Loading ' + selection + '...' self.freeze_editable() self.model.load_corpus(selection) @@ -471,12 +472,12 @@ class ConcordanceSearchView(object): query = self.query_box.get() if len(query.strip()) == 0: return - self.status["text"] = "Searching for " + query + self.status['text'] = 'Searching for ' + query self.freeze_editable() self.model.search(query, self.current_page + 1) def write_results(self, results): - self.results_box["state"] = "normal" + self.results_box['state'] = 'normal' row = 1 for each in results: sent, pos1, pos2 = each[0].strip(), each[1], each[2] @@ -485,34 +486,34 @@ class ConcordanceSearchView(object): sent, pos1, pos2 = self.pad(sent, pos1, pos2) sentence = sent[pos1 - self._char_before : pos1 + self._char_after] if not row == len(results): - sentence += "\n" - self.results_box.insert(str(row) + ".0", sentence) + sentence += '\n' + self.results_box.insert(str(row) + '.0', sentence) word_markers, label_markers = self.words_and_labels(sent, pos1, pos2) for marker in word_markers: self.results_box.tag_add( self._HIGHLIGHT_WORD_TAG, - str(row) + "." + str(marker[0]), - str(row) + "." + str(marker[1]), + str(row) + '.' + str(marker[0]), + str(row) + '.' + str(marker[1]), ) for marker in label_markers: self.results_box.tag_add( self._HIGHLIGHT_LABEL_TAG, - str(row) + "." + str(marker[0]), - str(row) + "." + str(marker[1]), + str(row) + '.' + str(marker[0]), + str(row) + '.' + str(marker[1]), ) row += 1 - self.results_box["state"] = "disabled" + self.results_box['state'] = 'disabled' def words_and_labels(self, sentence, pos1, pos2): search_exp = sentence[pos1:pos2] words, labels = [], [] - labeled_words = search_exp.split(" ") + labeled_words = search_exp.split(' ') index = 0 for each in labeled_words: - if each == "": + if each == '': index += 1 else: - word, label = each.split("/") + word, label = each.split('/') words.append( (self._char_before + index, self._char_before + index + len(word)) ) @@ -528,7 +529,7 @@ class ConcordanceSearchView(object): if hstart >= self._char_before: return sent, hstart, hend d = self._char_before - hstart - sent = "".join([" "] * d) + sent + sent = ''.join([' '] * d) + sent return sent, hstart + d, hend + d def destroy(self, *e): @@ -544,34 +545,34 @@ class ConcordanceSearchView(object): self.clear_results_box() def clear_results_box(self): - self.results_box["state"] = "normal" + self.results_box['state'] = 'normal' self.results_box.delete("1.0", END) - self.results_box["state"] = "disabled" + self.results_box['state'] = 'disabled' def freeze_editable(self): - self.query_box["state"] = "disabled" - self.search_button["state"] = "disabled" - self.prev["state"] = "disabled" - self.next["state"] = "disabled" + self.query_box['state'] = 'disabled' + self.search_button['state'] = 'disabled' + self.prev['state'] = 'disabled' + self.next['state'] = 'disabled' def unfreeze_editable(self): - self.query_box["state"] = "normal" - self.search_button["state"] = "normal" + self.query_box['state'] = 'normal' + self.search_button['state'] = 'normal' self.set_paging_button_states() def set_paging_button_states(self): if self.current_page == 0 or self.current_page == 1: - self.prev["state"] = "disabled" + self.prev['state'] = 'disabled' else: - self.prev["state"] = "normal" + self.prev['state'] = 'normal' if self.model.has_more_pages(self.current_page): - self.next["state"] = "normal" + self.next['state'] = 'normal' else: - self.next["state"] = "disabled" + self.next['state'] = 'disabled' def fire_event(self, event): # Firing an event so that rendering of widgets happen in the mainloop thread - self.top.event_generate(event, when="tail") + self.top.event_generate(event, when='tail') def mainloop(self, *args, **kwargs): if in_idle(): @@ -649,7 +650,7 @@ class ConcordanceSearchModel(object): try: ts = self.model.CORPORA[self.name]() self.model.tagged_sents = [ - " ".join(w + "/" + t for (w, t) in sent) for sent in ts + ' '.join(w + '/' + t for (w, t) in sent) for sent in ts ] self.model.queue.put(CORPUS_LOADED_EVENT) except Exception as e: @@ -689,14 +690,14 @@ class ConcordanceSearchModel(object): def processed_query(self): new = [] for term in self.model.query.split(): - term = re.sub(r"\.", r"[^/ ]", term) - if re.match("[A-Z]+$", term): - new.append(BOUNDARY + WORD_OR_TAG + "/" + term + BOUNDARY) - elif "/" in term: + term = re.sub(r'\.', r'[^/ ]', term) + if re.match('[A-Z]+$', term): + new.append(BOUNDARY + WORD_OR_TAG + '/' + term + BOUNDARY) + elif '/' in term: new.append(BOUNDARY + term + BOUNDARY) else: - new.append(BOUNDARY + term + "/" + WORD_OR_TAG + BOUNDARY) - return " ".join(new) + new.append(BOUNDARY + term + '/' + WORD_OR_TAG + BOUNDARY) + return ' '.join(new) def app(): @@ -704,7 +705,7 @@ def app(): d.mainloop() -if __name__ == "__main__": +if __name__ == '__main__': app() -__all__ = ["app"] +__all__ = ['app'] diff --git a/nlp_resource_data/nltk/app/nemo_app.py b/nlp_resource_data/nltk/app/nemo_app.py index e653ea1..639f767 100644 --- a/nlp_resource_data/nltk/app/nemo_app.py +++ b/nlp_resource_data/nltk/app/nemo_app.py @@ -10,7 +10,7 @@ Created by Aristide Grange import re import itertools -from tkinter import ( +from six.moves.tkinter import ( Frame, Label, PhotoImage, @@ -62,8 +62,8 @@ class Zone: def __init__(self, image, initialField, initialText): frm = Frame(root) frm.config(background="white") - self.image = PhotoImage(format="gif", data=images[image.upper()]) - self.imageDimmed = PhotoImage(format="gif", data=images[image]) + self.image = PhotoImage(format='gif', data=images[image.upper()]) + self.imageDimmed = PhotoImage(format='gif', data=images[image]) self.img = Label(frm) self.img.config(borderwidth=0) self.img.pack(side="left") @@ -168,7 +168,7 @@ def app(): root.mainloop() -if __name__ == "__main__": +if __name__ == '__main__': app() -__all__ = ["app"] +__all__ = ['app'] diff --git a/nlp_resource_data/nltk/app/rdparser_app.py b/nlp_resource_data/nltk/app/rdparser_app.py index 9437bff..5d3054d 100644 --- a/nlp_resource_data/nltk/app/rdparser_app.py +++ b/nlp_resource_data/nltk/app/rdparser_app.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Recursive Descent Parser Application # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Edward Loper # URL: # For license information, see LICENSE.TXT @@ -63,9 +63,10 @@ Keyboard Shortcuts:: [Ctrl-p]\t Print [q]\t Quit """ +from __future__ import division -from tkinter.font import Font -from tkinter import Listbox, IntVar, Button, Frame, Label, Menu, Scrollbar, Tk +from six.moves.tkinter_font import Font +from six.moves.tkinter import Listbox, IntVar, Button, Frame, Label, Menu, Scrollbar, Tk from nltk.tree import Tree from nltk.util import in_idle @@ -91,7 +92,7 @@ class RecursiveDescentApp(object): # Set up the main window. self._top = Tk() - self._top.title("Recursive Descent Parser Application") + self._top.title('Recursive Descent Parser Application') # Set up key bindings. self._init_bindings() @@ -121,7 +122,7 @@ class RecursiveDescentApp(object): self._parser.initialize(self._sent) # Resize callback - self._canvas.bind("", self._configure) + self._canvas.bind('', self._configure) ######################################### ## Initialization Helpers @@ -134,127 +135,127 @@ class RecursiveDescentApp(object): # TWhat's our font size (default=same as sysfont) self._size = IntVar(root) - self._size.set(self._sysfont.cget("size")) + self._size.set(self._sysfont.cget('size')) - self._boldfont = Font(family="helvetica", weight="bold", size=self._size.get()) - self._font = Font(family="helvetica", size=self._size.get()) + self._boldfont = Font(family='helvetica', weight='bold', size=self._size.get()) + self._font = Font(family='helvetica', size=self._size.get()) if self._size.get() < 0: big = self._size.get() - 2 else: big = self._size.get() + 2 - self._bigfont = Font(family="helvetica", weight="bold", size=big) + self._bigfont = Font(family='helvetica', weight='bold', size=big) def _init_grammar(self, parent): # Grammar view. self._prodframe = listframe = Frame(parent) - self._prodframe.pack(fill="both", side="left", padx=2) + self._prodframe.pack(fill='both', side='left', padx=2) self._prodlist_label = Label( - self._prodframe, font=self._boldfont, text="Available Expansions" + self._prodframe, font=self._boldfont, text='Available Expansions' ) self._prodlist_label.pack() self._prodlist = Listbox( self._prodframe, - selectmode="single", - relief="groove", - background="white", - foreground="#909090", + selectmode='single', + relief='groove', + background='white', + foreground='#909090', font=self._font, - selectforeground="#004040", - selectbackground="#c0f0c0", + selectforeground='#004040', + selectbackground='#c0f0c0', ) - self._prodlist.pack(side="right", fill="both", expand=1) + self._prodlist.pack(side='right', fill='both', expand=1) self._productions = list(self._parser.grammar().productions()) for production in self._productions: - self._prodlist.insert("end", (" %s" % production)) + self._prodlist.insert('end', (' %s' % production)) self._prodlist.config(height=min(len(self._productions), 25)) # Add a scrollbar if there are more than 25 productions. if len(self._productions) > 25: - listscroll = Scrollbar(self._prodframe, orient="vertical") + listscroll = Scrollbar(self._prodframe, orient='vertical') self._prodlist.config(yscrollcommand=listscroll.set) listscroll.config(command=self._prodlist.yview) - listscroll.pack(side="left", fill="y") + listscroll.pack(side='left', fill='y') # If they select a production, apply it. - self._prodlist.bind("<>", self._prodlist_select) + self._prodlist.bind('<>', self._prodlist_select) def _init_bindings(self): # Key bindings are a good thing. - self._top.bind("", self.destroy) - self._top.bind("", self.destroy) - self._top.bind("", self.destroy) - self._top.bind("e", self.expand) + self._top.bind('', self.destroy) + self._top.bind('', self.destroy) + self._top.bind('', self.destroy) + self._top.bind('e', self.expand) # self._top.bind('', self.expand) # self._top.bind('', self.expand) - self._top.bind("m", self.match) - self._top.bind("", self.match) - self._top.bind("", self.match) - self._top.bind("b", self.backtrack) - self._top.bind("", self.backtrack) - self._top.bind("", self.backtrack) - self._top.bind("", self.backtrack) - self._top.bind("", self.backtrack) - self._top.bind("a", self.autostep) + self._top.bind('m', self.match) + self._top.bind('', self.match) + self._top.bind('', self.match) + self._top.bind('b', self.backtrack) + self._top.bind('', self.backtrack) + self._top.bind('', self.backtrack) + self._top.bind('', self.backtrack) + self._top.bind('', self.backtrack) + self._top.bind('a', self.autostep) # self._top.bind('', self.autostep) - self._top.bind("", self.autostep) - self._top.bind("", self.cancel_autostep) - self._top.bind("", self.step) - self._top.bind("", self.reset) - self._top.bind("", self.postscript) + self._top.bind('', self.autostep) + self._top.bind('', self.cancel_autostep) + self._top.bind('', self.step) + self._top.bind('', self.reset) + self._top.bind('', self.postscript) # self._top.bind('', self.help) # self._top.bind('', self.help) - self._top.bind("", self.help) - self._top.bind("", self.help) + self._top.bind('', self.help) + self._top.bind('', self.help) # self._top.bind('', self.toggle_grammar) # self._top.bind('', self.toggle_grammar) # self._top.bind('', self.toggle_grammar) - self._top.bind("", self.edit_grammar) - self._top.bind("", self.edit_sentence) + self._top.bind('', self.edit_grammar) + self._top.bind('', self.edit_sentence) def _init_buttons(self, parent): # Set up the frames. self._buttonframe = buttonframe = Frame(parent) - buttonframe.pack(fill="none", side="bottom", padx=3, pady=2) + buttonframe.pack(fill='none', side='bottom', padx=3, pady=2) Button( buttonframe, - text="Step", - background="#90c0d0", - foreground="black", + text='Step', + background='#90c0d0', + foreground='black', command=self.step, - ).pack(side="left") + ).pack(side='left') Button( buttonframe, - text="Autostep", - background="#90c0d0", - foreground="black", + text='Autostep', + background='#90c0d0', + foreground='black', command=self.autostep, - ).pack(side="left") + ).pack(side='left') Button( buttonframe, - text="Expand", + text='Expand', underline=0, - background="#90f090", - foreground="black", + background='#90f090', + foreground='black', command=self.expand, - ).pack(side="left") + ).pack(side='left') Button( buttonframe, - text="Match", + text='Match', underline=0, - background="#90f090", - foreground="black", + background='#90f090', + foreground='black', command=self.match, - ).pack(side="left") + ).pack(side='left') Button( buttonframe, - text="Backtrack", + text='Backtrack', underline=0, - background="#f0a0a0", - foreground="black", + background='#f0a0a0', + foreground='black', command=self.backtrack, - ).pack(side="left") + ).pack(side='left') # Replace autostep... # self._autostep_button = Button(buttonframe, text='Autostep', @@ -265,42 +266,42 @@ class RecursiveDescentApp(object): self._autostep = 0 (x1, y1, x2, y2) = self._cframe.scrollregion() y2 = event.height - 6 - self._canvas["scrollregion"] = "%d %d %d %d" % (x1, y1, x2, y2) + self._canvas['scrollregion'] = '%d %d %d %d' % (x1, y1, x2, y2) self._redraw() def _init_feedback(self, parent): self._feedbackframe = feedbackframe = Frame(parent) - feedbackframe.pack(fill="x", side="bottom", padx=3, pady=3) + feedbackframe.pack(fill='x', side='bottom', padx=3, pady=3) self._lastoper_label = Label( - feedbackframe, text="Last Operation:", font=self._font + feedbackframe, text='Last Operation:', font=self._font ) - self._lastoper_label.pack(side="left") - lastoperframe = Frame(feedbackframe, relief="sunken", border=1) - lastoperframe.pack(fill="x", side="right", expand=1, padx=5) + self._lastoper_label.pack(side='left') + lastoperframe = Frame(feedbackframe, relief='sunken', border=1) + lastoperframe.pack(fill='x', side='right', expand=1, padx=5) self._lastoper1 = Label( - lastoperframe, foreground="#007070", background="#f0f0f0", font=self._font + lastoperframe, foreground='#007070', background='#f0f0f0', font=self._font ) self._lastoper2 = Label( lastoperframe, - anchor="w", + anchor='w', width=30, - foreground="#004040", - background="#f0f0f0", + foreground='#004040', + background='#f0f0f0', font=self._font, ) - self._lastoper1.pack(side="left") - self._lastoper2.pack(side="left", fill="x", expand=1) + self._lastoper1.pack(side='left') + self._lastoper2.pack(side='left', fill='x', expand=1) def _init_canvas(self, parent): self._cframe = CanvasFrame( parent, - background="white", + background='white', # width=525, height=250, closeenough=10, border=2, - relief="sunken", + relief='sunken', ) - self._cframe.pack(expand=1, fill="both", side="top", pady=2) + self._cframe.pack(expand=1, fill='both', side='top', pady=2) canvas = self._canvas = self._cframe.canvas() # Initially, there's no tree or text @@ -313,50 +314,50 @@ class RecursiveDescentApp(object): filemenu = Menu(menubar, tearoff=0) filemenu.add_command( - label="Reset Parser", underline=0, command=self.reset, accelerator="Del" + label='Reset Parser', underline=0, command=self.reset, accelerator='Del' ) filemenu.add_command( - label="Print to Postscript", + label='Print to Postscript', underline=0, command=self.postscript, - accelerator="Ctrl-p", + accelerator='Ctrl-p', ) filemenu.add_command( - label="Exit", underline=1, command=self.destroy, accelerator="Ctrl-x" + label='Exit', underline=1, command=self.destroy, accelerator='Ctrl-x' ) - menubar.add_cascade(label="File", underline=0, menu=filemenu) + menubar.add_cascade(label='File', underline=0, menu=filemenu) editmenu = Menu(menubar, tearoff=0) editmenu.add_command( - label="Edit Grammar", + label='Edit Grammar', underline=5, command=self.edit_grammar, - accelerator="Ctrl-g", + accelerator='Ctrl-g', ) editmenu.add_command( - label="Edit Text", + label='Edit Text', underline=5, command=self.edit_sentence, - accelerator="Ctrl-t", + accelerator='Ctrl-t', ) - menubar.add_cascade(label="Edit", underline=0, menu=editmenu) + menubar.add_cascade(label='Edit', underline=0, menu=editmenu) rulemenu = Menu(menubar, tearoff=0) rulemenu.add_command( - label="Step", underline=1, command=self.step, accelerator="Space" + label='Step', underline=1, command=self.step, accelerator='Space' ) rulemenu.add_separator() rulemenu.add_command( - label="Match", underline=0, command=self.match, accelerator="Ctrl-m" + label='Match', underline=0, command=self.match, accelerator='Ctrl-m' ) rulemenu.add_command( - label="Expand", underline=0, command=self.expand, accelerator="Ctrl-e" + label='Expand', underline=0, command=self.expand, accelerator='Ctrl-e' ) rulemenu.add_separator() rulemenu.add_command( - label="Backtrack", underline=0, command=self.backtrack, accelerator="Ctrl-b" + label='Backtrack', underline=0, command=self.backtrack, accelerator='Ctrl-b' ) - menubar.add_cascade(label="Apply", underline=0, menu=rulemenu) + menubar.add_cascade(label='Apply', underline=0, menu=rulemenu) viewmenu = Menu(menubar, tearoff=0) viewmenu.add_checkbutton( @@ -367,41 +368,41 @@ class RecursiveDescentApp(object): ) viewmenu.add_separator() viewmenu.add_radiobutton( - label="Tiny", + label='Tiny', variable=self._size, underline=0, value=10, command=self.resize, ) viewmenu.add_radiobutton( - label="Small", + label='Small', variable=self._size, underline=0, value=12, command=self.resize, ) viewmenu.add_radiobutton( - label="Medium", + label='Medium', variable=self._size, underline=0, value=14, command=self.resize, ) viewmenu.add_radiobutton( - label="Large", + label='Large', variable=self._size, underline=0, value=18, command=self.resize, ) viewmenu.add_radiobutton( - label="Huge", + label='Huge', variable=self._size, underline=0, value=24, command=self.resize, ) - menubar.add_cascade(label="View", underline=0, menu=viewmenu) + menubar.add_cascade(label='View', underline=0, menu=viewmenu) animatemenu = Menu(menubar, tearoff=0) animatemenu.add_radiobutton( @@ -412,30 +413,30 @@ class RecursiveDescentApp(object): underline=0, variable=self._animation_frames, value=10, - accelerator="-", + accelerator='-', ) animatemenu.add_radiobutton( label="Normal Animation", underline=0, variable=self._animation_frames, value=5, - accelerator="=", + accelerator='=', ) animatemenu.add_radiobutton( label="Fast Animation", underline=0, variable=self._animation_frames, value=2, - accelerator="+", + accelerator='+', ) menubar.add_cascade(label="Animate", underline=1, menu=animatemenu) helpmenu = Menu(menubar, tearoff=0) - helpmenu.add_command(label="About", underline=0, command=self.about) + helpmenu.add_command(label='About', underline=0, command=self.about) helpmenu.add_command( - label="Instructions", underline=0, command=self.help, accelerator="F1" + label='Instructions', underline=0, command=self.help, accelerator='F1' ) - menubar.add_cascade(label="Help", underline=0, menu=helpmenu) + menubar.add_cascade(label='Help', underline=0, menu=helpmenu) parent.config(menu=menubar) @@ -466,20 +467,20 @@ class RecursiveDescentApp(object): self._canvas.delete(self._textline) # Draw the tree. - helv = ("helvetica", -self._size.get()) - bold = ("helvetica", -self._size.get(), "bold") + helv = ('helvetica', -self._size.get()) + bold = ('helvetica', -self._size.get(), 'bold') attribs = { - "tree_color": "#000000", - "tree_width": 2, - "node_font": bold, - "leaf_font": helv, + 'tree_color': '#000000', + 'tree_width': 2, + 'node_font': bold, + 'leaf_font': helv, } tree = self._parser.tree() self._tree = tree_to_treesegment(canvas, tree, **attribs) self._cframe.add_widget(self._tree, 30, 5) # Draw the text. - helv = ("helvetica", -self._size.get()) + helv = ('helvetica', -self._size.get()) bottom = y = self._cframe.scrollregion()[3] self._textwidgets = [ TextWidget(canvas, word, font=self._font) for word in self._sent @@ -490,7 +491,7 @@ class RecursiveDescentApp(object): y = min(y, twidget.bbox()[1]) # Draw a line over the text, to separate it from the tree. - self._textline = canvas.create_line(-5000, y - 5, 5000, y - 5, dash=".") + self._textline = canvas.create_line(-5000, y - 5, 5000, y - 5, dash='.') # Highlight appropriate nodes. self._highlight_nodes() @@ -507,30 +508,30 @@ class RecursiveDescentApp(object): def _highlight_nodes(self): # Highlight the list of nodes to be checked. - bold = ("helvetica", -self._size.get(), "bold") + bold = ('helvetica', -self._size.get(), 'bold') for treeloc in self._parser.frontier()[:1]: - self._get(self._tree, treeloc)["color"] = "#20a050" - self._get(self._tree, treeloc)["font"] = bold + self._get(self._tree, treeloc)['color'] = '#20a050' + self._get(self._tree, treeloc)['font'] = bold for treeloc in self._parser.frontier()[1:]: - self._get(self._tree, treeloc)["color"] = "#008080" + self._get(self._tree, treeloc)['color'] = '#008080' def _highlight_prodlist(self): # Highlight the productions that can be expanded. # Boy, too bad tkinter doesn't implement Listbox.itemconfig; # that would be pretty useful here. - self._prodlist.delete(0, "end") + self._prodlist.delete(0, 'end') expandable = self._parser.expandable_productions() untried = self._parser.untried_expandable_productions() productions = self._productions for index in range(len(productions)): if productions[index] in expandable: if productions[index] in untried: - self._prodlist.insert(index, " %s" % productions[index]) + self._prodlist.insert(index, ' %s' % productions[index]) else: - self._prodlist.insert(index, " %s (TRIED)" % productions[index]) + self._prodlist.insert(index, ' %s (TRIED)' % productions[index]) self._prodlist.selection_set(index) else: - self._prodlist.insert(index, " %s" % productions[index]) + self._prodlist.insert(index, ' %s' % productions[index]) def _position_text(self): # Line up the text widgets that are matched against the tree @@ -541,22 +542,22 @@ class RecursiveDescentApp(object): for i in range(0, len(leaves)): widget = self._textwidgets[i] leaf = leaves[i] - widget["color"] = "#006040" - leaf["color"] = "#006040" + widget['color'] = '#006040' + leaf['color'] = '#006040' widget.move(leaf.bbox()[0] - widget.bbox()[0], 0) xmax = widget.bbox()[2] + 10 # Line up the text widgets that are not matched against the tree. for i in range(len(leaves), numwords): widget = self._textwidgets[i] - widget["color"] = "#a0a0a0" + widget['color'] = '#a0a0a0' widget.move(xmax - widget.bbox()[0], 0) xmax = widget.bbox()[2] + 10 # If we have a complete parse, make everything green :) if self._parser.currently_complete(): for twidget in self._textwidgets: - twidget["color"] = "#00a000" + twidget['color'] = '#00a000' # Move the matched leaves down to the text. for i in range(0, len(leaves)): @@ -591,8 +592,8 @@ class RecursiveDescentApp(object): def reset(self, *e): self._autostep = 0 self._parser.initialize(self._sent) - self._lastoper1["text"] = "Reset Application" - self._lastoper2["text"] = "" + self._lastoper1['text'] = 'Reset Application' + self._lastoper2['text'] = '' self._redraw() def autostep(self, *e): @@ -637,14 +638,14 @@ class RecursiveDescentApp(object): elif self._backtrack(): pass else: - self._lastoper1["text"] = "Finished" - self._lastoper2["text"] = "" + self._lastoper1['text'] = 'Finished' + self._lastoper2['text'] = '' self._autostep = 0 # Check if we just completed a parse. if self._parser.currently_complete(): self._autostep = 0 - self._lastoper2["text"] += " [COMPLETE PARSE]" + self._lastoper2['text'] += ' [COMPLETE PARSE]' def _expand(self, *e): if self._animating_lock: @@ -652,16 +653,16 @@ class RecursiveDescentApp(object): old_frontier = self._parser.frontier() rv = self._parser.expand() if rv is not None: - self._lastoper1["text"] = "Expand:" - self._lastoper2["text"] = rv - self._prodlist.selection_clear(0, "end") + self._lastoper1['text'] = 'Expand:' + self._lastoper2['text'] = rv + self._prodlist.selection_clear(0, 'end') index = self._productions.index(rv) self._prodlist.selection_set(index) self._animate_expand(old_frontier[0]) return True else: - self._lastoper1["text"] = "Expand:" - self._lastoper2["text"] = "(all expansions tried)" + self._lastoper1['text'] = 'Expand:' + self._lastoper2['text'] = '(all expansions tried)' return False def _match(self, *e): @@ -670,13 +671,13 @@ class RecursiveDescentApp(object): old_frontier = self._parser.frontier() rv = self._parser.match() if rv is not None: - self._lastoper1["text"] = "Match:" - self._lastoper2["text"] = rv + self._lastoper1['text'] = 'Match:' + self._lastoper2['text'] = rv self._animate_match(old_frontier[0]) return True else: - self._lastoper1["text"] = "Match:" - self._lastoper2["text"] = "(failed)" + self._lastoper1['text'] = 'Match:' + self._lastoper2['text'] = '(failed)' return False def _backtrack(self, *e): @@ -686,8 +687,8 @@ class RecursiveDescentApp(object): elt = self._parser.tree() for i in self._parser.frontier()[0]: elt = elt[i] - self._lastoper1["text"] = "Backtrack" - self._lastoper2["text"] = "" + self._lastoper1['text'] = 'Backtrack' + self._lastoper2['text'] = '' if isinstance(elt, Tree): self._animate_backtrack(self._parser.frontier()[0]) else: @@ -695,17 +696,17 @@ class RecursiveDescentApp(object): return True else: self._autostep = 0 - self._lastoper1["text"] = "Finished" - self._lastoper2["text"] = "" + self._lastoper1['text'] = 'Finished' + self._lastoper2['text'] = '' return False def about(self, *e): ABOUT = ( "NLTK Recursive Descent Parser Application\n" + "Written by Edward Loper" ) - TITLE = "About: Recursive Descent Parser Application" + TITLE = 'About: Recursive Descent Parser Application' try: - from tkinter.messagebox import Message + from six.moves.tkinter_messagebox import Message Message(message=ABOUT, title=TITLE).show() except: @@ -717,16 +718,16 @@ class RecursiveDescentApp(object): try: ShowText( self._top, - "Help: Recursive Descent Parser Application", - (__doc__ or "").strip(), + 'Help: Recursive Descent Parser Application', + (__doc__ or '').strip(), width=75, - font="fixed", + font='fixed', ) except: ShowText( self._top, - "Help: Recursive Descent Parser Application", - (__doc__ or "").strip(), + 'Help: Recursive Descent Parser Application', + (__doc__ or '').strip(), width=75, ) @@ -762,13 +763,13 @@ class RecursiveDescentApp(object): def _toggle_grammar(self, *e): if self._show_grammar.get(): self._prodframe.pack( - fill="both", side="left", padx=2, after=self._feedbackframe + fill='both', side='left', padx=2, after=self._feedbackframe ) - self._lastoper1["text"] = "Show Grammar" + self._lastoper1['text'] = 'Show Grammar' else: self._prodframe.pack_forget() - self._lastoper1["text"] = "Hide Grammar" - self._lastoper2["text"] = "" + self._lastoper1['text'] = 'Hide Grammar' + self._lastoper2['text'] = '' # def toggle_grammar(self, *e): # self._show_grammar = not self._show_grammar @@ -790,14 +791,14 @@ class RecursiveDescentApp(object): production = self._parser.expand(self._productions[index]) if production: - self._lastoper1["text"] = "Expand:" - self._lastoper2["text"] = production - self._prodlist.selection_clear(0, "end") + self._lastoper1['text'] = 'Expand:' + self._lastoper2['text'] = production + self._prodlist.selection_clear(0, 'end') self._prodlist.selection_set(index) self._animate_expand(old_frontier[0]) else: # Reset the production selections. - self._prodlist.selection_clear(0, "end") + self._prodlist.selection_clear(0, 'end') for prod in self._parser.expandable_productions(): index = self._productions.index(prod) self._prodlist.selection_set(index) @@ -819,13 +820,13 @@ class RecursiveDescentApp(object): self._canvas, tree, node_font=self._boldfont, - leaf_color="white", + leaf_color='white', tree_width=2, - tree_color="white", - node_color="white", + tree_color='white', + node_color='white', leaf_font=self._font, ) - widget.label()["color"] = "#20a050" + widget.label()['color'] = '#20a050' (oldx, oldy) = oldtree.label().bbox()[:2] (newx, newy) = widget.label().bbox()[:2] @@ -858,7 +859,7 @@ class RecursiveDescentApp(object): oldtree.destroy() colors = [ - "gray%d" % (10 * int(10 * x / self._animation_frames.get())) + 'gray%d' % (10 * int(10 * x / self._animation_frames.get())) for x in range(self._animation_frames.get(), 0, -1) ] @@ -900,22 +901,22 @@ class RecursiveDescentApp(object): def _animate_expand_frame(self, widget, colors): if len(colors) > 0: self._animating_lock = 1 - widget["color"] = colors[0] + widget['color'] = colors[0] for subtree in widget.subtrees(): if isinstance(subtree, TreeSegmentWidget): - subtree.label()["color"] = colors[0] + subtree.label()['color'] = colors[0] else: - subtree["color"] = colors[0] + subtree['color'] = colors[0] self._top.after(50, self._animate_expand_frame, widget, colors[1:]) else: - widget["color"] = "black" + widget['color'] = 'black' for subtree in widget.subtrees(): if isinstance(subtree, TreeSegmentWidget): - subtree.label()["color"] = "black" + subtree.label()['color'] = 'black' else: - subtree["color"] = "black" + subtree['color'] = 'black' self._redraw_quick() - widget.label()["color"] = "black" + widget.label()['color'] = 'black' self._animating_lock = 0 if self._autostep: self._step() @@ -925,9 +926,9 @@ class RecursiveDescentApp(object): if self._animation_frames.get() == 0: colors = [] else: - colors = ["#a00000", "#000000", "#a00000"] + colors = ['#a00000', '#000000', '#a00000'] colors += [ - "gray%d" % (10 * int(10 * x / (self._animation_frames.get()))) + 'gray%d' % (10 * int(10 * x / (self._animation_frames.get()))) for x in range(1, self._animation_frames.get() + 1) ] @@ -944,7 +945,7 @@ class RecursiveDescentApp(object): if len(colors) > 0: self._animating_lock = 1 for widget in widgets: - widget["color"] = colors[0] + widget['color'] = colors[0] self._top.after(50, self._animate_backtrack_frame, widgets, colors[1:]) else: for widget in widgets[0].subtrees(): @@ -977,7 +978,7 @@ class RecursiveDescentApp(object): widget.move(0, dy) self._top.after(10, self._animate_match_frame, frame - 1, widget, dy) else: - widget["color"] = "#006040" + widget['color'] = '#006040' self._redraw_quick() self._animating_lock = 0 if self._autostep: @@ -1003,14 +1004,14 @@ class RecursiveDescentApp(object): def set_grammar(self, grammar): self._parser.set_grammar(grammar) self._productions = list(grammar.productions()) - self._prodlist.delete(0, "end") + self._prodlist.delete(0, 'end') for production in self._productions: - self._prodlist.insert("end", (" %s" % production)) + self._prodlist.insert('end', (' %s' % production)) def edit_sentence(self, *e): sentence = " ".join(self._sent) - title = "Edit Text" - instr = "Enter a new sentence to parse." + title = 'Edit Text' + instr = 'Enter a new sentence to parse.' EntryDialog(self._top, sentence, instr, self.set_sentence, title) def set_sentence(self, sentence): @@ -1041,12 +1042,12 @@ def app(): """ ) - sent = "the dog saw a man in the park".split() + sent = 'the dog saw a man in the park'.split() RecursiveDescentApp(grammar, sent).mainloop() -if __name__ == "__main__": +if __name__ == '__main__': app() -__all__ = ["app"] +__all__ = ['app'] diff --git a/nlp_resource_data/nltk/app/srparser_app.py b/nlp_resource_data/nltk/app/srparser_app.py index 1db15ab..1f11427 100644 --- a/nlp_resource_data/nltk/app/srparser_app.py +++ b/nlp_resource_data/nltk/app/srparser_app.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Shift-Reduce Parser Application # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Edward Loper # URL: # For license information, see LICENSE.TXT @@ -62,8 +62,8 @@ Keyboard Shortcuts:: """ -from tkinter.font import Font -from tkinter import IntVar, Listbox, Button, Frame, Label, Menu, Scrollbar, Tk +from six.moves.tkinter_font import Font +from six.moves.tkinter import IntVar, Listbox, Button, Frame, Label, Menu, Scrollbar, Tk from nltk.tree import Tree from nltk.parse import SteppingShiftReduceParser @@ -101,7 +101,7 @@ class ShiftReduceApp(object): # Set up the main window. self._top = Tk() - self._top.title("Shift Reduce Parser Application") + self._top.title('Shift Reduce Parser Application') # Animations. animating_lock is a lock to prevent the demo # from performing new operations while it's animating. @@ -131,7 +131,7 @@ class ShiftReduceApp(object): # Reset the demo, and set the feedback frame to empty. self.reset() - self._lastoper1["text"] = "" + self._lastoper1['text'] = '' ######################################### ## Initialization Helpers @@ -144,171 +144,171 @@ class ShiftReduceApp(object): # TWhat's our font size (default=same as sysfont) self._size = IntVar(root) - self._size.set(self._sysfont.cget("size")) + self._size.set(self._sysfont.cget('size')) - self._boldfont = Font(family="helvetica", weight="bold", size=self._size.get()) - self._font = Font(family="helvetica", size=self._size.get()) + self._boldfont = Font(family='helvetica', weight='bold', size=self._size.get()) + self._font = Font(family='helvetica', size=self._size.get()) def _init_grammar(self, parent): # Grammar view. self._prodframe = listframe = Frame(parent) - self._prodframe.pack(fill="both", side="left", padx=2) + self._prodframe.pack(fill='both', side='left', padx=2) self._prodlist_label = Label( - self._prodframe, font=self._boldfont, text="Available Reductions" + self._prodframe, font=self._boldfont, text='Available Reductions' ) self._prodlist_label.pack() self._prodlist = Listbox( self._prodframe, - selectmode="single", - relief="groove", - background="white", - foreground="#909090", + selectmode='single', + relief='groove', + background='white', + foreground='#909090', font=self._font, - selectforeground="#004040", - selectbackground="#c0f0c0", + selectforeground='#004040', + selectbackground='#c0f0c0', ) - self._prodlist.pack(side="right", fill="both", expand=1) + self._prodlist.pack(side='right', fill='both', expand=1) self._productions = list(self._parser.grammar().productions()) for production in self._productions: - self._prodlist.insert("end", (" %s" % production)) + self._prodlist.insert('end', (' %s' % production)) self._prodlist.config(height=min(len(self._productions), 25)) # Add a scrollbar if there are more than 25 productions. if 1: # len(self._productions) > 25: - listscroll = Scrollbar(self._prodframe, orient="vertical") + listscroll = Scrollbar(self._prodframe, orient='vertical') self._prodlist.config(yscrollcommand=listscroll.set) listscroll.config(command=self._prodlist.yview) - listscroll.pack(side="left", fill="y") + listscroll.pack(side='left', fill='y') # If they select a production, apply it. - self._prodlist.bind("<>", self._prodlist_select) + self._prodlist.bind('<>', self._prodlist_select) # When they hover over a production, highlight it. self._hover = -1 - self._prodlist.bind("", self._highlight_hover) - self._prodlist.bind("", self._clear_hover) + self._prodlist.bind('', self._highlight_hover) + self._prodlist.bind('', self._clear_hover) def _init_bindings(self): # Quit - self._top.bind("", self.destroy) - self._top.bind("", self.destroy) - self._top.bind("", self.destroy) - self._top.bind("", self.destroy) + self._top.bind('', self.destroy) + self._top.bind('', self.destroy) + self._top.bind('', self.destroy) + self._top.bind('', self.destroy) # Ops (step, shift, reduce, undo) - self._top.bind("", self.step) - self._top.bind("", self.shift) - self._top.bind("", self.shift) - self._top.bind("", self.shift) - self._top.bind("", self.reduce) - self._top.bind("", self.reduce) - self._top.bind("", self.reduce) - self._top.bind("", self.reset) - self._top.bind("", self.undo) - self._top.bind("", self.undo) - self._top.bind("", self.undo) - self._top.bind("", self.undo) - self._top.bind("", self.undo) + self._top.bind('', self.step) + self._top.bind('', self.shift) + self._top.bind('', self.shift) + self._top.bind('', self.shift) + self._top.bind('', self.reduce) + self._top.bind('', self.reduce) + self._top.bind('', self.reduce) + self._top.bind('', self.reset) + self._top.bind('', self.undo) + self._top.bind('', self.undo) + self._top.bind('', self.undo) + self._top.bind('', self.undo) + self._top.bind('', self.undo) # Misc - self._top.bind("", self.postscript) - self._top.bind("", self.help) - self._top.bind("", self.help) - self._top.bind("", self.edit_grammar) - self._top.bind("", self.edit_sentence) + self._top.bind('', self.postscript) + self._top.bind('', self.help) + self._top.bind('', self.help) + self._top.bind('', self.edit_grammar) + self._top.bind('', self.edit_sentence) # Animation speed control - self._top.bind("-", lambda e, a=self._animate: a.set(20)) - self._top.bind("=", lambda e, a=self._animate: a.set(10)) - self._top.bind("+", lambda e, a=self._animate: a.set(4)) + self._top.bind('-', lambda e, a=self._animate: a.set(20)) + self._top.bind('=', lambda e, a=self._animate: a.set(10)) + self._top.bind('+', lambda e, a=self._animate: a.set(4)) def _init_buttons(self, parent): # Set up the frames. self._buttonframe = buttonframe = Frame(parent) - buttonframe.pack(fill="none", side="bottom") + buttonframe.pack(fill='none', side='bottom') Button( buttonframe, - text="Step", - background="#90c0d0", - foreground="black", + text='Step', + background='#90c0d0', + foreground='black', command=self.step, - ).pack(side="left") + ).pack(side='left') Button( buttonframe, - text="Shift", + text='Shift', underline=0, - background="#90f090", - foreground="black", + background='#90f090', + foreground='black', command=self.shift, - ).pack(side="left") + ).pack(side='left') Button( buttonframe, - text="Reduce", + text='Reduce', underline=0, - background="#90f090", - foreground="black", + background='#90f090', + foreground='black', command=self.reduce, - ).pack(side="left") + ).pack(side='left') Button( buttonframe, - text="Undo", + text='Undo', underline=0, - background="#f0a0a0", - foreground="black", + background='#f0a0a0', + foreground='black', command=self.undo, - ).pack(side="left") + ).pack(side='left') def _init_menubar(self, parent): menubar = Menu(parent) filemenu = Menu(menubar, tearoff=0) filemenu.add_command( - label="Reset Parser", underline=0, command=self.reset, accelerator="Del" + label='Reset Parser', underline=0, command=self.reset, accelerator='Del' ) filemenu.add_command( - label="Print to Postscript", + label='Print to Postscript', underline=0, command=self.postscript, - accelerator="Ctrl-p", + accelerator='Ctrl-p', ) filemenu.add_command( - label="Exit", underline=1, command=self.destroy, accelerator="Ctrl-x" + label='Exit', underline=1, command=self.destroy, accelerator='Ctrl-x' ) - menubar.add_cascade(label="File", underline=0, menu=filemenu) + menubar.add_cascade(label='File', underline=0, menu=filemenu) editmenu = Menu(menubar, tearoff=0) editmenu.add_command( - label="Edit Grammar", + label='Edit Grammar', underline=5, command=self.edit_grammar, - accelerator="Ctrl-g", + accelerator='Ctrl-g', ) editmenu.add_command( - label="Edit Text", + label='Edit Text', underline=5, command=self.edit_sentence, - accelerator="Ctrl-t", + accelerator='Ctrl-t', ) - menubar.add_cascade(label="Edit", underline=0, menu=editmenu) + menubar.add_cascade(label='Edit', underline=0, menu=editmenu) rulemenu = Menu(menubar, tearoff=0) rulemenu.add_command( - label="Step", underline=1, command=self.step, accelerator="Space" + label='Step', underline=1, command=self.step, accelerator='Space' ) rulemenu.add_separator() rulemenu.add_command( - label="Shift", underline=0, command=self.shift, accelerator="Ctrl-s" + label='Shift', underline=0, command=self.shift, accelerator='Ctrl-s' ) rulemenu.add_command( - label="Reduce", underline=0, command=self.reduce, accelerator="Ctrl-r" + label='Reduce', underline=0, command=self.reduce, accelerator='Ctrl-r' ) rulemenu.add_separator() rulemenu.add_command( - label="Undo", underline=0, command=self.undo, accelerator="Ctrl-u" + label='Undo', underline=0, command=self.undo, accelerator='Ctrl-u' ) - menubar.add_cascade(label="Apply", underline=0, menu=rulemenu) + menubar.add_cascade(label='Apply', underline=0, menu=rulemenu) viewmenu = Menu(menubar, tearoff=0) viewmenu.add_checkbutton( @@ -319,41 +319,41 @@ class ShiftReduceApp(object): ) viewmenu.add_separator() viewmenu.add_radiobutton( - label="Tiny", + label='Tiny', variable=self._size, underline=0, value=10, command=self.resize, ) viewmenu.add_radiobutton( - label="Small", + label='Small', variable=self._size, underline=0, value=12, command=self.resize, ) viewmenu.add_radiobutton( - label="Medium", + label='Medium', variable=self._size, underline=0, value=14, command=self.resize, ) viewmenu.add_radiobutton( - label="Large", + label='Large', variable=self._size, underline=0, value=18, command=self.resize, ) viewmenu.add_radiobutton( - label="Huge", + label='Huge', variable=self._size, underline=0, value=24, command=self.resize, ) - menubar.add_cascade(label="View", underline=0, menu=viewmenu) + menubar.add_cascade(label='View', underline=0, menu=viewmenu) animatemenu = Menu(menubar, tearoff=0) animatemenu.add_radiobutton( @@ -364,81 +364,81 @@ class ShiftReduceApp(object): underline=0, variable=self._animate, value=20, - accelerator="-", + accelerator='-', ) animatemenu.add_radiobutton( label="Normal Animation", underline=0, variable=self._animate, value=10, - accelerator="=", + accelerator='=', ) animatemenu.add_radiobutton( label="Fast Animation", underline=0, variable=self._animate, value=4, - accelerator="+", + accelerator='+', ) menubar.add_cascade(label="Animate", underline=1, menu=animatemenu) helpmenu = Menu(menubar, tearoff=0) - helpmenu.add_command(label="About", underline=0, command=self.about) + helpmenu.add_command(label='About', underline=0, command=self.about) helpmenu.add_command( - label="Instructions", underline=0, command=self.help, accelerator="F1" + label='Instructions', underline=0, command=self.help, accelerator='F1' ) - menubar.add_cascade(label="Help", underline=0, menu=helpmenu) + menubar.add_cascade(label='Help', underline=0, menu=helpmenu) parent.config(menu=menubar) def _init_feedback(self, parent): self._feedbackframe = feedbackframe = Frame(parent) - feedbackframe.pack(fill="x", side="bottom", padx=3, pady=3) + feedbackframe.pack(fill='x', side='bottom', padx=3, pady=3) self._lastoper_label = Label( - feedbackframe, text="Last Operation:", font=self._font + feedbackframe, text='Last Operation:', font=self._font ) - self._lastoper_label.pack(side="left") - lastoperframe = Frame(feedbackframe, relief="sunken", border=1) - lastoperframe.pack(fill="x", side="right", expand=1, padx=5) + self._lastoper_label.pack(side='left') + lastoperframe = Frame(feedbackframe, relief='sunken', border=1) + lastoperframe.pack(fill='x', side='right', expand=1, padx=5) self._lastoper1 = Label( - lastoperframe, foreground="#007070", background="#f0f0f0", font=self._font + lastoperframe, foreground='#007070', background='#f0f0f0', font=self._font ) self._lastoper2 = Label( lastoperframe, - anchor="w", + anchor='w', width=30, - foreground="#004040", - background="#f0f0f0", + foreground='#004040', + background='#f0f0f0', font=self._font, ) - self._lastoper1.pack(side="left") - self._lastoper2.pack(side="left", fill="x", expand=1) + self._lastoper1.pack(side='left') + self._lastoper2.pack(side='left', fill='x', expand=1) def _init_canvas(self, parent): self._cframe = CanvasFrame( parent, - background="white", + background='white', width=525, closeenough=10, border=2, - relief="sunken", + relief='sunken', ) - self._cframe.pack(expand=1, fill="both", side="top", pady=2) + self._cframe.pack(expand=1, fill='both', side='top', pady=2) canvas = self._canvas = self._cframe.canvas() self._stackwidgets = [] self._rtextwidgets = [] self._titlebar = canvas.create_rectangle( - 0, 0, 0, 0, fill="#c0f0f0", outline="black" + 0, 0, 0, 0, fill='#c0f0f0', outline='black' ) - self._exprline = canvas.create_line(0, 0, 0, 0, dash=".") - self._stacktop = canvas.create_line(0, 0, 0, 0, fill="#408080") + self._exprline = canvas.create_line(0, 0, 0, 0, dash='.') + self._stacktop = canvas.create_line(0, 0, 0, 0, fill='#408080') size = self._size.get() + 4 self._stacklabel = TextWidget( - canvas, "Stack", color="#004040", font=self._boldfont + canvas, 'Stack', color='#004040', font=self._boldfont ) self._rtextlabel = TextWidget( - canvas, "Remaining Text", color="#004040", font=self._boldfont + canvas, 'Remaining Text', color='#004040', font=self._boldfont ) self._cframe.add_widget(self._stacklabel) self._cframe.add_widget(self._rtextlabel) @@ -448,7 +448,7 @@ class ShiftReduceApp(object): ######################################### def _redraw(self): - scrollregion = self._canvas["scrollregion"].split() + scrollregion = self._canvas['scrollregion'].split() (cx1, cy1, cx2, cy2) = [int(c) for c in scrollregion] # Delete the old stack & rtext widgets. @@ -476,17 +476,17 @@ class ShiftReduceApp(object): for tok in self._parser.stack(): if isinstance(tok, Tree): attribs = { - "tree_color": "#4080a0", - "tree_width": 2, - "node_font": self._boldfont, - "node_color": "#006060", - "leaf_color": "#006060", - "leaf_font": self._font, + 'tree_color': '#4080a0', + 'tree_width': 2, + 'node_font': self._boldfont, + 'node_color': '#006060', + 'leaf_color': '#006060', + 'leaf_font': self._font, } widget = tree_to_treesegment(self._canvas, tok, **attribs) - widget.label()["color"] = "#000000" + widget.label()['color'] = '#000000' else: - widget = TextWidget(self._canvas, tok, color="#000000", font=self._font) + widget = TextWidget(self._canvas, tok, color='#000000', font=self._font) widget.bind_click(self._popup_reduce) self._stackwidgets.append(widget) self._cframe.add_widget(widget, stackx, y) @@ -495,7 +495,7 @@ class ShiftReduceApp(object): # Draw the remaining text. rtextwidth = 0 for tok in self._parser.remaining_text(): - widget = TextWidget(self._canvas, tok, color="#000000", font=self._font) + widget = TextWidget(self._canvas, tok, color='#000000', font=self._font) self._rtextwidgets.append(widget) self._cframe.add_widget(widget, rtextwidth, y) rtextwidth = widget.bbox()[2] + 4 @@ -541,7 +541,7 @@ class ShiftReduceApp(object): def _highlight_productions(self): # Highlight the productions that can be reduced. - self._prodlist.selection_clear(0, "end") + self._prodlist.selection_clear(0, 'end') for prod in self._parser.reducible_productions(): index = self._productions.index(prod) self._prodlist.selection_set(index) @@ -558,8 +558,8 @@ class ShiftReduceApp(object): def reset(self, *e): self._parser.initialize(self._sent) - self._lastoper1["text"] = "Reset App" - self._lastoper2["text"] = "" + self._lastoper1['text'] = 'Reset App' + self._lastoper2['text'] = '' self._redraw() def step(self, *e): @@ -569,19 +569,19 @@ class ShiftReduceApp(object): return True else: if list(self._parser.parses()): - self._lastoper1["text"] = "Finished:" - self._lastoper2["text"] = "Success" + self._lastoper1['text'] = 'Finished:' + self._lastoper2['text'] = 'Success' else: - self._lastoper1["text"] = "Finished:" - self._lastoper2["text"] = "Failure" + self._lastoper1['text'] = 'Finished:' + self._lastoper2['text'] = 'Failure' def shift(self, *e): if self._animating_lock: return if self._parser.shift(): tok = self._parser.stack()[-1] - self._lastoper1["text"] = "Shift:" - self._lastoper2["text"] = "%r" % tok + self._lastoper1['text'] = 'Shift:' + self._lastoper2['text'] = '%r' % tok if self._animate.get(): self._animate_shift() else: @@ -594,8 +594,8 @@ class ShiftReduceApp(object): return production = self._parser.reduce() if production: - self._lastoper1["text"] = "Reduce:" - self._lastoper2["text"] = "%s" % production + self._lastoper1['text'] = 'Reduce:' + self._lastoper2['text'] = '%s' % production if self._animate.get(): self._animate_reduce() else: @@ -648,24 +648,24 @@ class ShiftReduceApp(object): try: ShowText( self._top, - "Help: Shift-Reduce Parser Application", - (__doc__ or "").strip(), + 'Help: Shift-Reduce Parser Application', + (__doc__ or '').strip(), width=75, - font="fixed", + font='fixed', ) except: ShowText( self._top, - "Help: Shift-Reduce Parser Application", - (__doc__ or "").strip(), + 'Help: Shift-Reduce Parser Application', + (__doc__ or '').strip(), width=75, ) def about(self, *e): ABOUT = "NLTK Shift-Reduce Parser Application\n" + "Written by Edward Loper" - TITLE = "About: Shift-Reduce Parser Application" + TITLE = 'About: Shift-Reduce Parser Application' try: - from tkinter.messagebox import Message + from six.moves.tkinter_messagebox import Message Message(message=ABOUT, title=TITLE).show() except: @@ -677,14 +677,14 @@ class ShiftReduceApp(object): def set_grammar(self, grammar): self._parser.set_grammar(grammar) self._productions = list(grammar.productions()) - self._prodlist.delete(0, "end") + self._prodlist.delete(0, 'end') for production in self._productions: - self._prodlist.insert("end", (" %s" % production)) + self._prodlist.insert('end', (' %s' % production)) def edit_sentence(self, *e): sentence = " ".join(self._sent) - title = "Edit Text" - instr = "Enter a new sentence to parse." + title = 'Edit Text' + instr = 'Enter a new sentence to parse.' EntryDialog(self._top, sentence, instr, self.set_sentence, title) def set_sentence(self, sent): @@ -698,13 +698,13 @@ class ShiftReduceApp(object): def _toggle_grammar(self, *e): if self._show_grammar.get(): self._prodframe.pack( - fill="both", side="left", padx=2, after=self._feedbackframe + fill='both', side='left', padx=2, after=self._feedbackframe ) - self._lastoper1["text"] = "Show Grammar" + self._lastoper1['text'] = 'Show Grammar' else: self._prodframe.pack_forget() - self._lastoper1["text"] = "Hide Grammar" - self._lastoper2["text"] = "" + self._lastoper1['text'] = 'Hide Grammar' + self._lastoper2['text'] = '' def _prodlist_select(self, event): selection = self._prodlist.curselection() @@ -713,15 +713,15 @@ class ShiftReduceApp(object): index = int(selection[0]) production = self._parser.reduce(self._productions[index]) if production: - self._lastoper1["text"] = "Reduce:" - self._lastoper2["text"] = "%s" % production + self._lastoper1['text'] = 'Reduce:' + self._lastoper2['text'] = '%s' % production if self._animate.get(): self._animate_reduce() else: self._redraw() else: # Reset the production selections. - self._prodlist.selection_clear(0, "end") + self._prodlist.selection_clear(0, 'end') for prod in self._parser.reducible_productions(): index = self._productions.index(prod) self._prodlist.selection_set(index) @@ -732,7 +732,7 @@ class ShiftReduceApp(object): if len(productions) == 0: return - self._reduce_menu.delete(0, "end") + self._reduce_menu.delete(0, 'end') for production in productions: self._reduce_menu.add_command(label=str(production), command=self.reduce) self._reduce_menu.post( @@ -806,7 +806,7 @@ class ShiftReduceApp(object): if not isinstance(tok, Tree): raise ValueError() label = TextWidget( - self._canvas, str(tok.label()), color="#006060", font=self._boldfont + self._canvas, str(tok.label()), color='#006060', font=self._boldfont ) widget = TreeSegmentWidget(self._canvas, label, widgets, width=2) (x1, y1, x2, y2) = self._stacklabel.bbox() @@ -870,9 +870,9 @@ class ShiftReduceApp(object): rhslen = len(self._productions[index].rhs()) for stackwidget in self._stackwidgets[-rhslen:]: if isinstance(stackwidget, TreeSegmentWidget): - stackwidget.label()["color"] = "#00a000" + stackwidget.label()['color'] = '#00a000' else: - stackwidget["color"] = "#00a000" + stackwidget['color'] = '#00a000' # Remember what production we're hovering over. self._hover = index @@ -884,9 +884,9 @@ class ShiftReduceApp(object): self._hover = -1 for stackwidget in self._stackwidgets: if isinstance(stackwidget, TreeSegmentWidget): - stackwidget.label()["color"] = "black" + stackwidget.label()['color'] = 'black' else: - stackwidget["color"] = "black" + stackwidget['color'] = 'black' def app(): @@ -897,7 +897,7 @@ def app(): from nltk.grammar import Nonterminal, Production, CFG - nonterminals = "S VP NP PP P N Name V Det" + nonterminals = 'S VP NP PP P N Name V Det' (S, VP, NP, PP, P, N, Name, V, Det) = [Nonterminal(s) for s in nonterminals.split()] productions = ( @@ -910,28 +910,28 @@ def app(): Production(VP, [V, NP]), Production(PP, [P, NP]), # Lexical Productions - Production(NP, ["I"]), - Production(Det, ["the"]), - Production(Det, ["a"]), - Production(N, ["man"]), - Production(V, ["saw"]), - Production(P, ["in"]), - Production(P, ["with"]), - Production(N, ["park"]), - Production(N, ["dog"]), - Production(N, ["statue"]), - Production(Det, ["my"]), + Production(NP, ['I']), + Production(Det, ['the']), + Production(Det, ['a']), + Production(N, ['man']), + Production(V, ['saw']), + Production(P, ['in']), + Production(P, ['with']), + Production(N, ['park']), + Production(N, ['dog']), + Production(N, ['statue']), + Production(Det, ['my']), ) grammar = CFG(S, productions) # tokenize the sentence - sent = "my dog saw a man in the park with a statue".split() + sent = 'my dog saw a man in the park with a statue'.split() ShiftReduceApp(grammar, sent).mainloop() -if __name__ == "__main__": +if __name__ == '__main__': app() -__all__ = ["app"] +__all__ = ['app'] diff --git a/nlp_resource_data/nltk/app/wordfreq_app.py b/nlp_resource_data/nltk/app/wordfreq_app.py index 522139c..52c7c66 100644 --- a/nlp_resource_data/nltk/app/wordfreq_app.py +++ b/nlp_resource_data/nltk/app/wordfreq_app.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Wordfreq Application # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Sumukh Ghodke # URL: # For license information, see LICENSE.TXT @@ -25,11 +25,11 @@ def plot_word_freq_dist(text): def app(): - t1 = Text(gutenberg.words("melville-moby_dick.txt")) + t1 = Text(gutenberg.words('melville-moby_dick.txt')) plot_word_freq_dist(t1) -if __name__ == "__main__": +if __name__ == '__main__': app() -__all__ = ["app"] +__all__ = ['app'] diff --git a/nlp_resource_data/nltk/app/wordnet_app.py b/nlp_resource_data/nltk/app/wordnet_app.py index f31000a..9854955 100644 --- a/nlp_resource_data/nltk/app/wordnet_app.py +++ b/nlp_resource_data/nltk/app/wordnet_app.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: WordNet Browser Application # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Jussi Salmela # Paul Bone # URL: @@ -44,6 +44,7 @@ Options:: # modifying to be compliant with NLTK's coding standards. Tests also # need to be develop to ensure this continues to work in the face of # changes to other NLTK packages. +from __future__ import print_function # Allow this program to run inside the NLTK source tree. from sys import path @@ -61,12 +62,18 @@ import getopt import base64 import pickle import copy -from http.server import HTTPServer, BaseHTTPRequestHandler -from urllib.parse import unquote_plus +from six.moves.urllib.parse import unquote_plus + +from nltk import compat from nltk.corpus import wordnet as wn from nltk.corpus.reader.wordnet import Synset, Lemma +if compat.PY3: + from http.server import HTTPServer, BaseHTTPRequestHandler +else: + from BaseHTTPServer import HTTPServer, BaseHTTPRequestHandler + # now included in local file # from util import html_header, html_trailer, \ # get_static_index_page, get_static_page_by_path, \ @@ -89,40 +96,40 @@ class MyServerHandler(BaseHTTPRequestHandler): def do_GET(self): global firstClient sp = self.path[1:] - if unquote_plus(sp) == "SHUTDOWN THE SERVER": + if unquote_plus(sp) == 'SHUTDOWN THE SERVER': if server_mode: page = "Server must be killed with SIGTERM." type = "text/plain" else: - print("Server shutting down!") + print('Server shutting down!') os._exit(0) - elif sp == "": # First request. - type = "text/html" + elif sp == '': # First request. + type = 'text/html' if not server_mode and firstClient: firstClient = False page = get_static_index_page(True) else: page = get_static_index_page(False) - word = "green" + word = 'green' - elif sp.endswith(".html"): # Trying to fetch a HTML file TODO: - type = "text/html" + elif sp.endswith('.html'): # Trying to fetch a HTML file TODO: + type = 'text/html' usp = unquote_plus(sp) - if usp == "NLTK Wordnet Browser Database Info.html": - word = "* Database Info *" + if usp == 'NLTK Wordnet Browser Database Info.html': + word = '* Database Info *' if os.path.isfile(usp): - with open(usp, "r") as infile: + with open(usp, 'r') as infile: page = infile.read() else: page = ( - (html_header % word) + "

The database info file:" - "

" + (html_header % word) + '

The database info file:' + '

' + usp - + "" - + "

was not found. Run this:" - + "

python dbinfo_html.py" - + "

to produce it." + + '' + + '

was not found. Run this:' + + '

python dbinfo_html.py' + + '

to produce it.' + html_trailer ) else: @@ -131,7 +138,7 @@ class MyServerHandler(BaseHTTPRequestHandler): page = get_static_page_by_path(usp) elif sp.startswith("search"): # This doesn't seem to work with MWEs. - type = "text/html" + type = 'text/html' parts = (sp.split("?")[1]).split("&") word = [ p.split("=")[1].replace("+", " ") @@ -141,25 +148,25 @@ class MyServerHandler(BaseHTTPRequestHandler): page, word = page_from_word(word) elif sp.startswith("lookup_"): # TODO add a variation of this that takes a non ecoded word or MWE. - type = "text/html" + type = 'text/html' sp = sp[len("lookup_") :] page, word = page_from_href(sp) elif sp == "start_page": # if this is the first request we should display help # information, and possibly set a default word. - type = "text/html" + type = 'text/html' page, word = page_from_word("wordnet") else: - type = "text/plain" + type = 'text/plain' page = "Could not parse request: '%s'" % sp # Send result. self.send_head(type) - self.wfile.write(page.encode("utf8")) + self.wfile.write(page.encode('utf8')) def send_head(self, type=None): self.send_response(200) - self.send_header("Content-type", type) + self.send_header('Content-type', type) self.end_headers() def log_message(self, format, *args): @@ -177,7 +184,7 @@ def get_unique_counter_from_url(sp): Extract the unique counter from the URL if it has one. Otherwise return null. """ - pos = sp.rfind("%23") + pos = sp.rfind('%23') if pos != -1: return int(sp[(pos + 3) :]) else: @@ -227,7 +234,7 @@ def wnb(port=8000, runBrowser=True, logfilename=None): logfile = None # Compute URL and start web browser - url = "http://localhost:" + str(port) + url = 'http://localhost:' + str(port) server_ready = None browser_thread = None @@ -237,9 +244,9 @@ def wnb(port=8000, runBrowser=True, logfilename=None): browser_thread = startBrowser(url, server_ready) # Start the server. - server = HTTPServer(("", port), MyServerHandler) + server = HTTPServer(('', port), MyServerHandler) if logfile: - logfile.write("NLTK Wordnet browser server running serving: %s\n" % url) + logfile.write('NLTK Wordnet browser server running serving: %s\n' % url) if runBrowser: server_ready.set() @@ -287,10 +294,10 @@ This provides a backend to both wxbrowse and browserver.py. # WordNet corpus is installed. def _pos_tuples(): return [ - (wn.NOUN, "N", "noun"), - (wn.VERB, "V", "verb"), - (wn.ADJ, "J", "adj"), - (wn.ADV, "R", "adv"), + (wn.NOUN, 'N', 'noun'), + (wn.VERB, 'V', 'verb'), + (wn.ADJ, 'J', 'adj'), + (wn.ADV, 'R', 'adv'), ] @@ -300,8 +307,8 @@ def _pos_match(pos_tuple): tuple given to it. It attempts to match it against the first non-null component of the given pos tuple. """ - if pos_tuple[0] == "s": - pos_tuple = ("a", pos_tuple[1], pos_tuple[2]) + if pos_tuple[0] == 's': + pos_tuple = ('a', pos_tuple[1], pos_tuple[2]) for n, x in enumerate(pos_tuple): if x is not None: break @@ -366,24 +373,24 @@ def get_relations_data(word, synset): """ if synset.pos() == wn.NOUN: return ( - (HYPONYM, "Hyponyms", synset.hyponyms()), - (INSTANCE_HYPONYM, "Instance hyponyms", synset.instance_hyponyms()), - (HYPERNYM, "Direct hypernyms", synset.hypernyms()), + (HYPONYM, 'Hyponyms', synset.hyponyms()), + (INSTANCE_HYPONYM, 'Instance hyponyms', synset.instance_hyponyms()), + (HYPERNYM, 'Direct hypernyms', synset.hypernyms()), ( INDIRECT_HYPERNYMS, - "Indirect hypernyms", + 'Indirect hypernyms', rebuild_tree(synset.tree(lambda x: x.hypernyms()))[1], ), # hypernyms', 'Sister terms', - (INSTANCE_HYPERNYM, "Instance hypernyms", synset.instance_hypernyms()), + (INSTANCE_HYPERNYM, 'Instance hypernyms', synset.instance_hypernyms()), # (CLASS_REGIONAL, ['domain term region'], ), - (PART_HOLONYM, "Part holonyms", synset.part_holonyms()), - (PART_MERONYM, "Part meronyms", synset.part_meronyms()), - (SUBSTANCE_HOLONYM, "Substance holonyms", synset.substance_holonyms()), - (SUBSTANCE_MERONYM, "Substance meronyms", synset.substance_meronyms()), - (MEMBER_HOLONYM, "Member holonyms", synset.member_holonyms()), - (MEMBER_MERONYM, "Member meronyms", synset.member_meronyms()), - (ATTRIBUTE, "Attributes", synset.attributes()), + (PART_HOLONYM, 'Part holonyms', synset.part_holonyms()), + (PART_MERONYM, 'Part meronyms', synset.part_meronyms()), + (SUBSTANCE_HOLONYM, 'Substance holonyms', synset.substance_holonyms()), + (SUBSTANCE_MERONYM, 'Substance meronyms', synset.substance_meronyms()), + (MEMBER_HOLONYM, 'Member holonyms', synset.member_holonyms()), + (MEMBER_MERONYM, 'Member meronyms', synset.member_meronyms()), + (ATTRIBUTE, 'Attributes', synset.attributes()), (ANTONYM, "Antonyms", lemma_property(word, synset, lambda l: l.antonyms())), ( DERIVATIONALLY_RELATED_FORM, @@ -395,18 +402,18 @@ def get_relations_data(word, synset): ) elif synset.pos() == wn.VERB: return ( - (ANTONYM, "Antonym", lemma_property(word, synset, lambda l: l.antonyms())), - (HYPONYM, "Hyponym", synset.hyponyms()), - (HYPERNYM, "Direct hypernyms", synset.hypernyms()), + (ANTONYM, 'Antonym', lemma_property(word, synset, lambda l: l.antonyms())), + (HYPONYM, 'Hyponym', synset.hyponyms()), + (HYPERNYM, 'Direct hypernyms', synset.hypernyms()), ( INDIRECT_HYPERNYMS, - "Indirect hypernyms", + 'Indirect hypernyms', rebuild_tree(synset.tree(lambda x: x.hypernyms()))[1], ), - (ENTAILMENT, "Entailments", synset.entailments()), - (CAUSE, "Causes", synset.causes()), - (ALSO_SEE, "Also see", synset.also_sees()), - (VERB_GROUP, "Verb Groups", synset.verb_groups()), + (ENTAILMENT, 'Entailments', synset.entailments()), + (CAUSE, 'Causes', synset.causes()), + (ALSO_SEE, 'Also see', synset.also_sees()), + (VERB_GROUP, 'Verb Groups', synset.verb_groups()), ( DERIVATIONALLY_RELATED_FORM, "Derivationally related form", @@ -417,29 +424,29 @@ def get_relations_data(word, synset): ) elif synset.pos() == wn.ADJ or synset.pos == wn.ADJ_SAT: return ( - (ANTONYM, "Antonym", lemma_property(word, synset, lambda l: l.antonyms())), - (SIMILAR, "Similar to", synset.similar_tos()), + (ANTONYM, 'Antonym', lemma_property(word, synset, lambda l: l.antonyms())), + (SIMILAR, 'Similar to', synset.similar_tos()), # Participle of verb - not supported by corpus ( PERTAINYM, - "Pertainyms", + 'Pertainyms', lemma_property(word, synset, lambda l: l.pertainyms()), ), - (ATTRIBUTE, "Attributes", synset.attributes()), - (ALSO_SEE, "Also see", synset.also_sees()), + (ATTRIBUTE, 'Attributes', synset.attributes()), + (ALSO_SEE, 'Also see', synset.also_sees()), ) elif synset.pos() == wn.ADV: # This is weird. adverbs such as 'quick' and 'fast' don't seem # to have antonyms returned by the corpus.a return ( - (ANTONYM, "Antonym", lemma_property(word, synset, lambda l: l.antonyms())), + (ANTONYM, 'Antonym', lemma_property(word, synset, lambda l: l.antonyms())), ) # Derived from adjective - not supported by corpus else: raise TypeError("Unhandles synset POS type: " + str(synset.pos())) -html_header = """ +html_header = ''' @@ -450,13 +457,13 @@ html_header = """ 'text/html; charset=us-ascii'> NLTK Wordnet Browser display of: %s -""" -html_trailer = """ +''' +html_trailer = ''' -""" +''' -explanation = """ +explanation = '''

Search Help

  • The display below the line is an example of the output the browser shows you when you enter a search word. The search word was green.
  • @@ -475,33 +482,33 @@ synsets. Enter/Return key or click the Search button.

-""" +''' # HTML oriented functions def _bold(txt): - return "%s" % txt + return '%s' % txt def _center(txt): - return "
%s
" % txt + return '
%s
' % txt def _hlev(n, txt): - return "%s" % (n, txt, n) + return '%s' % (n, txt, n) def _italic(txt): - return "%s" % txt + return '%s' % txt def _li(txt): - return "
  • %s
  • " % txt + return '
  • %s
  • ' % txt def pg(word, body): - """ + ''' Return a HTML page of NLTK Browser format constructed from the word and body @@ -511,22 +518,22 @@ def pg(word, body): :type body: str :return: a HTML page for the word-body combination :rtype: str - """ + ''' return (html_header % word) + body + html_trailer def _ul(txt): - return "
      " + txt + "
    " + return '
      ' + txt + '
    ' def _abbc(txt): """ abbc = asterisks, breaks, bold, center """ - return _center(_bold("
    " * 10 + "*" * 10 + " " + txt + " " + "*" * 10)) + return _center(_bold('
    ' * 10 + '*' * 10 + ' ' + txt + ' ' + '*' * 10)) -full_hyponym_cont_text = _ul(_li(_italic("(has full hyponym continuation)"))) + "\n" +full_hyponym_cont_text = _ul(_li(_italic('(has full hyponym continuation)'))) + '\n' def _get_synset(synset_key): @@ -538,7 +545,7 @@ def _get_synset(synset_key): def _collect_one_synset(word, synset, synset_relations): - """ + ''' Returns the HTML string for one synset or word :param word: the current word @@ -550,11 +557,11 @@ def _collect_one_synset(word, synset, synset_relations): :type synset_relations: dict(synset_key, set(relation_id)) :return: The HTML string built for this synset :rtype: str - """ + ''' if isinstance(synset, tuple): # It's a word raise NotImplementedError("word not supported by _collect_one_synset") - typ = "S" + typ = 'S' pos_tuple = _pos_match((synset.pos(), None, None)) assert pos_tuple is not None, "pos_tuple is null: synset.pos(): %s" % synset.pos() descr = pos_tuple[2] @@ -563,23 +570,23 @@ def _collect_one_synset(word, synset, synset_relations): synset_label = typ + ";" if synset.name() in synset_relations: synset_label = _bold(synset_label) - s = "
  • %s (%s) " % (make_lookup_link(ref, synset_label), descr) + s = '
  • %s (%s) ' % (make_lookup_link(ref, synset_label), descr) def format_lemma(w): - w = w.replace("_", " ") + w = w.replace('_', ' ') if w.lower() == word: return _bold(w) else: ref = Reference(w) return make_lookup_link(ref, w) - s += ", ".join(format_lemma(l.name()) for l in synset.lemmas()) + s += ', '.join(format_lemma(l.name()) for l in synset.lemmas()) gl = " (%s) %s " % ( synset.definition(), - "; ".join('"%s"' % e for e in synset.examples()), + "; ".join("\"%s\"" % e for e in synset.examples()), ) - return s + gl + _synset_relations(word, synset, synset_relations) + "
  • \n" + return s + gl + _synset_relations(word, synset, synset_relations) + '\n' def _collect_all_synsets(word, pos, synset_relations=dict()): @@ -587,7 +594,7 @@ def _collect_all_synsets(word, pos, synset_relations=dict()): Return a HTML unordered list of synsets for the given word and part of speech. """ - return "
      %s\n
    \n" % "".join( + return '
      %s\n
    \n' % ''.join( ( _collect_one_synset(word, synset, synset_relations) for synset in wn.synsets(word, pos) @@ -596,7 +603,7 @@ def _collect_all_synsets(word, pos, synset_relations=dict()): def _synset_relations(word, synset, synset_relations): - """ + ''' Builds the HTML string for the relations of a synset :param word: The current word @@ -607,7 +614,7 @@ def _synset_relations(word, synset, synset_relations): :type synset_relations: dict(synset_key, set(relation_type)) :return: The HTML for a synset's relations :rtype: str - """ + ''' if not synset.name() in synset_relations: return "" @@ -623,7 +630,7 @@ def _synset_relations(word, synset, synset_relations): # similar tuples. This forms a tree of synsets. return "%s\n
      %s
    \n" % ( relation_html(r[0]), - "".join("
  • %s
  • \n" % relation_html(sr) for sr in r[1]), + ''.join('
  • %s
  • \n' % relation_html(sr) for sr in r[1]), ) else: raise TypeError( @@ -632,28 +639,28 @@ def _synset_relations(word, synset, synset_relations): ) def make_synset_html(db_name, disp_name, rels): - synset_html = "%s\n" % make_lookup_link( + synset_html = '%s\n' % make_lookup_link( copy.deepcopy(ref).toggle_synset_relation(synset, db_name).encode(), disp_name, ) if db_name in ref.synset_relations[synset.name()]: - synset_html += "
      %s
    \n" % "".join( + synset_html += '
      %s
    \n' % ''.join( "
  • %s
  • \n" % relation_html(r) for r in rels ) return synset_html html = ( - "
      " - + "\n".join( + '
        ' + + '\n'.join( ( "
      • %s
      • " % make_synset_html(*rel_data) for rel_data in get_relations_data(word, synset) if rel_data[2] != [] ) ) - + "
      " + + '
    ' ) return html @@ -743,7 +750,7 @@ def page_from_word(word): def page_from_href(href): - """ + ''' Returns a tuple of the HTML page built and the new current word :param href: The hypertext reference to be solved @@ -752,12 +759,12 @@ def page_from_href(href): to be sent to the browser and word is the new current word :rtype: A tuple (str,str) - """ + ''' return page_from_reference(Reference.decode(href)) def page_from_reference(href): - """ + ''' Returns a tuple of the HTML page built and the new current word :param href: The hypertext reference to be solved @@ -766,11 +773,11 @@ def page_from_reference(href): to be sent to the browser and word is the new current word :rtype: A tuple (str,str) - """ + ''' word = href.word pos_forms = defaultdict(list) - words = word.split(",") - words = [w for w in [w.strip().lower().replace(" ", "_") for w in words] if w != ""] + words = word.split(',') + words = [w for w in [w.strip().lower().replace(' ', '_') for w in words] if w != ""] if len(words) == 0: # No words were found. return "", "Please specify a word to search for." @@ -782,10 +789,10 @@ def page_from_reference(href): form = wn.morphy(w, pos) if form and form not in pos_forms[pos]: pos_forms[pos].append(form) - body = "" + body = '' for pos, pos_str, name in _pos_tuples(): if pos in pos_forms: - body += _hlev(3, name) + "\n" + body += _hlev(3, name) + '\n' for w in pos_forms[pos]: # Not all words of exc files are in the database, skip # to the next word if a KeyError is raised. @@ -833,7 +840,7 @@ def get_static_web_help_page(): @@ -903,7 +910,7 @@ def get_static_index_page(with_shutdown): @@ -936,7 +943,7 @@ def get_static_upper_page(with_shutdown): @@ -957,7 +964,7 @@ def get_static_upper_page(with_shutdown): """ if with_shutdown: - shutdown_link = 'Shutdown' + shutdown_link = "Shutdown" else: shutdown_link = "" @@ -996,7 +1003,7 @@ def app(): wnb(port, not server_mode, logfilename) -if __name__ == "__main__": +if __name__ == '__main__': app() -__all__ = ["app"] +__all__ = ['app'] diff --git a/nlp_resource_data/nltk/book.py b/nlp_resource_data/nltk/book.py index 0098bed..e130ecd 100644 --- a/nlp_resource_data/nltk/book.py +++ b/nlp_resource_data/nltk/book.py @@ -1,10 +1,11 @@ # Natural Language Toolkit: Some texts for exploration in chapter 1 of the book # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Steven Bird # # URL: # For license information, see LICENSE.TXT +from __future__ import print_function from nltk.corpus import ( gutenberg, @@ -24,13 +25,13 @@ print("Loading text1, ..., text9 and sent1, ..., sent9") print("Type the name of the text or sentence to view it.") print("Type: 'texts()' or 'sents()' to list the materials.") -text1 = Text(gutenberg.words("melville-moby_dick.txt")) +text1 = Text(gutenberg.words('melville-moby_dick.txt')) print("text1:", text1.name) -text2 = Text(gutenberg.words("austen-sense.txt")) +text2 = Text(gutenberg.words('austen-sense.txt')) print("text2:", text2.name) -text3 = Text(genesis.words("english-kjv.txt"), name="The Book of Genesis") +text3 = Text(genesis.words('english-kjv.txt'), name="The Book of Genesis") print("text3:", text3.name) text4 = Text(inaugural.words(), name="Inaugural Address Corpus") @@ -39,16 +40,16 @@ print("text4:", text4.name) text5 = Text(nps_chat.words(), name="Chat Corpus") print("text5:", text5.name) -text6 = Text(webtext.words("grail.txt"), name="Monty Python and the Holy Grail") +text6 = Text(webtext.words('grail.txt'), name="Monty Python and the Holy Grail") print("text6:", text6.name) text7 = Text(treebank.words(), name="Wall Street Journal") print("text7:", text7.name) -text8 = Text(webtext.words("singles.txt"), name="Personals Corpus") +text8 = Text(webtext.words('singles.txt'), name="Personals Corpus") print("text8:", text8.name) -text9 = Text(gutenberg.words("chesterton-thursday.txt")) +text9 = Text(gutenberg.words('chesterton-thursday.txt')) print("text9:", text9.name) @@ -120,23 +121,23 @@ sent5 = [ "JOIN", ] sent6 = [ - "SCENE", - "1", - ":", - "[", - "wind", - "]", - "[", - "clop", - "clop", - "clop", - "]", - "KING", - "ARTHUR", - ":", - "Whoa", - "there", - "!", + 'SCENE', + '1', + ':', + '[', + 'wind', + ']', + '[', + 'clop', + 'clop', + 'clop', + ']', + 'KING', + 'ARTHUR', + ':', + 'Whoa', + 'there', + '!', ] sent7 = [ "Pierre", @@ -159,20 +160,20 @@ sent7 = [ ".", ] sent8 = [ - "25", - "SEXY", - "MALE", - ",", - "seeks", - "attrac", - "older", - "single", - "lady", - ",", - "for", - "discreet", - "encounters", - ".", + '25', + 'SEXY', + 'MALE', + ',', + 'seeks', + 'attrac', + 'older', + 'single', + 'lady', + ',', + 'for', + 'discreet', + 'encounters', + '.', ] sent9 = [ "THE", diff --git a/nlp_resource_data/nltk/ccg/__init__.py b/nlp_resource_data/nltk/ccg/__init__.py index 7d54311..40515aa 100644 --- a/nlp_resource_data/nltk/ccg/__init__.py +++ b/nlp_resource_data/nltk/ccg/__init__.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Combinatory Categorial Grammar # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Graeme Gange # URL: # For license information, see LICENSE.TXT diff --git a/nlp_resource_data/nltk/ccg/__pycache__/__init__.cpython-37.pyc b/nlp_resource_data/nltk/ccg/__pycache__/__init__.cpython-37.pyc index 10662e1f54f3ffa302447f8d6277cb3a9dc9c4da..b69809cc2cb983b6de148aff26929fd335813b69 100644 GIT binary patch delta 31 lcmdnNzJ{IKiI<|FD)+8pPb3;4*+TE2SWe= delta 43 xcmZ3(zJs0HiISrZZX5<$n7G>+}O^#*u2LRl+3x@yz diff --git a/nlp_resource_data/nltk/ccg/__pycache__/api.cpython-37.pyc b/nlp_resource_data/nltk/ccg/__pycache__/api.cpython-37.pyc index 25cb42d3f4b4df75070dc68375245bffb7ac4184..6ed9de14ed9a8320aaabb679dd10f64dbed6e1d3 100644 GIT binary patch delta 3909 zcmai1U1%It6yBNHpV`gqX7k&mO_NQNZL>*iQd?uKH8vqOZM99b(#VRAlbtb{c6K-3 znbpRh#uD*CMTi%XJ}6>+C=`T}fp;{Eh2SF566g}sR`RTel?y_g^ zoO|xM=X~eq-ud(4i;pBmVrL;Kq#FMIG4Lq#+TzMpuBXtP0l=H?Ajun*d>yttVcso&;Xc;KTwC$Kq3Dwx1dYt$6h^@{Vy~{g7A>o$wYePk6)?nppqB*UAem_3HgXhBccKcWr zQB~Vvs=9}N5q_?doNTDzX;@W_CscKq&qXFZL_FhJ72$%Rs$=~7$QvV=t1N<~8Oa79 zuB@sBEe|3Ye=U}ys^k2Md?t;{sssU)&1&|ns!s4z(Op6tUyg2D2;p7{B(+GANa~R^ zB56X>f}|D6W+a_R2=+CCL1ySc9sF#xUk(LDB~0Il5>KJos=w+*A*n)WUc~1b(l#Fro1zdx$p2-8z;8y0_9bpiD8Q> zvZpkrpX0%%!#zH-*v_W4+zAvu&wtv`%3o{Ro~pip-46S{w|u?n8KLX8>L!HA3Ht&+ zwc%ioj~s3fSz}@!{|L5U+%b$wMRqs*vAeu+wGKsza)qC5Ss0;JlDBFnE+r4V=%#1y*$AUdm~DbTMy}u7cp0X?-=5eBZ*ZxN4YZ?*4kRfg zl$jNaj~eHX{N1+a2M_@dY%hniA%=&GE0iBlR4+gCqnl^;+<|NE1d@?xsb{timT+Jd zj2Tt^o&T~qU9pz2jM*L}dy!z0QucTm(TI<96pZ3%3`wPvA&U~2}Uc0RtPKXnk6RBkiu5KOglzU7{ql64<&%iToi zH}k)?ytR*7DxJ)@nh{`BLG0q!y51B<_K+-Sz0e&uc)Vy9R!X8$Seig;fFDT@=#|b53|jfwfIbi) z7hDiO!P9YVuJ7zBi;Tlo`x4ZJ?{Q0F8(d#;lQ z_v7OPkW9$CAME2VXFAI7IG7d_{8HxAM=2xF1V)Mw7xn-W(ni%xDyfE#!o-T+1{k}d zt-(*viuQC_63_`T#lP)+EQRZLsCV$rghgwK*Tont{5F}!uVl5L)3VHjl- z17>FrIVNiWNZ-lkii7q!bnuAvUH6@`#8d14g$g!aqGptzZBE delta 3734 zcma)9ZERCz6z;vZAGclGeX#G1t=$)Gx6vVE%sB@eA5%WY0!Flem$vt^b!&U)dplr& znF;DIG3qff7!_keV!|Jj;SV(N7cr4U5)yx1W8xoUOiWy&CWP?A_nZr3l=kYHp5Bk= zJ?FgVea`c~yME&SvEW1?;B)Z5w$!u|+w@+rarrHNr#e(2>&OWSLBMMI5c6>p*mI98xGZ_NvivjbWuon_%n|x^JlW=xhN%MLU~mmf8x`;7ZKy* zm49IZJmQE}{f@bTR~Tq)V9AW3DT{fXsTxbBvYJv@TF?x* zJA_(@OP3>vhzd-8h^8UT%Wut! zjG>%b%%=+(J>Lwg(%YRjU^fS!PO=bOasDzyIFX#10b?;`6f%WHbYocV@x2wpxAs=+ zS^3K468hJVb?Jp!jTMjarjvO-^USHGe)!G3zsn{okD?d_rYJ-3qUW_RO|r7YOx=p2 zy{af9@V95$s)&_5D+A7Jin0^3@`vMY^yWfSgJuIarmQIWWR63k_Fl3DMcD3S`l-;r<%Y=yvrZ=Y*lId*H5R8>OzL&%SO52)a!n?pSIYrNB zm)313fOPg;Wu@MYDF7%fgW z2itIfR_jfG>b>nN(t}?7Vi=ipYq<(x$bb7REbVbzZ;BSxcuYo~jms50#wZ00!%y}cx$1BUS#5!QU zr)g#DhAYBWB8+6|f~01c>E{buU>TB_Vr`(5KL%Ibp-o$OfHLLrp-AcMOp>Xe!}+>{ z5gRNDS}K{INAOixt(y#$2eUyQ^NW?E_2-4or^<(S(*%rb@K3|@5gQza=B&w}QeWqx zn_#(dZp$<1SY)I8hmBa*FC{XzGtkFGD8U#Y8Rq8P2t`cr&_aLDPzcm|3y`NjiUpmW{8+u>w}H zEe5wxf9( z4HA{uV=;oZ0NM<8;EQoIr8a|Ve6br%souuoe!#ohVPKcbZfIx^3j=W5*R*oDeO4Hv zQf$T1>wDV8HBp;k+aC8yVu48uuo^mOr@1IZZa5LP1)+L-;a2#7ohMXpKYZ5F1}}8< zh02$k3RlU2`$Bv9kkTT=kO)^k=EL}jxW_CVewUNBQivKfo^k7BHmYzW#S({|Lr z{72!P$dnxjNwx#-M{Jpht2+Ui=$Y_A%#)gqGM>WQ1KdPi*0JGP5Mu4cM<2vb!eH$C zSBNGUl93~7;X`;!-l)OJB}JHC=NHeXrf4MjS51gUsuYtB!LiQTmTHTSE*w*J`vCL9%kN`#3ssuPt6U9Bqs zaf4`qD-p-WpffIK9i3|>*~QN%0gHd^z%mC-MFXan{^im2tiiC4IN4HZCZTIQbti)P zY+-(LCSTB4KFKk(0lD!t3Z?1RQ*&CnV5vNEnP^ZdD^niL>FQ!u8^bN%sKK+kU#t@S QqF?ast{V3XvR1Y2U#S{z2mk;8 diff --git a/nlp_resource_data/nltk/ccg/__pycache__/chart.cpython-37.pyc b/nlp_resource_data/nltk/ccg/__pycache__/chart.cpython-37.pyc index e610180566874052a2856ad6d386ef9491167383..884151e86ddc1b532b4174ae9954d4196c4fb21a 100644 GIT binary patch delta 4465 zcmcgvYit}>6`s4ZkMZu>-d(Tl^?SYc+8#S=$4=6`oR{syaUKp%>Vl>;)6LGcz4FZN z_Rcu5O^OjN6%|M{To9E&q<@e~1%I$a|K$fNKZ-yIi61Z^Bt#{Uil7J~p`faqbJwqi z?4|;VmFAms&pmVQz27QS z%myp2#jQrI(Q49~tY)p*N@xkIMQgEIwN|T5YqQ$5c3~SdJFHHvQ^+B+;b|N)WOi-8 zlTS<2k`Xp)Uy_Vk8s6NHwQf+<8TFv32ZgMqXeVeQMiewr8rqaMiPj@@F{1%=4coe2 zp^F=hpljULrD+ds+9ZI@n`F_W^?{TS?G}M;pp~_L(6)+p`-U(ZpuL+CO>au@iAwuu zf3rjf;8B|;Z4f5YWHiGd&9nxlHw5j3(E{xjS_|z1Myt{GlB8vfcB2E{!$zmk1@93W zHIgv(gGRTJg7zpKGkTUiuM+lvj&4f8k9D{NhsQ=%Y&OEX0;k9s{#@W>H?J7|Po^ z@@`UxKFjX~CkCq+*Mf1ruw#Cce-NDPtzsT1&{fA?v$EU>ovRWq)&&bO_>V(pl5G{2 z#FEgp$a}+QwH}aIF9L>-r4jlO1`t$)L4+ZM0|*%e97TB9mn}GZhAG1m0H^tba1TlF zzoc5W{uMqzj*3s(p-1(KJagqz(RO`KahVoeuSv5D`+NyEIJXkNS@((bA@qt49v>pH zfIB=IN$~gTM*A^cz;LXxJ+5_~Pu5Q)s~8~{FY@oypZ^#Rk+RRh1Wu(zu2v4D;B}MR zk*Vb2%JW0sUl!~6Yh{v&6qA~l;vc4QzjT98#JBQ=tW#wB9PDQB8~kgH z?_3tuwOci+QZ6!4ZYRNs7RqG{xOsphDl**-?6k0&Tz|e`Q|71v*1_LsI?*KNw>wGK zmo1gdweRtNHZ2@Pe~%9f(gkOSu0H@1+XEnx^>+SxV{=ZZXJK9+C72xp8&{c~zj}UZ z`uvSD&F4YKE+Fj9Vs|R+ICR{H$Hx1L;`4gE9>uG~*zs-F4)giMMRJ0FJCRA(qLa$= z6~q0Mzn}Q-#uNBsr_ht&7wIZ*W~V_y!ccJr;Vi;Agb9R6geio*b%?Ox0)GyV9R`p{ zgm`&phr*MsGs!BGMMi$bUuius{}>M0H1vKP`YwXq+whd>^9uT8rG{PHrtc7crtQi$ zeRb%w%8alm-sS&jd-MNK-yd-a`{}#>B~c>b_#jy{?INL++p>bjb|4ASF^iCu-R#;v!yUM9mLe@;M~iS3`1 zKf;_<>)>q5+m5SDPEJoQ!QF>l=F8nZH^qK~dke!fb^SIqY0hC+P=ooW`Ab=5Q(GL# zYOH)@`o(E!i)@CA8C`XK`GP@Lnb^lfDvLr%^FMbFk|b|VrT$~55N=!ewbb0~DjoT7pc(#`ZjZE{+o2>lbD$09T=}ZZWO;+aj0%nD_d~#Iop+CLlhSr z*AH7O+(y-Kd1x;+7Abok9b^%1p#$%oA~V=guJ@^f=uu1x?m5q+EX)K8Xj{G^JqOY` z{)4{hvya3;(2u?Q*b=-6%I2Ws1Oeu~ASgufv$?I~{R;7ybB|9CoamzXaS}j_O=NS+ zce2cw+&iF7^4ABxPYgb*rb8B5;?c&&`DfKcT#U0f*b?+)`J1XrmbQMW`baAd@{lCU zH*r5Z0!>%8va9R`{`bKta+Dt%I@j}2CvHsqwSnB9qAY(K{*e?5clo!5`pMm`w}(c^ z2Krq0q;27peHvP9MGTW!_ar~#k{8tw6Dz`+W-lPzMp#2A0zd_?z>S;TL4`N~N+{(4 zR7LZfck+g_#MYsy`I%_AzT8sYG(^pCmT7r(-RS!3{C*}w*0z3@`IBb@^PN2d;MPsi zh3tyyT!+2m26aScQ~2UIQ}tcNjlEG$j%wal*cUJ&LYd z*?d9Q)kEbYH>ktkL1%?gUEg3HFkvzR#+@zT7qXpSWj>I!V8L{jhecU4^s7bo6vH{O zz*jx2<>SS4CRVHVwECsfJKJx3he;`pgY%Lqag zA%@U^5JzYP&_ZH3tg)=an)pvfdmF_eJ&G21U0`_Vu^BFp-Prg%ih^-Wc7{nPs4Yhv zqaYY7-&6LIZ8vRQW!x$n5TDbS4E8L5m&84aC+rCW-C<9J90-L86c15+kxb+;$i471 N!_(p?{GGAXzX2hEZ+ZX# delta 4146 zcmcgvU2Ggl5uV=P@$TB*KYQ2q+Mcz&w#Q!EiIYo?#K}3a9p{wT$t9Nq29jm7)AnvM zvupK?6+4$ZgB(RV1mZ#q35k>7BqR>dJ-{M?c!J~U1P@4vzj*oq*4SMw6o`3#@w*ZgKcdw40kB~(G9)ojz+%yzBajA~Icrp3&-7B>@G z!c1yOvqS4JJGD+>D;upZq8r&r?fj-+lolnuMGxJV^biefd$lf*!+Hedh>*KMZq?gB zZliwa?*X}8kAl1v6^aZfVtO1DaUu889va;yfU#||>e2e36&G?sU!HmdjO{UED!OjmAuUMK7URktPJ#B)_rv-Q7{i(azKH~sI` z?*+0Xm4Q$^nx87yOgeR|xLK~P71`~nN&Z&w6q(_F44zjyz(OS~4L=O6UFe0D<2B0G z9AB1CDanRLtP8r%@o&qE1C5OwzhxJh{X9>$oNwaG@zSb(k>6{1{%96g0vRN!9)^C! zG!2#+FlcqvE??q*Z<)#MZlQs(mzAw6Tnn96(6W({UbC<9AB0YfHZclUtU{?~t{K#( zALHHOv)LxbtzcZK@>n>@ABLYDYGN6z(hb|%G;i<^!e^R9hjqjHi+nlqoYL8FC#(m$ zbpCc^R_lWn>qo$dF$G}&VGv;mA%`%GFoH0OfU^iM`;r4!&oDz+0^lTHY#k&C{xIED zzu%fC$HkBo^r$|OR*rYAW;uSlcAHimpFxW&`+NyExHgmjsqJ!Z61}2>gCiq0VDnGK z(%f!8Jc3yQh7*nLaj$oHHksz{wT~%H3>7fE%O|4?AHpfp_KH}R&GOpE4Xq;kS67D5*9y-eKSvUp%QL>-pA+eYEM8{8actkoODEXU}YSFH-eGYcB z`0Kp4^W6)g!gec1mEA&9U_3hppoL4tDxMwSh>8U{!N(F-loO~_Ey`>)$kP09>O`kl z-|iwEe{pTi*nEqBCbcqz{tgZe(q-AKQ1~Gar}NssbIFe2Jo56jrG@#$g{v;3v!G*V z5cVeAos}JhjyK@5@bZ^^K9A2M`{Xz~3Iu(LJj3&uiChZ~ZoDKh&AWWF>zi9op@=<; zo&<%+hV|-IfT9Si^$A^?q~2>5deuqiI2b8{eseDp~%87 zdAuio?Ev1`S?K*Rl+A(NJMg)bc?D%&xrNOEWf%GHdM@tJhKxRqw%_wtGvE6E(}s_` zeYAa}_sY{~`wQ>toA{q-n}<1ndjM_o^{@2FoJ`efj- ziX;3N{Y%O@XdPT9>^yYE>a)rNGAZ%~p0pAZr#f*5VXmreaCKEI+mYwy7U#jPt?hktEg;6Dxl;kJYSX5fY8gK|`yk+?N>4Pmb$L`#�(1mmkCqG# ztmu$m_OS)L$@%MpgX9AL-r$ijKMqH4@DvlJcL^1H{&vs4&xg_2RUEY}aZcodpJe$oE)688)*Ck3fUh!!mD@TG8lcp{9; zb&u43{>E_T5I%C8033?&d{iUu96qYAQucAQ)DS*_HoiMGrn71O{;)bGoK=PM_yU4mh4Kc>c~a5x^B{{y>CWAeG%-g^CM#_ zDb}x#`AIiU@`xzz^Eto{p&zeV++er(z43W6&3`<8CcH0xyfvR98$6dEA?x*}{1n+j z4|hE|OSoV!fyAo9>BL=+^0Gr-QNv8EKY(4g5NZevp$q`!xDF3vwuTCEidV386F`-< zfN`gy+ht~fs0El19Dk`?F?3M?{2vot$$bmNq3HS{ADx^atp3vE`<^XKUUnJ4X`81j z#dX8J0tdlWYKwe_|2Wo+ip|!+K=n9o4YDUo0r_)$IwYop(ZbzGBt(*gy^1|@pgkrKJjs`EN83vzW^=K`^0JEOq96JL_rPaxu zQ;Y@C7V}z52wUuPp9JEi(SqILmIz}Dt}!C<<>@Z|`RQzjxFAKb;1$jA(qZ%bC(~EA zUdE;%3Nw;nrU*vdKp}Zzqf8|C9^t!*?8q}_O$S;}W3^a#?ZKt*i2y?pPn6^%klRG| aM<=33qr=fuv@1YD-T-kiHHU~RsSIX diff --git a/nlp_resource_data/nltk/ccg/__pycache__/combinator.cpython-37.pyc b/nlp_resource_data/nltk/ccg/__pycache__/combinator.cpython-37.pyc index 0a30b635ecfbe1a60d0eac5c0dbb1fab1745476e..3a832e7d7251af6ea195ed5ee881ccdacc43519c 100644 GIT binary patch literal 9839 zcmb_i&2QYs73ar(P^*=!FUztl+q5h>TE~|1)do!*!7hXH1bQfXYX5}x(%*YSaw%!`VL2r zn)zy;?{kepvsf*1JKwOHJ=Gp=7aFBzZ?#v~+@hP?(LdHiQCKfEVY%fUb4RcCrTsmk zg#J?8-|zNdY_BMzuk7~o*mK<1C;HLXANLJ#-#IaWz5%z7#|?@>G4xWa4vAqgg6pt* z&K=y*+@T$#I^qs+dl>EW{O)-%ig!ojcSpqqF@|?8h>Kzz*D-NPOyGJ^TozYw9miaw z@m!ZM7qw}wiMTy3t_u6tX7w`9eN9Ya?#X!WE8@DC!W>t{w77w*jk&Ji{i}HY8s4Y2 zEw1jE)k*P&n8AqaJWEB~#4I=CS*FCB;ufAwi`!xr*Bh8+3U5whmNzg9wQrz3Bj%R0 z%3E85rG@w8#}b8fdq?x-Z;X!wX&MG=ho|74Oe4eCmcy)bFeW zp>*nD(+$^rf#H3Q5H;L6^@bA!=(x1LiASy4JGEq@y5C%PLT{zv;_=YKcB>wGe#>br zIHCL4mz#B>ocv5Q?x6%Ts9de83$1Dhy=uBSVF>f32J^_HofCO(7eoO96{GQ=w}dC% zdgzLIh$J^VvN?Z*wU%uB?#JBQ%&fiYOIy0@(hb~J=(Qf(j!o*2n>JXE6*J2(e(FlM zv1to;)oZzdy&B7a7N)4Lx%H>?y0>bFYi|0~c3Q%Azi6UDRL>>(UPbSJ+=W(!+v+ zCQsxmu`~01NE8DX%2gDZy=FTINop7j=I~ld)^@{H+>rSwW!K3D=toy9owk4y zYppN-L93WiE^r&Gk(De(WF?xGJs8Q=e?OXQ2Xe0A*PX^(!&{kCrupvd`*Q(|`}R7d zbRN6GT&od2ovYU$&vj&&UEhp)p_ZEJa&6+lK&2U5H;%To*~}{^%E3%lOi6$%d#Nf@ zmGUq9a2pk44yJIEa4>ls2W-3UBAmji)ml!|t<|DZt=9Afl#O<|R(sZV8u622ttR|> zttLsqvY)DRRCT!E7O^5NRKqAi0hQI8Gs-#ctK&BP(W>1;c~uV{dvJvRZvBHTEQ%7m^>{fan)5}q+I`+Y|$!30-p=}>cJGD$zo5*`_D zY^6X^z&0Ql;n-`13W-y^MKB@@tvdnC8c-Wsc8K_}X~T^go67bgtxj%FKf03LFxb)x zZ|mDyP1`n5>)R%3BZgmqZ*L5XlsW)gQs$y!EoiT)72eMsNBe3+|l@J~!pA@d9-QEg0QS$m-UqV&StHa|zKdp7Vz z667j|Vvrmw)}~B(i%gip0qrqtFA@zRrs*U4*2OLiD(|aPT%{jfO%&D|`J=$HKOx=e ztHvnBTVru6e|I{VMk1&=gKylOp9F+1_-pWbpC!ME1lJg!)aArstnvWBs3?|%XOjw4y zC0Ud0K5KhlI}N0LcAOj?S_qYbDlcRuoXHW@f{^)2UKQnaa)LT8Q+0)^j?!3Pq}Q)d zvJQ6XuwbY6F!c|;aI~>Br#=CKNCLgO+d8UQkQKh)krEQ4bk7_Rg!DNk}?sYrx?6oXAl= zuIZMZGqx(n>auI*NA5`}ph`Ih{%1L-Fa_={d4o~`JTW0tZrj|`DR?F6L0(Q_ z4mnNL4OEc{ZAPV_jor5wwnH}wXQ^D+%;8}-XM2J71KJfS>H3w7($uV%!L6f;E;|VS^^QVOrK+fS~l`b1YGFx8qhiEAOlmpua zBg{w^VfHjRkA6lQZYN%9?B@5h1bt!;cvvYYXI0A=hgN9B?YOUe0~AjMMZc$oq8Oo> z1A0y$(?|8KYh4%ONEo6MW-@?a9n~Q*GjIga8W}MgDHz3K#!%HG8({&@Ao^~RfC}#r zXeDi^X@@iK1iY+{KIB{A;eZ}wk@_gS9Re{~p+^UDElL>vgc8sOSJRbRhJa#*UdG>6 zCh}$MbQWfb^GA1Imfj7)B7OcO{Q8cllN)JAQhR;en#1(p5TO*edaSaf-tHJeS$_w& z{86>i2k}I{iDAd#NbiatuEnYKpNYJZQ&N8aT7++(K)6hVlg#fg8OrazDrF;qZIdCC zMV2R#?~Ne?XDq>AGt}QZNZmvSqXr$#gi!gmzGDRaj29d-c0oV53H2v6z`hZc5oqY30|@)Id)#V`B&a{Xv5+2I{0;Le%rHtwU&c`6`{;}G z4FqLMqX-Vt-&y0qXa|Otl01Y0!mk*;vt9NK4jG&6kW5GR`qhYIpknnam2*!Le`H02D1RE(m+=N}OB6kn7;gw&^#B}Kob4EptvG$qWT zAzRAnEYTg@?F;;bc?&4KsobE0%sAhdar=e#%=iLM5$FGkGuney@U4_Tb{t4cqCv0K z0?PS8nB>9}9liF$Pvu9%1xormEAkF^TxEH4-Tln*0(a|ehTYDSOv0_c?_i{jgY5tt z@B{criEx?>-^C5wop5kOp&4V~SiHW*j{d?3&D~s>r?pY>pUiGc39kEk|bcMY< zUM0YyWyt2k6G~D^R{}n=ZkzJg(A;K;VY@jz<8(ijn6hOWU7#%ZMJ_CEb0HYrwFoZN zwf6vU_NX90)}-u;)kB(5W>2ZGN{Y$1ktmQgtB;wfcB;9JBaW*-BpuPIvt~>fIny#o zNBxAbm%3`{)M@eZN$~n(qO8)z?zCPXTgf;li40>aj?JuLYd}(O3Rr}zse9YF0k-O7 zv{;NdNc&gNj)S>!@H{)Gf+NNsHe1G0GqcfW_3z(~L^XDtW@|*A$`+cZHH}%Z`zWSh~u-Bx+InIN5sesybVcC zRPCh?{1lw=e<+fIN42ba>W^ogb&u^gD)=i;@c$$HL0eMZ!XFUiB(<(nMMol%(vRv; zyywUqQdWnP?LelJcR(sy(8fR|(Uje$Gh{Y>of)#7rDWRq4Gzm3@Fc~E>Tt|OHh-$7 z=9|rD{2?fij@SPPK*fQ!e_V?C#l)@4eYCNz{k!P?*`- z+1Ys?^WN{hnbqQ{Q)Lam+11Y)Pv$i3pL7%d@@U*e38b!RLaS>+7e-5O8+D^?)=i!6 z%~r0Rujh5`V@>2l{)Hy;t|1D)H0uTQ6orKzE9xntrzFbgDZ9nzX5GU5h^XMc!uKWI zpAw_EA9V|OUKXcC^@Ua+5o2N;*NSVo<>#6^^4zGOa!cH^XIxa4XX=fw;-zbLMW>liVO8Qv6gm|>1*m=QH`1O2n&EpZdq zOPFB>Z_Q!`8#7S*655x=tre{{-ybV4Eq!b+`R!G&pZp{n?#XYb~6!bo?rabm2PXx7VetYaq(g#gPNkb?lw2*b#KiM*WKi) z?R13g$j7}lrsruK>Dta}%e6z_UUefXuDHn~zg~AjejohwTFg%~(va;1^JSi4*$?qH zjdTLLYppN;R;!tVT;R6W23EWj11r|FEMO#8|9!Bq z?zi2AC(hQo-*x21!hFkbI<1A4x4O{Xf<<=ToqvBJ@Iv=i7ZN*<-C&{93O5#-&BqI= zJoDYH!3b2;P+jgc@nfLUjQ;h*?QTBv%CRyr!!=VDiFu2v5><&Tl8|svj5x4y6LWCo zC=S?;-DNn4)o65_w%ce7%8f?b7f?Lfl}6)f&uK+Zij9Wwn~jDf&C3z0DpaLhaEn-x zguhjkfV!-5&Zy+L4?bRkw>#`&O*~6}H0fQGmv!x_j}CF{{@>YjzK~8A#>?(Z{?|*8 z;4pW707r8+;K(g^4W0xymEPm^5N^PT)y_}>fH5R5#2J}vbwtYD5#8~AinrM`&(^Qe2ox~=`G42|QvdIG5B=rq9 z!*Ah9Ho}OCFh%Z#UPo*Z1q8snN)ZraI5DUf#R%}{)L`ts0>z~yO7#EAMzO?UWlu;+ zQH0D0VT3KDE4?lXvmltTBCD@6R0MYFI3w@l?n>gDa>ur?aemr2VA)%-lAAgdq5|`b8Z5YKCHv z94peMOn4iZFok{EV+{X3(NGLd>izRW7*yWZq<~95x|%2~89AW9vOgi+>1)PW3dF{8 zKY!<1a4m>AI*(B`U4^i{D1hZ}z#toqPcTHG&`>6#QOlTxEUjh@MS>YT%0#D2beq!S z7DLY&JopYuKzA14pq#hzR!&ks={)(7oOe-zSB8Y~#VBLSV8T6iy@znOIGwWHr#GgAztb<)VDA>t8 z41V>Ed03qoOFV3L2wN}bV$9pCxR{(BqB?TMM#>>v?7}b(U&+UApUlT3`2m@A>|et; z_+}l<2DQ(KB0HFwI2iTsaWIM>cTo@V%P zJ2mdQhYXR!rY{jmyFL<31k%{6vGYoK;#v8nCZP)b=t^Y?B_QM3)_1gBEvN{6h1&Wq zMId7rTiuNk4P-*EoqhC8xmqRl@ zayR0P|aNxD(4``3<ZFSp};sskg~2pg~tEY-XexBsMTJGA{_F zFXLRs#F@aLOopWrSOo4QbJ(Zk!ALab2c6c|&oEmgs=R^r5j5GfGc@HK#MhHMPUM|sKbqoMp$R<{jCn8_@{>}qlm{fsu; zj=j{_$?s}0`p_Hjfcv$Aa#po`QD}um+|+#~ZM9AWMZcznq8Oo>RXwLq>1Xx+<)I64 zC=AhY0~tWjMRh>T3>-nURz}QL0!ERT5mc3AD=gp{MBgbAP~jZ{t+)*}J?G3j1}`hs zhnxow`}81b9prUX2S7}|NskDLRmHg$Ip}Xt0@|Nyx>CzHP|VOP`0HmPU&c=7VU{?e zboXTG!w_tsFCK?qe<14QM!>H_sJ%XJ(W%3)h){xCC95K-cQ}Sn#+u@me^#yZems#i z3_A)(MppfBJxZOYcyS|o#*v+QBb3NXnp%N8 z=O@en`^KPxKtl%|K-jO`qgHD&M*YRjrR3n^FPK+hhEYaiPC1~J% z+@szwqR^0+?}0qSzd}I@5)6tbiU$9{=zxw28TN7&LMTdz2ywzgY)1k>Y={A2_riK!9V9S%U&9QkK*!2L5dLqJD9Nq>y8E-k&FLj@ zd*wLX{tuCiaQiTFuMgbsgUa8CQUcs#RzXtla2%nu@Gu;q!0OLoZJ>P$xJiaVaIDOT zEr?KE>7VTDD8v|#lGBwGFaC#DqLlz%=<>U`AI>T#O6(H^Zo#zjBWlZO@`Dg3n+%5v z@>H5cnKansU@fuD?E z357S6n{<#F<@+*fKhvHXU%)A%{9md13f`#UBPxIF*q4?J#=K4kDCY-ZoC}W=CzQNs z`Y8NVeu&mLg1i%{t32H5x<7Qh!0o@ou$w-~B;4xz4o2EI*bcA(--nOn!bvhr=frS# z!of*}W{iPT(fS(C^=C$C?&QKet>mtrWQ89Hpd5{=nalSd)`}|YV`L!7)qd$JTJ;k4 z^ zbS2;;>$WL>4b5$q7`B_kGfwvti78vA(FMwapXI{hHWz}?9gE;nT}So-XGsMCvLj8jYNSsR$n#~?bLIdha6YGO**RL4b7M~a;C)_{82*Ki$k?^;urcNMPwLTacpKySXD{A31FEdIm!}BV50)I>SVN7j3`L^vuLN; z7&iEMc1{IHj6ZC)jHPB~qtEK!yB!(Cfch{6)Ca^NITW3+NOGtnxzk>TJ4X7Gb2kwu zh#B^l_ptXpu(w>h!e0t;TDIDhq&@y-Sg+70{ds)SpLe>RGD-3vNjoq879eeEU8ah{ zn54so`gpW`$ecIUC*sUL)5)6|1^krbt9+Oe)ntau{w`gwezdo4tA7NzL;q_K&=Tj2s!=h=&y0^% uFI3M}FX4Mh9{W+Vdd14ow-c*EcZNQit6sH=R;i+o=B+~Y9Xu;k^#23c0T)^T diff --git a/nlp_resource_data/nltk/ccg/__pycache__/lexicon.cpython-37.pyc b/nlp_resource_data/nltk/ccg/__pycache__/lexicon.cpython-37.pyc index bd6b1bddf212842cb5335cc5b3452c0c6c23253e..55e1a299d25e6c29157c22c928685841f0ac08c8 100644 GIT binary patch delta 2235 zcmZ`)O>Epm6!zHOU9a~y|C^sSo8M*&P5MuXlKv&3iQ1%5N~PjL&34BmZf);s>@962 zSfS(um!eTQA)pcxoS@QP5O*Xb4wXPcU?Fknsp5o$1nPBiYiMwj4MPkdBhD=aa9K}$eR~^-j z88N}doVc4X60T-wZqi7)x}gg`?xftbkrr~o$+%e~3vz={&B?iWBTpnt`K*N|9}uHe zNIFPuLP~+uE~K=j+~{~98=Yd5*&KC=QFe1w5ThLEbf1%Gp0;dAw8iRKSBzdO4NNO- z1Evj`bUFlXgMBmlkL@^!>Dl_CjBzeRL1>19TtVOZzv7F-XVg038Hr z$jZ{8bwY>f$cD5|jA3htjzT{|l}l2obT_Nd%$zG;ux{Ixs^{xZFs>yM=Ib61)G{4A z9$2jG`1}&N*wU1#+Qi(iwQ7cT%@sZfAX%*J(EK!jrmM0Z(-ipPEY%!OGzd}oSK2|6ZeC0G z6L90*%o}8a|CTB2-v0zBywA04KRLn2vY%&;!nhP;FG0sn@L#iohbK_n-c&>*?OC^j znJ6N6K7j>(96*ZcL?i4JKaivSM_{}JRfq2g)V)%!)plCp4|5%vFJaD?0VI{3;Xmg5 zIBKz8em>tL3O>?k=e2x$%h$MAY@@2Am9W|7{rqJ@j_^NPPQQtKsQdNneqaZ6xLC7j zNJV%VprnQI%39sK>9aB9N(gfRK$ioo?87iH)XJ?gGZJW~!u)64{h+m%e8azJeSs`A ze`CkjAGXQzAbHn8plh{hWUJ9aCtj_`;lBi!21txmsNu>todvnxo!l^s>ri=gLb*j zuEOl!{M*7bxyt`3ymAc3P_frg3C|xvRPZ%S;O1*S26>LkR%Yd<-~CW&jEegj>ZbWt zJ*YFwG#S=)C^IWeRjQ5?WyEJ#9Y)%7f}7)&%IbKzW=Bupx28)#ejC{3r)AHanW?*}FuY}jaNmB|>-E>hacbw&j?)jh6emTbh&D;{(WEVT}FTbymyAH3a^ zrV@%!A}&a%(z|gYQY8+(AQ?Er9{`CH_`;#5ax3Cg35l8Kl%!VK)tfgnZ@%8ly!mDJ z@yym@JZ?zvjomI+7LG{LV2|p6aP}&C=>2 zD^w3#;ks_=g0Fdzx?vdt4|&mg%!(1oAz#O7c$ZiSz_#L42Q(>|5x`7=84kJAw<}vI zK}GkdenG|dsI;Kspfhk@q6wPZk!aF6w5?bfpiSBbbf2KJG)4P&Bx{hS=>Y6Q^bpO! zK1{Q#bP$jcIz)$IAEig=2p!!a))+lXb2JZF&N1ltHlYPNu_J90EAQm!Vc5s1d|fIY zyPq(Ymd@wjcJ8@$v*8<0Fn4^LTx~xk>vE^F~I&u$b_RszNX!sMWK%32aE}uvas2S`%b;u z2wdB*;L(dNI^ha(E0twF9J+rl0v0UAuD65*h6Y6t44oAB3~J3_&3Tz2(5qA~@!vz` z3;i!>vMiD#2=?6zOk5MamSFklPd8NA&~yd<5R0@w4sQ{n@Td9|iSt6_czWM}&_zcU zzZH4#>I7iVLsUO$GJ3!ligpaU8aTBka~=N{A2qh~1=M(X<`2WsNBl?QZ<6NUM+>?z zuCX}(Bbw_C0WatARP6NXC}3<1K@LG4fyhJ=F|^su#Qq)jlQ0ga@4)i$HR@qi)@INEzgDQy3NKz zko9E%lFH^FoIzePtq@vbgZxW#XsRcSq%d%O8)793)V}vT`N^vZnrb>^)O6|sI`?45^e*! zw^8+c7Dlx$69#q3Yt~$w>A>uSF}1Geu#2etCW5ylX$uZkuUuTZdTyD$i@f&`ypLcN zKqu6^eFs8f-8>>2=R>zqMaPLZgS{!5L}Z5D1ky5_EvEsYR|2=OiR(aSM2^JVSk!0g xO}gPZbLhdxw*j+?#N;fjSlCbv#ZVIBHxzTioYvF2p@;Q|K4#>Nlcpvg{})?$xDWsU diff --git a/nlp_resource_data/nltk/ccg/__pycache__/logic.cpython-37.pyc b/nlp_resource_data/nltk/ccg/__pycache__/logic.cpython-37.pyc index df6e87bc96c3bd7a9b99df1122671c2a8f0145af..273740fa50d25a31608b5d2fefc6a30f3206e947 100644 GIT binary patch delta 32 mcmaFGeUF>liI # URL: # For license information, see LICENSE.TXT - +from __future__ import unicode_literals from functools import total_ordering from abc import ABCMeta, abstractmethod +from six import add_metaclass from nltk.internals import raise_unorderable_types +from nltk.compat import python_2_unicode_compatible, unicode_repr + +@add_metaclass(ABCMeta) @total_ordering -class AbstractCCGCategory(metaclass=ABCMeta): - """ +class AbstractCCGCategory(object): + ''' Interface for categories in combinatory grammars. - """ + ''' @abstractmethod def is_primitive(self): @@ -80,12 +84,13 @@ class AbstractCCGCategory(metaclass=ABCMeta): return self._hash +@python_2_unicode_compatible class CCGVar(AbstractCCGCategory): - """ + ''' Class representing a variable CCG category. Used for conjunctions (and possibly type-raising, if implemented as a unary rule). - """ + ''' _maxID = 0 @@ -146,12 +151,13 @@ class CCGVar(AbstractCCGCategory): @total_ordering +@python_2_unicode_compatible class Direction(object): - """ + ''' Class representing the direction of a function application. Also contains maintains information as to which combinators may be used with the category. - """ + ''' def __init__(self, dir, restrictions): self._dir = dir @@ -160,10 +166,10 @@ class Direction(object): # Testing the application direction def is_forward(self): - return self._dir == "/" + return self._dir == '/' def is_backward(self): - return self._dir == "\\" + return self._dir == '\\' def dir(self): return self._dir @@ -178,16 +184,16 @@ class Direction(object): return self._restrs def is_variable(self): - return self._restrs == "_" + return self._restrs == '_' # Unification and substitution of variable directions. # Used only if type-raising is implemented as a unary rule, as it # must inherit restrictions from the argument category. def can_unify(self, other): if other.is_variable(): - return [("_", self.restrs())] + return [('_', self.restrs())] elif self.is_variable(): - return [("_", other.restrs())] + return [('_', other.restrs())] else: if self.restrs() == other.restrs(): return [] @@ -198,16 +204,16 @@ class Direction(object): return self for (var, restrs) in subs: - if var == "_": + if var == '_': return Direction(self._dir, restrs) return self # Testing permitted combinators def can_compose(self): - return "," not in self._restrs + return ',' not in self._restrs def can_cross(self): - return "." not in self._restrs + return '.' not in self._restrs def __eq__(self, other): return ( @@ -241,18 +247,19 @@ class Direction(object): # The negation operator reverses the direction of the application def __neg__(self): - if self._dir == "/": - return Direction("\\", self._restrs) + if self._dir == '/': + return Direction('\\', self._restrs) else: - return Direction("/", self._restrs) + return Direction('/', self._restrs) +@python_2_unicode_compatible class PrimitiveCategory(AbstractCCGCategory): - """ + ''' Class representing primitive categories. Takes a string representation of the category, and a list of strings specifying the morphological subcategories. - """ + ''' def __init__(self, categ, restrictions=[]): self._categ = categ @@ -296,16 +303,17 @@ class PrimitiveCategory(AbstractCCGCategory): def __str__(self): if self._restrs == []: return "%s" % self._categ - restrictions = "[%s]" % ",".join(repr(r) for r in self._restrs) + restrictions = "[%s]" % ",".join(unicode_repr(r) for r in self._restrs) return "%s%s" % (self._categ, restrictions) +@python_2_unicode_compatible class FunctionalCategory(AbstractCCGCategory): - """ + ''' Class that represents a function application category. Consists of argument and result categories, together with an application direction. - """ + ''' def __init__(self, res, arg, dir): self._res = res diff --git a/nlp_resource_data/nltk/ccg/chart.py b/nlp_resource_data/nltk/ccg/chart.py index ab4807c..bd410c7 100644 --- a/nlp_resource_data/nltk/ccg/chart.py +++ b/nlp_resource_data/nltk/ccg/chart.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Combinatory Categorial Grammar # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Graeme Gange # URL: # For license information, see LICENSE.TXT @@ -29,9 +29,12 @@ which should print a nice representation of the derivation. This entire process is shown far more clearly in the demonstration: python chart.py """ +from __future__ import print_function, division, unicode_literals import itertools +from six import string_types + from nltk.parse import ParserI from nltk.parse.chart import AbstractChartRule, EdgeI, Chart from nltk.tree import Tree @@ -48,7 +51,7 @@ from nltk.ccg.combinator import ( BackwardBx, BackwardSx, ) - +from nltk.compat import python_2_unicode_compatible from nltk.ccg.combinator import * from nltk.ccg.logic import * from nltk.sem.logic import * @@ -102,9 +105,9 @@ class CCGEdge(EdgeI): class CCGLeafEdge(EdgeI): - """ + ''' Class representing leaf edges in a CCG derivation. - """ + ''' def __init__(self, pos, token, leaf): self._pos = pos @@ -153,11 +156,12 @@ class CCGLeafEdge(EdgeI): return self._leaf +@python_2_unicode_compatible class BinaryCombinatorRule(AbstractChartRule): - """ + ''' Class implementing application of a binary combinator to a chart. Takes the directed combinator to apply. - """ + ''' NUMEDGES = 2 @@ -189,12 +193,11 @@ class BinaryCombinatorRule(AbstractChartRule): # Type-raising must be handled slightly differently to the other rules, as the # resulting rules only span a single edge, rather than both edges. - - +@python_2_unicode_compatible class ForwardTypeRaiseRule(AbstractChartRule): - """ + ''' Class for applying forward type raising - """ + ''' NUMEDGES = 2 @@ -214,10 +217,11 @@ class ForwardTypeRaiseRule(AbstractChartRule): return "%s" % self._combinator +@python_2_unicode_compatible class BackwardTypeRaiseRule(AbstractChartRule): - """ + ''' Class for applying backward type raising. - """ + ''' NUMEDGES = 2 @@ -260,10 +264,10 @@ DefaultRuleSet = ( class CCGChartParser(ParserI): - """ + ''' Chart parser for CCGs. Based largely on the ChartParser class from NLTK. - """ + ''' def __init__(self, lexicon, rules, trace=0): self._lexicon = lexicon @@ -365,7 +369,7 @@ def compute_semantics(children, edge): elif isinstance(combinator, UndirectedSubstitution): return compute_substitution_semantics(function, argument) else: - raise AssertionError("Unsupported combinator '" + combinator + "'") + raise AssertionError('Unsupported combinator \'' + combinator + '\'') else: return compute_type_raised_semantics(children[0].label()[0].semantics()) @@ -376,8 +380,8 @@ def compute_semantics(children, edge): def printCCGDerivation(tree): # Get the leaves and initial categories leafcats = tree.pos() - leafstr = "" - catstr = "" + leafstr = '' + catstr = '' # Construct a string with both the leaf word and corresponding # category aligned. @@ -386,10 +390,10 @@ def printCCGDerivation(tree): nextlen = 2 + max(len(leaf), len(str_cat)) lcatlen = (nextlen - len(str_cat)) // 2 rcatlen = lcatlen + (nextlen - len(str_cat)) % 2 - catstr += " " * lcatlen + str_cat + " " * rcatlen + catstr += ' ' * lcatlen + str_cat + ' ' * rcatlen lleaflen = (nextlen - len(leaf)) // 2 rleaflen = lleaflen + (nextlen - len(leaf)) % 2 - leafstr += " " * lleaflen + leaf + " " * rleaflen + leafstr += ' ' * lleaflen + leaf + ' ' * rleaflen print(leafstr.rstrip()) print(catstr.rstrip()) @@ -419,18 +423,18 @@ def printCCGTree(lwidth, tree): (token, op) = tree.label() - if op == "Leaf": + if op == 'Leaf': return rwidth # Pad to the left with spaces, followed by a sequence of '-' # and the derivation rule. - print(lwidth * " " + (rwidth - lwidth) * "-" + "%s" % op) + print(lwidth * ' ' + (rwidth - lwidth) * '-' + "%s" % op) # Print the resulting category on a new line. str_res = "%s" % (token.categ()) if token.semantics() is not None: str_res += " {" + str(token.semantics()) + "}" respadlen = (rwidth - lwidth - len(str_res)) // 2 + lwidth - print(respadlen * " " + str_res) + print(respadlen * ' ' + str_res) return rwidth @@ -438,7 +442,7 @@ def printCCGTree(lwidth, tree): # Construct the lexicon lex = fromstring( - """ + ''' :- S, NP, N, VP # Primitive categories, S is the target primitive Det :: NP/N # Family of words @@ -467,7 +471,7 @@ lex = fromstring( mushrooms => N parsnips => N bacon => N - """ + ''' ) @@ -477,5 +481,5 @@ def demo(): printCCGDerivation(parse) -if __name__ == "__main__": +if __name__ == '__main__': demo() diff --git a/nlp_resource_data/nltk/ccg/combinator.py b/nlp_resource_data/nltk/ccg/combinator.py index 60bb149..56f15ed 100644 --- a/nlp_resource_data/nltk/ccg/combinator.py +++ b/nlp_resource_data/nltk/ccg/combinator.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Combinatory Categorial Grammar # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Graeme Gange # URL: # For license information, see LICENSE.TXT @@ -8,12 +8,16 @@ CCG Combinators """ +from __future__ import unicode_literals from abc import ABCMeta, abstractmethod +from six import add_metaclass +from nltk.compat import python_2_unicode_compatible from nltk.ccg.api import FunctionalCategory -class UndirectedBinaryCombinator(metaclass=ABCMeta): +@add_metaclass(ABCMeta) +class UndirectedBinaryCombinator(object): """ Abstract class for representing a binary combinator. Merely defines functions for checking if the function and argument @@ -34,7 +38,8 @@ class UndirectedBinaryCombinator(metaclass=ABCMeta): pass -class DirectedBinaryCombinator(metaclass=ABCMeta): +@add_metaclass(ABCMeta) +class DirectedBinaryCombinator(object): """ Wrapper for the undirected binary combinator. It takes left and right categories, and decides which is to be @@ -51,6 +56,7 @@ class DirectedBinaryCombinator(metaclass=ABCMeta): pass +@python_2_unicode_compatible class ForwardCombinator(DirectedBinaryCombinator): """ Class representing combinators where the primary functor is on the left. @@ -59,7 +65,7 @@ class ForwardCombinator(DirectedBinaryCombinator): restricting the cases in which it may apply. """ - def __init__(self, combinator, predicate, suffix=""): + def __init__(self, combinator, predicate, suffix=''): self._combinator = combinator self._predicate = predicate self._suffix = suffix @@ -77,12 +83,13 @@ class ForwardCombinator(DirectedBinaryCombinator): return ">%s%s" % (self._combinator, self._suffix) +@python_2_unicode_compatible class BackwardCombinator(DirectedBinaryCombinator): """ The backward equivalent of the ForwardCombinator class. """ - def __init__(self, combinator, predicate, suffix=""): + def __init__(self, combinator, predicate, suffix=''): self._combinator = combinator self._predicate = predicate self._suffix = suffix @@ -100,6 +107,7 @@ class BackwardCombinator(DirectedBinaryCombinator): return "<%s%s" % (self._combinator, self._suffix) +@python_2_unicode_compatible class UndirectedFunctionApplication(UndirectedBinaryCombinator): """ Class representing function application. @@ -125,7 +133,7 @@ class UndirectedFunctionApplication(UndirectedBinaryCombinator): yield function.res().substitute(subs) def __str__(self): - return "" + return '' # Predicates for function application. @@ -145,6 +153,7 @@ ForwardApplication = ForwardCombinator(UndirectedFunctionApplication(), forwardO BackwardApplication = BackwardCombinator(UndirectedFunctionApplication(), backwardOnly) +@python_2_unicode_compatible class UndirectedComposition(UndirectedBinaryCombinator): """ Functional composition (harmonic) combinator. @@ -175,7 +184,7 @@ class UndirectedComposition(UndirectedBinaryCombinator): ) def __str__(self): - return "B" + return 'B' # Predicates for restricting application of straight composition. @@ -209,10 +218,11 @@ BackwardComposition = BackwardCombinator(UndirectedComposition(), backwardOnly) # Backward crossed composition BackwardBx = BackwardCombinator( - UndirectedComposition(), backwardBxConstraint, suffix="x" + UndirectedComposition(), backwardBxConstraint, suffix='x' ) +@python_2_unicode_compatible class UndirectedSubstitution(UndirectedBinaryCombinator): """ Substitution (permutation) combinator. @@ -245,7 +255,7 @@ class UndirectedSubstitution(UndirectedBinaryCombinator): ) def __str__(self): - return "S" + return 'S' # Predicate for forward substitution @@ -266,7 +276,7 @@ def backwardSxConstraint(left, right): # Instances of substitution combinators ForwardSubstitution = ForwardCombinator(UndirectedSubstitution(), forwardSConstraint) -BackwardSx = BackwardCombinator(UndirectedSubstitution(), backwardSxConstraint, "x") +BackwardSx = BackwardCombinator(UndirectedSubstitution(), backwardSxConstraint, 'x') # Retrieves the left-most functional category. @@ -277,6 +287,7 @@ def innermostFunction(categ): return categ +@python_2_unicode_compatible class UndirectedTypeRaise(UndirectedBinaryCombinator): """ Undirected combinator for type raising. @@ -318,7 +329,7 @@ class UndirectedTypeRaise(UndirectedBinaryCombinator): ) def __str__(self): - return "T" + return 'T' # Predicates for type-raising diff --git a/nlp_resource_data/nltk/ccg/lexicon.py b/nlp_resource_data/nltk/ccg/lexicon.py index 628eb8a..d8e2bf3 100644 --- a/nlp_resource_data/nltk/ccg/lexicon.py +++ b/nlp_resource_data/nltk/ccg/lexicon.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Combinatory Categorial Grammar # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Graeme Gange # URL: # For license information, see LICENSE.TXT @@ -8,10 +8,13 @@ CCG Lexicons """ +from __future__ import unicode_literals + import re from collections import defaultdict from nltk.ccg.api import PrimitiveCategory, Direction, CCGVar, FunctionalCategory +from nltk.compat import python_2_unicode_compatible from nltk.internals import deprecated from nltk.sem.logic import Expression @@ -21,26 +24,26 @@ from nltk.sem.logic import Expression # ------------ # Parses a primitive category and subscripts -PRIM_RE = re.compile(r"""([A-Za-z]+)(\[[A-Za-z,]+\])?""") +PRIM_RE = re.compile(r'''([A-Za-z]+)(\[[A-Za-z,]+\])?''') # Separates the next primitive category from the remainder of the # string -NEXTPRIM_RE = re.compile(r"""([A-Za-z]+(?:\[[A-Za-z,]+\])?)(.*)""") +NEXTPRIM_RE = re.compile(r'''([A-Za-z]+(?:\[[A-Za-z,]+\])?)(.*)''') # Separates the next application operator from the remainder -APP_RE = re.compile(r"""([\\/])([.,]?)([.,]?)(.*)""") +APP_RE = re.compile(r'''([\\/])([.,]?)([.,]?)(.*)''') # Parses the definition of the right-hand side (rhs) of either a word or a family -LEX_RE = re.compile(r"""([\S_]+)\s*(::|[-=]+>)\s*(.+)""", re.UNICODE) +LEX_RE = re.compile(r'''([\S_]+)\s*(::|[-=]+>)\s*(.+)''', re.UNICODE) # Parses the right hand side that contains category and maybe semantic predicate -RHS_RE = re.compile(r"""([^{}]*[^ {}])\s*(\{[^}]+\})?""", re.UNICODE) +RHS_RE = re.compile(r'''([^{}]*[^ {}])\s*(\{[^}]+\})?''', re.UNICODE) # Parses the semantic predicate -SEMANTICS_RE = re.compile(r"""\{([^}]+)\}""", re.UNICODE) +SEMANTICS_RE = re.compile(r'''\{([^}]+)\}''', re.UNICODE) # Strips comments from a line -COMMENTS_RE = re.compile("""([^#]*)(?:#.*)?""") +COMMENTS_RE = re.compile('''([^#]*)(?:#.*)?''') class Token(object): @@ -78,6 +81,7 @@ class Token(object): return cmp((self._categ, self._semantics), other.categ(), other.semantics()) +@python_2_unicode_compatible class CCGLexicon(object): """ Class representing a lexicon for CCG grammars. @@ -139,16 +143,16 @@ def matchBrackets(string): rest = string[1:] inside = "(" - while rest != "" and not rest.startswith(")"): - if rest.startswith("("): + while rest != "" and not rest.startswith(')'): + if rest.startswith('('): (part, rest) = matchBrackets(rest) inside = inside + part else: inside = inside + rest[0] rest = rest[1:] - if rest.startswith(")"): - return (inside + ")", rest[1:]) - raise AssertionError("Unmatched bracket in string '" + string + "'") + if rest.startswith(')'): + return (inside + ')', rest[1:]) + raise AssertionError('Unmatched bracket in string \'' + string + '\'') def nextCategory(string): @@ -156,7 +160,7 @@ def nextCategory(string): Separate the string for the next portion of the category from the rest of the string """ - if string.startswith("("): + if string.startswith('('): return matchBrackets(string) return NEXTPRIM_RE.match(string).groups() @@ -173,7 +177,7 @@ def parseSubscripts(subscr): Parse the subscripts for a primitive category """ if subscr: - return subscr[1:-1].split(",") + return subscr[1:-1].split(',') return [] @@ -203,7 +207,7 @@ def parsePrimitiveCategory(chunks, primitives, families, var): subscrs = parseSubscripts(chunks[1]) return (PrimitiveCategory(catstr, subscrs), var) raise AssertionError( - "String '" + catstr + "' is neither a family nor primitive category." + 'String \'' + catstr + '\' is neither a family nor primitive category.' ) @@ -214,10 +218,11 @@ def augParseCategory(line, primitives, families, var=None): """ (cat_string, rest) = nextCategory(line) - if cat_string.startswith("("): + if cat_string.startswith('('): (res, var) = augParseCategory(cat_string[1:-1], primitives, families, var) else: + # print rePrim.match(str).groups() (res, var) = parsePrimitiveCategory( PRIM_RE.match(cat_string).groups(), primitives, families, var ) @@ -228,7 +233,7 @@ def augParseCategory(line, primitives, families, var=None): rest = app[3] (cat_string, rest) = nextCategory(rest) - if cat_string.startswith("("): + if cat_string.startswith('('): (arg, var) = augParseCategory(cat_string[1:-1], primitives, families, var) else: (arg, var) = parsePrimitiveCategory( @@ -253,12 +258,12 @@ def fromstring(lex_str, include_semantics=False): if line == "": continue - if line.startswith(":-"): + if line.startswith(':-'): # A line of primitive categories. # The first one is the target category # ie, :- S, N, NP, VP primitives = primitives + [ - prim.strip() for prim in line[2:].strip().split(",") + prim.strip() for prim in line[2:].strip().split(',') ] else: # Either a family definition, or a word definition @@ -266,7 +271,7 @@ def fromstring(lex_str, include_semantics=False): (catstr, semantics_str) = RHS_RE.match(rhs).groups() (cat, var) = augParseCategory(catstr, primitives, families) - if sep == "::": + if sep == '::': # Family definition # ie, Det :: NP/N families[ident] = (cat, var) @@ -288,7 +293,7 @@ def fromstring(lex_str, include_semantics=False): return CCGLexicon(primitives[0], primitives, families, entries) -@deprecated("Use fromstring() instead.") +@deprecated('Use fromstring() instead.') def parseLexicon(lex_str): return fromstring(lex_str) diff --git a/nlp_resource_data/nltk/ccg/logic.py b/nlp_resource_data/nltk/ccg/logic.py index 37b87f3..b89bea9 100644 --- a/nlp_resource_data/nltk/ccg/logic.py +++ b/nlp_resource_data/nltk/ccg/logic.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Combinatory Categorial Grammar # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Tanin Na Nakorn (@tanin) # URL: # For license information, see LICENSE.TXT diff --git a/nlp_resource_data/nltk/chat/__init__.py b/nlp_resource_data/nltk/chat/__init__.py index d34def9..cd0ad40 100644 --- a/nlp_resource_data/nltk/chat/__init__.py +++ b/nlp_resource_data/nltk/chat/__init__.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Chatbots # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Authors: Steven Bird # URL: # For license information, see LICENSE.TXT @@ -15,6 +15,7 @@ typed by users, and respond with automatically generated sentences. These chatbots may not work using the windows command line or the windows IDLE GUI. """ +from __future__ import print_function from nltk.chat.util import Chat from nltk.chat.eliza import eliza_chat @@ -24,28 +25,28 @@ from nltk.chat.suntsu import suntsu_chat from nltk.chat.zen import zen_chat bots = [ - (eliza_chat, "Eliza (psycho-babble)"), - (iesha_chat, "Iesha (teen anime junky)"), - (rude_chat, "Rude (abusive bot)"), - (suntsu_chat, "Suntsu (Chinese sayings)"), - (zen_chat, "Zen (gems of wisdom)"), + (eliza_chat, 'Eliza (psycho-babble)'), + (iesha_chat, 'Iesha (teen anime junky)'), + (rude_chat, 'Rude (abusive bot)'), + (suntsu_chat, 'Suntsu (Chinese sayings)'), + (zen_chat, 'Zen (gems of wisdom)'), ] def chatbots(): import sys - print("Which chatbot would you like to talk to?") + print('Which chatbot would you like to talk to?') botcount = len(bots) for i in range(botcount): - print(" %d: %s" % (i + 1, bots[i][1])) + print(' %d: %s' % (i + 1, bots[i][1])) while True: - print("\nEnter a number in the range 1-%d: " % botcount, end=" ") + print('\nEnter a number in the range 1-%d: ' % botcount, end=' ') choice = sys.stdin.readline().strip() if choice.isdigit() and (int(choice) - 1) in range(botcount): break else: - print(" Error: bad chatbot number") + print(' Error: bad chatbot number') chatbot = bots[int(choice) - 1][0] chatbot() diff --git a/nlp_resource_data/nltk/chat/__pycache__/__init__.cpython-37.pyc b/nlp_resource_data/nltk/chat/__pycache__/__init__.cpython-37.pyc index 2af6a37e14397cde1021b64f235cec2ebf10e76a..8967c15264478025fb6dd1631e854f46f4644703 100644 GIT binary patch delta 332 zcmey$HH(|qiIo2c&3lfsz7k;9wI7sbcOFtJ8LB9$+NH;pNs zHHtrlFNME_Axa=cAecc@aN@GB{CovPnRzAgX{C9|C7Jnon`ba4F){K?{>`kyEdez6 zmyLdDagqLHGZsfimB|$>%Jot}Q9n(=TkP@iDf!9q@wd3*fx1gdi&Ep`iUeuQ9ZPG(hN5eu4VW@>RpVi7Bv0Y#-LsYPrcK>>&XlQ~(r zeb_+)ya*GkQuB&9fMP}bAOhsVB9P;e2uUFK7Kcr4eoARhs+}N^R}50d0`zJet2h9z CvQ_^8 delta 309 zcmbQm{gsQ?iI+4PSU~yzrp4`QvtRx9k;it)ei#zl5erBZNkdU-N@@}7WJOl7L^hBVFG53AYF-gLP^^d#M1X89 n0yz$ekN|RTaoFVMr*=wF$CJ6=Zwb710?UG}4=f~a-zXL7p@Cs8WVY8Sou3`!K*_1{NJHJ)o=ni c*R^O%d2!%k&7p&RIEnBEY`_Ucgc3~PH|>8+p8x;= delta 336 zcmbQL_d%D}iIEeuhtDXb}MK$(116w|HeglPtw03kSl zVz*dQQgictKsuSgI*UM_xWy44pPQJO7a#AZ$q~hums66hmzk2!e>Xav_4ao6Q({rx==tOp+?ybkY7m%}VGG zxYdPzOFl(6-l-_gg>%oGd**Ux-hJu%=f3YKcnWjD-WPs%`iK`$r~(Akh{P&YiR4rev&Ot7$QaKHr*e5gYMn$W_M-GH@ao7SLB4X$&8*Jdg;VI4Yig<7xyo7ipc zz}83=Ik#sD?(NIAWQ_R6(SSv+ZVCyW=pCPK93KJOdOUw?iqP1Z`}@Qmt%K-+Ws}H zTtBdeTBQSz(IxhrgOh`AaCs95+7~BQ< zL6L`%(4^=JK8*P4Ygtvh4?>K2-o%Mmh4|f{1hL%nm;7HQ>m*L0i1?|8ud*Wqop|N7 G*Z&5;iEuyw delta 451 zcmYjMze~eF6uw-NCT)}cRNFui2cdMZMRXJq#lb}c#Uaa)_8lZPY2?x>b`fa@HwhWl zRp==GFAn)roO~%1dLMl6-FJNNeYddYmU(KL1%hY0f5qb1w4Qyu0I>;3Kz%8oHkAm- zpnwWF&=5N0p#ViNz=RT%p@PV&3e}IgEkn&#IOPhLXVg}q4jXe~=U@}Iuxea~#*hk^ zYcqm#o7bdcO&5h5*Nb)rt~2h(-(QI*UF>QYBb6VWD3!da?6qO4cQ2U< zFKE&-nrCG+h~@m1b>sw`v9(Rh=*xBbQ4qq!=f_#j=nzSvhMc?@t>n$nlTX9=2lJR^ AX#fBK diff --git a/nlp_resource_data/nltk/chat/__pycache__/rude.cpython-37.pyc b/nlp_resource_data/nltk/chat/__pycache__/rude.cpython-37.pyc index ee564b25ec6563dc16b5fd004dc315775dd5553a..fa38e07333699e03256731044fe0e3793ab7095b 100644 GIT binary patch delta 367 zcmYjLJx;?g81-KgCn8Z*iGd?zpj{cN4onP)h0Tj))kav2o!~eF145`=pz29jI7~PJ zS75?U0bxtee&6@r_r3S|$Gp5O%L0TmWPAR!FVFOKAQDK}00S9>@sAp0FvEs05;$gw z!YWZ@hgd3!OkM%YWGb`pQk6@(Mea(`9SGX|7SzROoL|GS>YTQo-wk%-b!*=qUb0yh zS}wxZ#d7G~@=($BX%~1urGoT$k;@N)od+KO9YqW>@UzX)_? XS~)ao9h-d>-~`Pu5e23>B@})G(xprm delta 330 zcmYk0O-jTt9L4k1_TRyZ47dIBX$N#AGVVp@ByK#6 zp1>=(GU+gA0x!wS`{m_p`Z3MUvTTC6iq;QK2=GYKUFj9n5jWyL| zML1Jbu@}T66{*;-u})Od5I58D3i&Ktqdff%Z@#}iCx@}$xFA30Wo3;n@8x4<7t%Fl zu|9_RzV&zg_y-J|NgS+uIC+9CI`SJyy!XkJHufQK#0$bM4K4^_WZemInFHVL+My}# zDmfI(p>ACWIvQfpLe{Q#Q831aE!;_?h4|~r|24wQShZ~RJP`ekmp#hlghC1o@GmQ3 BL45!K diff --git a/nlp_resource_data/nltk/chat/__pycache__/suntsu.cpython-37.pyc b/nlp_resource_data/nltk/chat/__pycache__/suntsu.cpython-37.pyc index 1487e3f3084f79f153d1ea2efeb572da8c51a2db..2278d0e23a6ac2725ef2125eead59ba2be531978 100644 GIT binary patch delta 381 zcmYjLF;2rU6tx|vagwHlp&*XXftG~@A%-q22q7Ucc=?KI15x9oa+0}285mfqo`D^S zJ8+0xfIBep-HHlZdiHyI@BjBbc{@v{NfHq}ZLo&-b#n4>heQ!c6jiLChDk=<-zz-B zB{#nGk#pJ8T=%rUVlq&^>b(#tRG(0wViA}j;6yU@_Jj0irr;4-UP>|%lOZ}&FsJ#JFc{;UR{nM z5Ni8Q6#yp7OMu%^+ywxtngMhXV2P%+8o)ZY9F#>fAJ3k%X52P;VLcQrcLV%ImfPA2 qC*baDvY%UK#^v|@|8)_Urc!O8uMRL)rwE+(S?ckp$P=O9g8l-eO;P~> delta 344 zcmeA%c&^Xu#LLUY00eSL0rB+{c_qDEfSeSDD25cq6s8=;T&5_dT;?cdMursTD3(;F zRF+icW~M0C6qXd$7KSLc6t)z0AkCi2k;0M9ROFD#-pl}0%Nfj|$u+U&*ya$%I8hl6 zppiu&;+LC#Mt*LpepX^-Mt(tJQMSI`xNbd25Udr6 z-~@`@Vo6EO&2IYLnK?xu9YxF_^{fSnnMK7#>>vTKRWMWX5_41I`Qouy8N}0AD6YnE(I) diff --git a/nlp_resource_data/nltk/chat/__pycache__/util.cpython-37.pyc b/nlp_resource_data/nltk/chat/__pycache__/util.cpython-37.pyc index 541a5eb83b21dd996b3a9beaaf972ab0213f5ac3..35f68fd8c9af10e9fb6eb2ed9c7c00696749c9bb 100644 GIT binary patch delta 978 zcmZuvOHUL*5T2fA@9fUfNw^E_9>FLh3WqZ4o9 zu!#?*F%oGV;uE5DJ<@%>xDDUH+lb7LRv^8$M&7uyU);Og>g5P*$^2y`S6z5 zS(VHTTNv(J`$-A{Ow_?S+aTYKLB$@F__Z*X3Yg0ltQB%E+p~@)E?h8#{+tFu7>6KGV6yF@(wukv4NH zN_IK?A%Z3eMb=-0qBG?HBZUlQD*0+QR{Ag$W=SWvTd$I^#o<00>vo^>EfwH&8&YzJ;qFC_30!n9H79qJ zF^SoW?7aN#DHtl3u5UhC2S^~dirs-#g>LlG*HHJ}RB*sy;}Ts4q^%_0;UIXLx!!HE zmMwWV4<{7~H3v|27iMpcmXeD%9$0I5zD5=z_O{=s*Cqa4YrQ8j0rDX~{ z2tY?ue={DU=9fgcH`_^exG3s@i50ZwcG7I~mMrrB_e-2`Ct+#CmlX?f14Turgz6PD LBh@KWC)E56%!%HY delta 885 zcmaJefZcR%k2kUGz8j16;TX_yf9j=gM<4VxfWw`R=*r`_8%No|B(bdsFru+qM*X&a@u6 zS9b03rw?cY3b0Ud`YUr7sGxyPXn+YN;7|q&Do_O*24Ikwp^rn3x}v}^)ZQyl^R*q# z(Sbcy>LW)o`|9qOU8ZNZ*=$Xttz&G(DJH0rjmF_?_FbFe(w6Y+Zeu;}`i+jawI268 z+-O|LT;nFYl-5?5t>&|3L=ED3u`Gm5qbeK?q%Ko6eVM5#2LEAS# z-#0}`XdS-IcGLrQKuo8ceKKb!tlUgu6h2^0Ce=be!ZKNCWk=@pVuj4Qiet1mmA=qL z|3z4o+Xq<;ju0%;mzV?!8#=sIYBN^LKJ&BVGWb}U^XsA4ZGqRkpZ(yCYoo;GT9USM zBaAnF{Bs()1QtLznQI+zYd<4@y1Jx(_Y=cv24^M< srWNzPN;aM%$j!Lb@!O(c|4AriG{SBSY3MiQ?~%+9aK?3&{qN=aZ&X{n;Q#;t diff --git a/nlp_resource_data/nltk/chat/__pycache__/zen.cpython-37.pyc b/nlp_resource_data/nltk/chat/__pycache__/zen.cpython-37.pyc index 2003d48795ae9133fd6409f3f62f4cc68900a183..a585b4c7d617c8be87dd9d6874be68282ed89ed9 100644 GIT binary patch delta 398 zcmYjM&r8EF7){c&P1|+MAILn2Cl5Qg!7h3j9t3Zqg5V*SFq(*EO*+!n>kjiQjQu6z zA0zZn5f6TuP7LII`5xbU-^<7NYn)!DX^h}B;YvKO(!+;4Xe8oDVo3_D2`AXSy~5L! zQ|)sfdddSuRiOMO;i2?p@Pas#p=3}a70YNw?6r!P2xjAZG)Ug&$8-m`-MP(8Egl<_ z)p=>&Uum(|bNj7>&$Cfe+0pu-KRG(b2vxg4BFu32_cExA*v`7_pDsFE*+%;V32-fy zN|#qoPd0j+I+h8cr@0a0W{|X@5VFjKXk!6k>c%P|tefUtQ`ECz_B5@BO`R9k1JY7w z5Ls2tOH--JM$q5dO@OVpnQG_p_m_Wp*hH7IDb&RY$f|t+iaVqi(FIzdAof_wD8t{a CnN&Lf delta 361 zcmYjMO-sW-5Z$jPo75E1O9ekpf<2f(FJ6j>_T)hd-uAM@?m}v^*|JFwLLo@~1Jd8& zLHsd7e~Nf;H(GFD-Y~TFi&{Gx7C+a&>Wd7>92s@4er4pd|#fBk2jL<9|l&_IQY6rGOc9t zm_B8Co|-&~*Ju6F(FFvc+OrD)QrP=TYAZ#iyMFGUH{Gp7tMd!)@C^}C>9OOZmTz|4 zlya`pS;_g`AlUev3q9qW^~$2o<7t-G@uHp;#xV@Gzyc$yxh^YN8R~|u-Bzhg)kt#w h&$)f5wWlMc#iEdxr # Edward Loper # URL: @@ -12,6 +12,7 @@ # a translation table used to convert things you say into things the # computer says back, e.g. "I am" --> "you are" +from __future__ import print_function from nltk.chat.util import Chat, reflections # a table of response pairs, where each pair consists of a @@ -20,7 +21,7 @@ from nltk.chat.util import Chat, reflections pairs = ( ( - r"I need (.*)", + r'I need (.*)', ( "Why do you need %1?", "Would it really help you to get %1?", @@ -28,7 +29,7 @@ pairs = ( ), ), ( - r"Why don\'t you (.*)", + r'Why don\'t you (.*)', ( "Do you really think I don't %1?", "Perhaps eventually I will %1.", @@ -36,7 +37,7 @@ pairs = ( ), ), ( - r"Why can\'t I (.*)", + r'Why can\'t I (.*)', ( "Do you think you should be able to %1?", "If you could %1, what would you do?", @@ -45,7 +46,7 @@ pairs = ( ), ), ( - r"I can\'t (.*)", + r'I can\'t (.*)', ( "How do you know you can't %1?", "Perhaps you could %1 if you tried.", @@ -53,7 +54,7 @@ pairs = ( ), ), ( - r"I am (.*)", + r'I am (.*)', ( "Did you come to me because you are %1?", "How long have you been %1?", @@ -61,7 +62,7 @@ pairs = ( ), ), ( - r"I\'m (.*)", + r'I\'m (.*)', ( "How does being %1 make you feel?", "Do you enjoy being %1?", @@ -70,7 +71,7 @@ pairs = ( ), ), ( - r"Are you (.*)", + r'Are you (.*)', ( "Why does it matter whether I am %1?", "Would you prefer it if I were not %1?", @@ -79,7 +80,7 @@ pairs = ( ), ), ( - r"What (.*)", + r'What (.*)', ( "Why do you ask?", "How would an answer to that help you?", @@ -87,7 +88,7 @@ pairs = ( ), ), ( - r"How (.*)", + r'How (.*)', ( "How do you suppose?", "Perhaps you can answer your own question.", @@ -95,7 +96,7 @@ pairs = ( ), ), ( - r"Because (.*)", + r'Because (.*)', ( "Is that the real reason?", "What other reasons come to mind?", @@ -104,14 +105,14 @@ pairs = ( ), ), ( - r"(.*) sorry (.*)", + r'(.*) sorry (.*)', ( "There are many times when no apology is needed.", "What feelings do you have when you apologize?", ), ), ( - r"Hello(.*)", + r'Hello(.*)', ( "Hello... I'm glad you could drop by today.", "Hi there... how are you today?", @@ -119,20 +120,20 @@ pairs = ( ), ), ( - r"I think (.*)", + r'I think (.*)', ("Do you doubt %1?", "Do you really think so?", "But you're not sure %1?"), ), ( - r"(.*) friend (.*)", + r'(.*) friend (.*)', ( "Tell me more about your friends.", "When you think of a friend, what comes to mind?", "Why don't you tell me about a childhood friend?", ), ), - (r"Yes", ("You seem quite sure.", "OK, but can you elaborate a bit?")), + (r'Yes', ("You seem quite sure.", "OK, but can you elaborate a bit?")), ( - r"(.*) computer(.*)", + r'(.*) computer(.*)', ( "Are you really talking about me?", "Does it seem strange to talk to a computer?", @@ -141,7 +142,7 @@ pairs = ( ), ), ( - r"Is it (.*)", + r'Is it (.*)', ( "Do you think it is %1?", "Perhaps it's %1 -- what do you think?", @@ -150,14 +151,14 @@ pairs = ( ), ), ( - r"It is (.*)", + r'It is (.*)', ( "You seem very certain.", "If I told you that it probably isn't %1, what would you feel?", ), ), ( - r"Can you (.*)", + r'Can you (.*)', ( "What makes you think I can't %1?", "If I could %1, then what?", @@ -165,7 +166,7 @@ pairs = ( ), ), ( - r"Can I (.*)", + r'Can I (.*)', ( "Perhaps you don't want to %1.", "Do you want to be able to %1?", @@ -173,7 +174,7 @@ pairs = ( ), ), ( - r"You are (.*)", + r'You are (.*)', ( "Why do you think I am %1?", "Does it please you to think that I'm %1?", @@ -182,7 +183,7 @@ pairs = ( ), ), ( - r"You\'re (.*)", + r'You\'re (.*)', ( "Why do you say I am %1?", "Why do you think I am %1?", @@ -190,11 +191,11 @@ pairs = ( ), ), ( - r"I don\'t (.*)", + r'I don\'t (.*)', ("Don't you really %1?", "Why don't you %1?", "Do you want to %1?"), ), ( - r"I feel (.*)", + r'I feel (.*)', ( "Good, tell me more about these feelings.", "Do you often feel %1?", @@ -203,7 +204,7 @@ pairs = ( ), ), ( - r"I have (.*)", + r'I have (.*)', ( "Why do you tell me that you've %1?", "Have you really %1?", @@ -211,7 +212,7 @@ pairs = ( ), ), ( - r"I would (.*)", + r'I would (.*)', ( "Could you explain why you would %1?", "Why would you %1?", @@ -219,7 +220,7 @@ pairs = ( ), ), ( - r"Is there (.*)", + r'Is there (.*)', ( "Do you think there is %1?", "It's likely that there is %1.", @@ -227,7 +228,7 @@ pairs = ( ), ), ( - r"My (.*)", + r'My (.*)', ( "I see, your %1.", "Why do you say that your %1?", @@ -235,16 +236,16 @@ pairs = ( ), ), ( - r"You (.*)", + r'You (.*)', ( "We should be discussing you, not me.", "Why do you say that about me?", "Why do you care whether I %1?", ), ), - (r"Why (.*)", ("Why don't you tell me the reason why %1?", "Why do you think %1?")), + (r'Why (.*)', ("Why don't you tell me the reason why %1?", "Why do you think %1?")), ( - r"I want (.*)", + r'I want (.*)', ( "What would it mean to you if you got %1?", "Why do you want %1?", @@ -253,7 +254,7 @@ pairs = ( ), ), ( - r"(.*) mother(.*)", + r'(.*) mother(.*)', ( "Tell me more about your mother.", "What was your relationship with your mother like?", @@ -263,7 +264,7 @@ pairs = ( ), ), ( - r"(.*) father(.*)", + r'(.*) father(.*)', ( "Tell me more about your father.", "How did your father make you feel?", @@ -273,7 +274,7 @@ pairs = ( ), ), ( - r"(.*) child(.*)", + r'(.*) child(.*)', ( "Did you have close friends as a child?", "What is your favorite childhood memory?", @@ -283,7 +284,7 @@ pairs = ( ), ), ( - r"(.*)\?", + r'(.*)\?', ( "Why do you ask that?", "Please consider whether you can answer your own question.", @@ -292,7 +293,7 @@ pairs = ( ), ), ( - r"quit", + r'quit', ( "Thank you for talking with me.", "Good-bye.", @@ -300,7 +301,7 @@ pairs = ( ), ), ( - r"(.*)", + r'(.*)', ( "Please tell me more.", "Let's change focus a bit... Tell me about your family.", @@ -323,7 +324,7 @@ def eliza_chat(): print("Therapist\n---------") print("Talk to the program by typing in plain English, using normal upper-") print('and lower-case letters and punctuation. Enter "quit" when done.') - print("=" * 72) + print('=' * 72) print("Hello. How are you feeling today?") eliza_chatbot.converse() diff --git a/nlp_resource_data/nltk/chat/iesha.py b/nlp_resource_data/nltk/chat/iesha.py index 55318af..4a7a615 100644 --- a/nlp_resource_data/nltk/chat/iesha.py +++ b/nlp_resource_data/nltk/chat/iesha.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Teen Chatbot # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Selina Dennis # URL: # For license information, see LICENSE.TXT @@ -10,6 +10,7 @@ This chatbot is a tongue-in-cheek take on the average teen anime junky that frequents YahooMessenger or MSNM. All spelling mistakes and flawed grammar are intentional. """ +from __future__ import print_function from nltk.chat.util import Chat @@ -40,27 +41,27 @@ reflections = { pairs = ( ( - r"I\'m (.*)", + r'I\'m (.*)', ( "ur%1?? that's so cool! kekekekeke ^_^ tell me more!", "ur%1? neat!! kekeke >_<", ), ), ( - r"(.*) don\'t you (.*)", + r'(.*) don\'t you (.*)', ( "u think I can%2??! really?? kekeke \<_\<", "what do u mean%2??!", "i could if i wanted, don't you think!! kekeke", ), ), - (r"ye[as] [iI] (.*)", ("u%1? cool!! how?", "how come u%1??", "u%1? so do i!!")), + (r'ye[as] [iI] (.*)', ("u%1? cool!! how?", "how come u%1??", "u%1? so do i!!")), ( - r"do (you|u) (.*)\??", + r'do (you|u) (.*)\??', ("do i%2? only on tuesdays! kekeke *_*", "i dunno! do u%2??"), ), ( - r"(.*)\?", + r'(.*)\?', ( "man u ask lots of questions!", "booooring! how old r u??", @@ -68,11 +69,11 @@ pairs = ( ), ), ( - r"(cos|because) (.*)", + r'(cos|because) (.*)', ("hee! i don't believe u! >_<", "nuh-uh! >_<", "ooooh i agree!"), ), ( - r"why can\'t [iI] (.*)", + r'why can\'t [iI] (.*)', ( "i dunno! y u askin me for!", "try harder, silly! hee! ^_^", @@ -80,7 +81,7 @@ pairs = ( ), ), ( - r"I can\'t (.*)", + r'I can\'t (.*)', ( "u can't what??! >_<", "that's ok! i can't%1 either! kekekekeke ^_^", @@ -88,7 +89,7 @@ pairs = ( ), ), ( - r"(.*) (like|love|watch) anime", + r'(.*) (like|love|watch) anime', ( "omg i love anime!! do u like sailor moon??! ^&^", "anime yay! anime rocks sooooo much!", @@ -98,11 +99,11 @@ pairs = ( ), ), ( - r"I (like|love|watch|play) (.*)", + r'I (like|love|watch|play) (.*)', ("yay! %2 rocks!", "yay! %2 is neat!", "cool! do u like other stuff?? ^_^"), ), ( - r"anime sucks|(.*) (hate|detest) anime", + r'anime sucks|(.*) (hate|detest) anime', ( "ur a liar! i'm not gonna talk to u nemore if u h8 anime *;*", "no way! anime is the best ever!", @@ -110,17 +111,17 @@ pairs = ( ), ), ( - r"(are|r) (you|u) (.*)", + r'(are|r) (you|u) (.*)', ("am i%1??! how come u ask that!", "maybe! y shud i tell u?? kekeke >_>"), ), ( - r"what (.*)", + r'what (.*)', ("hee u think im gonna tell u? .v.", "booooooooring! ask me somethin else!"), ), - (r"how (.*)", ("not tellin!! kekekekekeke ^_^",)), - (r"(hi|hello|hey) (.*)", ("hi!!! how r u!!",)), + (r'how (.*)', ("not tellin!! kekekekekeke ^_^",)), + (r'(hi|hello|hey) (.*)', ("hi!!! how r u!!",)), ( - r"quit", + r'quit', ( "mom says i have to go eat dinner now :,( bye!!", "awww u have to go?? see u next time!!", @@ -128,7 +129,7 @@ pairs = ( ), ), ( - r"(.*)", + r'(.*)', ( "ur funny! kekeke", "boooooring! talk about something else! tell me wat u like!", @@ -146,7 +147,7 @@ def iesha_chat(): print("Iesha the TeenBoT\n---------") print("Talk to the program by typing in plain English, using normal upper-") print('and lower-case letters and punctuation. Enter "quit" when done.') - print("=" * 72) + print('=' * 72) print("hi!! i'm iesha! who r u??!") iesha_chatbot.converse() diff --git a/nlp_resource_data/nltk/chat/rude.py b/nlp_resource_data/nltk/chat/rude.py index c7b1b1b..c9c9de8 100644 --- a/nlp_resource_data/nltk/chat/rude.py +++ b/nlp_resource_data/nltk/chat/rude.py @@ -1,15 +1,16 @@ # Natural Language Toolkit: Rude Chatbot # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Peter Spiller # URL: # For license information, see LICENSE.TXT +from __future__ import print_function from nltk.chat.util import Chat, reflections pairs = ( ( - r"We (.*)", + r'We (.*)', ( "What do you mean, 'we'?", "Don't include me in that!", @@ -17,11 +18,11 @@ pairs = ( ), ), ( - r"You should (.*)", + r'You should (.*)', ("Don't tell me what to do, buddy.", "Really? I should, should I?"), ), ( - r"You\'re(.*)", + r'You\'re(.*)', ( "More like YOU'RE %1!", "Hah! Look who's talking.", @@ -29,7 +30,7 @@ pairs = ( ), ), ( - r"You are(.*)", + r'You are(.*)', ( "More like YOU'RE %1!", "Hah! Look who's talking.", @@ -37,7 +38,7 @@ pairs = ( ), ), ( - r"I can\'t(.*)", + r'I can\'t(.*)', ( "You do sound like the type who can't %1.", "Hear that splashing sound? That's my heart bleeding for you.", @@ -45,14 +46,14 @@ pairs = ( ), ), ( - r"I think (.*)", + r'I think (.*)', ( "I wouldn't think too hard if I were you.", "You actually think? I'd never have guessed...", ), ), ( - r"I (.*)", + r'I (.*)', ( "I'm getting a bit tired of hearing about you.", "How about we talk about me instead?", @@ -60,23 +61,23 @@ pairs = ( ), ), ( - r"How (.*)", + r'How (.*)', ( "How do you think?", "Take a wild guess.", "I'm not even going to dignify that with an answer.", ), ), - (r"What (.*)", ("Do I look like an encyclopedia?", "Figure it out yourself.")), + (r'What (.*)', ("Do I look like an encyclopedia?", "Figure it out yourself.")), ( - r"Why (.*)", + r'Why (.*)', ( "Why not?", "That's so obvious I thought even you'd have already figured it out.", ), ), ( - r"(.*)shut up(.*)", + r'(.*)shut up(.*)', ( "Make me.", "Getting angry at a feeble NLP assignment? Somebody's losing it.", @@ -84,7 +85,7 @@ pairs = ( ), ), ( - r"Shut up(.*)", + r'Shut up(.*)', ( "Make me.", "Getting angry at a feeble NLP assignment? Somebody's losing it.", @@ -92,11 +93,11 @@ pairs = ( ), ), ( - r"Hello(.*)", + r'Hello(.*)', ("Oh good, somebody else to talk to. Joy.", "'Hello'? How original..."), ), ( - r"(.*)", + r'(.*)', ( "I'm getting bored here. Become more interesting.", "Either become more thrilling or get lost, buddy.", @@ -111,7 +112,7 @@ rude_chatbot = Chat(pairs, reflections) def rude_chat(): print("Talk to the program by typing in plain English, using normal upper-") print('and lower-case letters and punctuation. Enter "quit" when done.') - print("=" * 72) + print('=' * 72) print("I suppose I should say hello.") rude_chatbot.converse() diff --git a/nlp_resource_data/nltk/chat/suntsu.py b/nlp_resource_data/nltk/chat/suntsu.py index 4c68a77..9f6dc34 100644 --- a/nlp_resource_data/nltk/chat/suntsu.py +++ b/nlp_resource_data/nltk/chat/suntsu.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Sun Tsu-Bot # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Sam Huston 2007 # URL: # For license information, see LICENSE.TXT @@ -13,27 +13,28 @@ Translated by LIONEL GILES, M.A. 1910 Hosted by the Gutenberg Project http://www.gutenberg.org/ """ +from __future__ import print_function from nltk.chat.util import Chat, reflections pairs = ( - (r"quit", ("Good-bye.", "Plan well", "May victory be your future")), + (r'quit', ("Good-bye.", "Plan well", "May victory be your future")), ( - r"[^\?]*\?", + r'[^\?]*\?', ( "Please consider whether you can answer your own question.", "Ask me no questions!", ), ), ( - r"[0-9]+(.*)", + r'[0-9]+(.*)', ( "It is the rule in war, if our forces are ten to the enemy's one, to surround him; if five to one, to attack him; if twice as numerous, to divide our army into two.", "There are five essentials for victory", ), ), ( - r"[A-Ca-c](.*)", + r'[A-Ca-c](.*)', ( "The art of war is of vital importance to the State.", "All warfare is based on deception.", @@ -44,7 +45,7 @@ pairs = ( ), ), ( - r"[D-Fd-f](.*)", + r'[D-Fd-f](.*)', ( "The skillful soldier does not raise a second levy, neither are his supply-wagons loaded more than twice.", "Bring war material with you from home, but forage on the enemy.", @@ -53,7 +54,7 @@ pairs = ( ), ), ( - r"[G-Ig-i](.*)", + r'[G-Ig-i](.*)', ( "Heaven signifies night and day, cold and heat, times and seasons.", "It is the rule in war, if our forces are ten to the enemy's one, to surround him; if five to one, to attack him; if twice as numerous, to divide our army into two.", @@ -62,7 +63,7 @@ pairs = ( ), ), ( - r"[J-Lj-l](.*)", + r'[J-Lj-l](.*)', ( "There are three ways in which a ruler can bring misfortune upon his army.", "By commanding the army to advance or to retreat, being ignorant of the fact that it cannot obey. This is called hobbling the army.", @@ -77,7 +78,7 @@ pairs = ( ), ), ( - r"[M-Om-o](.*)", + r'[M-Om-o](.*)', ( "If you know the enemy and know yourself, you need not fear the result of a hundred battles.", "If you know yourself but not the enemy, for every victory gained you will also suffer a defeat.", @@ -86,7 +87,7 @@ pairs = ( ), ), ( - r"[P-Rp-r](.*)", + r'[P-Rp-r](.*)', ( "Security against defeat implies defensive tactics; ability to defeat the enemy means taking the offensive.", "Standing on the defensive indicates insufficient strength; attacking, a superabundance of strength.", @@ -96,7 +97,7 @@ pairs = ( ), ), ( - r"[S-Us-u](.*)", + r'[S-Us-u](.*)', ( "What the ancients called a clever fighter is one who not only wins, but excels in winning with ease.", "Hence his victories bring him neither reputation for wisdom nor credit for courage.", @@ -107,7 +108,7 @@ pairs = ( ), ), ( - r"[V-Zv-z](.*)", + r'[V-Zv-z](.*)', ( "It is a matter of life and death, a road either to safety or to ruin.", "Hold out baits to entice the enemy. Feign disorder, and crush him.", @@ -117,7 +118,7 @@ pairs = ( "Just as water retains no constant shape, so in warfare there are no constant conditions.", ), ), - (r"(.*)", ("Your statement insults me.", "")), + (r'(.*)', ("Your statement insults me.", "")), ) suntsu_chatbot = Chat(pairs, reflections) @@ -126,7 +127,7 @@ suntsu_chatbot = Chat(pairs, reflections) def suntsu_chat(): print("Talk to the program by typing in plain English, using normal upper-") print('and lower-case letters and punctuation. Enter "quit" when done.') - print("=" * 72) + print('=' * 72) print("You seek enlightenment?") suntsu_chatbot.converse() diff --git a/nlp_resource_data/nltk/chat/util.py b/nlp_resource_data/nltk/chat/util.py index 8f4ec5d..f2dd361 100644 --- a/nlp_resource_data/nltk/chat/util.py +++ b/nlp_resource_data/nltk/chat/util.py @@ -1,16 +1,19 @@ # Natural Language Toolkit: Chatbot Utilities # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Authors: Steven Bird # URL: # For license information, see LICENSE.TXT # Based on an Eliza implementation by Joe Strout , # Jeff Epler and Jez Higgins . +from __future__ import print_function import re import random +from six.moves import input + reflections = { "i am": "you are", @@ -54,7 +57,7 @@ class Chat(object): self._regex = self._compile_reflections() def _compile_reflections(self): - sorted_refl = sorted(self._reflections, key=len, reverse=True) + sorted_refl = sorted(self._reflections.keys(), key=len, reverse=True) return re.compile( r"\b({0})\b".format("|".join(map(re.escape, sorted_refl))), re.IGNORECASE ) @@ -74,7 +77,7 @@ class Chat(object): ) def _wildcards(self, response, match): - pos = response.find("%") + pos = response.find('%') while pos >= 0: num = int(response[pos + 1 : pos + 2]) response = ( @@ -82,7 +85,7 @@ class Chat(object): + self._substitute(match.group(num)) + response[pos + 2 :] ) - pos = response.find("%") + pos = response.find('%') return response def respond(self, str): @@ -104,10 +107,10 @@ class Chat(object): resp = self._wildcards(resp, match) # process wildcards # fix munged punctuation at the end - if resp[-2:] == "?.": - resp = resp[:-2] + "." - if resp[-2:] == "??": - resp = resp[:-2] + "?" + if resp[-2:] == '?.': + resp = resp[:-2] + '.' + if resp[-2:] == '??': + resp = resp[:-2] + '?' return resp # Hold a conversation with a chatbot diff --git a/nlp_resource_data/nltk/chat/zen.py b/nlp_resource_data/nltk/chat/zen.py index 2ae944b..d46a9f9 100644 --- a/nlp_resource_data/nltk/chat/zen.py +++ b/nlp_resource_data/nltk/chat/zen.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Zen Chatbot # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Amy Holland # URL: # For license information, see LICENSE.TXT @@ -35,6 +35,7 @@ responses are very limited. Zen Chatbot will usually answer very vaguely, or respond to a question by asking a different question, in much the same way as Eliza. """ +from __future__ import print_function from nltk.chat.util import Chat, reflections @@ -46,7 +47,7 @@ responses = ( # "good day" etc, but also "good grief!" and other sentences starting # with the word 'good' that may not be a greeting ( - r"(hello(.*))|(good [a-zA-Z]+)", + r'(hello(.*))|(good [a-zA-Z]+)', ( "The path to enlightenment is often difficult to see.", "Greetings. I sense your mind is troubled. Tell me of your troubles.", @@ -64,7 +65,7 @@ responses = ( # interpretation only makes sense for some inputs # ( - r"i need (.*)", + r'i need (.*)', ( "%1 can be achieved by hard work and dedication of the mind.", "%1 is not a need, but a desire of the mind. Clear your mind of such concerns.", @@ -72,7 +73,7 @@ responses = ( ), ), ( - r"i want (.*)", + r'i want (.*)', ( "Desires of the heart will distract you from the path to enlightenment.", "Will%1 help you attain enlightenment?", @@ -88,27 +89,27 @@ responses = ( # chatbot: "Are you sure I tell you?" # - this style works for positives (e.g. "why do you like cake?") # but does not work for negatives (e.g. "why don't you like cake?") - (r"why (.*) i (.*)\?", ("You%1%2?", "Perhaps you only think you%1%2")), - (r"why (.*) you(.*)\?", ("Why%1 you%2?", "%2 I%1", "Are you sure I%2?")), - (r"why (.*)\?", ("I cannot tell you why%1.", "Why do you think %1?")), + (r'why (.*) i (.*)\?', ("You%1%2?", "Perhaps you only think you%1%2")), + (r'why (.*) you(.*)\?', ("Why%1 you%2?", "%2 I%1", "Are you sure I%2?")), + (r'why (.*)\?', ("I cannot tell you why%1.", "Why do you think %1?")), # e.g. "are you listening?", "are you a duck" ( - r"are you (.*)\?", + r'are you (.*)\?', ("Maybe%1, maybe not%1.", "Whether I am%1 or not is God's business."), ), # e.g. "am I a duck?", "am I going to die?" ( - r"am i (.*)\?", + r'am i (.*)\?', ("Perhaps%1, perhaps not%1.", "Whether you are%1 or not is not for me to say."), ), # what questions, e.g. "what time is it?" # problems: # person: "What do you want?" # chatbot: "Seek truth, not what do me want." - (r"what (.*)\?", ("Seek truth, not what%1.", "What%1 should not concern you.")), + (r'what (.*)\?', ("Seek truth, not what%1.", "What%1 should not concern you.")), # how questions, e.g. "how do you do?" ( - r"how (.*)\?", + r'how (.*)\?', ( "How do you suppose?", "Will an answer to that really help in your search for enlightenment?", @@ -117,7 +118,7 @@ responses = ( ), # can questions, e.g. "can you run?", "can you come over here please?" ( - r"can you (.*)\?", + r'can you (.*)\?', ( "I probably can, but I may not.", "Maybe I can%1, and maybe I cannot.", @@ -126,7 +127,7 @@ responses = ( ), # can questions, e.g. "can I have some cake?", "can I know truth?" ( - r"can i (.*)\?", + r'can i (.*)\?', ( "You can%1 if you believe you can%1, and have a pure spirit.", "Seek truth and you will know if you can%1.", @@ -134,7 +135,7 @@ responses = ( ), # e.g. "It is raining" - implies the speaker is certain of a fact ( - r"it is (.*)", + r'it is (.*)', ( "How can you be certain that%1, when you do not even know yourself?", "Whether it is%1 or not does not change the way the world is.", @@ -142,14 +143,14 @@ responses = ( ), # e.g. "is there a doctor in the house?" ( - r"is there (.*)\?", + r'is there (.*)\?', ("There is%1 if you believe there is.", "It is possible that there is%1."), ), # e.g. "is it possible?", "is this true?" - (r"is(.*)\?", ("%1 is not relevant.", "Does this matter?")), + (r'is(.*)\?', ("%1 is not relevant.", "Does this matter?")), # non-specific question ( - r"(.*)\?", + r'(.*)\?', ( "Do you think %1?", "You seek the truth. Does the truth seek you?", @@ -159,7 +160,7 @@ responses = ( ), # expression of hate of form "I hate you" or "Kelly hates cheese" ( - r"(.*) (hate[s]?)|(dislike[s]?)|(don\'t like)(.*)", + r'(.*) (hate[s]?)|(dislike[s]?)|(don\'t like)(.*)', ( "Perhaps it is not about hating %2, but about hate from within.", "Weeds only grow when we dislike them", @@ -168,7 +169,7 @@ responses = ( ), # statement containing the word 'truth' ( - r"(.*) truth(.*)", + r'(.*) truth(.*)', ( "Seek truth, and truth will seek you.", "Remember, it is not the spoon which bends - only yourself.", @@ -178,13 +179,13 @@ responses = ( # desire to do an action # e.g. "I want to go shopping" ( - r"i want to (.*)", + r'i want to (.*)', ("You may %1 if your heart truly desires to.", "You may have to %1."), ), # desire for an object # e.g. "I want a pony" ( - r"i want (.*)", + r'i want (.*)', ( "Does your heart truly desire %1?", "Is this a desire of the heart, or of the mind?", @@ -192,7 +193,7 @@ responses = ( ), # e.g. "I can't wait" or "I can't do this" ( - r"i can\'t (.*)", + r'i can\'t (.*)', ( "What we can and can't do is a limitation of the mind.", "There are limitations of the body, and limitations of the mind.", @@ -203,7 +204,7 @@ responses = ( # problem: exceptions... # e.g. "I think, therefore I am" ( - r"i think (.*)", + r'i think (.*)', ( "Uncertainty in an uncertain world.", "Indeed, how can we be certain of anything in such uncertain times.", @@ -212,7 +213,7 @@ responses = ( ), # "I feel...emotions/sick/light-headed..." ( - r"i feel (.*)", + r'i feel (.*)', ( "Your body and your emotions are both symptoms of your mind." "What do you believe is the root of such feelings?", @@ -222,7 +223,7 @@ responses = ( # exclaimation mark indicating emotion # e.g. "Wow!" or "No!" ( - r"(.*)!", + r'(.*)!', ( "I sense that you are feeling emotional today.", "You need to calm your emotions.", @@ -231,7 +232,7 @@ responses = ( # because [statement] # e.g. "because I said so" ( - r"because (.*)", + r'because (.*)', ( "Does knowning the reasons behind things help you to understand" " the things themselves?", @@ -240,7 +241,7 @@ responses = ( ), # yes or no - raise an issue of certainty/correctness ( - r"(yes)|(no)", + r'(yes)|(no)', ( "Is there certainty in an uncertain world?", "It is better to be right than to be certain.", @@ -248,7 +249,7 @@ responses = ( ), # sentence containing word 'love' ( - r"(.*)love(.*)", + r'(.*)love(.*)', ( "Think of the trees: they let the birds perch and fly with no intention to call them when they come, and no longing for their return when they fly away. Let your heart be like the trees.", "Free love!", @@ -256,7 +257,7 @@ responses = ( ), # sentence containing word 'understand' - r ( - r"(.*)understand(.*)", + r'(.*)understand(.*)', ( "If you understand, things are just as they are;" " if you do not understand, things are just as they are.", @@ -266,7 +267,7 @@ responses = ( # 'I', 'me', 'my' - person is talking about themself. # this breaks down when words contain these - eg 'Thyme', 'Irish' ( - r"(.*)(me )|( me)|(my)|(mine)|(i)(.*)", + r'(.*)(me )|( me)|(my)|(mine)|(i)(.*)', ( "'I', 'me', 'my'... these are selfish expressions.", "Have you ever considered that you might be a selfish person?", @@ -277,12 +278,12 @@ responses = ( # 'you' starting a sentence # e.g. "you stink!" ( - r"you (.*)", + r'you (.*)', ("My path is not of conern to you.", "I am but one, and you but one more."), ), # say goodbye with some extra Zen wisdom. ( - r"exit", + r'exit', ( "Farewell. The obstacle is the path.", "Farewell. Life is a journey, not a destination.", @@ -294,7 +295,7 @@ responses = ( # when stumped, respond with generic zen wisdom # ( - r"(.*)", + r'(.*)', ( "When you're enlightened, every word is wisdom.", "Random talk is useless.", @@ -309,13 +310,13 @@ zen_chatbot = Chat(responses, reflections) def zen_chat(): - print("*" * 75) + print('*' * 75) print("Zen Chatbot!".center(75)) - print("*" * 75) + print('*' * 75) print('"Look beyond mere words and letters - look into your mind"'.center(75)) print("* Talk your way to truth with Zen Chatbot.") print("* Type 'quit' when you have had enough.") - print("*" * 75) + print('*' * 75) print("Welcome, my child.") zen_chatbot.converse() diff --git a/nlp_resource_data/nltk/chunk/__init__.py b/nlp_resource_data/nltk/chunk/__init__.py index 3ec1e19..f4b107c 100644 --- a/nlp_resource_data/nltk/chunk/__init__.py +++ b/nlp_resource_data/nltk/chunk/__init__.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Chunkers # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Steven Bird # Edward Loper # URL: @@ -169,8 +169,8 @@ from nltk.chunk.util import ( from nltk.chunk.regexp import RegexpChunkParser, RegexpParser # Standard treebank POS tagger -_BINARY_NE_CHUNKER = "chunkers/maxent_ne_chunker/english_ace_binary.pickle" -_MULTICLASS_NE_CHUNKER = "chunkers/maxent_ne_chunker/english_ace_multiclass.pickle" +_BINARY_NE_CHUNKER = 'chunkers/maxent_ne_chunker/english_ace_binary.pickle' +_MULTICLASS_NE_CHUNKER = 'chunkers/maxent_ne_chunker/english_ace_multiclass.pickle' def ne_chunk(tagged_tokens, binary=False): diff --git a/nlp_resource_data/nltk/chunk/__pycache__/__init__.cpython-37.pyc b/nlp_resource_data/nltk/chunk/__pycache__/__init__.cpython-37.pyc index 9e922d032aa2090169152f57d5ae56765bbad4bc..a88b3e6e07053b1d5b9645c37532816fad22bf54 100644 GIT binary patch delta 32 mcmexn@ydeRiIQ6Us_zGzxk773ljjB(FwQ! delta 44 ycmaE5@y&wUiI=2fwOCx delta 57 zcmZ3_yPKEWiISrZZX5<$n7G>+}ZMI={XJWiHxrJ4M LF>3QtRxU;Wn~@Jl diff --git a/nlp_resource_data/nltk/chunk/__pycache__/named_entity.cpython-37.pyc b/nlp_resource_data/nltk/chunk/__pycache__/named_entity.cpython-37.pyc index fa269067e00603071ff9d2013fead61232f13000..5d5dbf2ec63012aba399a0986532e74727076f07 100644 GIT binary patch delta 2680 zcmZ`5O^+N$wX3^lrss2Kc6Rn_W@o?bg|V}12Pd&j)@wV#aaM_ZA;h?VwAxd(V4I#_fa*EUVgA$exY1082B6J-gh6rU!FU-#0*0i+z?EdJ_}54GQusyEpBPt_HAw> z&Gs`qqvtHo;+*kwfy47!ChHf1A}Em>FU6SzK& zBeoG3%x3#7^|akk?^?;398!SvIwsUU!L@}Es1vP<;EYBo{=bnV z^LxrG{&Yi&oWUi}qTf9!Qab@D$K#k8co55Ps+UTNO{%oC+SH2L8_Lb&!X^c`8wG={ z2Wqx_@wq3-BnRPH1Wyvnv)u>=SQDh$VWVX0>^_5p!=rc0udvAmqPOBV;mzlSrz+jG#tcmm{}9r%BShdOJ)2^ zb+xg;?i_usQDv)?-mxg9Wk2$S>vggJyu|yRdT)A;^^QK6eq^#KCFj!FG2=Phm9D{$ z)$RH3t?SdqYz z6YFGv;0>gGqz+rV><#sg)+2UReYmjLpbFOYom%_-0gO(q{@m zQqL}Lef1iyJC%`p6kq5O(9ToqrFj_+1AOlA$W@bpx*-x-RxE5g(|BwlVmLdy7H)8 z;JXNT{(j(ZV71{RU=wjjL?`r<-3@x#GDM?pDc}Vw!~VX6Jkdrn)P1_7d1YjjN?^kq^r1*X6UT_dr0)Zz<3PWDqx8^i z2WteVL`Hd|QXHj4$AvCsK6A_vMO)zi11joc!PYfWN8!}j#9$A7xka@Q)4OBiMPD*! Maa2uH)lPN(4Ok{myZ`_I delta 2588 zcmZ`*U1(fI6uxus-QBx?`A^y;yPIT_{;cgLt=6_`n)+93v=*@~wU^3zyE94F+@I~a zciSc+L0hyc_`?W_2;!3n3PQkQ!50xkUj-j7T72=rH{XQ5=sB~gHmmMs&YU?vGiSbY zW@cZWzy4_c!F)cW;jeV=$=auH=ZDwEnWk~g);Qz3%WS54-PToa*ar57YkCPg$;ifZ zQ=Vm8UfNDGZKVe#2{+?q?JUzyYyN<#iQIAq&N7X@Gn%hIOTtA^zrdEeC#~n}bap6aeXl1;<)bf`FWPT$w*FR<+V}=|w zc9n8C{zQWw>~20WvlMt@<~e6&DQG&dJTonyG7cs3sGF4U81LSZKn}>GqC@vBUir33 zFUh))?3Aa>8?!^m#YQNiilNMH{h|vb>~M*DYrzWh8d43E~1Z zS_j5uFsN;eMoB*wic zcx*->L?^rnOlBxn<)xftJu;V{9951g8cNyC%q(GE54`5w75P~H!2XRKQ-E+6l8t=Z z$$9|IR;U)Juyzfoq~D1~FRfq8KhFC4kv+bu&sTr@6MvNiB;`PN^&l;2=i6b{HnL@g zSe~(gBiM)Pj=14&lwDn4=@x8_3gP%_V!rzPo)_Q{4k{^lfLxM9wj+rXO=$QL945gI zA`c>|nD7uOO+56Z+s!uIpvJ8GXW-*-=Sf6~RxAI&S6);U? zhR7ZwD%A&wBLq~$aW0IUs8RPsv=nfliEmr%Z_r^cDX@hEw#vR?Sys~OgT3p&_h#8W zs&+fVq*L%2IjJyN4>LEKg&a0LsY5~T>pyFbyY8DM>pAjF#YtB<7eojq z$yAC|vQ%y;EwM=_tCEEMWH2OW2Dj`wimYvgt#j=pZC)+THUorpu}1a%jXXbCWT)5P z9PD9-D7_s!i}P;a@S0P{gXu)h4`>n6bhO7I)`cmhpu-$*D<~C4#!(N7EECz z3oXC?hkSM9%_=FemJ}j{Wy}QD?Mky_WjINz$RnfG?9<2rzE&E{$`?krtsNm&)#VxD zo*{AqNo)p9;m7)slR$N3rxry7K@i2M1-#1~*9A*iMF?1IpfWaYm3eG}a5x+zy@BDv zR^$}iaWV`b!uueJUc%>)k`sjjdsfaBb{}i1WjuNr0>51u0Xepa_@vCWUyT}WP=qmS zz$2vE3=o_}*$w$cVVRwk<>GaANd8RAeR z$TOue_R9K7=~s4#LYl#8oC0qh0z76>=i2w7_^`Y>{@(6?o5NL9{+Gj1d2C{YjmX7` zxwEQHyV~Iqz95LG;OR1s)533g0-Q*wtiUN0+KCXaKLM|hsmdfB-#FQ9)R$cmo4n;U zD_!ucWaSF8<*xEwnHS0QGWp7-aLH1k=ucMk5aN+KkAC(WUy+IRNwVLNN{8|mq zEeYT}62yWk>Lkjjuaw*OkE$gq0C*jVoxbF`(^xaG)pjfEAUhds{D%hu%3Hae~wdUZ`VRt2O;mR3G=qQH`$PY%qdN!s0>iT+7sQL#0P;Iv#ZN0Wwh#hUWe)ZV`U Dnlmo* diff --git a/nlp_resource_data/nltk/chunk/__pycache__/regexp.cpython-37.pyc b/nlp_resource_data/nltk/chunk/__pycache__/regexp.cpython-37.pyc index 48374fb05500792faa6b7cd50114a2d2cb149622..b4077df0714d8a57fbbfc3519fbbfa0494caac36 100644 GIT binary patch delta 5904 zcmcIo3v3j}8NS)u+gsnuXYdo_ClC9uG0!9fLNFv4+yZV05VICtmfN*`c<;g79vBF| z6sSNGy|eMf z5Mi|^-M6#<%>46z^Z)-mcKn}$b3@2)2!{h6_~ra()RpggJn!R6@#lj=H~h25iN~XR zbYgmvL?KB*5eU8u@a|Q-yzDhaMFg2>`jV0&ahcEbCuK$EWyvf{29y9V`^{i7q=b@T zB_AdpBecn*%X-;Kk6va(PIylcrJTzGdJts6f-K5qAw3MTuo30$D!447mxHXlAe+Ty zQN03W6$ROBE}Ny#2HEU_tP*8;y;83NVO2p`#T!=ZH6W`g$f~)lR<8qDT|rjEWpng; zkkuDtwT5`1PM@ncRC~TdlsS5%-gMHV)bp0jdJD8{F#_D|xm?z&&pVm-=<^D~27SK1 z06H}43-va5HW|%&`-pIw&@rRwga;-b#`w*QI6P!qfCQ_CwWu{&O#A`1`EBrG;@&4*eqVaD17B5jTp1a|OKbEU0{vS+fsRVca{Cx@-NG z-Ul6DQnTX2w3*$Qy-uz{Ju;25^_BO``3wiYdeESW!NY0$Y~@w5B?yg=hyzED?V+== zUx3bm2VII#htPr03E=qR>0HX9sNRvZOvXs*l#{Op2_NuW^-?mA{j9pKiCa8jwiB_^ zYJ51C8UZin;?_CaTk|(@paoT<0Urtg+hRU7DlFZHuoj^k zVUpW~$rSxw3;%2@fV@ByL?Xjq^vXmg5fUH)Ay6;Naz~(rz1{310ruWloqer&zZ`8u zowzEx>^8IESmPod-K$Z!2Eg&@MmAo`k=xp2kH@Z)ySa&5u>BVRiW4Ijy?c|*JY{?k zDCXgN*t;E9o4<%13i&iijmPy?sOqup&ezE5N^B*%uETL0KV&j23MqHM>HFiH_1Q0W zHHnfNRqXBMt)!KGvV8MO^a5RgntVevoy#~rTuU;v8AW9eU|4R?!9oGWgj=MYEVbg^ z^%!3cNz#YlNw?#RXc~i-gAIEZiISM6X)gO{QgvOwJ-xTx4V!m{!q8kR$208`nJR-HziHLK#as z0_WI+rBaU6Kz|hz0}IC57*m2-FlHM9!kcv_qWR$d}&nZNG^d+B0F)P2mV6u_}SEAK|ua6ZYv{uiQD)Cs?k3CQOmf zm!~jYNO$v~Y}nl`{qN~r=FWUey>jOif5X0~was8%83BsVO`+IEM>s`g-^Tv~#edk> zhkteF^cvVF``c$g&jflOmC!RWR{aA|qrD)orMsJ1_^yTawUP5u#ngax>Qwe0TFb7` zWi``n7k)_48+1#@M94ZpJc?;?BWb|daGL$%0sBDq1F|RyW78Bu8sIi5O4(O(D`r3& z$u0L5Q?wjXbO{}>|CLky(H0yQk9J4A?f13QO1A&lPSV0&KUTFEPf~D(SqBftYr?iY zl>xGY<8s!(Bi>J#FxKzxr)_NSSWOR}DflVE-E}siE;yN@)B;c>D-OrYgN|HKqL*mr zJJxCT+E^3O?f1q6c^yAVyXym-GT=~>r7J;`6QJp|rJCA4!*qNWh3PI;gawz~teg3|~mI%~M+ni9exm8)0zu}u=~e}64SR!?YzgE3#bP|vj8 zBWfyrlY$2Oxo_+i^FcHrL}S?NI0EkI6v?_5r^tE>4ov%UmV0@kN3n*7-$^We6@j0R zAB7TJ6SxiaD9s~qYffOPgp4nvf$uulqo;q{j2vzecE_M<7dv$3+5&FjZCc`4We@xM z*~`SoRp8!oP`)!24PJ~5q@+fN5-C1;j)Zp=tu*LpH#9U@ z`pMP#Q7F;F2(Yb!`P*Vaddq!za;{Ec4`lT@cZoyL2!up+%lXnM8BpN z>*)dPSGEH#@cfDl`hgNGC|q4Z2_1tsMdXUmw~CAxe$}AV@fs$dsfvP|Cr+f7?CTd! zll()_obptE3`%g3?L@&Y1ir>$wmJb-({64o$Xfo*#cGyuHOVwoYJ zS#Gf5w8By`oQ2CkB4yA$SOal4#jG(3!fO&E&J zKV^Tt^yh^<6Hvu1a%L@5mHTY(3k_rq&jP{C0^Y^Ft-JU0$8nhH#_EQOYBumIvY%pI9QH|%?R)7>asFN? z0av~kuExpr6Bz_s^5#iCxP3IlSH2fP=cH%(8|CkvrFvpR~zo-!6n_)ND|?;Lo=r2Pnk z2nqt8fawT=i7P;q+<={A%S#nd;mG$T(>iSB_oF3Q{EQGKAwk|M xgoF?Yk*GgJd__xEe2mTuiPb~L~Bp`B0-qQx}c~7UjP96@B{{l8kqj&%S delta 5571 zcmcIoYitzP72esM-5u}RYa2h-*x1;{7?zg}7^n@#FKmMXew)A&7{)tedtvs$-C4&r z1&c?NmZl_dDM4vzp;4r!{0Lz{ErcSGR7kB-DOIahn!Z(qhE}Zp%Y`y+Z>sVu2n#%(JiuyXlvp=_3{)o)PKOP$O@QdYh7K?0= zIo0CFuT|wjR=<_QwoSE#ct6isS}ig!+fQ3$yJA0O^E-fd$WGv$jCTSr$SJ_5C{7p` zfOpAbZs>5cjuhxfmD7MvW4sG^kDLyCI^*5IXULhrXEHt&_yuwn@L7yc13p{M0X~QE z9^i9jFYsQ*rz@5#8FHSyFxPUG^JmKWa=~d!zkdNtTO=34v_gdkyRv{Uk{1KNnDNJ zWu-4ZAgaocqWXDF8i*0pw3+V^hbwVcU5r~VrayI2Zg~2HfDJ0lG64dwQ+~s!K zVe%-y@944pBn{yf3C4i7VlNlH>-%qeVm(0W$y>^bfs%=lA7oHO;+==@4a5MMUv8 zx?J==SBrjm%oY1=Nv4d)*suYB+@#3_lL5UGD+MBy(W50L9Isz4IVEIxQKlLR zi)16MTv4=wrSy92tO8=#WhE9!#=vaJ(}!1l%57vOwBY=0Kqi0-6XBa;6RQ(DWQRIZ zNx!w~ZgDdTOwHqj)vYj8rypJY1Fpt{vv{%*J_Oe>oKZ>B6cRQUe6~FLWV=o(7V{1> zZRqRkOSuAiXZ`kt3~Ui9>FM-NI_LTx0G(Is`ixQFc%`v!I$i$$ExxQ zm~oR98m@j4387$k;01cUs^`#VoVo=`Et0KBSj=`{3jwi68L2T%(t?4Iq76o5vJd+c zswef6CKz}EzZllFkjGnvJRx035wb}W4R5IDI`wxpNH(__IhbPP(i5Y(tt{T80Xmol zX2C=`xMAxOC!EI&VsTEa&h`63Hod* zVj^P4tVr+czpR@Z(qGW8H_Qhq;`x3O(k?(+r#CfPZQGxPY#w<2s(-cl4Q}-uBKL%& zJ*`XT5xRo_=;KL14hUT)ImmLeNB>XjU+jt8BnRkcyH=(p;Mm~^THWR=nl&_DdOKL4 zzuNXDx0b=NnTaT2O#6V$l8swwpkqEU>H)QBaJrb;C;+5oq=!N3*3&xwW$&6SH#7An zZJCmKfDJ7SAd!*G3Ua@`=lPp+2Y8Cs?wt?7ZZN2O8o+YW&64orz4ea&J-lzT7@j|P zgWEjU82$*&grCKdNzWb3>NF4DHlKsceV0){%cW_sml1`*E)6vQA7Fo?-wu8>_o$}R zJBOFgqZ0c8-fxrO^|Q3?)eotEa!jkl7h>&Rwq-Ve)Req+LQTFO+qrTwT@5#0o1}me zQs8cv*t{oCi7LHXd&sRAKs|#ZOZK5hYP?Z&=SN~v0bY`%_ z9dBWyw&h!v6AxW?ytfHEeKvzvgE7r;61cYG3t$2ETG@Tau&HozPe+7k*+jgZ-K0N0 zzK31MNHK^RnN8b!+ny798d}AUhMu10t~P=f2(l0fb7md1;GAU#P8iyLhZcal;*cSX ztCf@1V;jf``qJ29?f|`9vQWP__Q+Ys?#t#5V%SMsRbpf{Xfs?S6469e>Qhw1t`P`I zvN}Arf2}`!`MhPiu{0cy}qcSJp?2CX)*zG3C4XGkiFS2{uKWzFB&xE;} zNsVyRWxE^dd8W8W3`eFYNYm9<_VSC^EjN{vqSP0WU|kagOVa5dUdwKeW0N`ZIJU-+ zup90WwBV-C7R|xl5hQH+W7tZv-VX1%m{LHuo%>ZW8a+ccz6e7twC?=hmNJuOnTZ#d zU9^k7!|~L5;rG1-sMKyEK{3M?l7`JKp80v))N&khgd{Q$3^P9&4t!`x$yvwl?Y*@2 zYc*p8L`Vz?oT*@Vo6kM-PM7e~36wxQpYskl0#hDf5uw+GY~Je17rcVUwNh{j0y$!# z-z>?~Jr{eO4rYU04_ZQOn=&xCjz6dKzU&=cfKNN`m+wAflK zPUogc7!H=QPd%|-g;|DP_ttt_JElD39IKHM6#g<2rW~XHba^bpaf)Pmc@Uibklz2! zpUYX^qly_SWD^WM;q{-sUC3=_Id3&{o{5;xYx6<<5-#(swQ69%PJP!_j+qIC^Ee=w z*{+#08(m8fJTvxllKN$sr(&|;6-h^rP!M72X};zCcgDd#eS-jFhmd9bTisUzM1=-^Kp0)E$Tr z#mqvsr(r7k@r?xs$U2;CI8;R%f-U?6J$7Sl1|C4TT|~_{cPYJhqs)Q&iIskGV@=r` z*!%{P=}|LMWF-_4l_5#>(i**;yP}`b%dF#~`psKc_yzeOHY#XJOk1N3Yk-r`{yX2R zeG9sMR)bqZzKbn4c^%c8YhsJpjgBH$fcV`EC%}rvW8^CJzgJkz63j70=L)a3E)XQ+hZcPC; vwhoA3P32OZ8Dka|ex8ig@PBzmOGY#NS~B+VoQoHlVPqNnRta^&po{x2U_3bn diff --git a/nlp_resource_data/nltk/chunk/__pycache__/util.cpython-37.pyc b/nlp_resource_data/nltk/chunk/__pycache__/util.cpython-37.pyc index f9a821276c4270c97fd64f3051de64c02d148c8c..2d7dfcc957b6a3427b31b170610ef7d8ec44b451 100644 GIT binary patch delta 3395 zcmb7G&2JRh5uexd;hFKw*!UBF;4kd8$7^hCW3v$OVq=2{U=VnN=p@_Qp7(4w?(UgX z_bgs-Rwh|cR!i6bt=w`5_pCUyl1sQmIV5|@B~m_~_8&;JhkPH9qNM6I#@Ugx(pqv& zb-nudy{dZk-jn<6r{8D!g?v66!@p|XBV+4PzGElHVlf`$%#JxsXO5<8PF#;Ol4*8A zPvCFdPC6+)rDO@a&PnTO#V75Ilhw0|PucZOPDf{`UuWl?g5H39nrH6D^uqZV&+_`a zFRt#`3cio`6O@UZ76Hw?ff9Bweu<7$-7W; zkazP#DCtnzJtA?d^NFT+sjptN?dE;FAJ-4@0e+Yd@*(~TALb*x$Vb;RykSk_uktbU z)5DMOan$P-edqbaruL9QTJ*jjL&VzkSaJGKl%Zl=7Aj!5zOhzyOTJZhWnm^wIhdkzQJgdsRoqx2;Ms2?N2gwyCxn;7PAk2JtxjcH1NyBcMes@fgB~|I4hYcNZHnnG}@*0`KG&!unm_kd_ zwm%6MJ6E(t1u_oG{1|D^ASouI$Vt<#Y?zW&B!gNvnKi?S%Jcj1({7VZkSC>txJQte ztW;z3JbTKvJfE6y?jM2Ob8MEv$^;2jpX}~96_u&RT&5h$^928M_`9B~Y_v8Wd}S9v zWJe7Or;z+7xY~C-%I#^AQ8T`X1al;)Nl?aev5J@_|6k#=zJV_hDHA+B+#d7}^o*)N z)N@}D2#(@93kriB;hllxmk860VP>Kb)Vp64ViMQuF~rJvO@r)z@ahZ$52<0a1`h}S zFhq^Nf78;z2T=jT=m;MSea6^lp*>&Qnts_C;biwDT{ zeA6unNTQ0=K**$H`lSt-w9Qpv%amEE2$##$8VIp1)GW|bwlLoo9yFs(F$G03O_s`4 z*M}zJX493FV(J;=2gWn!0 ztz0CRb#!Pf`vuPQn&onu`7*f<e;{vL0nvuqhwYun-g`jK`-l)~)6WfH65`X+IN7EKKNk@X6X;lESCG`~fw zt0Yd@mMh#cO$=9naw}+YEqHhO@L^8uUisApl`oZD+ulXK75rj)lHCsfKK)B}o|<4= zyKn?Hk-;)5IA0|n)C&<_mBGTi?}N2k^`lE+x8ey(30w>QJ=53n`KXXX2OkHMvqLNw z-k5#Bb|@_>7UcyfTPo^y^Mc9y6vRIC*7xqwet55#t_6@)<@9b_{zjB9g;L?Aau@Ld zWOu^f zymp24siB_q%bR$O*KC-^>_3CkC&w?|LJkKKWdOb#@zMF9l6f_DGEe<9wOgal=tv~c zD3)nra4-DXNmFAtf`g}z;ZeAJx_d_zhRy?goya$cyoW^QjSEXl*Nl~$SC@=yOR6(u zQoWW`TLCqg%wBN0*orRpm)!{8Bqu{iigl{@_S#Lat5~8W3`b8YMvgEC_G_{R!@>mP zKs32Q$0-{$&dsp<;kmgvjg^9XXI|YYQIrQnJ|gl1A`g))7ZWlUfnU6Gb{9L`r*+yM z-o$hu_R0z$(&y*XFLM-sx5-B}%py&^Sa+5sv;s@BbTBcWVvmE_`M;#E;{p!#rf_`W zJ1kFMU_~M?cy_kyy4v76N@S`MW$**ixlQCADZW4ykbp|7XI1?SDrGIO1Q$;EjbL=K zcjt$wO4o*CT6n?W14@NnAun$rSzgAj*`O~H72|vSxne&)-0;=w<5Li?9o$|F;zX!b zfZ9RNxVFDJt`Ja<=#A00xjydT0Jq$A*rDmC=U>XJzGbVisMjl5^fHDtY2}o}5p;gM z=pM?WT&BvaxA0W}lw24iLU{t6$hU~7B&pWFL!Xo`nOeNEGJomPMR-J_?-5beeM}rB k1;GTub&}oqH%aR_iEFn_TkH_KflLvT@!sq8#Iv*8 zGvhQ(gdCa>0d1;Qw-SPbPBn zO10$A=bm%V|G8)Uv)9=DS6Oy3n@vaXr*Gqep}m>S@4k-*9^uT6==4{!Syj_DMs(DU zR%3ch(J?z-P3Q?l$L*$SQcoeB;7xZTdh)3VPx92A2v3Rh?Wo?&)4Un^8D8YAyzLIt zvm)|#3qNq5>8-rPJ9sDZ+W0}9yR#e75AfssFwY~aopXoup54@Vflr{F!~6(8iudxOiyzz8;BO*-JA&b| zwMb#|@09*Zfd#R35JGya+y4o>tR14VJXkJm)vBU&)qHuYRySe0RFv(p+1@zHa`@}E zAVjh;7Gr7F!ZNHa263Oq-p%Go9TYSp6(kMAaxBj{ez3ZRpC~GV&e{gVNau^j>ltv*MXvO0J z^8!|BF!3NYUlESDUWbJr$@bjzMG7cL8|AGXXWJE?3ti-wvJC@Pp(oX)OQyXemH}!2 zJtQ9_=q6Fr-KhrA8&XoJ)roh(q*-}UDY zGd4Sfcaxpl1}y&zQwQMO#G=KS1;e3VV?tE#$&s!$?>4KXHA)3oj1_&@Ag z*OpX}QBdl~>GmW-Ar^X$n|6K63|Kv2&}b&LX;j1Z{J}rdYqAnqQkO995u^iFZqRv< zp0_R6E7z*^g@4K4_07zY+aN|vH79!+C&D&0s0WFv<+_5u=b!4oz$O~~!HK;9LOmK# zIEnCI=?|O;YkP`#G>jj6g2#x_P@t6MU>C7U-oO3+!J*ILsYz}R%^p!sXx=`<2^>ZF zM-l(GL%pBF{sGxOGCQH{(<}Qd`^Qk8k}r;|7aJzy(J!vaFz|I6Mu+@xJw|4@UpNSXTaNtTIhA3=hW#=KjAvI>KffM!$S%e27iS?~hL#1Ej6OEE4qy zWAHc$<_V~9K-s}*f-?l?2==?EDAoQ93TEWd!WhfQ^}_4BYTDC58efj%dZtqrpy8E3 z0};flrdQqy;>y#*>%!rBViVWr4WX7mkg$b$O}NmCI)wy`l4`tM+i^Ugl^I0q zHCK%kaS|?g>1lFXp#RcKFgk# zr%F9N`}I<*yxB7=*qJF0^F1qHES+Fw`A%t!t@^(&ZAbHKXhYW)SK)cG%@zT(K%=0^ z=cmUup2h?0hmlahxp1D?G)$lqP*|w)0{#h_)e|?om+M&1uafj@#7@|jBb*xT1V?}} zYm#t7{&{+M_zIB?ac9dl$F}z(za~d!CYkGBn)wwQrEcHQ&L4vc64);l&XZ(=njXgc zj9i$@WvG*Sj4A*t^2*#m=ckx z*V7%l)Z`#u2Mb>jO^BM6AbsB9;(GWMemGcgndE~Ag(fvd`;((L)h$sLMpch1B4Aax zLbpGiZqRC2SW?_!CO15B-AhV$VV2$X-&t7Dm?7Ugd&D+K?OO!jCio7)s|YKFSdcN! zFE3v-mR6rIE-r`jd6(pr4}LXiwDJ*h(g{S!s>%MP?zJn#DI)}ldRVR7#Bm93A?&$-ofvAM)2cg$8lr|MVq8>fYx3Sw z|Lz-jpVoTSwD1Ff|Jf+l7s$$&5LQ-jCODmlRfz7>PZqj?I;y7}+uJTGH`HdmEj*i* zVimWjH@X?lJnIX-?7C4d3WhvQj`B# IUeJ>4e>IS<0RR91 diff --git a/nlp_resource_data/nltk/chunk/api.py b/nlp_resource_data/nltk/chunk/api.py index 1e4af77..1454825 100644 --- a/nlp_resource_data/nltk/chunk/api.py +++ b/nlp_resource_data/nltk/chunk/api.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Chunk parsing API # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Edward Loper # Steven Bird (minor additions) # URL: diff --git a/nlp_resource_data/nltk/chunk/named_entity.py b/nlp_resource_data/nltk/chunk/named_entity.py index 863ee99..07d3067 100644 --- a/nlp_resource_data/nltk/chunk/named_entity.py +++ b/nlp_resource_data/nltk/chunk/named_entity.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Chunk parsing API # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Edward Loper # URL: # For license information, see LICENSE.TXT @@ -8,6 +8,8 @@ """ Named entity chunker """ +from __future__ import print_function +from __future__ import unicode_literals import os, re, pickle from xml.etree import ElementTree as ET @@ -39,7 +41,7 @@ class NEChunkParserTagger(ClassifierBasedTagger): def _classifier_builder(self, train): return MaxentClassifier.train( - train, algorithm="megam", gaussian_prior_sigma=1, trace=2 + train, algorithm='megam', gaussian_prior_sigma=1, trace=2 ) def _english_wordlist(self): @@ -48,7 +50,7 @@ class NEChunkParserTagger(ClassifierBasedTagger): except AttributeError: from nltk.corpus import words - self._en_wordlist = set(words.words("en-basic")) + self._en_wordlist = set(words.words('en-basic')) wl = self._en_wordlist return wl @@ -90,22 +92,22 @@ class NEChunkParserTagger(ClassifierBasedTagger): # 89.6 features = { - "bias": True, - "shape": shape(word), - "wordlen": len(word), - "prefix3": word[:3].lower(), - "suffix3": word[-3:].lower(), - "pos": pos, - "word": word, - "en-wordlist": (word in self._english_wordlist()), - "prevtag": prevtag, - "prevpos": prevpos, - "nextpos": nextpos, - "prevword": prevword, - "nextword": nextword, - "word+nextpos": "{0}+{1}".format(word.lower(), nextpos), - "pos+prevtag": "{0}+{1}".format(pos, prevtag), - "shape+prevtag": "{0}+{1}".format(prevshape, prevtag), + 'bias': True, + 'shape': shape(word), + 'wordlen': len(word), + 'prefix3': word[:3].lower(), + 'suffix3': word[-3:].lower(), + 'pos': pos, + 'word': word, + 'en-wordlist': (word in self._english_wordlist()), + 'prevtag': prevtag, + 'prevpos': prevpos, + 'nextpos': nextpos, + 'prevword': prevword, + 'nextword': nextword, + 'word+nextpos': '{0}+{1}'.format(word.lower(), nextpos), + 'pos+prevtag': '{0}+{1}'.format(pos, prevtag), + 'shape+prevtag': '{0}+{1}'.format(prevshape, prevtag), } return features @@ -137,14 +139,14 @@ class NEChunkParser(ChunkParserI): """ Convert a list of tagged tokens to a chunk-parse tree. """ - sent = Tree("S", []) + sent = Tree('S', []) for (tok, tag) in tagged_tokens: - if tag == "O": + if tag == 'O': sent.append(tok) - elif tag.startswith("B-"): + elif tag.startswith('B-'): sent.append(Tree(tag[2:], [tok])) - elif tag.startswith("I-"): + elif tag.startswith('I-'): if sent and isinstance(sent[-1], Tree) and sent[-1].label() == tag[2:]: sent[-1].append(tok) else: @@ -162,42 +164,42 @@ class NEChunkParser(ChunkParserI): if len(child) == 0: print("Warning -- empty chunk in sentence") continue - toks.append((child[0], "B-{0}".format(child.label()))) + toks.append((child[0], 'B-{0}'.format(child.label()))) for tok in child[1:]: - toks.append((tok, "I-{0}".format(child.label()))) + toks.append((tok, 'I-{0}'.format(child.label()))) else: - toks.append((child, "O")) + toks.append((child, 'O')) return toks def shape(word): - if re.match("[0-9]+(\.[0-9]*)?|[0-9]*\.[0-9]+$", word, re.UNICODE): - return "number" - elif re.match("\W+$", word, re.UNICODE): - return "punct" - elif re.match("\w+$", word, re.UNICODE): + if re.match('[0-9]+(\.[0-9]*)?|[0-9]*\.[0-9]+$', word, re.UNICODE): + return 'number' + elif re.match('\W+$', word, re.UNICODE): + return 'punct' + elif re.match('\w+$', word, re.UNICODE): if word.istitle(): - return "upcase" + return 'upcase' elif word.islower(): - return "downcase" + return 'downcase' else: - return "mixedcase" + return 'mixedcase' else: - return "other" + return 'other' def simplify_pos(s): - if s.startswith("V"): + if s.startswith('V'): return "V" else: - return s.split("-")[0] + return s.split('-')[0] def postag_tree(tree): # Part-of-speech tagging. words = tree.leaves() tag_iter = (pos for (word, pos) in pos_tag(words)) - newtree = Tree("S", []) + newtree = Tree('S', []) for child in tree: if isinstance(child, Tree): newtree.append(Tree(child.label(), [])) @@ -208,47 +210,47 @@ def postag_tree(tree): return newtree -def load_ace_data(roots, fmt="binary", skip_bnews=True): +def load_ace_data(roots, fmt='binary', skip_bnews=True): for root in roots: for root, dirs, files in os.walk(root): - if root.endswith("bnews") and skip_bnews: + if root.endswith('bnews') and skip_bnews: continue for f in files: - if f.endswith(".sgm"): + if f.endswith('.sgm'): for sent in load_ace_file(os.path.join(root, f), fmt): yield sent def load_ace_file(textfile, fmt): - print(" - {0}".format(os.path.split(textfile)[1])) - annfile = textfile + ".tmx.rdc.xml" + print(' - {0}'.format(os.path.split(textfile)[1])) + annfile = textfile + '.tmx.rdc.xml' # Read the xml file, and get a list of entities entities = [] - with open(annfile, "r") as infile: + with open(annfile, 'r') as infile: xml = ET.parse(infile).getroot() - for entity in xml.findall("document/entity"): - typ = entity.find("entity_type").text - for mention in entity.findall("entity_mention"): - if mention.get("TYPE") != "NAME": + for entity in xml.findall('document/entity'): + typ = entity.find('entity_type').text + for mention in entity.findall('entity_mention'): + if mention.get('TYPE') != 'NAME': continue # only NEs - s = int(mention.find("head/charseq/start").text) - e = int(mention.find("head/charseq/end").text) + 1 + s = int(mention.find('head/charseq/start').text) + e = int(mention.find('head/charseq/end').text) + 1 entities.append((s, e, typ)) # Read the text file, and mark the entities. - with open(textfile, "r") as infile: + with open(textfile, 'r') as infile: text = infile.read() # Strip XML tags, since they don't count towards the indices - text = re.sub("<(?!/?TEXT)[^>]+>", "", text) + text = re.sub('<(?!/?TEXT)[^>]+>', '', text) # Blank out anything before/after def subfunc(m): - return " " * (m.end() - m.start() - 6) + return ' ' * (m.end() - m.start() - 6) - text = re.sub("[\s\S]*", subfunc, text) - text = re.sub("[\s\S]*", "", text) + text = re.sub('[\s\S]*', subfunc, text) + text = re.sub('[\s\S]*', '', text) # Simplify quotes text = re.sub("``", ' "', text) @@ -257,24 +259,24 @@ def load_ace_file(textfile, fmt): entity_types = set(typ for (s, e, typ) in entities) # Binary distinction (NE or not NE) - if fmt == "binary": + if fmt == 'binary': i = 0 - toks = Tree("S", []) + toks = Tree('S', []) for (s, e, typ) in sorted(entities): if s < i: s = i # Overlapping! Deal with this better? if e <= s: continue toks.extend(word_tokenize(text[i:s])) - toks.append(Tree("NE", text[s:e].split())) + toks.append(Tree('NE', text[s:e].split())) i = e toks.extend(word_tokenize(text[i:])) yield toks # Multiclass distinction (NE type) - elif fmt == "multiclass": + elif fmt == 'multiclass': i = 0 - toks = Tree("S", []) + toks = Tree('S', []) for (s, e, typ) in sorted(entities): if s < i: s = i # Overlapping! Deal with this better? @@ -287,7 +289,7 @@ def load_ace_file(textfile, fmt): yield toks else: - raise ValueError("bad fmt value") + raise ValueError('bad fmt value') # This probably belongs in a more general-purpose location (as does @@ -297,36 +299,36 @@ def cmp_chunks(correct, guessed): guessed = NEChunkParser._parse_to_tagged(guessed) ellipsis = False for (w, ct), (w, gt) in zip(correct, guessed): - if ct == gt == "O": + if ct == gt == 'O': if not ellipsis: print(" {:15} {:15} {2}".format(ct, gt, w)) - print(" {:15} {:15} {2}".format("...", "...", "...")) + print(' {:15} {:15} {2}'.format('...', '...', '...')) ellipsis = True else: ellipsis = False print(" {:15} {:15} {2}".format(ct, gt, w)) -def build_model(fmt="binary"): - print("Loading training data...") +def build_model(fmt='binary'): + print('Loading training data...') train_paths = [ - find("corpora/ace_data/ace.dev"), - find("corpora/ace_data/ace.heldout"), - find("corpora/ace_data/bbn.dev"), - find("corpora/ace_data/muc.dev"), + find('corpora/ace_data/ace.dev'), + find('corpora/ace_data/ace.heldout'), + find('corpora/ace_data/bbn.dev'), + find('corpora/ace_data/muc.dev'), ] train_trees = load_ace_data(train_paths, fmt) train_data = [postag_tree(t) for t in train_trees] - print("Training...") + print('Training...') cp = NEChunkParser(train_data) del train_data - print("Loading eval data...") - eval_paths = [find("corpora/ace_data/ace.eval")] + print('Loading eval data...') + eval_paths = [find('corpora/ace_data/ace.eval')] eval_trees = load_ace_data(eval_paths, fmt) eval_data = [postag_tree(t) for t in eval_trees] - print("Evaluating...") + print('Evaluating...') chunkscore = ChunkScore() for i, correct in enumerate(eval_data): guess = cp.parse(correct.leaves()) @@ -335,18 +337,18 @@ def build_model(fmt="binary"): cmp_chunks(correct, guess) print(chunkscore) - outfilename = "/tmp/ne_chunker_{0}.pickle".format(fmt) - print("Saving chunker to {0}...".format(outfilename)) + outfilename = '/tmp/ne_chunker_{0}.pickle'.format(fmt) + print('Saving chunker to {0}...'.format(outfilename)) - with open(outfilename, "wb") as outfile: + with open(outfilename, 'wb') as outfile: pickle.dump(cp, outfile, -1) return cp -if __name__ == "__main__": +if __name__ == '__main__': # Make sure that the pickled object has the right class name: from nltk.chunk.named_entity import build_model - build_model("binary") - build_model("multiclass") + build_model('binary') + build_model('multiclass') diff --git a/nlp_resource_data/nltk/chunk/regexp.py b/nlp_resource_data/nltk/chunk/regexp.py index f0e910c..fe4ab5b 100644 --- a/nlp_resource_data/nltk/chunk/regexp.py +++ b/nlp_resource_data/nltk/chunk/regexp.py @@ -1,21 +1,27 @@ # Natural Language Toolkit: Regular Expression Chunkers # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Edward Loper # Steven Bird (minor additions) # URL: # For license information, see LICENSE.TXT +from __future__ import print_function, unicode_literals +from __future__ import division import re +from six import string_types + from nltk.tree import Tree from nltk.chunk.api import ChunkParserI +from nltk.compat import python_2_unicode_compatible, unicode_repr ##////////////////////////////////////////////////////// ## ChunkString ##////////////////////////////////////////////////////// +@python_2_unicode_compatible class ChunkString(object): """ A string-based encoding of a particular chunking of a text. @@ -54,18 +60,18 @@ class ChunkString(object): will only match positions that are in chinks. """ - CHUNK_TAG_CHAR = r"[^\{\}<>]" - CHUNK_TAG = r"(<%s+?>)" % CHUNK_TAG_CHAR + CHUNK_TAG_CHAR = r'[^\{\}<>]' + CHUNK_TAG = r'(<%s+?>)' % CHUNK_TAG_CHAR - IN_CHUNK_PATTERN = r"(?=[^\{]*\})" - IN_CHINK_PATTERN = r"(?=[^\}]*(\{|$))" + IN_CHUNK_PATTERN = r'(?=[^\{]*\})' + IN_CHINK_PATTERN = r'(?=[^\}]*(\{|$))' # These are used by _verify - _CHUNK = r"(\{%s+?\})+?" % CHUNK_TAG - _CHINK = r"(%s+?)+?" % CHUNK_TAG - _VALID = re.compile(r"^(\{?%s\}?)*?$" % CHUNK_TAG) - _BRACKETS = re.compile("[^\{\}]+") - _BALANCED_BRACKETS = re.compile(r"(\{\})*$") + _CHUNK = r'(\{%s+?\})+?' % CHUNK_TAG + _CHINK = r'(%s+?)+?' % CHUNK_TAG + _VALID = re.compile(r'^(\{?%s\}?)*?$' % CHUNK_TAG) + _BRACKETS = re.compile('[^\{\}]+') + _BALANCED_BRACKETS = re.compile(r'(\{\})*$') def __init__(self, chunk_struct, debug_level=1): """ @@ -91,7 +97,7 @@ class ChunkString(object): self._root_label = chunk_struct.label() self._pieces = chunk_struct[:] tags = [self._tag(tok) for tok in self._pieces] - self._str = "<" + "><".join(tags) + ">" + self._str = '<' + '><'.join(tags) + '>' self._debug = debug_level def _tag(self, tok): @@ -100,7 +106,7 @@ class ChunkString(object): elif isinstance(tok, Tree): return tok.label() else: - raise ValueError("chunk structures must contain tagged " "tokens or trees") + raise ValueError('chunk structures must contain tagged ' 'tokens or trees') def _verify(self, s, verify_tags): """ @@ -121,31 +127,31 @@ class ChunkString(object): # Check overall form if not ChunkString._VALID.match(s): raise ValueError( - "Transformation generated invalid " "chunkstring:\n %s" % s + 'Transformation generated invalid ' 'chunkstring:\n %s' % s ) # Check that parens are balanced. If the string is long, we # have to do this in pieces, to avoid a maximum recursion # depth limit for regular expressions. - brackets = ChunkString._BRACKETS.sub("", s) + brackets = ChunkString._BRACKETS.sub('', s) for i in range(1 + len(brackets) // 5000): substr = brackets[i * 5000 : i * 5000 + 5000] if not ChunkString._BALANCED_BRACKETS.match(substr): raise ValueError( - "Transformation generated invalid " "chunkstring:\n %s" % s + 'Transformation generated invalid ' 'chunkstring:\n %s' % s ) if verify_tags <= 0: return - tags1 = (re.split(r"[\{\}<>]+", s))[1:-1] + tags1 = (re.split(r'[\{\}<>]+', s))[1:-1] tags2 = [self._tag(piece) for piece in self._pieces] if tags1 != tags2: raise ValueError( - "Transformation generated invalid " "chunkstring: tag changed" + 'Transformation generated invalid ' 'chunkstring: tag changed' ) - def to_chunkstruct(self, chunk_label="CHUNK"): + def to_chunkstruct(self, chunk_label='CHUNK'): """ Return the chunk structure encoded by this ``ChunkString``. @@ -160,10 +166,10 @@ class ChunkString(object): pieces = [] index = 0 piece_in_chunk = 0 - for piece in re.split("[{}]", self._str): + for piece in re.split('[{}]', self._str): # Find the list of tokens contained in this piece. - length = piece.count("<") + length = piece.count('<') subsequence = self._pieces[index : index + length] # Add this list of tokens to our pieces. @@ -209,7 +215,7 @@ class ChunkString(object): # The substitution might have generated "empty chunks" # (substrings of the form "{}"). Remove them, so they don't # interfere with other transformations. - s = re.sub("\{\}", "", s) + s = re.sub('\{\}', '', s) # Make sure that the transformation was legal. if self._debug > 1: @@ -227,7 +233,7 @@ class ChunkString(object): :rtype: str """ - return "" % repr(self._str) + return '' % unicode_repr(self._str) def __str__(self): """ @@ -239,10 +245,10 @@ class ChunkString(object): :rtype: str """ # Add spaces to make everything line up. - str = re.sub(r">(?!\})", r"> ", self._str) - str = re.sub(r"([^\{])<", r"\1 <", str) - if str[0] == "<": - str = " " + str + str = re.sub(r'>(?!\})', r'> ', self._str) + str = re.sub(r'([^\{])<', r'\1 <', str) + if str[0] == '<': + str = ' ' + str return str @@ -251,6 +257,7 @@ class ChunkString(object): ##////////////////////////////////////////////////////// +@python_2_unicode_compatible class RegexpChunkRule(object): """ A rule specifying how to modify the chunking in a ``ChunkString``, @@ -295,7 +302,7 @@ class RegexpChunkRule(object): :param descr: A short description of the purpose and/or effect of this rule. """ - if isinstance(regexp, str): + if isinstance(regexp, string_types): regexp = re.compile(regexp) self._repl = repl self._descr = descr @@ -338,11 +345,11 @@ class RegexpChunkRule(object): :rtype: str """ return ( - "" - + repr(self._repl) - + ">" + '' + + unicode_repr(self._repl) + + '>' ) @staticmethod @@ -365,33 +372,34 @@ class RegexpChunkRule(object): ?+'> """ # Split off the comment (but don't split on '\#') - m = re.match(r"(?P(\\.|[^#])*)(?P#.*)?", s) - rule = m.group("rule").strip() - comment = (m.group("comment") or "")[1:].strip() + m = re.match(r'(?P(\\.|[^#])*)(?P#.*)?', s) + rule = m.group('rule').strip() + comment = (m.group('comment') or '')[1:].strip() # Pattern bodies: chunk, chink, split, merge try: if not rule: - raise ValueError("Empty chunk pattern") - if rule[0] == "{" and rule[-1] == "}": + raise ValueError('Empty chunk pattern') + if rule[0] == '{' and rule[-1] == '}': return ChunkRule(rule[1:-1], comment) - elif rule[0] == "}" and rule[-1] == "{": + elif rule[0] == '}' and rule[-1] == '{': return ChinkRule(rule[1:-1], comment) - elif "}{" in rule: - left, right = rule.split("}{") + elif '}{' in rule: + left, right = rule.split('}{') return SplitRule(left, right, comment) - elif "{}" in rule: - left, right = rule.split("{}") + elif '{}' in rule: + left, right = rule.split('{}') return MergeRule(left, right, comment) - elif re.match("[^{}]*{[^{}]*}[^{}]*", rule): - left, chunk, right = re.split("[{}]", rule) + elif re.match('[^{}]*{[^{}]*}[^{}]*', rule): + left, chunk, right = re.split('[{}]', rule) return ChunkRuleWithContext(left, chunk, right, comment) else: - raise ValueError("Illegal chunk pattern: %s" % rule) + raise ValueError('Illegal chunk pattern: %s' % rule) except (ValueError, re.error): - raise ValueError("Illegal chunk pattern: %s" % rule) + raise ValueError('Illegal chunk pattern: %s' % rule) +@python_2_unicode_compatible class ChunkRule(RegexpChunkRule): """ A rule specifying how to add chunks to a ``ChunkString``, using a @@ -417,10 +425,10 @@ class ChunkRule(RegexpChunkRule): """ self._pattern = tag_pattern regexp = re.compile( - "(?P%s)%s" + '(?P%s)%s' % (tag_pattern2re_pattern(tag_pattern), ChunkString.IN_CHINK_PATTERN) ) - RegexpChunkRule.__init__(self, regexp, "{\g}", descr) + RegexpChunkRule.__init__(self, regexp, '{\g}', descr) def __repr__(self): """ @@ -434,9 +442,10 @@ class ChunkRule(RegexpChunkRule): :rtype: str """ - return "" + return '' +@python_2_unicode_compatible class ChinkRule(RegexpChunkRule): """ A rule specifying how to remove chinks to a ``ChunkString``, @@ -462,10 +471,10 @@ class ChinkRule(RegexpChunkRule): """ self._pattern = tag_pattern regexp = re.compile( - "(?P%s)%s" + '(?P%s)%s' % (tag_pattern2re_pattern(tag_pattern), ChunkString.IN_CHUNK_PATTERN) ) - RegexpChunkRule.__init__(self, regexp, "}\g{", descr) + RegexpChunkRule.__init__(self, regexp, '}\g{', descr) def __repr__(self): """ @@ -479,9 +488,10 @@ class ChinkRule(RegexpChunkRule): :rtype: str """ - return "" + return '' +@python_2_unicode_compatible class UnChunkRule(RegexpChunkRule): """ A rule specifying how to remove chunks to a ``ChunkString``, @@ -504,8 +514,8 @@ class UnChunkRule(RegexpChunkRule): of this rule. """ self._pattern = tag_pattern - regexp = re.compile("\{(?P%s)\}" % tag_pattern2re_pattern(tag_pattern)) - RegexpChunkRule.__init__(self, regexp, "\g", descr) + regexp = re.compile('\{(?P%s)\}' % tag_pattern2re_pattern(tag_pattern)) + RegexpChunkRule.__init__(self, regexp, '\g', descr) def __repr__(self): """ @@ -519,9 +529,10 @@ class UnChunkRule(RegexpChunkRule): :rtype: str """ - return "" + return '' +@python_2_unicode_compatible class MergeRule(RegexpChunkRule): """ A rule specifying how to merge chunks in a ``ChunkString``, using @@ -563,13 +574,13 @@ class MergeRule(RegexpChunkRule): self._left_tag_pattern = left_tag_pattern self._right_tag_pattern = right_tag_pattern regexp = re.compile( - "(?P%s)}{(?=%s)" + '(?P%s)}{(?=%s)' % ( tag_pattern2re_pattern(left_tag_pattern), tag_pattern2re_pattern(right_tag_pattern), ) ) - RegexpChunkRule.__init__(self, regexp, "\g", descr) + RegexpChunkRule.__init__(self, regexp, '\g', descr) def __repr__(self): """ @@ -584,14 +595,15 @@ class MergeRule(RegexpChunkRule): :rtype: str """ return ( - "" + '' ) +@python_2_unicode_compatible class SplitRule(RegexpChunkRule): """ A rule specifying how to split chunks in a ``ChunkString``, using @@ -632,13 +644,13 @@ class SplitRule(RegexpChunkRule): self._left_tag_pattern = left_tag_pattern self._right_tag_pattern = right_tag_pattern regexp = re.compile( - "(?P%s)(?=%s)" + '(?P%s)(?=%s)' % ( tag_pattern2re_pattern(left_tag_pattern), tag_pattern2re_pattern(right_tag_pattern), ) ) - RegexpChunkRule.__init__(self, regexp, r"\g}{", descr) + RegexpChunkRule.__init__(self, regexp, r'\g}{', descr) def __repr__(self): """ @@ -653,14 +665,15 @@ class SplitRule(RegexpChunkRule): :rtype: str """ return ( - "" + '' ) +@python_2_unicode_compatible class ExpandLeftRule(RegexpChunkRule): """ A rule specifying how to expand chunks in a ``ChunkString`` to the left, @@ -702,13 +715,13 @@ class ExpandLeftRule(RegexpChunkRule): self._left_tag_pattern = left_tag_pattern self._right_tag_pattern = right_tag_pattern regexp = re.compile( - "(?P%s)\{(?P%s)" + '(?P%s)\{(?P%s)' % ( tag_pattern2re_pattern(left_tag_pattern), tag_pattern2re_pattern(right_tag_pattern), ) ) - RegexpChunkRule.__init__(self, regexp, "{\g\g", descr) + RegexpChunkRule.__init__(self, regexp, '{\g\g', descr) def __repr__(self): """ @@ -723,14 +736,15 @@ class ExpandLeftRule(RegexpChunkRule): :rtype: str """ return ( - "" + '' ) +@python_2_unicode_compatible class ExpandRightRule(RegexpChunkRule): """ A rule specifying how to expand chunks in a ``ChunkString`` to the @@ -772,13 +786,13 @@ class ExpandRightRule(RegexpChunkRule): self._left_tag_pattern = left_tag_pattern self._right_tag_pattern = right_tag_pattern regexp = re.compile( - "(?P%s)\}(?P%s)" + '(?P%s)\}(?P%s)' % ( tag_pattern2re_pattern(left_tag_pattern), tag_pattern2re_pattern(right_tag_pattern), ) ) - RegexpChunkRule.__init__(self, regexp, "\g\g}", descr) + RegexpChunkRule.__init__(self, regexp, '\g\g}', descr) def __repr__(self): """ @@ -793,14 +807,15 @@ class ExpandRightRule(RegexpChunkRule): :rtype: str """ return ( - "" + '' ) +@python_2_unicode_compatible class ChunkRuleWithContext(RegexpChunkRule): """ A rule specifying how to add chunks to a ``ChunkString``, using @@ -853,7 +868,7 @@ class ChunkRuleWithContext(RegexpChunkRule): self._chunk_tag_pattern = chunk_tag_pattern self._right_context_tag_pattern = right_context_tag_pattern regexp = re.compile( - "(?P%s)(?P%s)(?P%s)%s" + '(?P%s)(?P%s)(?P%s)%s' % ( tag_pattern2re_pattern(left_context_tag_pattern), tag_pattern2re_pattern(chunk_tag_pattern), @@ -861,7 +876,7 @@ class ChunkRuleWithContext(RegexpChunkRule): ChunkString.IN_CHINK_PATTERN, ) ) - replacement = r"\g{\g}\g" + replacement = r'\g{\g}\g' RegexpChunkRule.__init__(self, regexp, replacement, descr) def __repr__(self): @@ -876,7 +891,7 @@ class ChunkRuleWithContext(RegexpChunkRule): :rtype: str """ - return "" % ( + return '' % ( self._left_context_tag_pattern, self._chunk_tag_pattern, self._right_context_tag_pattern, @@ -890,7 +905,7 @@ class ChunkRuleWithContext(RegexpChunkRule): # this should probably be made more strict than it is -- e.g., it # currently accepts 'foo'. CHUNK_TAG_PATTERN = re.compile( - r"^((%s|<%s>)*)$" % ("([^\{\}<>]|\{\d+,?\}|\{\d*,\d+\})+", "[^\{\}<>]+") + r'^((%s|<%s>)*)$' % ('([^\{\}<>]|\{\d+,?\}|\{\d*,\d+\})+', '[^\{\}<>]+') ) @@ -931,13 +946,13 @@ def tag_pattern2re_pattern(tag_pattern): ``tag_pattern``. """ # Clean up the regular expression - tag_pattern = re.sub(r"\s", "", tag_pattern) - tag_pattern = re.sub(r"<", "(<(", tag_pattern) - tag_pattern = re.sub(r">", ")>)", tag_pattern) + tag_pattern = re.sub(r'\s', '', tag_pattern) + tag_pattern = re.sub(r'<', '(<(', tag_pattern) + tag_pattern = re.sub(r'>', ')>)', tag_pattern) # Check the regular expression if not CHUNK_TAG_PATTERN.match(tag_pattern): - raise ValueError("Bad tag pattern: %r" % tag_pattern) + raise ValueError('Bad tag pattern: %r' % tag_pattern) # Replace "." with CHUNK_TAG_CHAR. # We have to do this after, since it adds {}[]<>s, which would @@ -948,11 +963,11 @@ def tag_pattern2re_pattern(tag_pattern): def reverse_str(str): lst = list(str) lst.reverse() - return "".join(lst) + return ''.join(lst) tc_rev = reverse_str(ChunkString.CHUNK_TAG_CHAR) reversed = reverse_str(tag_pattern) - reversed = re.sub(r"\.(?!\\(\\\\)*($|[^\\]))", tc_rev, reversed) + reversed = re.sub(r'\.(?!\\(\\\\)*($|[^\\]))', tc_rev, reversed) tag_pattern = reverse_str(reversed) return tag_pattern @@ -963,6 +978,7 @@ def tag_pattern2re_pattern(tag_pattern): ##////////////////////////////////////////////////////// +@python_2_unicode_compatible class RegexpChunkParser(ChunkParserI): """ A regular expression based chunk parser. ``RegexpChunkParser`` uses a @@ -985,7 +1001,7 @@ class RegexpChunkParser(ChunkParserI): """ - def __init__(self, rules, chunk_label="NP", root_label="S", trace=0): + def __init__(self, rules, chunk_label='NP', root_label='S', trace=0): """ Construct a new ``RegexpChunkParser``. @@ -1024,14 +1040,14 @@ class RegexpChunkParser(ChunkParserI): :param verbose: Whether output should be verbose. :rtype: None """ - print("# Input:") + print('# Input:') print(chunkstr) for rule in self._rules: rule.apply(chunkstr) if verbose: - print("#", rule.descr() + " (" + repr(rule) + "):") + print('#', rule.descr() + ' (' + unicode_repr(rule) + '):') else: - print("#", rule.descr() + ":") + print('#', rule.descr() + ':') print(chunkstr) def _notrace_apply(self, chunkstr): @@ -1067,7 +1083,7 @@ class RegexpChunkParser(ChunkParserI): used to define this ``RegexpChunkParser``. """ if len(chunk_struct) == 0: - print("Warning: parsing empty text") + print('Warning: parsing empty text') return Tree(self._root_label, []) try: @@ -1120,7 +1136,7 @@ class RegexpChunkParser(ChunkParserI): else: format = " %s\n %s\n" for rule in self._rules: - s += format % (rule.descr(), repr(rule)) + s += format % (rule.descr(), unicode_repr(rule)) return s[:-1] @@ -1129,6 +1145,7 @@ class RegexpChunkParser(ChunkParserI): ##////////////////////////////////////////////////////// +@python_2_unicode_compatible class RegexpParser(ChunkParserI): """ A grammar based chunk parser. ``chunk.RegexpParser`` uses a set of @@ -1171,7 +1188,7 @@ class RegexpParser(ChunkParserI): """ - def __init__(self, grammar, root_label="S", loop=1, trace=0): + def __init__(self, grammar, root_label='S', loop=1, trace=0): """ Create a new chunk parser, from the given start state and set of chunk patterns. @@ -1193,12 +1210,12 @@ class RegexpParser(ChunkParserI): self._grammar = grammar self._loop = loop - if isinstance(grammar, str): + if isinstance(grammar, string_types): self._read_grammar(grammar, root_label, trace) else: # Make sur the grammar looks like it has the right type: type_err = ( - "Expected string or list of RegexpChunkParsers " "for the grammar." + 'Expected string or list of RegexpChunkParsers ' 'for the grammar.' ) try: grammar = list(grammar) @@ -1216,21 +1233,21 @@ class RegexpParser(ChunkParserI): """ rules = [] lhs = None - for line in grammar.split("\n"): + for line in grammar.split('\n'): line = line.strip() # New stage begins if there's an unescaped ':' - m = re.match("(?P(\\.|[^:])*)(:(?P.*))", line) + m = re.match('(?P(\\.|[^:])*)(:(?P.*))', line) if m: # Record the stage that we just completed. self._add_stage(rules, lhs, root_label, trace) # Start a new stage. - lhs = m.group("nonterminal").strip() + lhs = m.group('nonterminal').strip() rules = [] - line = m.group("rule").strip() + line = m.group('rule').strip() # Skip blank & comment-only lines - if line == "" or line.startswith("#"): + if line == '' or line.startswith('#'): continue # Add the rule @@ -1245,7 +1262,7 @@ class RegexpParser(ChunkParserI): """ if rules != []: if not lhs: - raise ValueError("Expected stage marker (eg NP:)") + raise ValueError('Expected stage marker (eg NP:)') parser = RegexpChunkParser( rules, chunk_label=lhs, root_label=root_label, trace=trace ) @@ -1323,43 +1340,43 @@ def demo_eval(chunkparser, text): # Evaluate our chunk parser. chunkscore = chunk.ChunkScore() - for sentence in text.split("\n"): + for sentence in text.split('\n'): print(sentence) sentence = sentence.strip() if not sentence: continue gold = chunk.tagstr2tree(sentence) tokens = gold.leaves() - test = chunkparser.parse(Tree("S", tokens), trace=1) + test = chunkparser.parse(Tree('S', tokens), trace=1) chunkscore.score(gold, test) print() - print("/" + ("=" * 75) + "\\") - print("Scoring", chunkparser) - print(("-" * 77)) - print("Precision: %5.1f%%" % (chunkscore.precision() * 100), " " * 4, end=" ") - print("Recall: %5.1f%%" % (chunkscore.recall() * 100), " " * 6, end=" ") - print("F-Measure: %5.1f%%" % (chunkscore.f_measure() * 100)) + print('/' + ('=' * 75) + '\\') + print('Scoring', chunkparser) + print(('-' * 77)) + print('Precision: %5.1f%%' % (chunkscore.precision() * 100), ' ' * 4, end=' ') + print('Recall: %5.1f%%' % (chunkscore.recall() * 100), ' ' * 6, end=' ') + print('F-Measure: %5.1f%%' % (chunkscore.f_measure() * 100)) # Missed chunks. if chunkscore.missed(): - print("Missed:") + print('Missed:') missed = chunkscore.missed() for chunk in missed[:10]: - print(" ", " ".join(map(str, chunk))) + print(' ', ' '.join(map(str, chunk))) if len(chunkscore.missed()) > 10: - print(" ...") + print(' ...') # Incorrect chunks. if chunkscore.incorrect(): - print("Incorrect:") + print('Incorrect:') incorrect = chunkscore.incorrect() for chunk in incorrect[:10]: - print(" ", " ".join(map(str, chunk))) + print(' ', ' '.join(map(str, chunk))) if len(chunkscore.incorrect()) > 10: - print(" ...") + print(' ...') - print("\\" + ("=" * 75) + "/") + print('\\' + ('=' * 75) + '/') print() @@ -1378,10 +1395,10 @@ def demo(): [ John/NNP ] thinks/VBZ [ Mary/NN ] saw/VBD [ the/DT cat/NN ] sit/VB on/IN [ the/DT mat/NN ]./. """ - print("*" * 75) - print("Evaluation text:") + print('*' * 75) + print('Evaluation text:') print(text) - print("*" * 75) + print('*' * 75) print() grammar = r""" @@ -1426,7 +1443,7 @@ def demo(): print("Demonstration of empty grammar:") cp = chunk.RegexpParser("") - print(chunk.accuracy(cp, conll2000.chunked_sents("test.txt", chunk_types=("NP",)))) + print(chunk.accuracy(cp, conll2000.chunked_sents('test.txt', chunk_types=('NP',)))) print() print("Demonstration of accuracy evaluation using CoNLL tags:") @@ -1438,7 +1455,7 @@ def demo(): {} # merge det/adj with nouns """ cp = chunk.RegexpParser(grammar) - print(chunk.accuracy(cp, conll2000.chunked_sents("test.txt")[:5])) + print(chunk.accuracy(cp, conll2000.chunked_sents('test.txt')[:5])) print() print("Demonstration of tagged token input") @@ -1467,5 +1484,5 @@ def demo(): ) -if __name__ == "__main__": +if __name__ == '__main__': demo() diff --git a/nlp_resource_data/nltk/chunk/util.py b/nlp_resource_data/nltk/chunk/util.py index 35ee79d..e29760d 100644 --- a/nlp_resource_data/nltk/chunk/util.py +++ b/nlp_resource_data/nltk/chunk/util.py @@ -1,16 +1,18 @@ # Natural Language Toolkit: Chunk format conversions # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Edward Loper # Steven Bird (minor additions) # URL: # For license information, see LICENSE.TXT +from __future__ import print_function, unicode_literals, division import re from nltk.tree import Tree from nltk.tag.mapping import map_tag from nltk.tag.util import str2tuple +from nltk.compat import python_2_unicode_compatible ##////////////////////////////////////////////////////// ## EVALUATION @@ -118,10 +120,10 @@ class ChunkScore(object): self._tp = set() self._fp = set() self._fn = set() - self._max_tp = kwargs.get("max_tp_examples", 100) - self._max_fp = kwargs.get("max_fp_examples", 100) - self._max_fn = kwargs.get("max_fn_examples", 100) - self._chunk_label = kwargs.get("chunk_label", ".*") + self._max_tp = kwargs.get('max_tp_examples', 100) + self._max_fp = kwargs.get('max_fp_examples', 100) + self._max_fn = kwargs.get('max_fn_examples', 100) + self._chunk_label = kwargs.get('chunk_label', '.*') self._tp_num = 0 self._fp_num = 0 self._fn_num = 0 @@ -282,7 +284,7 @@ class ChunkScore(object): :rtype: str """ - return "" + return '' def __str__(self): """ @@ -318,7 +320,7 @@ def _chunksets(t, count, chunk_label): def tagstr2tree( - s, chunk_label="NP", root_label="S", sep="/", source_tagset=None, target_tagset=None + s, chunk_label="NP", root_label="S", sep='/', source_tagset=None, target_tagset=None ): """ Divide a string of bracketted tagged text into @@ -337,20 +339,20 @@ def tagstr2tree( :rtype: Tree """ - WORD_OR_BRACKET = re.compile(r"\[|\]|[^\[\]\s]+") + WORD_OR_BRACKET = re.compile(r'\[|\]|[^\[\]\s]+') stack = [Tree(root_label, [])] for match in WORD_OR_BRACKET.finditer(s): text = match.group() - if text[0] == "[": + if text[0] == '[': if len(stack) != 1: - raise ValueError("Unexpected [ at char {:d}".format(match.start())) + raise ValueError('Unexpected [ at char {:d}'.format(match.start())) chunk = Tree(chunk_label, []) stack[-1].append(chunk) stack.append(chunk) - elif text[0] == "]": + elif text[0] == ']': if len(stack) != 2: - raise ValueError("Unexpected ] at char {:d}".format(match.start())) + raise ValueError('Unexpected ] at char {:d}'.format(match.start())) stack.pop() else: if sep is None: @@ -362,16 +364,16 @@ def tagstr2tree( stack[-1].append((word, tag)) if len(stack) != 1: - raise ValueError("Expected ] at char {:d}".format(len(s))) + raise ValueError('Expected ] at char {:d}'.format(len(s))) return stack[0] ### CONLL -_LINE_RE = re.compile("(\S+)\s+(\S+)\s+([IOB])-?(\S+)?") +_LINE_RE = re.compile('(\S+)\s+(\S+)\s+([IOB])-?(\S+)?') -def conllstr2tree(s, chunk_types=("NP", "PP", "VP"), root_label="S"): +def conllstr2tree(s, chunk_types=('NP', 'PP', 'VP'), root_label="S"): """ Return a chunk structure for a single sentence encoded in the given CONLL 2000 style string. @@ -391,29 +393,29 @@ def conllstr2tree(s, chunk_types=("NP", "PP", "VP"), root_label="S"): stack = [Tree(root_label, [])] - for lineno, line in enumerate(s.split("\n")): + for lineno, line in enumerate(s.split('\n')): if not line.strip(): continue # Decode the line. match = _LINE_RE.match(line) if match is None: - raise ValueError("Error on line {:d}".format(lineno)) + raise ValueError('Error on line {:d}'.format(lineno)) (word, tag, state, chunk_type) = match.groups() # If it's a chunk type we don't care about, treat it as O. if chunk_types is not None and chunk_type not in chunk_types: - state = "O" + state = 'O' # For "Begin"/"Outside", finish any completed chunks - # also do so for "Inside" which don't match the previous token. - mismatch_I = state == "I" and chunk_type != stack[-1].label() - if state in "BO" or mismatch_I: + mismatch_I = state == 'I' and chunk_type != stack[-1].label() + if state in 'BO' or mismatch_I: if len(stack) == 2: stack.pop() # For "Begin", start a new chunk. - if state == "B" or mismatch_I: + if state == 'B' or mismatch_I: chunk = Tree(chunk_type, []) stack[-1].append(chunk) stack.append(chunk) @@ -452,7 +454,7 @@ def tree2conlltags(t): def conlltags2tree( - sentence, chunk_types=("NP", "PP", "VP"), root_label="S", strict=False + sentence, chunk_types=('NP', 'PP', 'VP'), root_label='S', strict=False ): """ Convert the CoNLL IOB format to a tree. @@ -465,9 +467,9 @@ def conlltags2tree( else: # Treat as O tree.append((word, postag)) - elif chunktag.startswith("B-"): + elif chunktag.startswith('B-'): tree.append(Tree(chunktag[2:], [(word, postag)])) - elif chunktag.startswith("I-"): + elif chunktag.startswith('I-'): if ( len(tree) == 0 or not isinstance(tree[-1], Tree) @@ -480,7 +482,7 @@ def conlltags2tree( tree.append(Tree(chunktag[2:], [(word, postag)])) else: tree[-1].append((word, postag)) - elif chunktag == "O": + elif chunktag == 'O': tree.append((word, postag)) else: raise ValueError("Bad conll tag {0!r}".format(chunktag)) @@ -497,20 +499,20 @@ def tree2conllstr(t): :rtype: str """ lines = [" ".join(token) for token in tree2conlltags(t)] - return "\n".join(lines) + return '\n'.join(lines) ### IEER _IEER_DOC_RE = re.compile( - r"\s*" - r"(\s*(?P.+?)\s*\s*)?" - r"(\s*(?P.+?)\s*\s*)?" - r"(\s*(?P.+?)\s*\s*)?" - r"\s*" - r"(\s*(?P.+?)\s*\s*)?" - r"(?P.*?)\s*" - r"\s*\s*", + r'\s*' + r'(\s*(?P.+?)\s*\s*)?' + r'(\s*(?P.+?)\s*\s*)?' + r'(\s*(?P.+?)\s*\s*)?' + r'\s*' + r'(\s*(?P.+?)\s*\s*)?' + r'(?P.*?)\s*' + r'\s*\s*', re.DOTALL, ) @@ -523,17 +525,17 @@ def _ieer_read_text(s, root_label): # return the empty list in place of a Tree if s is None: return [] - for piece_m in re.finditer("<[^>]+>|[^\s<]+", s): + for piece_m in re.finditer('<[^>]+>|[^\s<]+', s): piece = piece_m.group() try: - if piece.startswith(" # URL: # For license information, see LICENSE.TXT @@ -57,7 +57,7 @@ to the classifier: >>> from nltk.corpus import gutenberg >>> for fileid in gutenberg.fileids(): # doctest: +SKIP ... doc = gutenberg.words(fileid) # doctest: +SKIP - ... print(fileid, classifier.classify(document_features(doc))) # doctest: +SKIP + ... print fileid, classifier.classify(document_features(doc)) # doctest: +SKIP The parameters that a feature detector expects will vary, depending on the task and the needs of the feature detector. For example, a diff --git a/nlp_resource_data/nltk/classify/__pycache__/__init__.cpython-37.pyc b/nlp_resource_data/nltk/classify/__pycache__/__init__.cpython-37.pyc index 7df3038beaaeeab3d4c6e1da4db42abea4a267f0..0e6a47aacd69e5741d72c975386074ad9d4f4b23 100644 GIT binary patch delta 49 zcmX@0vRQ@KiIC!@k-b1vh}*<7*gY!3RR#YOs)ZwaOW E098~Ds{jB1 delta 61 zcmdn2azKUGiISrZZX5<$n7G>+}ZT4V2D*yoO77Qx@ diff --git a/nlp_resource_data/nltk/classify/__pycache__/decisiontree.cpython-37.pyc b/nlp_resource_data/nltk/classify/__pycache__/decisiontree.cpython-37.pyc index 5f56ab96e7192a9f5b16adc0d6f8e8e99767fce1..599c0fee6b6f4e2dac6b60ebbba3a7e834c69e79 100644 GIT binary patch delta 3102 zcma)8O>7&-72etZUH*u&B%1n%Mb4(y$5E6nB}dv`f-& zmw^Ep0I9rM<`ndT9;?szXtrXbw39{kaurfd$e-duv)0MbV3UXy2QqL`!i2 z6PU-hZ{ECl^Sz&$-DiG%W$QvNXG{2N8y`7G7bNK~^s)ETkXgnN-A3X{70G3;9I#NX z$e~hELbakYk|}`}>J?q^YG8zB#SE>A71|YB$h066W-D32>p?EeSMp5q@|Pv|QNuNF zNv`Q-x0P*H8E`Gvz9m%(ZpO{x|Dboo&8^GuzBjln;qv^8QhDH`4}U+aB>5Kjjo7KT z8@1SPHj+ZS;n$k3=LCN2K{bdH)Air-Bb?*c+46}pOD0>J@te(tv*7IM)|z3f8vCn( zx4Xl9H8XnfYiM+V1K!GMFNG!WC zkb#sO-ILGqb>rh}Ir=0+iSmXQL!LOZ7F46iuWtgG?b%A~x>t?c;Gy|Mt-nYUlSXc*;8)nbywiv#1k z|F#V_dcJ}6c_vRzrvo{yI=Zi4Sf0vye62O*lHqq-%2&*wGTu?+BPEgRIc>oNwE~+ZU-3CH(fY{96W*w&)|rPNF>u{1vbo7R%HAKxohkp z{za}h*@zW9kGi7~l8EDLh@slm@VqKNogaU715wiu#SN6`9c@SNFxNm7QHm(aqY~n% z+;|vg%^lX!*5o@Zl?Y^w9e?J;WdVbLAo+ljzW0K{^2R^~<`EFr?P4k}=E^G<#XAu^W+Av2P z0T1&?61x?7ZMR8PDKZ==OfX4P>bkcXmG!iG9wqS^B8x;QBJenogE$Hm!RyQC+t7VwMzx$T48hF+E&w2jnUl-Gj;3}yW|BpUg!--$v zh-4(H^!6No*Bs>+N5?UM&gjK0+7F&bf}4`1M8SrGtx~IlcOy#u7M+X?eTJiF2t!9s zi=`=k$7!|d`n5PcgbqhYp?{JhDdc+1&B1Nj9vmAY9dQx{FA-0hC0Z1>!`3f%m&e{? zD`Hj#vJaGR*e4I38C@4C(+@ovxoa;`o2Cwb?sKk9L=O(wpKyF zF!Ai!9`5`8h=l*LQ}?E#K=9jW^&kBD4E@ zAc|hpvxtca)FRVpf0MWiq-(EwQS6AD2fjs8VId=;+Y^%y_oNvu`!9}&Zq(gl$CucH zhi!!Q!?q~G_D(U&e|BQ{v1!@_XC=HR`)MmOmOYoItCgnf%cPm+EdS4miHD2yEs}JY zxP#s%*;T($g-z@(n65*+PZwYQ5@xT{G7&F<7=a}^AS@C0DiJ~vXOT;?QH-ypTIk_3$_0^ZB0Gdjls6*mAkIu)+=G`uPUIIQDlySK z5>vKEOv}uhhH8)7Q~Y|V#4`L=>C6+~p!Ea-y1k#<*qPYIJyyw1!s=J|A}=v;`9DkF z_vkh$87)l5$@Oj)AJK2ar~P&V>>}R*{tI@&h7?{on+(P5CSG7Kwt=&ZBM_6$J6tZ z>h=;RGn%ZNT?EpiT|IH)VxRZMzy*Z(JE1*Y>q6R?H1+6SL)6m^1r5b)79J-)P=Z+a^9%+x8OTu5japV?00qwYjtQb~2wQ7c2<8`*d zHkGo97PLj61^9mBOF0#*6hdV9jc;pw}4~g~T)QBw^;9yz*l$y*Rlk0#7t3L8<*@K>tVFndGgHJGl zpx6%UWBp8L88)Je3+WqdN_;1s?YtFgm~MT;z=G?ZL5Ji$70Z1GORcvJEQ__lu(ZmG z($Zy7wU)7h$*m^fF&5lL0z5{oM9bJPTg-Ei04PNQsDlbpK)XMSvq`jJtf-qTmPk@q zWTS%>KNZ>(!a74^D*O)BQ`;`XgbdEvRB$#u9J5NRXHX|YI+E5&n98p*} z_x!->htLX3ip|Vnc0l|n^RxdSqu>S1IOGs=xIG&0KYH*Y${P4cCMoRUxdmRTh}FJh zXP?I)IE|_g`;kGFau!LHotw)(ni+@or-@1LA9$@JnKVU?sC~dR0ksb}ix4@@fUkOu zQp4l2ivV#B25Cx1ZU#9s?ohKNK2Pup0Wl0N5cEW!M;XT_pd+Z54zn{ztQ70l#O=P) z2_hcuTQ2J@SiFv1_ zI+>}jq4R%?7g+$fDFzrS8HS_70@t zVE5$luY4>&cWIfdk<5-P8Fe_ABPbA1#&XF$$%@NCQ?zyXI)O};tP14|?jS^l$AePO z{c0(&fOAuVjlcQAB&kv}gp&w(26o3LTqOA=g3IFDdw$8PcV-V zrGn56E2SC_mm6M8Fv$cN5^*FBSnipk*pn}rImsnzC;j*Y(+DS*$za6Fw}>6oqZI~R{9)pI?7XN>j$ALJ1nv=V z0-5cdE#WcUnU9>U&)EtRv!}?q$7_w-;_2jMCruXxURcFqlq@xTpO?Z)qaMiSR_Lqy zVRfnrjU{)f;#b18m})wCHV_I2rNZ`i^OW1HfJu{bcQU{g`uirqZVWq#i|mv6TEkoQ b`8;*8fO4p-DYc)aw7!ngQ7+rM?bz(UDff7N diff --git a/nlp_resource_data/nltk/classify/__pycache__/maxent.cpython-37.pyc b/nlp_resource_data/nltk/classify/__pycache__/maxent.cpython-37.pyc index f15741b1ac221a2f3c0e306516d87b4a662ac3ca..86c46203809d63742b881caefa537d1a75f2d109 100644 GIT binary patch delta 7926 zcmb7J3vg6bn(o`ro6dtIB!Pqgo$zc35CRS|A__<$!^lHMz^m9^`rb~O+}qup?k0qB zX(E;@qk?!=(B1J#?I=1sf{i*lj>rMv58c6PUz+F4!4sa~Y#S##O6OZ#QTSJgBl7?eW@p#=};V-K;e;UTwA5 z6SN8TL~Wwosnm zXVl;4G3w3c1HJ=_Hr<@A&7j!^qp?8Kjpob)K5Z6Fj5nHS++@z8@oXA58!a?$;c=A4 z6O4&8o@hpiK8MDwMjMUWcs!TJlZ@^(eDDI z$Cz%+AWFBfW~MQVrWP8rjVS&07>kTK##|yTqV2kjd9>X;b2=Tj*qCoDAdYLyuNd9^ z-bWPirg_Z)a)#mC?1?TezNLH?S|9Z~bs3RN*f{;c{{Lf_zpikdHL1 zQu^cv4Y$p1rdcOw={rn|7KlkS6yS8hRA3zh3dK#0waVHSjIl&a1ZV|lBM|lSO07C& z4#Wn`WN%+CTd|E(oi%eKw0q_ApVT-ZNm(uiEd2UjyFq$inF_PI~3zXP?O{`fj0lz0Y-tiG#Kn*9D zO^cjqIH9a=XDl--Zo+JU!^#O*JM-C`fe z&K0m<3@^h8NVw&Hkk51+opseE;%-{^N4dRo!p++W6Kesm%1yBighd0cneHTiS~tjuzO2;H(_+}7W{bmN8ANG zuwIz*i|(%$A~fP6BRYX0^~Fp8gt52{fXx#JW)RTAE55#F!>wz-aq}&koodd1+syT) z4L)^Lu9j336vm!!(Esdo0&1h*8=j#0)G9Tk`s8a1Cn!zDvkOlJ7Pv7@EN&?C`tl~o zgL|pXa<+NGdne1GsuuZ^B|nYqC(4zL%XU9hJaO$JNSL@&3><1qpD;-RTSjeh1-gsT%YnsVLiu>jx}oE3Yj9r^QIhH**E6^VN?pPZt}mE zhgWsWUnaXE_hQND#4Gy-{pcfNyfbp&*i2CLH^j71F1u;Pd@LSrTw_}LpP@5~t4AJM zIjfJ0SaOHCp1R1DY#JfPD;lJ>Q1sfy{=fIgU|drc^oPi4&( zGv)YkixIaFJsNbwqC-BtX=w{OgdIh$izSWAq(Sm)Ub%CB zx16i*7~Fa;unxvtPlh^3zrqjjbpS~1_;kZ?b%Kh~8d0t~ zePBXrIiYk0ch#&oEF;?|Pvh%AS+8;qaL8t9#W&^M+qV_|bS6P(w>==o#XbP69px$B znK!+k2p5o3^@OOisLn4^C#kKfU-f$x0V=vk>7S3V8;eWsYV$APD0A~hlx1cWDg;N; z{6g;Ev3L_lUx2q25t?;FALetsu~nrg2IVloBLGK0R#uG?pCymb^gA*Te=71EVV7ih zS-s;#IwyY|e|sj2>(&KFNjS>x%KaeadHK9?XAD}GixMSl-WE869V`Bdun|dTV3t)z za%j~%^ejDZn5o`epXdUu!V*Z159JPXOBY+h>skVC8bf}NF38`Rg~cosJCsF%lSgVX zGNbY&(Em*yOLQg13KmDrh;+sjxj_Ldl@)e7TUqk5HJ>HElU@pz8bZ`szb`zlCaBg? z0e+SWaI0L|`|4&DdvRTCF2B! z#{f9x#&(Jm8h#XV0$g(|Pk>AQTOLaGU#&R!<);2F>VtK1^Uk#kAW78kR;Fzb<3)fg zML#?dl0V<6&tSoM;n*#?hEDl-D;trU0Tkmbp$2+|3L>5aopYrei!4gnf2HCvP9Cwg z#w+3>f(>8O%k`4j8g@9WF zdWP-BE+~~N@hG@hpUOUbmrwq2)12aM_gp*NjKgl1;8;duq)uj~b$ZmqI|)|=4gx+M zcY`n0p2cG$61V}+K>#-&fe|8vc)Z1$eNQOLQTd(1+Cu4gVG~I_4)6p(DQLe`gBE{{ zHJbq{Joq=5s|ZR2+bPI1Tc-B!zFGqQOb!gq{xaS9Y;j=74DwY6_s)Yj`O@wzzKuC9 zDc=M3cL31z%6)oNmOTZ|rvaV;;Dj%0I$A%O>4p7wJcE!#HYvO zqDFpnXl8ISw(69D!;xlwu)$}smhK3wHT`-Qf6 z1^ZA$WW$7blzMNS(xOcC@_d})k^h0FdgRSV4!SagDrJ~P4B}>hn*dm(us8#}B$xOx zhR*X4ahkR4vZt*gK5qFc< z7hw+e5-a5wk44$_`{dsqy|wKWCfWPkggLa>2(7Z=ShEJ>Issh>eb9aPQyq)-nmIfp z#9~^AUL*GDRErQF;xqs|;x%Be10XB}yA*LD-UO%t_<`Jitfkux4vc>Y@HT;Hqidte zI05|~`QEXf8{P%VE7?+PRyS%u_z?joXqZ+`zwAv)nrMgQlE?d%bn*D(FZt%c!1Um} z1Z%uWL;RGGN~b){OOG%{YY7H=RH_ zPwzd&GVC(Rbt?PnU7FCwB@)8i6*KKjQY7P)i1k0CY4LM_UjVdA{l$*iza-RAR+RGu zkGAw)b}m@X0l1pU7hhb}iEz+DsTduacHfV>&S8_iPsE@c_tJIB`^9xH{oXe@iPbv^ ztQUPiS#_^lAdkomFW(q+Ipkw68ug zs#n=1FP^$q5yeHXoKtQXplMCb>FJnd@3sY}C+sn*mh+J?qifIvuwDXaJ95S8Dar@( z?$e#Y4>13seBg8^nf0a9YYP8}i5CF4{Cot=0SEy!0{jyJ`U7Yh2vP;20zqw}#w$L>JiVH>NgMj8;uCP(`t@bRKRn|m z!4iI}>4anS$qb*0=$&@bODhB#AluieUTbIQBZeEf>@YZh-XNzf%jBns({Wn^qkHv9v-< z`v6Eg0pF9iwi4u+DZ925AM^#xP8)g4Tz(BPXSWdW(>=wj)KCiudR0XwkJBHG>5qP! P@aOf1>J(K5-kA6QAq8)h delta 7840 zcmb7J3v?9MdEVLQX!S%$0)YfVJQoWIV>rcx$Y4S-aV>1XfX6n=XlGVhX=Zo1yDN}P zyjH*tBqTWTI$+qPDq-jbtcDYUajMtwodJyrpJB$-#^l> zSdg*N;oJY-``>&2_dh%PtM|R<-}BboT3sFX(7#!|cSZ+)bG&AH{+QzN7#__tTxBSR z*HScPyC>P;_n6+nu$ZTK@OO_V>D}Ssb)V+rPrv4;PoEXAgIZ9*n%@f9s-`j>uqy1Z z7G^qVRoYcr71JTB+OE-Rm{zS?yH3m3*;BMBcD+{5lNHufyFqKP8?{EeNo(S{urLMxc(et^Y@>}n7aCp09HX6978!Gmc{CF-<{Jy>bFs11SZFMwl^f`^h_RSXTWq$_ zZ5_r9M#q6>kG90@G?ol{k166&bIE=(x8dF5iQHKDt@5v-+MbBwR2pVn&skX`5i2YR zzZ#l0gFN`br@Q)7w%IkH5BI0ix)|*0l>atmozg2;)Ze{m3UN3=OYb$UOhggWXehAD zg_)pQ35E;j>#LQG^%!G|XaF<e@n<+cSsmz$!@wI(YZFs2- zr%iHaGc7$Vf8Nl)7!o<*xT$Az!lWb0k5yxqp2;NQ!;cn3qpCz$I*9B9&15WPm|`jo z1WO(uDpKJpIY*vuTGLRb>TF^-TKIj_ao-}WM7)l-bE%_PWtVYZS+y$=PETW1Fv_>DT4b zmS}z=S0HU9GGi7fvVD(OGhPC}CoW=5>RY#=QX$*dEw z26CCK;~z*RlCB$Zg5$$lg_+FRrqHt{x%x(uaXyy(IId(%M>Wh$OeA_uCy+^7iEN~L z+^)B@&g)?ZC<(EEwu*&-2w*XR7Rba>g0}GiY}trUti>`%-z}inY$39+r6-c73+GyM z%8e_r;9A=8k)^M)v$&Z!-UQbwf|1p7X4_)9y1hyso*mxYh54lfk&qMW*E4#S47CN5 z>j)gbMV55}LQnRYu0i-FH?AEe>B>2lAS_+8{MoE#<#NH-_5-DJjG3La6_-p*C9r=G zUX2@KDLdW$wp7|5UwGXu;y&85)xj+}k=Nw{j&z;zn)s@^|Z& z%Fcm~w$EV8#KbH42JPxc#CTszesZQ(nO?VCp8C|So!B_m(=dO5*OL`=2e^(@*wnp?IV}u}?l@_ZNJBxFOuZ>iXO^g5c$#yh4AD z2(*$;IdtdnGS3b&>I2R876e z%sN53AV=M2J0xrbqyXCh@P(sBZ9Sb%B>My|aw?*_wMQS$q>@f3noDNPCNt^y>4_#Q zHqv?|=!Qh=W1DYoVudEuWC0@w3!65#EBS6_b^?j4NuBi(937;4%%mX>(tz(4p952< zJjl49K}oTnxAD`_vhwtSFK>pvkiz5j`|7=QN{cV(ty1cgVBvRLeyRj-qZQFFe|UGh z$*MyPaRh*y#lrxW1~!SPt{~`?jz5u%rv$sTC<*(CJQqzES4o4s-|Cfr{m@c*aNF%| z9~R}rY0JLt5qWs@hU?|b?n2-8%YoZr%pNk-0s0iAvq!LIklOL-hT-Z26(d!mRB&S8 zYz0#&mChzo$&C2C9KFAFEuh*~4l=lz9KJDdDnI49z!jws2^ zYLp4Tf;eB3zvx|l7e`=#kLJL1qaP-8ywP>VK&G+yJm3p}N3pD|o1g-l1WLawn!s}WBu9`%CEWIVy$MefsD0V7~ z0!NRRV}6b-y9w(*m;V{>h))(Uj+z#!v?;Q~0#+(1>^8R2c}%g%PZ8hK6zgtJtM>cC zwN*j2hHCIBs=>|jbl-(7ew>A*c50$rC5p9-HGtQ~AJLEoWqvL<7}$R!&-A~2KbHuc z>&DEVf_ehdj2uLB2Qm)ONNaHw0+LDpM1^Mi~(&u+GU+ z3Pu*GOd3>y3i+P3JysSFFtO`=_6MYPA4WdBc$_A_w-F09M5szQq}Eb54yZmV9j6P= z*!9&PryD;ce><}GW0sVMSf*}frF8ltnjNO>)LXz>K*xTHJ< z>T3XKdhK34A<0gH^G^U@0&w1!G@YoQO3O&P@5xIC@4d4u>G=CZDf2k#IQ_W7e|YA7 zl@6UPQ-_*^pv11ikwYhaQ+1->F$n0bm-__pq+E4)ROyu8I-FO!g#fGy0wh$tj9=`Z4T;l}eu~aUZEqR3=bCZU-*YV+LexhIKh_BNQi%M)J zOoT_N^VTR$%5*Q!$0#28AJ$ZleCqfiSB6lz46}(rd<5$y_ae;=LAW|1Sh#8>tklr|CZ)|QUM#%w*r;zd+%29a7AC$o-Hs_zgBzu1p6nh^=;Fx!6W4aZ~S@d=*nvByo4_4F(=tAhj?yI26XtdAF z;(J3hs)gvKW4}(d3GpJH1F&PxfjSRBmJ<31P1jq1}Q3PltqfTSonu8 z|9Pb!u9wdm%O~n5-k`}z71X5f>81HjuqXr(Mf2->6B)rzr_~$>nm?7K@2U;pVK+B| z;@D^9SS40&4HnQ#wLmIIZ%V~Vq9Ze19ob!Qp;jA@3v+kWw9^TZpvV*V{3SNN2KXxg z@>#3xS-*Yjrk?1!O+DMzZ{Dz83*WJJN7S7ZuM>x(tS#jgz75k`-i6ryU4W~#tUR-> zeJhR&C8H!zN|d|p5-oaU`phSkOND37yzOf>u-hc)5iyW#7Oz`SAC+gHTN89S8L^&!YPCA~jOebUyW-xI4^yr-wm2&F2 zIWgGDEynPx6BMSM8ZzB1!rLR)_*?-CU{FkcI~SR9&6MfGjx3<4i-^f5&dm+}6q6_9 ztLJ7ka~_oy9*dSAk?)`DQ!IJ!`IU-Y_~QA?%1%Mknwr&9QOh2(1t;h(oCH6)hT$t= zDc5E?s9u7xPBbzlX=MkOWOc({Vh)+fY-Y)bud{E2zQ255UVOei_^p63xfWHGcfTHCDED)JG;W?V^ z1Pxui##CBFM+Y*AK3kul&<}}At0E86YY1{>G;WBu2QjMWxZ5w`rl7mWO`u?IbZHP1UG57|7}BZSXs79S zkNkShCw2`?O|O+xmSx7=cN(aIS_RzRY3Ny<{WdnvWfPWwAX*)-)3?xGy*EL>mkbM1 z(xY*-bdLXq)u~gRAlW_Q9|0i%T8URw$RjT`=aDj^5`a|E>iNv#X2hJ}z^DNW0z<2g zMvYXAUM1OjmfqvD3DLHkV5)b36c8g|-3Q=2J_U+{@vEQ)0bd8aNRSr;ATI(g0gy!k z9%CE3?>$TYC_0sX0JBp@&N92vXfizne#NU)Q9%!SRYfHyRnZ98fIqK4RG0VUJ#~Jy VZev{+(LHsatXo+pzx#6Y{{yq}K@|W1 diff --git a/nlp_resource_data/nltk/classify/__pycache__/megam.cpython-37.pyc b/nlp_resource_data/nltk/classify/__pycache__/megam.cpython-37.pyc index bee093038a283d195e6079bac4ec37118efdb21b..9bfcd6363427dec61253811279c8e07b890cf95f 100644 GIT binary patch delta 874 zcmZ8fTWi!n6wXX8o6Tjjx89Jtwpwf+>;=K6wqo&u2=&1Q?I6Z@rDe`yfT^ zA1H$;_@W4cFM{lgzr=?;3H}QE&6;dg=7U^-7RFSlAmC*(<9!trjA_eA- z!JZ>YY$_9DQ%dj?V7^70v_kbmL~Pok1~q}zXs1mrY6Ep>jXH-Csna@bfNW6tk>rkj z|G0^-xu~cOS(s$rcAoUIaFA?($x1V@^}TF=7^I+Uy+J(mv(Yi0!gDILRTy3z^!?b| z3KQ_lAiheh3ZCQD#XMz;{I+^`VgdBj0+39M#8!}jm^u2SwzNeN?8*gRSSnChsFA-F zL}i8(5TiV*c-C7KL2gL((B?3W)^>8b=oyh{>L&0)P#Sqa@6Jxd*;>z=tM8m zCV9+S|Ds))wFMs&Q05R<74?@a%!1Rdw!_tSMA_c;&v(kE!Sj~;jSiX~ zy>8q@<1!eEY8M{h4r7p{BF>xqXY<+ob>Y&|j0TLIn^9w; zxZ8D6*KHOR&!dB$=fM-gn`b!-Jda_K{f5XZr{NwGmlGw4vK^)fS`%~ueo4R*KT4VS z0_=(iH}A*80n5(TD15U5mH{4?oiS0xnNrIUmSfjf;sMR0U~Lj2(u)8JvapUVY=Okc RLN#P)ifl|7D@Idq`~`mm!Nvdp delta 753 zcmZ8eJ#W)M7{2#xCyrz1vqeor8-a&ku!;*aR!6f!`U z4s3w#4^SZnBo>r`U%3Kh%_xrg0e#6?eECY#erE}M9y|pTb zu`md75CMXHa6m}K$ARJ~fTrf$t2*if6fUWVYD0rH0m+|WM}34s{7`6KisXq(v?FlR zq(IU{7n)9rBts0LWk{BoN60aVMRG!B3BHHyl6MCq^!ZyG!`|$+Qo+}Si4wyNXwM7W z&LFfQQH1!2SV}N|T8(}b(8g15+K)ya8#e0vm39kk{#{!HefCQ$ry4TQRuWv_Okj{` zzSp7tJo^OFvQDPiqal4hV%r_QZobehnS)t|e==(eSz(;UGE9sguPzAN z(dT8uNx%yKqiuy1rCyRSuaV}*)*|fkFV-5=WSV;7}wS;92|j{4-lJHITe3g!*} zP^>|9_O-YJ7xKc9WZKgay~&s;qRM=oSIdt#HYJrB$AmI=?r4e8^?d(izGMEbeC2RQ z1lzD@<96GIwrwV9*CkQcb=eKUIJwY|hxI`iQxQmSH9jHXf2i+=&6OQf3s diff --git a/nlp_resource_data/nltk/classify/__pycache__/naivebayes.cpython-37.pyc b/nlp_resource_data/nltk/classify/__pycache__/naivebayes.cpython-37.pyc index c78ac74a1ad4c45210c03dca1c7cb405d9422402..10f7cf98f6ac7e98d9036139ec8323fce9cd464b 100644 GIT binary patch delta 1824 zcmZuxO>7%Q6rP#g^{&@;;{3UB(l!a95JT%Gt@uwO8Y(J44T4CCx@u)Mp0(m;cb&|x zD^l6@Awh8@I7BP1ReR(>r7B#J;DV6gjL;rA=7zYSst4X1r*Rd!tNr%PoA)>Gy?IaP zew)oKWHKg!r>^gpiu;*-%ZCLg7U7gLmwL=%o?7HR3UeZc> zre#XL=BB)~m8Rqx;R&wq5UxAvZN0)#LPXbDfFXPR}+0jnon;r0qk65cA4- z*|i^kd#FsZxMxFD{jgN2`{l4&^W$vYua;}vDY@0q5w;r~+*5zoQv(p}lZA`*KwOOM z^fh`V`YydR*$dnP6aL%>x~472o!pSd&i07_!vL9ow}s zM@~x{qP7~G@GDM%;LYexuhr6lO2P-}A;BR)gWM*2B&3{fQ%b)4qd^|927O4cl3Tyt zWP5av?UG%(%PI5rmt*|_KkZ7}qlp7%UH8-0He~PDIS9^Z=1#}Vov}1wLz~I~|L;^rd^Y;K|7@-aO2Nwjge9oK23b~N zGZBqd#l`@{~q-ILFK#eM?YnTqWh?qU-@Dj9XgX9Sm4{#JyPZ#A`qP^jXmb80p@<$MtCL!Ig16|0(`kEcsVJO5~ zu#OeHNGeLn>oJZL)+JVK&SfUWuv~goKh}9QDBFUI8DJHXovU*hw~c4la!$MiYgxnC zz>0%PDElGSk7Q!H(MdfM@1pE`2r}a7Xf1z1VRws1`N|*sl0T&MeDrv9@Qj9HGSWGu z#u4TLVmSS!=&w;{1ut>ey>x1&{JK{PY;VJLf>V7EtP_7@qGAhE^lo2i86R-;AD~zT6r@VeBX|f`9`+yn!$c5U1oE@|4^CSuEf%ukI&#bs40N2erIJ9u0-F#{ zS94vb+`>5@$a+{=-woG`SZsT>3KM@z$XcO?&iQfNT76RL*^DmQ8&!d`vU3IuL41tx if;7d(tXJc8*O|weEOVvy(JV`|VVc&nEppw^4f-DxFM~t? delta 1941 zcmZuyO>7%Q6rP`5@A|KC{^O>kzf^HSPTGP1DGd=S?V-p81qxbzqK#)vob1}k?3N~x zBOe?T=KxZ*5(iYF^u`4w6p0%bj`RXZTvi<5inu`!R3P!*IH{AUj^^!~H{ZN@?|W}1 z52pUTl6@tcO%eEw&A&eP?cHo%d;k+c93rS-9`%{SDC*qfzTp_Y>6m`PN%)pyX}jUs ze$q)&a-9fMBsPgi$YhHT{9GPwr*pZqgh4eR{Kjgc+#sh1(7fyRr znpmj&a$?b4Tc|I)YH6YvZD+32ndqm??Aa_J3RE3~k3w`u^nn&9Ja=At!CTSw?9v4U z#HosO!`n&*GCT^l%bptq)ympCyT4|y(VJ<+8%uI6Pc2 zPzhWyKoqP_7{iD%D>c`bv8iKFPem&|`GLdnsX@TK8-3B^G|>h6<}iZ4K{UuB*(MPN>-A`-v$s=zAt*1 zE1x_G1gO&}G-0}yNzEhD-iq$@-kru3D0>D!EM~FJ6o2?-F#2Hx*e8$_1eOL_VGtE9=|BoJwxlP0SqtIh^ME@LpFQQd{=9If z+hG3-23dbK`aLrk{m?%ajSu(rz6oJ+Xs6gx@*}AN8SV7lpsDE3zG4X*Easl9sk5LJ z61t6IT2lisb(l99Gw-5fDmSdwJE0gy^VH#JrpEBdkp}Dx&`4N<+H8R3IHN-}wR^As zA|LNIVpqe>>H>(bP*Ye#} zeK9S+T0^BpG9aXL-^Wmc9&5siD}*kO z8qz>GBDO>|D7#9i%U~)bJGcLNT;2~WP^Vz3`!TjLB-jz^-j1!k$YZ+PsVP%ah`oYB zb9)K2doY4C(U&9VIA1UB)g!;ULbrzIvKo-n$BPbv>3Yh zxhOv>bND=U!&0ZCPe(^4deE2y0gNiSqQkt3w!`&~%}}}3+V{~fqqpocunt7N7+pSb zc@e9u`&EsghHsmC0R?6er}gXV%W$DCl!@jWD7wzIrn9zT0lP22ncZ(Nd(h_acRF<{ zb)HdWM?aqMns4BCgmxU^Nd>0R8mIPN@~Tt^4nS|QA*5gb2ebeilh6!HQ8kMz_S~Eg ziOtQG3R!g%E$MRI^JE#WTrJRqI=Pw`E)}tg?&^HitA=YGw(;%nB!ym`UJcv-cT#kh rtI9%zjusD7uc3HC?y)`P*Trp5PU8a>U^xtyVIwqS<(lNCZQ1mHa$=Z5 diff --git a/nlp_resource_data/nltk/classify/__pycache__/positivenaivebayes.cpython-37.pyc b/nlp_resource_data/nltk/classify/__pycache__/positivenaivebayes.cpython-37.pyc index 82d82499c98e8c5cee8f938257ba04bb94460681..578f0ebd5bbb0612bdc037461d80573daaa79b44 100644 GIT binary patch delta 819 zcmZ{iO=}ZT6o&7a$z(p7nI_YTB)CbnLL*{9w7MuFZ4hx$D2f45!nF4m0+Y$d%#@*= zOQ_lCqR=vbAi-S~igfQUaN){D7vfLx&LmXDc4qV5bMATWdk*Jo{bSu+H%)_xFH(y=}TTa2$wr)eGFc;-Ph;a8 z8WRA5s!)2W`uCcoztkjx2Uqfx|9Hczf$sz`Y?4qxG6FsqKSou6SGhj5HO@~6FHAL+ zQj0i;g$nuCCY3UC-!cvejw#wqIgbQK*lCDfrl8E*3}jvfI)c8_BveMJLbw&GD4|7j z$#=6l)OiVIED5{WghoJ>Ly6mHW08u;uCzu(wP+}QkIHP$n9cdJx`j63PTZKXGP?#Y zjJ0&?;>{URc+VmZsgQNFt`lAkbu3eLB%y;w_TUfExrz?WQ|&M<)Gw@j78wKQu{e@m zkh14ljkbk}s1%s8uw72fQO!vL0LHg%q18i`> c0{f53X<$GBDqu?*oE_N*0H=xVe1ccM0jhb<+yDRo delta 811 zcma))zi-n(6vyBBM`B;>#Bm%*MHSku>Ci|Bg$_|wRUMcrDx{_ZRper$hC0rVI>?}R zimEWcPzmtLkcbT=P*rAD{)Y-!`43>_>_mtTk+@slySwk_zW3g}t9+^GTe?nxkMiNY zX8pD9k?ZHr?yAvO^=9-_U5<3Rbg@Ra1hNtJwB?~gm=f+GY{S70Q^QSE$KoW;Za)wh zWf_!^E&yg^SO7=310vJ`Q}NnZ!pwvaz%-9&9QA4qKqWrKl-^1MI9uavGM6pm@c-d& zI9yBMTJL!S!VOGwaE;Na#j!jArcYx>jP0U=dI_#tcErT!H?H4|^}~a84v>@gOaVze z18DOAuSjo^zfVG;1S}h*P{6DszZFJ=^C)6INKY#_B2MZ|U?!S4 zG@LZjM?xJq61l4vD9?mAS6JXzSmE{*#%r+7+f*1UaXYw?8Id2$x16-2p2b4?*UIMx1Qc^kf?L*IvzGq+l zn;z%h%YDLsT~su7HL}x}czh{(t{?S{Pgc<%w42?|Nh@eQahq;vx4T|c&M%g)#Tx_P zvkzU*b!I#6$))_TGw_3`lRq{0sz;r+TYYRlJ?eDr{&96J`jy|kwIBa+Lm}}@i4vj2 dBo@i?sa?$^N;Fa;rf@MZPYF4XeiuHF{XZf(#T@_u diff --git a/nlp_resource_data/nltk/classify/__pycache__/rte_classify.cpython-37.pyc b/nlp_resource_data/nltk/classify/__pycache__/rte_classify.cpython-37.pyc index 3045dfde1dd3af95d66cd9f0a775b39494838ae5..7e50e97b4df47838681a6137e663491bffd8b2c9 100644 GIT binary patch delta 1239 zcmah|OKTKC5T4$hotHDao2N-kBu34;(QGsx6x@Uu5+lYaD2NWqFz$44%skef-jIl( zV!%W25SoCZCr{!{<5|3jpl2_0@fY|9M9{8YW1=3M!`IbaU0+pw)$@G#!>~1JS%$>F zhPvk5T(i27w^r&qizE+ua*zuZTM1QL75iMEg}SXHX-4uCPhF8b6&nw-_6|?;^cBfA zJ;U3? z2U)s3!rVd!b2OE`RAx&Z{IHxMoqQoX!A7JD>~4S_fL$C;vtG4=vTRljPlG#E3B`5S zC#09}Y^#xAC%|FoQvWtibs8_=h+9O^Y*GSSsyU7y`P6axUu$1b|9|pcVL$XcN*@m- zd)a-X8+Ea#Mt=_!_Xn#QFJcn7^$Q!Hja5`S0RI0r6|jfqm{JDvA@3?jrG=h2q~8|6scAX-0-^Rj=x~n z+TV_y6qPlpC%8=x0^|S`fJqK%HlZZdrA*6CBA+W*C>I>3c<+)SdequxV8%s#$F@9w|sA#&&9Vz`GPEqKK315 zoU2Z6%1N}anM+NYK_!TM8qzp}H$zGY`DT4uP~$gz8jpoX0VS-3G-1Q1h+0?^+1aP{ zX%agnO}^zTMBYDrjyfZLiHpyb>z0Yj%j#O)YN@1AF44>U6?Bn4@ur*}f;B~ZH|;^3C56LOgSDRSG!RRf!*>ge~d5_YTxZZe5@ zVg81MXs%GL>P_9LR*!Y!!grM3hj@X;#QW(Z%#&omJ=h|8({aUl)o9}xLM&3?$RK?u zriUQoJyaoU1rtMU-R%4jpP*b8^!rxYps%DcJ_m$(`c=wY^teyJ|7i%o*gzfzIKw~@ znz)Kr9iskUYq-AF>gqH)E&obFf4hzCbr-U->FvlT{Ab zrdxlJ{*Gp|;NS)f-0TsO6uTp7fa44b0dkUFjU`ha%MO(*u^p){Vby69Y(J(CV<|LB z-^6O8V8SLb$__WU0+ZaHi_3XCwKvXBv0~`9Q?tz0_)9t#FI7B$q=X?pIM}-?^_EQT zF|#dv!FX&x10Nz2ovru-gn~4mIG5<9DpU;BT-4O@Eovn`j!iOKwrXDJ2_};Q00$5R zIM2Wh*bZ^SGpjY+a&)Un0_?INkSV%6a6P+S;uKg<19*8wAg`LC+P1!=WAd7Q8z`<{ zW9l0PnT)Vgk(-9oDDAu-$XbbeciV1jrM9CR9+{VrlGv*?)m7Ydyfj_jT3p68hrkHE oZWFHgv%*2i`;QSJbA}J8PE64)(2bm2AVE_OC diff --git a/nlp_resource_data/nltk/classify/__pycache__/scikitlearn.cpython-37.pyc b/nlp_resource_data/nltk/classify/__pycache__/scikitlearn.cpython-37.pyc index 048bb8a58112c290eae6b65d7351b6224da02117..48a9e227444d689386a35f2e14ff601ad0da1a32 100644 GIT binary patch delta 1498 zcmY*Z&5zqe6!$oZ<2ZKWd^-EtE$pGGP<{zTeMpe6jcJ_1dxH zSepFJ%1`|-yH9JqQx|C((Xd7k(HV+#tVac0KyWUwVpPH1Wo*AdFR9G*XY*Lwy)>&`K$L+DDz zgE$>edBkgQHeWdU9O}X4gi{iZvVK7omRH9#$Yz|TG)r@IUxk-Nd(xJxQXSWD48cIE72c_a)?I-wzn`{d^;tXCF^v|G>Yzzv`-H;gC_) zH;2Y~9|AU4^Ut!7@mrvM1GMdQ!f3$b0SjZfLhF?A+n{PKRN^bIHhEJD@{;;^ELguH x?utG0a6#PBjc)1gV`tP< zAQn@0bb-7=2v`>+SY*eF-(j)f2e3p4X2m%-GigJjeD~h-y5ISZokx48!`h)`l@3m)5eenmLxZ)_BaW!C}>ZpwNTA+oxqcdfB5l?zh2o1-Oo_(KM+!Hl%qd@2c##_q@Fdg|&8cvcmmVliRam?{Q{i{4S_x|5wC>bH z+p%ToYS0LqPE+o!pcS^AHd920*G?F&bx{#*Ug!2hcEav?HTM9ZY?+1R8j^|-~@P2mRI^-2fEv4W^$z%>8(71AQ9C%405*O$`rx=6a{y6Px z^5VQQ5nj3gk%%Ktt>&m0v=Khd{;ppi5m)FCNLtnzt+PaZt;!Dhdk9YTtfWgVpOcizNs>*X z;BGPfF#Ey&1q~x5l<^RW8z+!$h$!@oPd{s)QNx`5=_pwoD&GwJ{U diff --git a/nlp_resource_data/nltk/classify/__pycache__/senna.cpython-37.pyc b/nlp_resource_data/nltk/classify/__pycache__/senna.cpython-37.pyc index 286c8c55c19d3ffab09b907446e8414faeb4d228..2f803314897783340c83e10d31a7de0329697a3c 100644 GIT binary patch delta 1250 zcmZ8h&u`pB6!zF&uYc`kv+FH^1}HheZdx{|6e^Vvs$LKWT2-wOn#;&tPg7I>(##l= zQx5HoNKx?r0YZBQap3|NZb&2b)QUUTo_Nnon`$lj>Ag4KH=ggkH$IvDwR3c%*Ygc{ z>h39{n}+cegBMQ+oa^w)V=#$9jf5m-Mskyy1br*Ba+}(Dhjwy@I&IF*y17f;+@oF_ zcQQW@Xpo0AY-1XF4B3KaF(Tti#U^uUv0}A5j|L4 zXnu39U-1+_P)r?Gd`fipZoI$G#TVe7uMX9NvS63l3);NQt5~J?GQM!T-;sd@HPLQS z3QRu`c5~vsO*+j_?)zk;dE!oj^8k4M=}s-hPP6IVAiE3Z{Yi*!E(V($9xMtMO}mf% zO_>%`TfE+|;H?OmqBYPFLyW=PUYrJ3h|>dCN2mWrABhcck%1YT<#DA+O1-iI(JC#A zCI4)b3E<{U)cf(Bmf@)KAur?#N?R)4uNF^xTh=yGr=~V%A850zR)Ej_-7J*~YqEFT z?5yY0trjk3cpP%D3io#RdBG1Vv3Jt^zJ5sFZ9W_Raccmm~CwT!o%()bA0D$@o5=G4B zd7>Sb7FDe@sl>-fzS{gVx--!>=$G0NT-KQqmvCjJ8QYNiwuz&ftT%s0+h4wim^ab1 z$Hy`+c?f)keAWr<;Z=BL2*$O1(;vG-x92*d*W4U`AM}y9hGx(_8&8gIp^K!pEK^H6 zd70E17q~zjI5EuZD__Ey(V^t3uGkX!3uI(_%}HpQBqDtxZbRY@YLP9q;`v%p*H)#uyGvDjSbB-qz_vmVH6Q~sG((Vxv3q1V F{{c)-BhUZ< delta 1087 zcmZ8g&rj4q6z;U$Zrkm`0$o6f7?I#E3A-wCF$9%B2t-+ad6}3rWm}g*e`Tf{zzYX4 zG4a4*{sE4jOw1-8^gob`=U(+jJbLluysiqNNx!^#?|X0FeDmg8{iJp@Uo7eZ|3vnaWqFi7I3dq*c7UUJ@N7LL%B1a`-Rw4{*jvB zvRf^eEO$=x`=})HMp8A5ND$k}4w2X$?Ix1hk~W3PtgTgshB@K)a(yQX-1@G4xD&-T z*{j#sdu<-gbk4OiG=r4|o9uhxD~-BgXir7-0OvBZDif9GlRQASmAo5yA$Je5^TMbJ z#YqO&!0P9)(z#(gKn>6^H8-EHtu;2b78k6>+TzBOt>wn*CMoj}Q~-u-1$LYw4{K5E z5ipaBq{92z%&|X!)B!q&WPb2c!j1* z$q-vNN0j?On`Xq^tYDd+l%@)CY1>ao1tM3jVS@W2_S0;m9@Z>RKD!N^>i}7!AZRxa zh?kNtG>6-}$A4;YP(@wT%WA(`R28DLwEV76fJBIqB8vwrM=M}LQYJuEN|hjT+P+I5 zfEC^`pV&(~!L+0Xb(3~%b(x>Ihe8Db8KQ_x)Pu-VzFMu8(!6CkQPZ;cb<(ct5|0pg zjHyoB+wl>JxK{M9n&4Jr-%eigSJ53^7-DRMezI5Nz0~a3BO17LD;smGP5f^Au;ryb e|0|NJv)w-DKr>q6Ad4m{@ds##a!x883CbVG$MACi diff --git a/nlp_resource_data/nltk/classify/__pycache__/svm.cpython-37.pyc b/nlp_resource_data/nltk/classify/__pycache__/svm.cpython-37.pyc index 38da62331b66003b4d4338ff15dca6ac749ee959..73f364e2d678cb84d988ad516b84c8db89e3f23a 100644 GIT binary patch delta 31 lcmZ3^I-8Z-iI diff --git a/nlp_resource_data/nltk/classify/__pycache__/tadm.cpython-37.pyc b/nlp_resource_data/nltk/classify/__pycache__/tadm.cpython-37.pyc index 649d9d1e1eefc2a715f14be192747746acc740f3..7a3efdeddd3a9d91da15dbf45b27bfc63b4935e4 100644 GIT binary patch delta 1071 zcmZ8fJ8#rL5caOmKHK-mB|rp0C=mpKr94m)5JX212v8_MNFV{0kmcNNE>`?F&H5mr zNkNLVk^ce()M=;?EgkjNG?a8e#f-y4#FoGD%zoeQ%%*ACuU$)wieXTnu(R#fnBZDl6vX{4l}l!6~hhOCq{O>W=L<- zgQv`(_nDFLr?^kl5M#m;_AU#WqAr|WBWMX%cxct2Qt&9+Ey@KesQi}m?ny?3LycGl%?HXlq6Nc_sdmxk}Z@Q z?jBn#zqu2uOQf?3h_I6;EYfLJ$m=DMQt^6>`~Ba5inNAAWg{4@$(kG%%SYb#on`bj zUthp)NwZsDIr@GLC=4qEU4k13uiW!$YIb&I>b&qh@F{$m;3~ltg3|<470e)L3!!i zVh6&|XAk^i+6!?giy_D`1gZf{f-1`kL^oBuRozt3&XQtt4yRZ`h64*UoG?Y4#=Ufq zLviF556e17tObI_=Y)T9$fA^v{MYa5uNoq!o91Ge$Q2q0MSbaThdI27FULD*Gn>~; SukM_6n$8KQ?uFMWy0eL7m!-BvV83r{5$uKbn6Ldi|ny9_$G*bn%(~t3X6B!f3 zLKZAoXlmg%nCOa)@hjY!^aq&u1sYb??eT$hC+Ak(drsA>mp1Ra z-dlTe|42?4VKgAlAX10Qxe_R0!6{JExAJPyDLy9AXp!(z*MJr!_&+6)@Adbe$Op{S(Ix}Q6n8_>|O=h!-j25@qP*;J;LvFp6OK}sC(TAjd?mHUX z5EXjZROBer-PB{DyXi-DniXVtPe7H0ll8@&IOL1Zy#7wy^Ps!f5TEJV(0N%)iV}#T zP|%KTWxX(ug1V9^jb=u7bNK+uGJm4Cwt2+&deFWpUX}XP7C%ZWHI)1#WkWB(Px-*h zmLKpb@j$K8iR0($6*@frmz%4~;$wMyq%D()CV{9$YS1wXwd1elX*!-2k`yxiQNPFU z0K@?37Bpe%Q*=hm>#K7!GLflC$~eFox0&AYf*|*CPaNr0+7uu3!_E>aMx!9@HadZq zB>q;v5qf((O5rM&N-21bCtVhA%&}X` zxJV0O1z}afOMNz}EiANav#f5w9Hy@!+(5XFFoA$4gUjNZSzXRKSkBZm?(!&s5u8x} zjGN5nQ72}8wCyq;##>_5s_w5LRdd}?p2~IWbfaDwY&t6oP9<-IAEg{3FGv6%5=^7S zyc_mnNKf}2Yq_@?j+zL0lm2WGc_DY*Q>J87rU>BE$+T7&v)zEV@t%{K1dVFqqCNc! DypqEQ diff --git a/nlp_resource_data/nltk/classify/__pycache__/textcat.cpython-37.pyc b/nlp_resource_data/nltk/classify/__pycache__/textcat.cpython-37.pyc index c0b9144912ee49b3569842069974123041f092c6..87b169f2288d4ffde4313d64a4ebb2e9bbdbb3f8 100644 GIT binary patch delta 1320 zcmZuw&2QX96rb_8yYWZloAks>SK!PeDpj1_gh(sjX0Wosy$>OX(!W$bU zNL7&1KcJ~4gy6urH&Dcl6DMv6jy@y)08Xe}m^YD{Dq^ku`F+3NJiqz(h3_v$Z}6`Sb|5QjzSWdTomFKwXv!?9Qo-}A77*uh zLOXN)_Og0;HDptW3pBY!yeqRWg%TYlsuD*Zm_Jb3yKgU!dujrmmB;1u0f940dHCk# zM#ANG&3>1@)!wrYUk@?Fl+EVEfRPTb#cr*GkKqY4jzARE7lQ8~yCz10>qj3uH|g?% za;x6fc9oSAqcfJSFjLVxRp4Q8HP2+7RK?Efr)}rHy1=Y5uV?U#N{6!uPb1jvqWA12 zU^d`C3=Y!Qp@n@h>pz2wy0yY2f?5F7nMi%W)Y9`oa49QTOdA%OXzt;!Nfi8 zgacqc7LCh9NYJVC+NiV8121FNDK^0)xBN=Eb;bGMxHDjF8r`N1D&BNVZOj$^hI zWi2~17bPsVbhyyR-jn`u$e?|3_>!uzK79UMXX`^gMoX``m8v!WuDv&WfB%}w>^#nk zI4e1yj5?R|v`RSd0g5qEx7Ft>4bj)+@xK&OVbNZFvLP1VmBqKeT_O)KiIban;jaTb7G*@&kdsUOc(6 zzFie^{jS*Au4V$J>zCB`&b#z=^_R2v2FG-}o^OU0^bEC;i^dQSV1S!LkVSP*#-?s+euuR#*6Iqn4F7Q0vX0+RAcyORaRS(GS(6bJJ#xtA6Qx ze#L_&EZWte`=R}`Y$|u^*2&6LE?Qe`)}WhALT(NYC)C1!|MD?`P9pLoPBmNT)Ec9N z1(aEw4(ZVDIqGl!&)w(su2GA6vB>inyB}|X{1jF(5^4y&0~I`|{`6R|Ok@p)dctfP z7DnJAvW_|N55x669sbV>qrNp%9|ynEH8t*@509F8jNY5fWnCL*b1{)$Dbc;Liw%l` zt*o3#m_b%HH7uj_3c||>Lj?1&j8QxZtRbu;9BG(092H<1F6)1dEN(Gs9!qzu&lx|( zPxGP4NeJ10srQ)DNUiqoxMz`jQhnL~{VZNScm)A#MNTcXvDVII2^Z9CFzTXq*gZ%I z)YpSGUE}A$g>#W(4*85NGrTO-!@H-R4sPvkXlBdfxDZ(x$6?TVahz639P2Bvb1)83 z2S@eNyvcH~G2H<1&_`JcmMzZ diff --git a/nlp_resource_data/nltk/classify/__pycache__/util.cpython-37.pyc b/nlp_resource_data/nltk/classify/__pycache__/util.cpython-37.pyc index d46f923ad9c661f6e5a5e0946239f121b96ba895..00cf6577633102bff4fabb7723b1991a92220931 100644 GIT binary patch delta 2956 zcmbVOTWl0n7~Zox-JPAibm?Vj>9*YhZ93cvG~s3iTW+G<1hhC5#_62dGVScldS;eV z4b~_=8m#Bls1N!?Vq%&Y6E89PqA?~W#+P~WN#g@X9*l|+{r|J2g|aHxO}}&b@8|#j zbLO+rKSnbnnT!&FziMJmzciO=pM47!G@?Z)p;3pFqgu2a(_#eIF-OuQF*ay%7$qlB zmNgk@1=>L4mx-372`XQXXbMeI1;!Lj(KL+dgAtmc*~<}{wK6Zpv@C6;IoNNcD`*pp zIjgZmXftiOyq;()#G;kvL1B{}F`8*7ZKLh5YN4xW2e7x&*vW|6HTTK(f*K986=u7E zUaGogV0&&TQ~QGL!|dH(--)N<5YLUFfvV32_=kxRGR*%<92%8iSCv=-hENK;DPK!C z#<=BJlqF%0p_EDloyo|QH7XTnMlV@LP-T{1fIUAA5b2Rgo=DMjOo_7W{2THWLgIWt z$*)2cLXS{zBRMo-xz@!B+kJxT$`+F1b4v4uJDI{pBM2-%u&6G)@eiSyD4I#KR{pi} z^5CL?8e@&HA4W@lNSvA>K}2_NC_|3z2c}o9?0$m3lA73wB$b3xg?Zx;IEu17!b*U! zp+bRunG)6pq*1;(JvOjtb>T1^Z$fwMDa*0Xdmeop_WGJu=~ZSxv>JFNIOCWx?z6RgPi|$`MpV4$YVAm5UAJ93(Dm|s zkSmcqD$(S!aZ$Gei=ATYP-Z>r-ZH1A`KwLO z4{k+0+Ypwb5i_hIrUFp-7pavKQdCx$%I8|z`MKs_L|@g!WPYxtt>aDP%mGA*L}ZeK zp8}oHM%aFk`IJ1sPqnUH`5@4rX|!DGPus!y{tMo;<)~~AztXy+?;)VEVE`zY5uEQu z*oUy>p3JYcc9W|-lkX%4=GW(cB)v#fqqUM2a<_4#FqZL0d}mwvo;W||U$zCC7FFD8 zGvbD10&WE1lc0(-Lfv0P_)mSKTbGNlN6`4A0QJSbityRuk^2_=CclhuYnb z8afc}>@SA*9&6_}JJzPeaU8_;F}|_T5f_a&z>gNXc8aP+f3!H})l$U{Biq8w``>9wbGN^bzt(e{9O2h` zcAPy5(yVsSI6IE;7{UpFFjh7y>?AJ#F=_2cJq4gPF}$#AmAsI;7gi6RIgOjO{Ne0L z1Px&+kqO(^%Z6i#OtLdbTnJ9ofyiW2#bAMLIQmjXzvsC%hloC}t;s|S;?W{@z0&OG zFZPbD6B=ue8#B=azEI2An_zDPPxj5gdB`4_gZ2Enhr+wbpD#Hfhqc-P=n9zLDxyhp350 zQ4^sw?J-)b42jW1wS`#7R?BRybRZ&g zN-tXzMp-vKw`5NoA@Hh*?<^?_uS%9gNj%0}kW$;H+QN5iEpB1oYyz yx)i~PIOL(SN2`vtI|Uqm5uhlMTvU$9B&{eL<+L1^ler-|C1=u`(%Yq&vhi=!A2a{} delta 2894 zcmbVONo*WN6z!Vn*%$9yyxXyz&SGalBq70gNk~HC#3755PD0b}>hW~kGd-$q$8nG_ z61i{y6jiPy5{L^2Ak2YF5E2Lp2@VJ$4h=$FkU(6xAdtE6e~mqM>>*0<$gh6YU#tGF z`u)$;=ToV%R4OjPU&r*MHgPM}vUnRNR8l2MXuu{a5o16N!WgvWTu2QOiMCRihHery zOv5yCQ&J-|N@FlaX`Citj2)3^lBRA-G-bwL4XO#6rd6<>q}8+r#*~@N5L!#?z(!io z^|S%#Dyvn}t7#i;qRlX^p)GV1h}Y7<8A)ls_08U5FL^)QBtwi#M~AXb&K$a`FJzs9 z&Sr-O`Hzt?GQvBf$EE@xp~x%*gD-o|tgA+BecH55$|A7F(1Jn)pM_B<+y0=CU!{AV>3SyBL^SSEbc3-U_h`+w4te^y03fBEV#`TF^awM1-?o#}q zZMmM|?FBh0I4I4*QR*%pC;D-tn#Mc>Njleq3CzF?l%3hZb!**sg z+nP0PE9*G)EXcX&RwDKAW4-)`Bwy88j&#*sB|n$=TPbaK+0Vuh39F=bnGdGB$2Ng$ z){f8t0J@z>cOi5Gs1d_q%rw?-(2wedF~@XcLFKp7b)>KOZu$;s3xbLg_GQBIhRdOrjwGsSR^IY7%`V=q&X7hlqDFK2b z+p#Frcfj12^R_qZM-5wdT`RNT%X6M(vtAU%0`o(bo7eNK2N$*>YzI)nY!GSOqZ;>^ zZsj$sV1{N|ETtrJws^c353<%2nv(Q`T-317paX# zxCQ(uizc}2CX2Qln=QFaOdkveL4gmi~ zm6@mRyya!LTyy44TVYT04;uFM4+D=q0{{gxg7hfD7{Xd-sm4z79^cW}P7W21H~vQY zkf{VpB|WlE8mCys_xZ)<+#^YT!hdh}2Fngtt{KVuWCmUY5tHDGGeX;6N<2L@fenRq5}tw#^A~9EWi}!6({U z!{Xv?=lQmd{i13|fTf1v<)bDOA^E{fu5=GqP6f}=kLUHA>1x!>IUn;c+D3_5jI{qu z$YK6#hxSN$`-|J1)+0&&&6hgoHa<_7E}!I9!g?^rL4K#}w!#g<>*5uhKQrUZXuOFzHgJxPx9RL{OECs3nw&`6y`IE63?;0JShft|+b ze~Z!~IVnV21q!G#b~_4Bg0@ehg_n zZ%C*biA20b^KGm0~Mm7tw<`htApNZ~`P;{2-C}Putr61?7Vw5dZ)H diff --git a/nlp_resource_data/nltk/classify/__pycache__/weka.cpython-37.pyc b/nlp_resource_data/nltk/classify/__pycache__/weka.cpython-37.pyc index cf635495d75847e727cf81fca01db6b50953f454..19fd5d10a6284e2a90c93ca1fe696265f83a4b07 100644 GIT binary patch delta 2449 zcmZ`*OKclO820RXZO4i8vh#A9q@hWuDI_g@kd`#i2c=b;2q`_3t=jFzvre|zwcUR= zX;U5%Gzg(eeP{~m0p&!IkWee(iUT(g2QG;FToA_|kN_b-;llrqn@0^K%C|HB`!zH9 zaB_7rbvTtuM(}4oe%pF@JJogcciixZ8R3j;9`iL*W3<*i-PF}LYDU%9Fb(yMnKAW^ zn{o9`n2FmLb4!fJy=K3~Z1G#oR%IgLrTvVV!L@F;@j9O5^}K2PXL%!UT4rVk zZ{{t_5wr8v2yf--lRUV_{AKPjb(6)p;eeK=0mqsoJ5ilgxoFWG?_wqI@nziFw*%#J44$Qr2BZbg5!6^uX7_kf34sgrTD0n41#Jx7WQ?iAc3|5n4o*RVu zl3yObCC)aS9v>s~QSRnLsb6)=)c|9jJ?(e_#K^R+>LFQR8f7R=KUu<`8@8~Um1OE` zw&N%Y1DV4L9#d95x9HRa0LPK@vG}9$5StRCO=ov0t;rfyI8A0)wGpZZs`sROPkh_t z?3yCJ2DL7g-hX-gT$s(5&XBlF2B8gMaM=xKRkUz|sD^Kc`B})|8b--@w3@X+IRuiq z8>ZLdQX%a}**leOEhiZ}S1GmbW~p{EL7mIBK~_9SZ_Vx|${vE6Vc07krTeDIos3dj zU>^y+Ku{eES`82!Bp4@n2_ct~dLG-8BLdB-OgVXa3SQ7Ny_LY_E8b?Hb)n@|OtuN$K&V$4m1e9bzaG zr^W=SbbQgB(Viy;s;E)dF+gdkQ&Xd`S~oW16rC$&7#|YHdLAx8l&ro&U=WNVNPWf$ zWh^K`=x{iXlw6|fogxOGV1eK=0v_?A?K?7BSg<|Z(5+kJ6fBg(YdsRs^h6S^dXzP? zR{V=q7JE6{?;*8%A*pIrhaDz}`VPT0f||^WNE@%TZ~asoRH;`_c{Q#YQ97v8;-{zh z1=SJqCR0|y4=o!C1qiW;!r9p>%RLDUrNv^6fdt+BnT`P!l!eL>Syc z%(2N_KU^o#q=nBhy=xdSNx*vq>at1EN?j%=Xw{7%^M=r z?sj|H_1th#-TP*vBGcOrMH_FMIGIY2LW|<8y@kVe*@Ze{))O=k&}*ok>gx4HRRAis xxHHr?af5a@2`D4r1RE_{uboVc`6WK@Ipg$?RzT@QSp!RK`fbpf#cxB~{sGpD7196z delta 2379 zcmZ`)OKclO820RXZO4fl=N%_)nl=foTS%I=Kp!FW35C+M6^OzXb-VGblZ|(6_uoy5 zQ&2z+s*u`9W#Gc=0zv`=LNccwkT`JSiUgfYZ1wUpR>yo zA55f)#CR3|sYHHO@U9C|ws#e1^)EYBmxawZq zPnZc7xjB~T)b4S*ogOEHlD%AeF_O*9A50$g z>>zOSt^W_yzpStgAj7w$q{kX+f5t z&Uz|W3%X+YcCm7ydOh(1V{PJg!?)vIC@vGBr*(K+Q@EHPo{okI}f2SkU7lY5nN={bBj)$18@vEABrcN53|#v(tLjO5T4qTFgFb?WN3?pcn$lT;-B7z2cX0>2Z{OugY6aGWKfqyVgA{)kY?$a@h{( z68GBrdq#+|i=eI8n+Ba9KjxHRiJAq?WHjx6gJzjn+qvpUgP*0!svo**-cmC0PA zt~AL7P2BCiHAuoT7M2Q5ab4#ra=rvux0E>B(<|;|P7Pf^Doi1))M(W!WtJ{&LFrG~ zsJPdY**8k^K0zNT-S6~xX*Q%G29G#(CP<~Ddbqb;+f5wQQN6CChtg1EQ>U@qFgE2B zo!84SJ}9nj`hG#`ZZU+zs4R7?xoS75PdTBC1tka_4kj5+)UGF&!6g8K00C#bX#0+g z=I3k=4`HSJ(3qV^xs5Cm38(`iiB>(zHn3Lwk5zB=akhUNskIkLrK{HM@ElPW2;L^B z3w;J@W7Tv0UuZ)r^xBx${HhbBNxdkx4)BYrBy^ihS$RLSY{=&=^c&tp8V0GFff8-T zRrWA^<;8<&|KFbiYMm8#2R`5X4(?!{;0gidcI6p=iwJezT|-(y{4jW6Do0#S;2_Aj zW${wZvOtZ1Y7CWsD!OdKha+@z_nV#)43rTdH-Jp8xkg>6T8?V@kFbV%nJKOey|K_w zf;2JMqFLIn_f0nYcEPb$$GJj9)KDWH-XQqacH5H+uIGjo^{|^86djwE2BEf3h50WJlV}6Oxdd@fn6i`4>Ex}fQP2$|P GfqwxXn**r; diff --git a/nlp_resource_data/nltk/classify/api.py b/nlp_resource_data/nltk/classify/api.py index ba6d88e..91987c1 100644 --- a/nlp_resource_data/nltk/classify/api.py +++ b/nlp_resource_data/nltk/classify/api.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Classifier Interface # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Edward Loper # Steven Bird (minor additions) # URL: diff --git a/nlp_resource_data/nltk/classify/decisiontree.py b/nlp_resource_data/nltk/classify/decisiontree.py index 10c784c..0739cf4 100644 --- a/nlp_resource_data/nltk/classify/decisiontree.py +++ b/nlp_resource_data/nltk/classify/decisiontree.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Decision Tree Classifiers # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Edward Loper # URL: # For license information, see LICENSE.TXT @@ -10,13 +10,16 @@ A classifier model that decides which label to assign to a token on the basis of a tree structure, where branches correspond to conditions on feature values, and leaves correspond to label assignments. """ +from __future__ import print_function, unicode_literals, division from collections import defaultdict from nltk.probability import FreqDist, MLEProbDist, entropy from nltk.classify.api import ClassifierI +from nltk.compat import python_2_unicode_compatible +@python_2_unicode_compatible class DecisionTreeClassifier(ClassifierI): def __init__(self, label, feature_name=None, decisions=None, default=None): """ @@ -69,7 +72,7 @@ class DecisionTreeClassifier(ClassifierI): errors += 1 return errors / len(labeled_featuresets) - def pretty_format(self, width=70, prefix="", depth=4): + def pretty_format(self, width=70, prefix='', depth=4): """ Return a string containing a pretty-printed version of this decision tree. Each line in this string corresponds to a @@ -81,24 +84,20 @@ class DecisionTreeClassifier(ClassifierI): n = width - len(prefix) - 15 return '{0}{1} {2}\n'.format(prefix, '.' * n, self._label) s = '' - for i, (fval, result) in enumerate(sorted(self._decisions.items(), - key=lambda item: - (item[0] in [None, False, True], str(item[0]).lower()) - ) - ): + for i, (fval, result) in enumerate(sorted(self._decisions.items())): hdr = '{0}{1}={2}? '.format(prefix, self._fname, fval) n = width - 15 - len(hdr) - s += "{0}{1} {2}\n".format(hdr, "." * (n), result._label) + s += '{0}{1} {2}\n'.format(hdr, '.' * (n), result._label) if result._fname is not None and depth > 1: - s += result.pretty_format(width, prefix + " ", depth - 1) + s += result.pretty_format(width, prefix + ' ', depth - 1) if self._default is not None: n = width - len(prefix) - 21 - s += "{0}else: {1} {2}\n".format(prefix, "." * n, self._default._label) + s += '{0}else: {1} {2}\n'.format(prefix, '.' * n, self._default._label) if self._default._fname is not None and depth > 1: - s += self._default.pretty_format(width, prefix + " ", depth - 1) + s += self._default.pretty_format(width, prefix + ' ', depth - 1) return s - def pseudocode(self, prefix="", depth=4): + def pseudocode(self, prefix='', depth=4): """ Return a string representation of this decision tree that expresses the decisions it makes as a nested set of pseudocode @@ -107,26 +106,23 @@ class DecisionTreeClassifier(ClassifierI): if self._fname is None: return "{0}return {1!r}\n".format(prefix, self._label) s = '' - for (fval, result) in sorted(self._decisions.items(), - key=lambda item: - (item[0] in [None, False, True], str(item[0]).lower()) - ): + for (fval, result) in sorted(self._decisions.items()): s += '{0}if {1} == {2!r}: '.format(prefix, self._fname, fval) if result._fname is not None and depth > 1: - s += "\n" + result.pseudocode(prefix + " ", depth - 1) + s += '\n' + result.pseudocode(prefix + ' ', depth - 1) else: - s += "return {0!r}\n".format(result._label) + s += 'return {0!r}\n'.format(result._label) if self._default is not None: if len(self._decisions) == 1: - s += "{0}if {1} != {2!r}: ".format( + s += '{0}if {1} != {2!r}: '.format( prefix, self._fname, list(self._decisions.keys())[0] ) else: - s += "{0}else: ".format(prefix) + s += '{0}else: '.format(prefix) if self._default._fname is not None and depth > 1: - s += "\n" + self._default.pseudocode(prefix + " ", depth - 1) + s += '\n' + self._default.pseudocode(prefix + ' ', depth - 1) else: - s += "return {0!r}\n".format(self._default._label) + s += 'return {0!r}\n'.format(self._default._label) return s def __str__(self): @@ -269,7 +265,7 @@ class DecisionTreeClassifier(ClassifierI): if verbose: print( ( - "best stump for {:6d} toks uses {:20} err={:6.4f}".format( + 'best stump for {:6d} toks uses {:20} err={:6.4f}'.format( len(labeled_featuresets), best_stump._fname, best_error ) ) @@ -316,14 +312,14 @@ class DecisionTreeClassifier(ClassifierI): best_stump = stump if verbose: if best_stump._decisions: - descr = "{0}={1}".format( + descr = '{0}={1}'.format( best_stump._fname, list(best_stump._decisions.keys())[0] ) else: - descr = "(default)" + descr = '(default)' print( ( - "best stump for {:6d} toks uses {:20} err={:6.4f}".format( + 'best stump for {:6d} toks uses {:20} err={:6.4f}'.format( len(labeled_featuresets), descr, best_error ) ) @@ -346,9 +342,9 @@ def demo(): classifier = names_demo( f, binary_names_demo_features # DecisionTreeClassifier.train, ) - print(classifier.pretty_format(depth=7)) + print(classifier.pp(depth=7)) print(classifier.pseudocode(depth=7)) -if __name__ == "__main__": +if __name__ == '__main__': demo() diff --git a/nlp_resource_data/nltk/classify/maxent.py b/nlp_resource_data/nltk/classify/maxent.py index 7a03f81..e74b676 100644 --- a/nlp_resource_data/nltk/classify/maxent.py +++ b/nlp_resource_data/nltk/classify/maxent.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Maximum Entropy Classifiers # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Edward Loper # Dmitry Chichkov (TypedMaxentFeatureEncoding) # URL: @@ -51,6 +51,8 @@ For all values of ``feat_val`` and ``some_label``. This mapping is performed by classes that implement the ``MaxentFeatureEncodingI`` interface. """ +from __future__ import print_function, unicode_literals + try: import numpy except ImportError: @@ -60,6 +62,9 @@ import tempfile import os from collections import defaultdict +from six import integer_types + +from nltk import compat from nltk.data import gzip_open_unicode from nltk.util import OrderedDict from nltk.probability import DictionaryProbDist @@ -69,13 +74,14 @@ from nltk.classify.util import CutoffChecker, accuracy, log_likelihood from nltk.classify.megam import call_megam, write_megam_file, parse_megam_weights from nltk.classify.tadm import call_tadm, write_tadm_file, parse_tadm_weights -__docformat__ = "epytext en" +__docformat__ = 'epytext en' ###################################################################### # { Classifier Model ###################################################################### +@compat.python_2_unicode_compatible class MaxentClassifier(ClassifierI): """ A maximum entropy classifier (also known as a "conditional @@ -167,16 +173,16 @@ class MaxentClassifier(ClassifierI): probabilities of each label for that featureset. """ descr_width = 50 - TEMPLATE = " %-" + str(descr_width - 2) + "s%s%8.3f" + TEMPLATE = ' %-' + str(descr_width - 2) + 's%s%8.3f' pdist = self.prob_classify(featureset) labels = sorted(pdist.samples(), key=pdist.prob, reverse=True) labels = labels[:columns] print( - " Feature".ljust(descr_width) - + "".join("%8s" % (("%s" % l)[:7]) for l in labels) + ' Feature'.ljust(descr_width) + + ''.join('%8s' % (("%s" % l)[:7]) for l in labels) ) - print(" " + "-" * (descr_width - 2 + 8 * len(labels))) + print(' ' + '-' * (descr_width - 2 + 8 * len(labels))) sums = defaultdict(int) for i, label in enumerate(labels): feature_vector = self._encoding.encode(featureset, label) @@ -189,26 +195,26 @@ class MaxentClassifier(ClassifierI): else: score = self._weights[f_id] ** f_val descr = self._encoding.describe(f_id) - descr = descr.split(" and label is ")[0] # hack - descr += " (%s)" % f_val # hack + descr = descr.split(' and label is ')[0] # hack + descr += ' (%s)' % f_val # hack if len(descr) > 47: - descr = descr[:44] + "..." - print(TEMPLATE % (descr, i * 8 * " ", score)) + descr = descr[:44] + '...' + print(TEMPLATE % (descr, i * 8 * ' ', score)) sums[label] += score - print(" " + "-" * (descr_width - 1 + 8 * len(labels))) + print(' ' + '-' * (descr_width - 1 + 8 * len(labels))) print( - " TOTAL:".ljust(descr_width) + "".join("%8.3f" % sums[l] for l in labels) + ' TOTAL:'.ljust(descr_width) + ''.join('%8.3f' % sums[l] for l in labels) ) print( - " PROBS:".ljust(descr_width) - + "".join("%8.3f" % pdist.prob(l) for l in labels) + ' PROBS:'.ljust(descr_width) + + ''.join('%8.3f' % pdist.prob(l) for l in labels) ) def most_informative_features(self, n=10): """ Generates the ranked list of informative features from most to least. """ - if hasattr(self, "_most_informative_features"): + if hasattr(self, '_most_informative_features'): return self._most_informative_features[:n] else: self._most_informative_features = sorted( @@ -218,7 +224,7 @@ class MaxentClassifier(ClassifierI): ) return self._most_informative_features[:n] - def show_most_informative_features(self, n=10, show="all"): + def show_most_informative_features(self, n=10, show='all'): """ :param show: all, neg, or pos (for negative-only or positive-only) :type show: str @@ -227,22 +233,22 @@ class MaxentClassifier(ClassifierI): """ # Use None the full list of ranked features. fids = self.most_informative_features(None) - if show == "pos": + if show == 'pos': fids = [fid for fid in fids if self._weights[fid] > 0] - elif show == "neg": + elif show == 'neg': fids = [fid for fid in fids if self._weights[fid] < 0] for fid in fids[:n]: - print("%8.3f %s" % (self._weights[fid], self._encoding.describe(fid))) + print('%8.3f %s' % (self._weights[fid], self._encoding.describe(fid))) def __repr__(self): - return "" % ( + return '' % ( len(self._encoding.labels()), self._encoding.length(), ) #: A list of the algorithm names that are accepted for the #: ``train()`` method's ``algorithm`` parameter. - ALGORITHMS = ["GIS", "IIS", "MEGAM", "TADM"] + ALGORITHMS = ['GIS', 'IIS', 'MEGAM', 'TADM'] @classmethod def train( @@ -307,42 +313,42 @@ class MaxentClassifier(ClassifierI): log likelihood by less than ``v``. """ if algorithm is None: - algorithm = "iis" + algorithm = 'iis' for key in cutoffs: if key not in ( - "max_iter", - "min_ll", - "min_lldelta", - "max_acc", - "min_accdelta", - "count_cutoff", - "norm", - "explicit", - "bernoulli", + 'max_iter', + 'min_ll', + 'min_lldelta', + 'max_acc', + 'min_accdelta', + 'count_cutoff', + 'norm', + 'explicit', + 'bernoulli', ): - raise TypeError("Unexpected keyword arg %r" % key) + raise TypeError('Unexpected keyword arg %r' % key) algorithm = algorithm.lower() - if algorithm == "iis": + if algorithm == 'iis': return train_maxent_classifier_with_iis( train_toks, trace, encoding, labels, **cutoffs ) - elif algorithm == "gis": + elif algorithm == 'gis': return train_maxent_classifier_with_gis( train_toks, trace, encoding, labels, **cutoffs ) - elif algorithm == "megam": + elif algorithm == 'megam': return train_maxent_classifier_with_megam( train_toks, trace, encoding, labels, gaussian_prior_sigma, **cutoffs ) - elif algorithm == "tadm": + elif algorithm == 'tadm': kwargs = cutoffs - kwargs["trace"] = trace - kwargs["encoding"] = encoding - kwargs["labels"] = labels - kwargs["gaussian_prior_sigma"] = gaussian_prior_sigma + kwargs['trace'] = trace + kwargs['encoding'] = encoding + kwargs['labels'] = labels + kwargs['gaussian_prior_sigma'] = gaussian_prior_sigma return TadmMaxentClassifier.train(train_toks, **kwargs) else: - raise ValueError("Unknown algorithm %s" % algorithm) + raise ValueError('Unknown algorithm %s' % algorithm) #: Alias for MaxentClassifier. @@ -471,7 +477,7 @@ class FunctionBackedMaxentFeatureEncoding(MaxentFeatureEncodingI): return self._labels def describe(self, fid): - return "no description available" + return 'no description available' class BinaryMaxentFeatureEncoding(MaxentFeatureEncodingI): @@ -534,8 +540,8 @@ class BinaryMaxentFeatureEncoding(MaxentFeatureEncodingI): """ if set(mapping.values()) != set(range(len(mapping))): raise ValueError( - "Mapping values must be exactly the " - "set of integers from 0...len(mapping)" + 'Mapping values must be exactly the ' + 'set of integers from 0...len(mapping)' ) self._labels = list(labels) @@ -595,8 +601,8 @@ class BinaryMaxentFeatureEncoding(MaxentFeatureEncodingI): def describe(self, f_id): # Inherit docs. - if not isinstance(f_id, int): - raise TypeError("describe() expected an int") + if not isinstance(f_id, integer_types): + raise TypeError('describe() expected an int') try: self._inv_mapping except AttributeError: @@ -606,17 +612,17 @@ class BinaryMaxentFeatureEncoding(MaxentFeatureEncodingI): if f_id < len(self._mapping): (fname, fval, label) = self._inv_mapping[f_id] - return "%s==%r and label is %r" % (fname, fval, label) + return '%s==%r and label is %r' % (fname, fval, label) elif self._alwayson and f_id in self._alwayson.values(): for (label, f_id2) in self._alwayson.items(): if f_id == f_id2: - return "label is %r" % label + return 'label is %r' % label elif self._unseen and f_id in self._unseen.values(): for (fname, f_id2) in self._unseen.items(): if f_id == f_id2: - return "%s is unseen" % fname + return '%s is unseen' % fname else: - raise ValueError("Bad feature id") + raise ValueError('Bad feature id') def labels(self): # Inherit docs. @@ -660,7 +666,7 @@ class BinaryMaxentFeatureEncoding(MaxentFeatureEncodingI): for (tok, label) in train_toks: if labels and label not in labels: - raise ValueError("Unexpected label %s" % label) + raise ValueError('Unexpected label %s' % label) seen_labels.add(label) # Record each of the features. @@ -724,7 +730,7 @@ class GISEncoding(BinaryMaxentFeatureEncoding): # Add a correction feature. total = sum(v for (f, v) in encoding) if total >= self._C: - raise ValueError("Correction feature is not high enough!") + raise ValueError('Correction feature is not high enough!') encoding.append((base_length, self._C - total)) # Return the result @@ -735,7 +741,7 @@ class GISEncoding(BinaryMaxentFeatureEncoding): def describe(self, f_id): if f_id == BinaryMaxentFeatureEncoding.length(self): - return "Correction feature (%s)" % self._C + return 'Correction feature (%s)' % self._C else: return BinaryMaxentFeatureEncoding.describe(self, f_id) @@ -872,8 +878,8 @@ class TypedMaxentFeatureEncoding(MaxentFeatureEncodingI): """ if set(mapping.values()) != set(range(len(mapping))): raise ValueError( - "Mapping values must be exactly the " - "set of integers from 0...len(mapping)" + 'Mapping values must be exactly the ' + 'set of integers from 0...len(mapping)' ) self._labels = list(labels) @@ -910,7 +916,7 @@ class TypedMaxentFeatureEncoding(MaxentFeatureEncodingI): # Convert input-features to joint-features: for fname, fval in featureset.items(): - if isinstance(fval, (int, float)): + if isinstance(fval, (integer_types, float)): # Known feature name & value: if (fname, type(fval), label) in self._mapping: encoding.append((self._mapping[fname, type(fval), label], fval)) @@ -938,8 +944,8 @@ class TypedMaxentFeatureEncoding(MaxentFeatureEncodingI): def describe(self, f_id): # Inherit docs. - if not isinstance(f_id, int): - raise TypeError("describe() expected an int") + if not isinstance(f_id, integer_types): + raise TypeError('describe() expected an int') try: self._inv_mapping except AttributeError: @@ -949,17 +955,17 @@ class TypedMaxentFeatureEncoding(MaxentFeatureEncodingI): if f_id < len(self._mapping): (fname, fval, label) = self._inv_mapping[f_id] - return "%s==%r and label is %r" % (fname, fval, label) + return '%s==%r and label is %r' % (fname, fval, label) elif self._alwayson and f_id in self._alwayson.values(): for (label, f_id2) in self._alwayson.items(): if f_id == f_id2: - return "label is %r" % label + return 'label is %r' % label elif self._unseen and f_id in self._unseen.values(): for (fname, f_id2) in self._unseen.items(): if f_id == f_id2: - return "%s is unseen" % fname + return '%s is unseen' % fname else: - raise ValueError("Bad feature id") + raise ValueError('Bad feature id') def labels(self): # Inherit docs. @@ -1006,7 +1012,7 @@ class TypedMaxentFeatureEncoding(MaxentFeatureEncodingI): for (tok, label) in train_toks: if labels and label not in labels: - raise ValueError("Unexpected label %s" % label) + raise ValueError('Unexpected label %s' % label) seen_labels.add(label) # Record each of the features. @@ -1043,17 +1049,17 @@ def train_maxent_classifier_with_gis( :see: ``train_maxent_classifier()`` for parameter descriptions. """ - cutoffs.setdefault("max_iter", 100) + cutoffs.setdefault('max_iter', 100) cutoffchecker = CutoffChecker(cutoffs) # Construct an encoding from the training data. if encoding is None: encoding = GISEncoding.train(train_toks, labels=labels) - if not hasattr(encoding, "C"): + if not hasattr(encoding, 'C'): raise TypeError( - "The GIS algorithm requires an encoding that " - "defines C (e.g., GISEncoding)." + 'The GIS algorithm requires an encoding that ' + 'defines C (e.g., GISEncoding).' ) # Cinv is the inverse of the sum of each joint feature vector. @@ -1069,7 +1075,7 @@ def train_maxent_classifier_with_gis( # Build the classifier. Start with weight=0 for each attested # feature, and weight=-infinity for each unattested feature. - weights = numpy.zeros(len(empirical_fcount), "d") + weights = numpy.zeros(len(empirical_fcount), 'd') for fid in unattested: weights[fid] = numpy.NINF classifier = ConditionalExponentialClassifier(encoding, weights) @@ -1079,11 +1085,11 @@ def train_maxent_classifier_with_gis( del empirical_fcount if trace > 0: - print(" ==> Training (%d iterations)" % cutoffs["max_iter"]) + print(' ==> Training (%d iterations)' % cutoffs['max_iter']) if trace > 2: print() - print(" Iteration Log Likelihood Accuracy") - print(" ---------------------------------------") + print(' Iteration Log Likelihood Accuracy') + print(' ---------------------------------------') # Train the classifier. try: @@ -1092,7 +1098,7 @@ def train_maxent_classifier_with_gis( ll = cutoffchecker.ll or log_likelihood(classifier, train_toks) acc = cutoffchecker.acc or accuracy(classifier, train_toks) iternum = cutoffchecker.iter - print(" %9d %14.5f %9.3f" % (iternum, ll, acc)) + print(' %9d %14.5f %9.3f' % (iternum, ll, acc)) # Use the model to estimate the number of times each # feature should occur in the training data. @@ -1116,21 +1122,21 @@ def train_maxent_classifier_with_gis( break except KeyboardInterrupt: - print(" Training stopped: keyboard interrupt") + print(' Training stopped: keyboard interrupt') except: raise if trace > 2: ll = log_likelihood(classifier, train_toks) acc = accuracy(classifier, train_toks) - print(" Final %14.5f %9.3f" % (ll, acc)) + print(' Final %14.5f %9.3f' % (ll, acc)) # Return the classifier. return classifier def calculate_empirical_fcount(train_toks, encoding): - fcount = numpy.zeros(encoding.length(), "d") + fcount = numpy.zeros(encoding.length(), 'd') for tok, label in train_toks: for (index, val) in encoding.encode(tok, label): @@ -1140,7 +1146,7 @@ def calculate_empirical_fcount(train_toks, encoding): def calculate_estimated_fcount(classifier, train_toks, encoding): - fcount = numpy.zeros(encoding.length(), "d") + fcount = numpy.zeros(encoding.length(), 'd') for tok, label in train_toks: pdist = classifier.prob_classify(tok) @@ -1169,7 +1175,7 @@ def train_maxent_classifier_with_iis( :see: ``train_maxent_classifier()`` for parameter descriptions. """ - cutoffs.setdefault("max_iter", 100) + cutoffs.setdefault('max_iter', 100) cutoffchecker = CutoffChecker(cutoffs) # Construct an encoding from the training data. @@ -1185,7 +1191,7 @@ def train_maxent_classifier_with_iis( # nfarray performs the reverse operation. nfident is # nfarray multiplied by an identity matrix. nfmap = calculate_nfmap(train_toks, encoding) - nfarray = numpy.array(sorted(nfmap, key=nfmap.__getitem__), "d") + nfarray = numpy.array(sorted(nfmap, key=nfmap.__getitem__), 'd') nftranspose = numpy.reshape(nfarray, (len(nfarray), 1)) # Check for any features that are not attested in train_toks. @@ -1193,17 +1199,17 @@ def train_maxent_classifier_with_iis( # Build the classifier. Start with weight=0 for each attested # feature, and weight=-infinity for each unattested feature. - weights = numpy.zeros(len(empirical_ffreq), "d") + weights = numpy.zeros(len(empirical_ffreq), 'd') for fid in unattested: weights[fid] = numpy.NINF classifier = ConditionalExponentialClassifier(encoding, weights) if trace > 0: - print(" ==> Training (%d iterations)" % cutoffs["max_iter"]) + print(' ==> Training (%d iterations)' % cutoffs['max_iter']) if trace > 2: print() - print(" Iteration Log Likelihood Accuracy") - print(" ---------------------------------------") + print(' Iteration Log Likelihood Accuracy') + print(' ---------------------------------------') # Train the classifier. try: @@ -1212,7 +1218,7 @@ def train_maxent_classifier_with_iis( ll = cutoffchecker.ll or log_likelihood(classifier, train_toks) acc = cutoffchecker.acc or accuracy(classifier, train_toks) iternum = cutoffchecker.iter - print(" %9d %14.5f %9.3f" % (iternum, ll, acc)) + print(' %9d %14.5f %9.3f' % (iternum, ll, acc)) # Calculate the deltas for this iteration, using Newton's method. deltas = calculate_deltas( @@ -1236,14 +1242,14 @@ def train_maxent_classifier_with_iis( break except KeyboardInterrupt: - print(" Training stopped: keyboard interrupt") + print(' Training stopped: keyboard interrupt') except: raise if trace > 2: ll = log_likelihood(classifier, train_toks) acc = accuracy(classifier, train_toks) - print(" Final %14.5f %9.3f" % (ll, acc)) + print(' Final %14.5f %9.3f' % (ll, acc)) # Return the classifier. return classifier @@ -1353,12 +1359,12 @@ def calculate_deltas( NEWTON_CONVERGE = 1e-12 MAX_NEWTON = 300 - deltas = numpy.ones(encoding.length(), "d") + deltas = numpy.ones(encoding.length(), 'd') # Precompute the A matrix: # A[nf][id] = sum ( p(fs) * p(label|fs) * f(fs,label) ) # over all label,fs s.t. num_features[label,fs]=nf - A = numpy.zeros((len(nfmap), encoding.length()), "d") + A = numpy.zeros((len(nfmap), encoding.length()), 'd') for tok, label in train_toks: dist = classifier.prob_classify(tok) @@ -1427,40 +1433,40 @@ def train_maxent_classifier_with_megam( explicit = True bernoulli = True - if "explicit" in kwargs: - explicit = kwargs["explicit"] - if "bernoulli" in kwargs: - bernoulli = kwargs["bernoulli"] + if 'explicit' in kwargs: + explicit = kwargs['explicit'] + if 'bernoulli' in kwargs: + bernoulli = kwargs['bernoulli'] # Construct an encoding from the training data. if encoding is None: # Count cutoff can also be controlled by megam with the -minfc # option. Not sure where the best place for it is. - count_cutoff = kwargs.get("count_cutoff", 0) + count_cutoff = kwargs.get('count_cutoff', 0) encoding = BinaryMaxentFeatureEncoding.train( train_toks, count_cutoff, labels=labels, alwayson_features=True ) elif labels is not None: - raise ValueError("Specify encoding or labels, not both") + raise ValueError('Specify encoding or labels, not both') # Write a training file for megam. try: - fd, trainfile_name = tempfile.mkstemp(prefix="nltk-") - with open(trainfile_name, "w") as trainfile: + fd, trainfile_name = tempfile.mkstemp(prefix='nltk-') + with open(trainfile_name, 'w') as trainfile: write_megam_file( train_toks, encoding, trainfile, explicit=explicit, bernoulli=bernoulli ) os.close(fd) except (OSError, IOError, ValueError) as e: - raise ValueError("Error while creating megam training file: %s" % e) + raise ValueError('Error while creating megam training file: %s' % e) # Run megam on the training file. options = [] - options += ["-nobias", "-repeat", "10"] + options += ['-nobias', '-repeat', '10'] if explicit: - options += ["-explicit"] + options += ['-explicit'] if not bernoulli: - options += ["-fvals"] + options += ['-fvals'] if gaussian_prior_sigma: # Lambda is just the precision of the Gaussian prior, i.e. it's the # inverse variance, so the parameter conversion is 1.0/sigma**2. @@ -1468,25 +1474,25 @@ def train_maxent_classifier_with_megam( inv_variance = 1.0 / gaussian_prior_sigma ** 2 else: inv_variance = 0 - options += ["-lambda", "%.2f" % inv_variance, "-tune"] + options += ['-lambda', '%.2f' % inv_variance, '-tune'] if trace < 3: - options += ["-quiet"] - if "max_iter" in kwargs: - options += ["-maxi", "%s" % kwargs["max_iter"]] - if "ll_delta" in kwargs: + options += ['-quiet'] + if 'max_iter' in kwargs: + options += ['-maxi', '%s' % kwargs['max_iter']] + if 'll_delta' in kwargs: # [xx] this is actually a perplexity delta, not a log # likelihood delta - options += ["-dpp", "%s" % abs(kwargs["ll_delta"])] - if hasattr(encoding, "cost"): - options += ["-multilabel"] # each possible la - options += ["multiclass", trainfile_name] + options += ['-dpp', '%s' % abs(kwargs['ll_delta'])] + if hasattr(encoding, 'cost'): + options += ['-multilabel'] # each possible la + options += ['multiclass', trainfile_name] stdout = call_megam(options) - # print('./megam_i686.opt ', ' '.join(options)) + # print './megam_i686.opt ', ' '.join(options) # Delete the training file try: os.remove(trainfile_name) except (OSError, IOError) as e: - print("Warning: unable to delete %s: %s" % (trainfile_name, e)) + print('Warning: unable to delete %s: %s' % (trainfile_name, e)) # Parse the generated weight vector. weights = parse_megam_weights(stdout, encoding.length(), explicit) @@ -1506,14 +1512,14 @@ def train_maxent_classifier_with_megam( class TadmMaxentClassifier(MaxentClassifier): @classmethod def train(cls, train_toks, **kwargs): - algorithm = kwargs.get("algorithm", "tao_lmvm") - trace = kwargs.get("trace", 3) - encoding = kwargs.get("encoding", None) - labels = kwargs.get("labels", None) - sigma = kwargs.get("gaussian_prior_sigma", 0) - count_cutoff = kwargs.get("count_cutoff", 0) - max_iter = kwargs.get("max_iter") - ll_delta = kwargs.get("min_lldelta") + algorithm = kwargs.get('algorithm', 'tao_lmvm') + trace = kwargs.get('trace', 3) + encoding = kwargs.get('encoding', None) + labels = kwargs.get('labels', None) + sigma = kwargs.get('gaussian_prior_sigma', 0) + count_cutoff = kwargs.get('count_cutoff', 0) + max_iter = kwargs.get('max_iter') + ll_delta = kwargs.get('min_lldelta') # Construct an encoding from the training data. if not encoding: @@ -1522,33 +1528,33 @@ class TadmMaxentClassifier(MaxentClassifier): ) trainfile_fd, trainfile_name = tempfile.mkstemp( - prefix="nltk-tadm-events-", suffix=".gz" + prefix='nltk-tadm-events-', suffix='.gz' ) - weightfile_fd, weightfile_name = tempfile.mkstemp(prefix="nltk-tadm-weights-") + weightfile_fd, weightfile_name = tempfile.mkstemp(prefix='nltk-tadm-weights-') - trainfile = gzip_open_unicode(trainfile_name, "w") + trainfile = gzip_open_unicode(trainfile_name, 'w') write_tadm_file(train_toks, encoding, trainfile) trainfile.close() options = [] - options.extend(["-monitor"]) - options.extend(["-method", algorithm]) + options.extend(['-monitor']) + options.extend(['-method', algorithm]) if sigma: - options.extend(["-l2", "%.6f" % sigma ** 2]) + options.extend(['-l2', '%.6f' % sigma ** 2]) if max_iter: - options.extend(["-max_it", "%d" % max_iter]) + options.extend(['-max_it', '%d' % max_iter]) if ll_delta: - options.extend(["-fatol", "%.6f" % abs(ll_delta)]) - options.extend(["-events_in", trainfile_name]) - options.extend(["-params_out", weightfile_name]) + options.extend(['-fatol', '%.6f' % abs(ll_delta)]) + options.extend(['-events_in', trainfile_name]) + options.extend(['-params_out', weightfile_name]) if trace < 3: - options.extend(["2>&1"]) + options.extend(['2>&1']) else: - options.extend(["-summary"]) + options.extend(['-summary']) call_tadm(options) - with open(weightfile_name, "r") as weightfile: + with open(weightfile_name, 'r') as weightfile: weights = parse_tadm_weights(weightfile) os.remove(trainfile_name) @@ -1570,5 +1576,5 @@ def demo(): classifier = names_demo(MaxentClassifier.train) -if __name__ == "__main__": +if __name__ == '__main__': demo() diff --git a/nlp_resource_data/nltk/classify/megam.py b/nlp_resource_data/nltk/classify/megam.py index 6a80b7d..f86d8aa 100644 --- a/nlp_resource_data/nltk/classify/megam.py +++ b/nlp_resource_data/nltk/classify/megam.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Interface to Megam Classifier # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Edward Loper # URL: # For license information, see LICENSE.TXT @@ -22,8 +22,13 @@ for details. .. _megam: http://www.umiacs.umd.edu/~hal/megam/index.html """ +from __future__ import print_function + import subprocess +from six import string_types + +from nltk import compat from nltk.internals import find_binary try: @@ -50,11 +55,11 @@ def config_megam(bin=None): """ global _megam_bin _megam_bin = find_binary( - "megam", + 'megam', bin, - env_vars=["MEGAM"], - binary_names=["megam.opt", "megam", "megam_686", "megam_i686.opt"], - url="http://www.umiacs.umd.edu/~hal/megam/index.html", + env_vars=['MEGAM'], + binary_names=['megam.opt', 'megam', 'megam_686', 'megam_i686.opt'], + url='http://www.umiacs.umd.edu/~hal/megam/index.html', ) @@ -100,12 +105,12 @@ def write_megam_file(train_toks, encoding, stream, bernoulli=True, explicit=True # Write the file, which contains one line per instance. for featureset, label in train_toks: # First, the instance number (or, in the weighted multiclass case, the cost of each label). - if hasattr(encoding, "cost"): + if hasattr(encoding, 'cost'): stream.write( - ":".join(str(encoding.cost(featureset, label, l)) for l in labels) + ':'.join(str(encoding.cost(featureset, label, l)) for l in labels) ) else: - stream.write("%d" % labelnum[label]) + stream.write('%d' % labelnum[label]) # For implicit file formats, just list the features that fire # for this instance's actual label. @@ -116,11 +121,11 @@ def write_megam_file(train_toks, encoding, stream, bernoulli=True, explicit=True # any of the possible labels. else: for l in labels: - stream.write(" #") + stream.write(' #') _write_megam_features(encoding.encode(featureset, l), stream, bernoulli) # End of the instance. - stream.write("\n") + stream.write('\n') def parse_megam_weights(s, features_count, explicit=True): @@ -130,10 +135,10 @@ def parse_megam_weights(s, features_count, explicit=True): vector. This function does not currently handle bias features. """ if numpy is None: - raise ValueError("This function requires that numpy be installed") - assert explicit, "non-explicit not supported yet" - lines = s.strip().split("\n") - weights = numpy.zeros(features_count, "d") + raise ValueError('This function requires that numpy be installed') + assert explicit, 'non-explicit not supported yet' + lines = s.strip().split('\n') + weights = numpy.zeros(features_count, 'd') for line in lines: if line.strip(): fid, weight = line.split() @@ -144,26 +149,26 @@ def parse_megam_weights(s, features_count, explicit=True): def _write_megam_features(vector, stream, bernoulli): if not vector: raise ValueError( - "MEGAM classifier requires the use of an " "always-on feature." + 'MEGAM classifier requires the use of an ' 'always-on feature.' ) for (fid, fval) in vector: if bernoulli: if fval == 1: - stream.write(" %s" % fid) + stream.write(' %s' % fid) elif fval != 0: raise ValueError( - "If bernoulli=True, then all" "features must be binary." + 'If bernoulli=True, then all' 'features must be binary.' ) else: - stream.write(" %s %s" % (fid, fval)) + stream.write(' %s %s' % (fid, fval)) def call_megam(args): """ Call the ``megam`` binary with the given arguments. """ - if isinstance(args, str): - raise TypeError("args should be a list of strings") + if isinstance(args, string_types): + raise TypeError('args should be a list of strings') if _megam_bin is None: config_megam() @@ -176,9 +181,9 @@ def call_megam(args): if p.returncode != 0: print() print(stderr) - raise OSError("megam command failed!") + raise OSError('megam command failed!') - if isinstance(stdout, str): + if isinstance(stdout, string_types): return stdout else: - return stdout.decode("utf-8") + return stdout.decode('utf-8') diff --git a/nlp_resource_data/nltk/classify/naivebayes.py b/nlp_resource_data/nltk/classify/naivebayes.py index abfed1a..8859439 100644 --- a/nlp_resource_data/nltk/classify/naivebayes.py +++ b/nlp_resource_data/nltk/classify/naivebayes.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Naive Bayes Classifiers # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Edward Loper # URL: # For license information, see LICENSE.TXT @@ -29,6 +29,7 @@ sum to one: | P(label|features) = -------------------------------------------- | SUM[l]( P(l) * P(f1|l) * ... * P(fn|l) ) """ +from __future__ import print_function, unicode_literals from collections import defaultdict @@ -98,7 +99,7 @@ class NaiveBayesClassifier(ClassifierI): if (label, fname) in self._feature_probdist: break else: - # print('Ignoring unseen feature %s' % fname) + # print 'Ignoring unseen feature %s' % fname del featureset[fname] # Find the log probabilty of each label, given the features. @@ -124,7 +125,7 @@ class NaiveBayesClassifier(ClassifierI): def show_most_informative_features(self, n=10): # Determine the most relevant features, and display them. cpdist = self._feature_probdist - print("Most Informative Features") + print('Most Informative Features') for (fname, fval) in self.most_informative_features(n): @@ -133,22 +134,21 @@ class NaiveBayesClassifier(ClassifierI): labels = sorted( [l for l in self._labels if fval in cpdist[l, fname].samples()], - key=lambda element: (-labelprob(element), element), - reverse=True + key=labelprob, ) if len(labels) == 1: continue l0 = labels[0] l1 = labels[-1] if cpdist[l0, fname].prob(fval) == 0: - ratio = "INF" + ratio = 'INF' else: - ratio = "%8.1f" % ( + ratio = '%8.1f' % ( cpdist[l1, fname].prob(fval) / cpdist[l0, fname].prob(fval) ) print( ( - "%24s = %-14r %6s : %-6s = %s : 1.0" + '%24s = %-14r %6s : %-6s = %s : 1.0' % (fname, fval, ("%s" % l1)[:6], ("%s" % l0)[:6], ratio) ) ) @@ -163,7 +163,7 @@ class NaiveBayesClassifier(ClassifierI): | max[ P(fname=fval|label1) / P(fname=fval|label2) ] """ - if hasattr(self, "_most_informative_features"): + if hasattr(self, '_most_informative_features'): return self._most_informative_features[:n] else: # The set of (fname, fval) pairs used by this classifier. @@ -186,8 +186,7 @@ class NaiveBayesClassifier(ClassifierI): # Convert features to a list, & sort it by how informative # features are. self._most_informative_features = sorted( - features, key=lambda feature_: (minprob[feature_] / maxprob[feature_], feature_[0], - feature_[1] in [None, False, True], str(feature_[1]).lower()) + features, key=lambda feature_: minprob[feature_] / maxprob[feature_] ) return self._most_informative_features[:n] @@ -253,5 +252,5 @@ def demo(): classifier.show_most_informative_features() -if __name__ == "__main__": +if __name__ == '__main__': demo() diff --git a/nlp_resource_data/nltk/classify/positivenaivebayes.py b/nlp_resource_data/nltk/classify/positivenaivebayes.py index 58621f1..7d6cb15 100644 --- a/nlp_resource_data/nltk/classify/positivenaivebayes.py +++ b/nlp_resource_data/nltk/classify/positivenaivebayes.py @@ -59,8 +59,8 @@ The features of a sentence are simply the words it contains: We use the sports sentences as positive examples, the mixed ones ad unlabeled examples: - >>> positive_featuresets = map(features, sports_sentences) - >>> unlabeled_featuresets = map(features, various_sentences) + >>> positive_featuresets = list(map(features, sports_sentences)) + >>> unlabeled_featuresets = list(map(features, various_sentences)) >>> classifier = PositiveNaiveBayesClassifier.train(positive_featuresets, ... unlabeled_featuresets) @@ -95,10 +95,10 @@ class PositiveNaiveBayesClassifier(NaiveBayesClassifier): estimator=ELEProbDist, ): """ - :param positive_featuresets: An iterable of featuresets that are known as positive + :param positive_featuresets: A list of featuresets that are known as positive examples (i.e., their label is ``True``). - :param unlabeled_featuresets: An iterable of featuresets whose label is unknown. + :param unlabeled_featuresets: A list of featuresets whose label is unknown. :param positive_prob_prior: A prior estimate of the probability of the label ``True`` (default 0.5). @@ -109,30 +109,28 @@ class PositiveNaiveBayesClassifier(NaiveBayesClassifier): fnames = set() # Count up how many times each feature value occurred in positive examples. - num_positive_examples = 0 for featureset in positive_featuresets: for fname, fval in featureset.items(): positive_feature_freqdist[fname][fval] += 1 feature_values[fname].add(fval) fnames.add(fname) - num_positive_examples += 1 # Count up how many times each feature value occurred in unlabeled examples. - num_unlabeled_examples = 0 for featureset in unlabeled_featuresets: for fname, fval in featureset.items(): unlabeled_feature_freqdist[fname][fval] += 1 feature_values[fname].add(fval) fnames.add(fname) - num_unlabeled_examples += 1 # If a feature didn't have a value given for an instance, then we assume that # it gets the implicit value 'None'. + num_positive_examples = len(positive_featuresets) for fname in fnames: count = positive_feature_freqdist[fname].N() positive_feature_freqdist[fname][None] += num_positive_examples - count feature_values[fname].add(None) + num_unlabeled_examples = len(unlabeled_featuresets) for fname in fnames: count = unlabeled_feature_freqdist[fname].N() unlabeled_feature_freqdist[fname][None] += num_unlabeled_examples - count diff --git a/nlp_resource_data/nltk/classify/rte_classify.py b/nlp_resource_data/nltk/classify/rte_classify.py index 0be8c81..19e1332 100644 --- a/nlp_resource_data/nltk/classify/rte_classify.py +++ b/nlp_resource_data/nltk/classify/rte_classify.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: RTE Classifier # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Ewan Klein # URL: # For license information, see LICENSE.TXT @@ -16,6 +16,7 @@ the hypothesis is more informative than (i.e not entailed by) the text. TO DO: better Named Entity classification TO DO: add lemmatization """ +from __future__ import print_function from nltk.tokenize import RegexpTokenizer from nltk.classify.util import accuracy, check_megam_config @@ -37,28 +38,28 @@ class RTEFeatureExtractor(object): self.stop = stop self.stopwords = set( [ - "a", - "the", - "it", - "they", - "of", - "in", - "to", - "is", - "have", - "are", - "were", - "and", - "very", - ".", - ",", + 'a', + 'the', + 'it', + 'they', + 'of', + 'in', + 'to', + 'is', + 'have', + 'are', + 'were', + 'and', + 'very', + '.', + ',', ] ) - self.negwords = set(["no", "not", "never", "failed", "rejected", "denied"]) + self.negwords = set(['no', 'not', 'never', 'failed', 'rejected', 'denied']) # Try to tokenize so that abbreviations, monetary amounts, email # addresses, URLs are single tokens. - tokenizer = RegexpTokenizer("[\w.@:/]+|\w+|\$[\d.]+") + tokenizer = RegexpTokenizer('[\w.@:/]+|\w+|\$[\d.]+') # Get the set of word types for text and hypothesis self.text_tokens = tokenizer.tokenize(rtepair.text) @@ -86,11 +87,11 @@ class RTEFeatureExtractor(object): :type toktype: 'ne' or 'word' """ ne_overlap = set(token for token in self._overlap if self._ne(token)) - if toktype == "ne": + if toktype == 'ne': if debug: print("ne overlap", ne_overlap) return ne_overlap - elif toktype == "word": + elif toktype == 'word': if debug: print("word overlap", self._overlap - ne_overlap) return self._overlap - ne_overlap @@ -105,9 +106,9 @@ class RTEFeatureExtractor(object): :type toktype: 'ne' or 'word' """ ne_extra = set(token for token in self._hyp_extra if self._ne(token)) - if toktype == "ne": + if toktype == 'ne': return ne_extra - elif toktype == "word": + elif toktype == 'word': return self._hyp_extra - ne_extra else: raise ValueError("Type not recognized: '%s'" % toktype) @@ -138,13 +139,13 @@ class RTEFeatureExtractor(object): def rte_features(rtepair): extractor = RTEFeatureExtractor(rtepair) features = {} - features["alwayson"] = True - features["word_overlap"] = len(extractor.overlap("word")) - features["word_hyp_extra"] = len(extractor.hyp_extra("word")) - features["ne_overlap"] = len(extractor.overlap("ne")) - features["ne_hyp_extra"] = len(extractor.hyp_extra("ne")) - features["neg_txt"] = len(extractor.negwords & extractor.text_words) - features["neg_hyp"] = len(extractor.negwords & extractor.hyp_words) + features['alwayson'] = True + features['word_overlap'] = len(extractor.overlap('word')) + features['word_hyp_extra'] = len(extractor.hyp_extra('word')) + features['ne_overlap'] = len(extractor.overlap('ne')) + features['ne_hyp_extra'] = len(extractor.hyp_extra('ne')) + features['neg_txt'] = len(extractor.negwords & extractor.text_words) + features['neg_hyp'] = len(extractor.negwords & extractor.hyp_words) return features @@ -155,17 +156,17 @@ def rte_featurize(rte_pairs): def rte_classifier(algorithm): from nltk.corpus import rte as rte_corpus - train_set = rte_corpus.pairs(["rte1_dev.xml", "rte2_dev.xml", "rte3_dev.xml"]) - test_set = rte_corpus.pairs(["rte1_test.xml", "rte2_test.xml", "rte3_test.xml"]) + train_set = rte_corpus.pairs(['rte1_dev.xml', 'rte2_dev.xml', 'rte3_dev.xml']) + test_set = rte_corpus.pairs(['rte1_test.xml', 'rte2_test.xml', 'rte3_test.xml']) featurized_train_set = rte_featurize(train_set) featurized_test_set = rte_featurize(test_set) # Train the classifier - print("Training classifier...") - if algorithm in ["megam", "BFGS"]: # MEGAM based algorithms. + print('Training classifier...') + if algorithm in ['megam', 'BFGS']: # MEGAM based algorithms. # Ensure that MEGAM is configured first. check_megam_config() clf = lambda x: MaxentClassifier.train(featurized_train_set, algorithm) - elif algorithm in ["GIS", "IIS"]: # Use default GIS/IIS MaxEnt algorithm + elif algorithm in ['GIS', 'IIS']: # Use default GIS/IIS MaxEnt algorithm clf = MaxentClassifier.train(featurized_train_set, algorithm) else: err_msg = str( @@ -173,7 +174,7 @@ def rte_classifier(algorithm): "'megam', 'BFGS', 'GIS', 'IIS'.\n" ) raise Exception(err_msg) - print("Testing classifier...") + print('Testing classifier...') acc = accuracy(clf, featurized_test_set) - print("Accuracy: %6.4f" % acc) + print('Accuracy: %6.4f' % acc) return clf diff --git a/nlp_resource_data/nltk/classify/scikitlearn.py b/nlp_resource_data/nltk/classify/scikitlearn.py index 90b450b..c00dcdc 100644 --- a/nlp_resource_data/nltk/classify/scikitlearn.py +++ b/nlp_resource_data/nltk/classify/scikitlearn.py @@ -30,9 +30,13 @@ best 1000 features: ... ('nb', MultinomialNB())]) >>> classif = SklearnClassifier(pipeline) """ +from __future__ import print_function, unicode_literals + +from six.moves import zip from nltk.classify.api import ClassifierI from nltk.probability import DictionaryProbDist +from nltk import compat try: from sklearn.feature_extraction import DictVectorizer @@ -40,9 +44,10 @@ try: except ImportError: pass -__all__ = ["SklearnClassifier"] +__all__ = ['SklearnClassifier'] +@compat.python_2_unicode_compatible class SklearnClassifier(ClassifierI): """Wrapper for scikit-learn classifiers.""" diff --git a/nlp_resource_data/nltk/classify/senna.py b/nlp_resource_data/nltk/classify/senna.py index 35bd402..0ccd29f 100644 --- a/nlp_resource_data/nltk/classify/senna.py +++ b/nlp_resource_data/nltk/classify/senna.py @@ -1,7 +1,7 @@ # encoding: utf-8 # Natural Language Toolkit: Senna Interface # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Rami Al-Rfou' # URL: # For license information, see LICENSE.TXT @@ -29,6 +29,7 @@ The input is: Note: Unit tests for this module can be found in test/unit/test_senna.py + >>> from __future__ import unicode_literals >>> from nltk.classify import Senna >>> pipeline = Senna('/usr/share/senna-v3.0', ['pos', 'chk', 'ner']) >>> sent = 'Dusseldorf is an international business center'.split() @@ -37,20 +38,26 @@ Note: Unit tests for this module can be found in test/unit/test_senna.py ('international', 'I-NP', 'O', 'JJ'), ('business', 'I-NP', 'O', 'NN'), ('center', 'I-NP', 'O', 'NN')] """ + +from __future__ import unicode_literals from os import path, sep, environ from subprocess import Popen, PIPE from platform import architecture, system +from six import text_type + from nltk.tag.api import TaggerI +from nltk.compat import python_2_unicode_compatible -_senna_url = "http://ml.nec-labs.com/senna/" +_senna_url = 'http://ml.nec-labs.com/senna/' +@python_2_unicode_compatible class Senna(TaggerI): - SUPPORTED_OPERATIONS = ["pos", "chk", "ner"] + SUPPORTED_OPERATIONS = ['pos', 'chk', 'ner'] - def __init__(self, senna_path, operations, encoding="utf-8"): + def __init__(self, senna_path, operations, encoding='utf-8'): self._encoding = encoding self._path = path.normpath(senna_path) + sep @@ -59,9 +66,9 @@ class Senna(TaggerI): exe_file_1 = self.executable(self._path) if not path.isfile(exe_file_1): # Check for the system environment - if "SENNA" in environ: + if 'SENNA' in environ: # self._path = path.join(environ['SENNA'],'') - self._path = path.normpath(environ["SENNA"]) + sep + self._path = path.normpath(environ['SENNA']) + sep exe_file_2 = self.executable(self._path) if not path.isfile(exe_file_2): raise OSError( @@ -78,16 +85,16 @@ class Senna(TaggerI): be used. """ os_name = system() - if os_name == "Linux": + if os_name == 'Linux': bits = architecture()[0] - if bits == "64bit": - return path.join(base_path, "senna-linux64") - return path.join(base_path, "senna-linux32") - if os_name == "Windows": - return path.join(base_path, "senna-win32.exe") - if os_name == "Darwin": - return path.join(base_path, "senna-osx") - return path.join(base_path, "senna") + if bits == '64bit': + return path.join(base_path, 'senna-linux64') + return path.join(base_path, 'senna-linux32') + if os_name == 'Windows': + return path.join(base_path, 'senna-win32.exe') + if os_name == 'Darwin': + return path.join(base_path, 'senna-osx') + return path.join(base_path, 'senna') def _map(self): """ @@ -125,16 +132,16 @@ class Senna(TaggerI): # Build the senna command to run the tagger _senna_cmd = [ self.executable(self._path), - "-path", + '-path', self._path, - "-usrtokens", - "-iobtags", + '-usrtokens', + '-iobtags', ] - _senna_cmd.extend(["-" + op for op in self.operations]) + _senna_cmd.extend(['-' + op for op in self.operations]) # Serialize the actual sentences to a temporary string - _input = "\n".join((" ".join(x) for x in sentences)) + "\n" - if isinstance(_input, str) and encoding: + _input = '\n'.join((' '.join(x) for x in sentences)) + '\n' + if isinstance(_input, text_type) and encoding: _input = _input.encode(encoding) # Run the tagger and get the output @@ -144,7 +151,7 @@ class Senna(TaggerI): # Check the return code. if p.returncode != 0: - raise RuntimeError("Senna command failed! Details: %s" % stderr) + raise RuntimeError('Senna command failed! Details: %s' % stderr) if encoding: senna_output = stdout.decode(encoding) @@ -160,12 +167,12 @@ class Senna(TaggerI): sentence_index += 1 token_index = 0 continue - tags = tagged_word.split("\t") + tags = tagged_word.split('\t') result = {} for tag in map_: result[tag] = tags[map_[tag]].strip() try: - result["word"] = sentences[sentence_index][token_index] + result['word'] = sentences[sentence_index][token_index] except IndexError: raise IndexError( "Misalignment error occurred at sentence number %d. Possible reason" @@ -183,6 +190,6 @@ def setup_module(module): from nose import SkipTest try: - tagger = Senna("/usr/share/senna-v3.0", ["pos", "chk", "ner"]) + tagger = Senna('/usr/share/senna-v3.0', ['pos', 'chk', 'ner']) except OSError: raise SkipTest("Senna executable not found") diff --git a/nlp_resource_data/nltk/classify/svm.py b/nlp_resource_data/nltk/classify/svm.py index 544f859..b6e0b3a 100644 --- a/nlp_resource_data/nltk/classify/svm.py +++ b/nlp_resource_data/nltk/classify/svm.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: SVM-based classifier # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Leon Derczynski # # URL: diff --git a/nlp_resource_data/nltk/classify/tadm.py b/nlp_resource_data/nltk/classify/tadm.py index 8780699..a2f8daf 100644 --- a/nlp_resource_data/nltk/classify/tadm.py +++ b/nlp_resource_data/nltk/classify/tadm.py @@ -1,13 +1,16 @@ # Natural Language Toolkit: Interface to TADM Classifier # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Joseph Frazee # URL: # For license information, see LICENSE.TXT +from __future__ import print_function, unicode_literals import sys import subprocess +from six import string_types + from nltk.internals import find_binary try: @@ -21,7 +24,7 @@ _tadm_bin = None def config_tadm(bin=None): global _tadm_bin _tadm_bin = find_binary( - "tadm", bin, env_vars=["TADM"], binary_names=["tadm"], url="http://tadm.sf.net" + 'tadm', bin, env_vars=['TADM'], binary_names=['tadm'], url='http://tadm.sf.net' ) @@ -47,14 +50,14 @@ def write_tadm_file(train_toks, encoding, stream): # http://sf.net/forum/forum.php?thread_id=1675097&forum_id=473054 labels = encoding.labels() for featureset, label in train_toks: - length_line = "%d\n" % len(labels) + length_line = '%d\n' % len(labels) stream.write(length_line) for known_label in labels: v = encoding.encode(featureset, known_label) - line = "%d %d %s\n" % ( + line = '%d %d %s\n' % ( int(label == known_label), len(v), - " ".join("%d %d" % u for u in v), + ' '.join('%d %d' % u for u in v), ) stream.write(line) @@ -68,15 +71,15 @@ def parse_tadm_weights(paramfile): weights = [] for line in paramfile: weights.append(float(line.strip())) - return numpy.array(weights, "d") + return numpy.array(weights, 'd') def call_tadm(args): """ Call the ``tadm`` binary with the given arguments. """ - if isinstance(args, str): - raise TypeError("args should be a list of strings") + if isinstance(args, string_types): + raise TypeError('args should be a list of strings') if _tadm_bin is None: config_tadm() @@ -89,7 +92,7 @@ def call_tadm(args): if p.returncode != 0: print() print(stderr) - raise OSError("tadm command failed!") + raise OSError('tadm command failed!') def names_demo(): @@ -104,18 +107,18 @@ def encoding_demo(): from nltk.classify.maxent import TadmEventMaxentFeatureEncoding tokens = [ - ({"f0": 1, "f1": 1, "f3": 1}, "A"), - ({"f0": 1, "f2": 1, "f4": 1}, "B"), - ({"f0": 2, "f2": 1, "f3": 1, "f4": 1}, "A"), + ({'f0': 1, 'f1': 1, 'f3': 1}, 'A'), + ({'f0': 1, 'f2': 1, 'f4': 1}, 'B'), + ({'f0': 2, 'f2': 1, 'f3': 1, 'f4': 1}, 'A'), ] encoding = TadmEventMaxentFeatureEncoding.train(tokens) write_tadm_file(tokens, encoding, sys.stdout) print() for i in range(encoding.length()): - print("%s --> %d" % (encoding.describe(i), i)) + print('%s --> %d' % (encoding.describe(i), i)) print() -if __name__ == "__main__": +if __name__ == '__main__': encoding_demo() names_demo() diff --git a/nlp_resource_data/nltk/classify/textcat.py b/nlp_resource_data/nltk/classify/textcat.py index 97545d5..b217fa8 100644 --- a/nlp_resource_data/nltk/classify/textcat.py +++ b/nlp_resource_data/nltk/classify/textcat.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Natural Language Toolkit: Language ID module using TextCat algorithm # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Avital Pekker # # URL: @@ -18,7 +18,7 @@ n-gram frequencies to profile languages and text-yet to be identified-then compares using a distance measure. Language n-grams are provided by the "An Crubadan" -project. A corpus reader was created separately to read +project. A corpus reader was created seperately to read those files. For details regarding the algorithm, see: @@ -28,10 +28,17 @@ For details about An Crubadan, see: http://borel.slu.edu/crubadan/index.html """ -from sys import maxsize +# Ensure that literal strings default to unicode rather than str. +from __future__ import print_function, unicode_literals +from nltk.compat import PY3 from nltk.util import trigrams +if PY3: + from sys import maxsize +else: + from sys import maxint + # Note: this is NOT "re" you're likely used to. The regex module # is an alternative to the standard re module that supports # Unicode codepoint properties with the \p{} syntax. @@ -71,11 +78,11 @@ class TextCat(object): self._corpus.lang_freq(lang) def remove_punctuation(self, text): - """ Get rid of punctuation except apostrophes """ + ''' Get rid of punctuation except apostrophes ''' return re.sub(r"[^\P{P}\']+", "", text) def profile(self, text): - """ Create FreqDist of trigrams within text """ + ''' Create FreqDist of trigrams within text ''' from nltk import word_tokenize, FreqDist clean_text = self.remove_punctuation(text) @@ -84,7 +91,7 @@ class TextCat(object): fingerprint = FreqDist() for t in tokens: token_trigram_tuples = trigrams(self._START_CHAR + t + self._END_CHAR) - token_trigrams = ["".join(tri) for tri in token_trigram_tuples] + token_trigrams = [''.join(tri) for tri in token_trigram_tuples] for cur_trigram in token_trigrams: if cur_trigram in fingerprint: @@ -95,8 +102,8 @@ class TextCat(object): return fingerprint def calc_dist(self, lang, trigram, text_profile): - """ Calculate the "out-of-place" measure between the - text and language profile for a single trigram """ + ''' Calculate the "out-of-place" measure between the + text and language profile for a single trigram ''' lang_fd = self._corpus.lang_freq(lang) dist = 0 @@ -111,13 +118,16 @@ class TextCat(object): # Arbitrary but should be larger than # any possible trigram file length # in terms of total lines - dist = maxsize + if PY3: + dist = maxsize + else: + dist = maxint return dist def lang_dists(self, text): - """ Calculate the "out-of-place" measure between - the text and all languages """ + ''' Calculate the "out-of-place" measure between + the text and all languages ''' distances = {} profile = self.profile(text) @@ -134,8 +144,8 @@ class TextCat(object): return distances def guess_language(self, text): - """ Find the language with the min distance - to the text and return its ISO 639-3 code """ + ''' Find the language with the min distance + to the text and return its ISO 639-3 code ''' self.last_distances = self.lang_dists(text) return min(self.last_distances, key=self.last_distances.get) @@ -146,27 +156,27 @@ def demo(): from nltk.corpus import udhr langs = [ - "Kurdish-UTF8", - "Abkhaz-UTF8", - "Farsi_Persian-UTF8", - "Hindi-UTF8", - "Hawaiian-UTF8", - "Russian-UTF8", - "Vietnamese-UTF8", - "Serbian_Srpski-UTF8", - "Esperanto-UTF8", + 'Kurdish-UTF8', + 'Abkhaz-UTF8', + 'Farsi_Persian-UTF8', + 'Hindi-UTF8', + 'Hawaiian-UTF8', + 'Russian-UTF8', + 'Vietnamese-UTF8', + 'Serbian_Srpski-UTF8', + 'Esperanto-UTF8', ] friendly = { - "kmr": "Northern Kurdish", - "abk": "Abkhazian", - "pes": "Iranian Persian", - "hin": "Hindi", - "haw": "Hawaiian", - "rus": "Russian", - "vie": "Vietnamese", - "srp": "Serbian", - "epo": "Esperanto", + 'kmr': 'Northern Kurdish', + 'abk': 'Abkhazian', + 'pes': 'Iranian Persian', + 'hin': 'Hindi', + 'haw': 'Hawaiian', + 'rus': 'Russian', + 'vie': 'Vietnamese', + 'srp': 'Serbian', + 'epo': 'Esperanto', } tc = TextCat() @@ -177,22 +187,22 @@ def demo(): rows = len(raw_sentences) - 1 cols = list(map(len, raw_sentences)) - sample = "" + sample = '' # Generate a sample text of the language for i in range(0, rows): - cur_sent = "" + cur_sent = '' for j in range(0, cols[i]): - cur_sent += " " + raw_sentences[i][j] + cur_sent += ' ' + raw_sentences[i][j] sample += cur_sent # Try to detect what it is - print("Language snippet: " + sample[0:140] + "...") + print('Language snippet: ' + sample[0:140] + '...') guess = tc.guess_language(sample) - print("Language detection: %s (%s)" % (guess, friendly[guess])) - print("#" * 140) + print('Language detection: %s (%s)' % (guess, friendly[guess])) + print('#' * 140) -if __name__ == "__main__": +if __name__ == '__main__': demo() diff --git a/nlp_resource_data/nltk/classify/util.py b/nlp_resource_data/nltk/classify/util.py index b9d1986..a0a15a6 100644 --- a/nlp_resource_data/nltk/classify/util.py +++ b/nlp_resource_data/nltk/classify/util.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Classifier Utility Functions # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Edward Loper # Steven Bird (minor additions) # URL: @@ -9,6 +9,7 @@ """ Utility functions and classes for classifiers. """ +from __future__ import print_function, division import math @@ -107,10 +108,10 @@ class CutoffChecker(object): def __init__(self, cutoffs): self.cutoffs = cutoffs.copy() - if "min_ll" in cutoffs: - cutoffs["min_ll"] = -abs(cutoffs["min_ll"]) - if "min_lldelta" in cutoffs: - cutoffs["min_lldelta"] = abs(cutoffs["min_lldelta"]) + if 'min_ll' in cutoffs: + cutoffs['min_ll'] = -abs(cutoffs['min_ll']) + if 'min_lldelta' in cutoffs: + cutoffs['min_lldelta'] = abs(cutoffs['min_lldelta']) self.ll = None self.acc = None self.iter = 1 @@ -118,32 +119,32 @@ class CutoffChecker(object): def check(self, classifier, train_toks): cutoffs = self.cutoffs self.iter += 1 - if "max_iter" in cutoffs and self.iter >= cutoffs["max_iter"]: + if 'max_iter' in cutoffs and self.iter >= cutoffs['max_iter']: return True # iteration cutoff. new_ll = nltk.classify.util.log_likelihood(classifier, train_toks) if math.isnan(new_ll): return True - if "min_ll" in cutoffs or "min_lldelta" in cutoffs: - if "min_ll" in cutoffs and new_ll >= cutoffs["min_ll"]: + if 'min_ll' in cutoffs or 'min_lldelta' in cutoffs: + if 'min_ll' in cutoffs and new_ll >= cutoffs['min_ll']: return True # log likelihood cutoff if ( - "min_lldelta" in cutoffs + 'min_lldelta' in cutoffs and self.ll - and ((new_ll - self.ll) <= abs(cutoffs["min_lldelta"])) + and ((new_ll - self.ll) <= abs(cutoffs['min_lldelta'])) ): return True # log likelihood delta cutoff self.ll = new_ll - if "max_acc" in cutoffs or "min_accdelta" in cutoffs: + if 'max_acc' in cutoffs or 'min_accdelta' in cutoffs: new_acc = nltk.classify.util.log_likelihood(classifier, train_toks) - if "max_acc" in cutoffs and new_acc >= cutoffs["max_acc"]: + if 'max_acc' in cutoffs and new_acc >= cutoffs['max_acc']: return True # log likelihood cutoff if ( - "min_accdelta" in cutoffs + 'min_accdelta' in cutoffs and self.acc - and ((new_acc - self.acc) <= abs(cutoffs["min_accdelta"])) + and ((new_acc - self.acc) <= abs(cutoffs['min_accdelta'])) ): return True # log likelihood delta cutoff self.acc = new_acc @@ -158,25 +159,25 @@ class CutoffChecker(object): def names_demo_features(name): features = {} - features["alwayson"] = True - features["startswith"] = name[0].lower() - features["endswith"] = name[-1].lower() - for letter in "abcdefghijklmnopqrstuvwxyz": - features["count(%s)" % letter] = name.lower().count(letter) - features["has(%s)" % letter] = letter in name.lower() + features['alwayson'] = True + features['startswith'] = name[0].lower() + features['endswith'] = name[-1].lower() + for letter in 'abcdefghijklmnopqrstuvwxyz': + features['count(%s)' % letter] = name.lower().count(letter) + features['has(%s)' % letter] = letter in name.lower() return features def binary_names_demo_features(name): features = {} - features["alwayson"] = True - features["startswith(vowel)"] = name[0].lower() in "aeiouy" - features["endswith(vowel)"] = name[-1].lower() in "aeiouy" - for letter in "abcdefghijklmnopqrstuvwxyz": - features["count(%s)" % letter] = name.lower().count(letter) - features["has(%s)" % letter] = letter in name.lower() - features["startswith(%s)" % letter] = letter == name[0].lower() - features["endswith(%s)" % letter] = letter == name[-1].lower() + features['alwayson'] = True + features['startswith(vowel)'] = name[0].lower() in 'aeiouy' + features['endswith(vowel)'] = name[-1].lower() in 'aeiouy' + for letter in 'abcdefghijklmnopqrstuvwxyz': + features['count(%s)' % letter] = name.lower().count(letter) + features['has(%s)' % letter] = letter in name.lower() + features['startswith(%s)' % letter] = letter == name[0].lower() + features['endswith(%s)' % letter] = letter == name[-1].lower() return features @@ -185,8 +186,8 @@ def names_demo(trainer, features=names_demo_features): import random # Construct a list of classified names, using the names corpus. - namelist = [(name, "male") for name in names.words("male.txt")] + [ - (name, "female") for name in names.words("female.txt") + namelist = [(name, 'male') for name in names.words('male.txt')] + [ + (name, 'female') for name in names.words('female.txt') ] # Randomly split the names into a test & train set. @@ -196,13 +197,13 @@ def names_demo(trainer, features=names_demo_features): test = namelist[5000:5500] # Train up a classifier. - print("Training classifier...") + print('Training classifier...') classifier = trainer([(features(n), g) for (n, g) in train]) # Run the classifier on the test data. - print("Testing classifier...") + print('Testing classifier...') acc = accuracy(classifier, [(features(n), g) for (n, g) in test]) - print("Accuracy: %6.4f" % acc) + print('Accuracy: %6.4f' % acc) # For classifiers that can find probabilities, show the log # likelihood and some sample probability distributions. @@ -210,15 +211,15 @@ def names_demo(trainer, features=names_demo_features): test_featuresets = [features(n) for (n, g) in test] pdists = classifier.prob_classify_many(test_featuresets) ll = [pdist.logprob(gold) for ((name, gold), pdist) in zip(test, pdists)] - print("Avg. log likelihood: %6.4f" % (sum(ll) / len(test))) + print('Avg. log likelihood: %6.4f' % (sum(ll) / len(test))) print() - print("Unseen Names P(Male) P(Female)\n" + "-" * 40) + print('Unseen Names P(Male) P(Female)\n' + '-' * 40) for ((name, gender), pdist) in list(zip(test, pdists))[:5]: - if gender == "male": - fmt = " %-15s *%6.4f %6.4f" + if gender == 'male': + fmt = ' %-15s *%6.4f %6.4f' else: - fmt = " %-15s %6.4f *%6.4f" - print(fmt % (name, pdist.prob("male"), pdist.prob("female"))) + fmt = ' %-15s %6.4f *%6.4f' + print(fmt % (name, pdist.prob('male'), pdist.prob('female'))) except NotImplementedError: pass @@ -230,8 +231,8 @@ def partial_names_demo(trainer, features=names_demo_features): from nltk.corpus import names import random - male_names = names.words("male.txt") - female_names = names.words("female.txt") + male_names = names.words('male.txt') + female_names = names.words('female.txt') random.seed(654321) random.shuffle(male_names) @@ -251,13 +252,13 @@ def partial_names_demo(trainer, features=names_demo_features): random.shuffle(test) # Train up a classifier. - print("Training classifier...") + print('Training classifier...') classifier = trainer(positive, unlabeled) # Run the classifier on the test data. - print("Testing classifier...") + print('Testing classifier...') acc = accuracy(classifier, [(features(n), m) for (n, m) in test]) - print("Accuracy: %6.4f" % acc) + print('Accuracy: %6.4f' % acc) # For classifiers that can find probabilities, show the log # likelihood and some sample probability distributions. @@ -265,14 +266,14 @@ def partial_names_demo(trainer, features=names_demo_features): test_featuresets = [features(n) for (n, m) in test] pdists = classifier.prob_classify_many(test_featuresets) ll = [pdist.logprob(gold) for ((name, gold), pdist) in zip(test, pdists)] - print("Avg. log likelihood: %6.4f" % (sum(ll) / len(test))) + print('Avg. log likelihood: %6.4f' % (sum(ll) / len(test))) print() - print("Unseen Names P(Male) P(Female)\n" + "-" * 40) + print('Unseen Names P(Male) P(Female)\n' + '-' * 40) for ((name, is_male), pdist) in zip(test, pdists)[:5]: if is_male == True: - fmt = " %-15s *%6.4f %6.4f" + fmt = ' %-15s *%6.4f %6.4f' else: - fmt = " %-15s %6.4f *%6.4f" + fmt = ' %-15s %6.4f *%6.4f' print(fmt % (name, pdist.prob(True), pdist.prob(False))) except NotImplementedError: pass @@ -289,7 +290,7 @@ def wsd_demo(trainer, word, features, n=1000): import random # Get the instances. - print("Reading data...") + print('Reading data...') global _inst_cache if word not in _inst_cache: _inst_cache[word] = [(i, i.senses[0]) for i in senseval.instances(word)] @@ -297,23 +298,23 @@ def wsd_demo(trainer, word, features, n=1000): if n > len(instances): n = len(instances) senses = list(set(l for (i, l) in instances)) - print(" Senses: " + " ".join(senses)) + print(' Senses: ' + ' '.join(senses)) # Randomly split the names into a test & train set. - print("Splitting into test & train...") + print('Splitting into test & train...') random.seed(123456) random.shuffle(instances) train = instances[: int(0.8 * n)] test = instances[int(0.8 * n) : n] # Train up a classifier. - print("Training classifier...") + print('Training classifier...') classifier = trainer([(features(i), l) for (i, l) in train]) # Run the classifier on the test data. - print("Testing classifier...") + print('Testing classifier...') acc = accuracy(classifier, [(features(i), l) for (i, l) in test]) - print("Accuracy: %6.4f" % acc) + print('Accuracy: %6.4f' % acc) # For classifiers that can find probabilities, show the log # likelihood and some sample probability distributions. @@ -321,7 +322,7 @@ def wsd_demo(trainer, word, features, n=1000): test_featuresets = [features(i) for (i, n) in test] pdists = classifier.prob_classify_many(test_featuresets) ll = [pdist.logprob(gold) for ((name, gold), pdist) in zip(test, pdists)] - print("Avg. log likelihood: %6.4f" % (sum(ll) / len(test))) + print('Avg. log likelihood: %6.4f' % (sum(ll) / len(test))) except NotImplementedError: pass diff --git a/nlp_resource_data/nltk/classify/weka.py b/nlp_resource_data/nltk/classify/weka.py index 3bfb311..fbd4302 100644 --- a/nlp_resource_data/nltk/classify/weka.py +++ b/nlp_resource_data/nltk/classify/weka.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Interface to Weka Classsifiers # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Edward Loper # URL: # For license information, see LICENSE.TXT @@ -8,7 +8,7 @@ """ Classifiers that make use of the external 'Weka' package. """ - +from __future__ import print_function import time import tempfile import os @@ -17,6 +17,8 @@ import re import zipfile from sys import stdin +from six import integer_types, string_types + from nltk.probability import DictionaryProbDist from nltk.internals import java, config_java @@ -24,11 +26,11 @@ from nltk.classify.api import ClassifierI _weka_classpath = None _weka_search = [ - ".", - "/usr/share/weka", - "/usr/local/share/weka", - "/usr/lib/weka", - "/usr/local/lib/weka", + '.', + '/usr/share/weka', + '/usr/local/share/weka', + '/usr/lib/weka', + '/usr/local/lib/weka', ] @@ -43,27 +45,27 @@ def config_weka(classpath=None): if _weka_classpath is None: searchpath = _weka_search - if "WEKAHOME" in os.environ: - searchpath.insert(0, os.environ["WEKAHOME"]) + if 'WEKAHOME' in os.environ: + searchpath.insert(0, os.environ['WEKAHOME']) for path in searchpath: - if os.path.exists(os.path.join(path, "weka.jar")): - _weka_classpath = os.path.join(path, "weka.jar") + if os.path.exists(os.path.join(path, 'weka.jar')): + _weka_classpath = os.path.join(path, 'weka.jar') version = _check_weka_version(_weka_classpath) if version: print( - ("[Found Weka: %s (version %s)]" % (_weka_classpath, version)) + ('[Found Weka: %s (version %s)]' % (_weka_classpath, version)) ) else: - print("[Found Weka: %s]" % _weka_classpath) + print('[Found Weka: %s]' % _weka_classpath) _check_weka_version(_weka_classpath) if _weka_classpath is None: raise LookupError( - "Unable to find weka.jar! Use config_weka() " - "or set the WEKAHOME environment variable. " - "For more information about Weka, please see " - "http://www.cs.waikato.ac.nz/ml/weka/" + 'Unable to find weka.jar! Use config_weka() ' + 'or set the WEKAHOME environment variable. ' + 'For more information about Weka, please see ' + 'http://www.cs.waikato.ac.nz/ml/weka/' ) @@ -76,7 +78,7 @@ def _check_weka_version(jar): return None try: try: - return zf.read("weka/core/version.txt") + return zf.read('weka/core/version.txt') except KeyError: return None finally: @@ -89,10 +91,10 @@ class WekaClassifier(ClassifierI): self._model = model_filename def prob_classify_many(self, featuresets): - return self._classify_many(featuresets, ["-p", "0", "-distribution"]) + return self._classify_many(featuresets, ['-p', '0', '-distribution']) def classify_many(self, featuresets): - return self._classify_many(featuresets, ["-p", "0"]) + return self._classify_many(featuresets, ['-p', '0']) def _classify_many(self, featuresets, options): # Make sure we can find java & weka. @@ -101,15 +103,15 @@ class WekaClassifier(ClassifierI): temp_dir = tempfile.mkdtemp() try: # Write the test data file. - test_filename = os.path.join(temp_dir, "test.arff") + test_filename = os.path.join(temp_dir, 'test.arff') self._formatter.write(test_filename, featuresets) # Call weka to classify the data. cmd = [ - "weka.classifiers.bayes.NaiveBayes", - "-l", + 'weka.classifiers.bayes.NaiveBayes', + '-l', self._model, - "-T", + '-T', test_filename, ] + options (stdout, stderr) = java( @@ -121,17 +123,17 @@ class WekaClassifier(ClassifierI): # Check if something went wrong: if stderr and not stdout: - if "Illegal options: -distribution" in stderr: + if 'Illegal options: -distribution' in stderr: raise ValueError( - "The installed version of weka does " - "not support probability distribution " - "output." + 'The installed version of weka does ' + 'not support probability distribution ' + 'output.' ) else: - raise ValueError("Weka failed to generate output:\n%s" % stderr) + raise ValueError('Weka failed to generate output:\n%s' % stderr) # Parse weka's output. - return self.parse_weka_output(stdout.decode(stdin.encoding).split("\n")) + return self.parse_weka_output(stdout.decode(stdin.encoding).split('\n')) finally: for f in os.listdir(temp_dir): @@ -139,7 +141,7 @@ class WekaClassifier(ClassifierI): os.rmdir(temp_dir) def parse_weka_distribution(self, s): - probs = [float(v) for v in re.split("[*,]+", s) if v.strip()] + probs = [float(v) for v in re.split('[*,]+', s) if v.strip()] probs = dict(zip(self._formatter.labels(), probs)) return DictionaryProbDist(probs) @@ -150,14 +152,14 @@ class WekaClassifier(ClassifierI): lines = lines[i:] break - if lines[0].split() == ["inst#", "actual", "predicted", "error", "prediction"]: - return [line.split()[2].split(":")[1] for line in lines[1:] if line.strip()] + if lines[0].split() == ['inst#', 'actual', 'predicted', 'error', 'prediction']: + return [line.split()[2].split(':')[1] for line in lines[1:] if line.strip()] elif lines[0].split() == [ - "inst#", - "actual", - "predicted", - "error", - "distribution", + 'inst#', + 'actual', + 'predicted', + 'error', + 'distribution', ]: return [ self.parse_weka_distribution(line.split()[-1]) @@ -166,16 +168,16 @@ class WekaClassifier(ClassifierI): ] # is this safe:? - elif re.match(r"^0 \w+ [01]\.[0-9]* \?\s*$", lines[0]): + elif re.match(r'^0 \w+ [01]\.[0-9]* \?\s*$', lines[0]): return [line.split()[1] for line in lines if line.strip()] else: for line in lines[:10]: print(line) raise ValueError( - "Unhandled output format -- your version " - "of weka may not be supported.\n" - " Header: %s" % lines[0] + 'Unhandled output format -- your version ' + 'of weka may not be supported.\n' + ' Header: %s' % lines[0] ) # [xx] full list of classifiers (some may be abstract?): @@ -192,12 +194,12 @@ class WekaClassifier(ClassifierI): # VotedPerceptron, Winnow, ZeroR _CLASSIFIER_CLASS = { - "naivebayes": "weka.classifiers.bayes.NaiveBayes", - "C4.5": "weka.classifiers.trees.J48", - "log_regression": "weka.classifiers.functions.Logistic", - "svm": "weka.classifiers.functions.SMO", - "kstar": "weka.classifiers.lazy.KStar", - "ripper": "weka.classifiers.rules.JRip", + 'naivebayes': 'weka.classifiers.bayes.NaiveBayes', + 'C4.5': 'weka.classifiers.trees.J48', + 'log_regression': 'weka.classifiers.functions.Logistic', + 'svm': 'weka.classifiers.functions.SMO', + 'kstar': 'weka.classifiers.lazy.KStar', + 'ripper': 'weka.classifiers.rules.JRip', } @classmethod @@ -205,7 +207,7 @@ class WekaClassifier(ClassifierI): cls, model_filename, featuresets, - classifier="naivebayes", + classifier='naivebayes', options=[], quiet=True, ): @@ -218,7 +220,7 @@ class WekaClassifier(ClassifierI): temp_dir = tempfile.mkdtemp() try: # Write the training data file. - train_filename = os.path.join(temp_dir, "train.arff") + train_filename = os.path.join(temp_dir, 'train.arff') formatter.write(train_filename, featuresets) if classifier in cls._CLASSIFIER_CLASS: @@ -226,10 +228,10 @@ class WekaClassifier(ClassifierI): elif classifier in cls._CLASSIFIER_CLASS.values(): javaclass = classifier else: - raise ValueError("Unknown classifier %s" % classifier) + raise ValueError('Unknown classifier %s' % classifier) # Train the weka model. - cmd = [javaclass, "-d", model_filename, "-t", train_filename] + cmd = [javaclass, '-d', model_filename, '-t', train_filename] cmd += list(options) if quiet: stdout = subprocess.PIPE @@ -276,8 +278,8 @@ class ARFF_Formatter: def write(self, outfile, tokens): """Writes ARFF data to a file for the given data.""" - if not hasattr(outfile, "write"): - outfile = open(outfile, "w") + if not hasattr(outfile, 'write'): + outfile = open(outfile, 'w') outfile.write(self.format(tokens)) outfile.close() @@ -296,18 +298,18 @@ class ARFF_Formatter: for tok, label in tokens: for (fname, fval) in tok.items(): if issubclass(type(fval), bool): - ftype = "{True, False}" - elif issubclass(type(fval), (int, float, bool)): - ftype = "NUMERIC" - elif issubclass(type(fval), str): - ftype = "STRING" + ftype = '{True, False}' + elif issubclass(type(fval), (integer_types, float, bool)): + ftype = 'NUMERIC' + elif issubclass(type(fval), string_types): + ftype = 'STRING' elif fval is None: continue # can't tell the type. else: - raise ValueError("Unsupported value type %r" % ftype) + raise ValueError('Unsupported value type %r' % ftype) if features.get(fname, ftype) != ftype: - raise ValueError("Inconsistent type for %s" % fname) + raise ValueError('Inconsistent type for %s' % fname) features[fname] = ftype features = sorted(features.items()) @@ -317,20 +319,20 @@ class ARFF_Formatter: """Returns an ARFF header as a string.""" # Header comment. s = ( - "% Weka ARFF file\n" - + "% Generated automatically by NLTK\n" - + "%% %s\n\n" % time.ctime() + '% Weka ARFF file\n' + + '% Generated automatically by NLTK\n' + + '%% %s\n\n' % time.ctime() ) # Relation name - s += "@RELATION rel\n\n" + s += '@RELATION rel\n\n' # Input attribute specifications for fname, ftype in self._features: - s += "@ATTRIBUTE %-30r %s\n" % (fname, ftype) + s += '@ATTRIBUTE %-30r %s\n' % (fname, ftype) # Label attribute specification - s += "@ATTRIBUTE %-30r {%s}\n" % ("-label-", ",".join(self._labels)) + s += '@ATTRIBUTE %-30r {%s}\n' % ('-label-', ','.join(self._labels)) return s @@ -352,29 +354,29 @@ class ARFF_Formatter: tokens = [(tok, None) for tok in tokens] # Data section - s = "\n@DATA\n" + s = '\n@DATA\n' for (tok, label) in tokens: for fname, ftype in self._features: - s += "%s," % self._fmt_arff_val(tok.get(fname)) - s += "%s\n" % self._fmt_arff_val(label) + s += '%s,' % self._fmt_arff_val(tok.get(fname)) + s += '%s\n' % self._fmt_arff_val(label) return s def _fmt_arff_val(self, fval): if fval is None: - return "?" - elif isinstance(fval, (bool, int)): - return "%s" % fval + return '?' + elif isinstance(fval, (bool, integer_types)): + return '%s' % fval elif isinstance(fval, float): - return "%r" % fval + return '%r' % fval else: - return "%r" % fval + return '%r' % fval -if __name__ == "__main__": +if __name__ == '__main__': from nltk.classify.util import names_demo, binary_names_demo_features def make_classifier(featuresets): - return WekaClassifier.train("/tmp/name.model", featuresets, "C4.5") + return WekaClassifier.train('/tmp/name.model', featuresets, 'C4.5') classifier = names_demo(make_classifier, binary_names_demo_features) diff --git a/nlp_resource_data/nltk/cli.py b/nlp_resource_data/nltk/cli.py deleted file mode 100644 index 01ff3d0..0000000 --- a/nlp_resource_data/nltk/cli.py +++ /dev/null @@ -1,59 +0,0 @@ -# -*- coding: utf-8 -*- -# Natural Language Toolkit: NLTK Command-Line Interface -# -# Copyright (C) 2001-2020 NLTK Project -# URL: -# For license information, see LICENSE.TXT - - -from functools import partial -from itertools import chain -from tqdm import tqdm - -import click - -from nltk import word_tokenize -from nltk.util import parallelize_preprocess - -CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"]) - - -@click.group(context_settings=CONTEXT_SETTINGS) -@click.version_option() -def cli(): - pass - - -@cli.command("tokenize") -@click.option( - "--language", - "-l", - default="en", - help="The language for the Punkt sentence tokenization.", -) -@click.option( - "--preserve-line", - "-l", - default=True, - is_flag=True, - help="An option to keep the preserve the sentence and not sentence tokenize it.", -) -@click.option("--processes", "-j", default=1, help="No. of processes.") -@click.option("--encoding", "-e", default="utf8", help="Specify encoding of file.") -@click.option( - "--delimiter", "-d", default=" ", help="Specify delimiter to join the tokens." -) -def tokenize_file(language, preserve_line, processes, encoding, delimiter): - """ This command tokenizes text stream using nltk.word_tokenize """ - with click.get_text_stream("stdin", encoding=encoding) as fin: - with click.get_text_stream("stdout", encoding=encoding) as fout: - # If it's single process, joblib parallization is slower, - # so just process line by line normally. - if processes == 1: - for line in tqdm(fin.readlines()): - print(delimiter.join(word_tokenize(line)), end="\n", file=fout) - else: - for outline in parallelize_preprocess( - word_tokenize, fin.readlines(), processes, progress_bar=True - ): - print(delimiter.join(outline), end="\n", file=fout) diff --git a/nlp_resource_data/nltk/cluster/__init__.py b/nlp_resource_data/nltk/cluster/__init__.py index 2310947..c7fc100 100644 --- a/nlp_resource_data/nltk/cluster/__init__.py +++ b/nlp_resource_data/nltk/cluster/__init__.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Clusterers # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Trevor Cohn # URL: # For license information, see LICENSE.TXT diff --git a/nlp_resource_data/nltk/cluster/__pycache__/__init__.cpython-37.pyc b/nlp_resource_data/nltk/cluster/__pycache__/__init__.cpython-37.pyc index a5235cbb0221d45825482bc030d06f830b03fc82..ea338758fc664971f2ebb9720ef25be393fb94f7 100644 GIT binary patch delta 31 lcmX@7cu0}kiI6) delta 43 xcmX@4cutYqiISrZZX5<$n7G>+}P0kev1px1}3@ZQt diff --git a/nlp_resource_data/nltk/cluster/__pycache__/api.cpython-37.pyc b/nlp_resource_data/nltk/cluster/__pycache__/api.cpython-37.pyc index 0cbec0e277e3a93aad5e255b70368efb0a045d08..36c667007aec0299c702f094f99b463e98905dfb 100644 GIT binary patch delta 650 zcmZuvO>5LZ7|tY_$#h9-*xD9B@Sq?I{qW>LYeBInhy`!5mzZQm9CwnGnMAf8s<<8n zS>{i8EycS(z>DB1Cx3@0=bbHzO9OfGGBeM|`(}O(->)SPl4Ri7r;Fy<;XJv59?oxG zyTLpbNL-OIK^_k|nxRJ?3mN|CG0dY`FhgTHe?D4Ga>lZXw|OaZtxr$RP#Re=1@QaM zZl~3*dFt<@b@R#pwgsyrUs3z%aobFI?G7$r1y-rk5igIgnrZO!b_|4IK_G8<**2=j zGB3DXCJv;Sa4B9j4ZD8!A-so%mn>Id5Wr4ACFTd-*oJ18$(!e{qRd;-)Y*}03MO>B zZ7!3~BbOR2H2yZHT->F_)x8UKo8|E2ZH(#R{ zxBeT9s2QIwM~u&1vO2H$V{<^q?_4J^A>~>fLIv*5M{Pp&FRrT~vmzFNpn*Wc(DzYH z;)o9DYD{S4_76uZeUKNWf}|?2&<9#fRcNbmTFZ8_bM(XxDKCT+?XeqkPnzPj{Y6gT YBIlm&RSoMT--jF;Zb6D-KMqp#2U3-VAOHXW delta 598 zcmZvY%SyvQ6oxyQT*jK#+Jc~n7hFiFwBkY(yg(OqC$5W7l4hi_lcY>iu`3naN-0^n z)`j3k@dbPcS6KG032+&VxD-L<8G25Z+FPLRshcd4nhVJ6i zwYrTq@`B|m7--Ufo1$-wf9s+YM?Ig9f$r-8oFsXD7L}8-KCtAzXhi=|2p~0sp5JV- zUKCRvZ6sP%y@k>-pT@S#ec#ekY&$Kk0FFgz n6ap64I+ZRD9ozAk7x!hRlKpRJO1Ava?J>FoOGl7@K{4?M(G+>f diff --git a/nlp_resource_data/nltk/cluster/__pycache__/em.cpython-37.pyc b/nlp_resource_data/nltk/cluster/__pycache__/em.cpython-37.pyc index 40edf72da14591862c980b77abc338ff92127a04..ac4c45738dd5286f4c3a5ec23d1d5566f8375c4a 100644 GIT binary patch delta 1440 zcmZ`(%WvF7828v-uh+Zl?y|dibOTMJrjjX2NXeE2HQ^!A(o`S>6@@91wOLOSD}L#- zU3ED$hx7uchB=`2fVd*@5RHW31Q(?K2T0RH#eqY`fqNw+=JTd)T7fP7X6AdpWg)X!tAa+v3*sZJjEk{5)w*6m8wJ;)+9v@G^$-xqPDIueanIuiGu$} zN~C^>8d^{a4QdeF>p?j*sYw)OoK?J%XIxi2gPA+p4xtsV?3vdUYO$(U*-{~6){eB} zsqZU}b^k4l#!-cxfFEV<ejJHL9{GdVV{YJQ4EjO(jzdH@8D^VtvjY6?vZF9qT>k5IQ5!mJ6WN2qxrC)S^6e z2SJ`ncj?;R4eNJx97P-ruwugWJr-qtHiR0#R_~4#>Ztj^Ivx0FhLNmZ;@{UddN#`2 zucs*k%;imh7@#hR@C*W)qk0^%6fhpBAvF1Ud&v=Imk{qlV|FSVjKIK0(zFPh-Gy15=FQH<2KNPAB zeD4VxyUi})d*3!!$aqn-Qo7ks7#u+@P5K3klh$D}!>3wDR-awdX1Xr01YGwvU+K&mWwc}R&Cb$!oKKiUSU@<1u#8Z& z293ixgvS&6X!yX846J`TCBd}$U!5*7_R3?w60*eaj4#d?gP|6wLRHE*Ds<%{$mIj} zaAM#&3BT|Y6E82El)L*Hf{u(=By6<5JNw5cZRNF%TQ~9h`Bj+_sX+*qV~ z6l7blgen*o%pgo76ch4j#vTVl`Nhe_cMFZ8^jLLT*&i1M)O>g$1sN1QpryoH;L!|%! delta 1406 zcmZ`&OKcoP5S^ai&W`uv&)Qj=5Su7oG!oePc)>`K%m}fJx7+K%^Ha^N z$#NhPi8u!uf#8HVa7+|@LW&eF+_@q3CE|pPlmpjDNObjDab$%V=}k{{SJkUm)!)AG zRlD|jwd!i}IkEX}r#$y%tw}fJ-=IP@N+QkI-qVtXsWGzS!W_}?eP2uT4NdOrksceq zL9}&^8q~a@QIi?h4d0{|wQp#?#cW#I(cucSu1h~kE^D5%kICM-^T2nA&M%bjA8W|} zhii-5Y0MTs4hGw47QoKp0zd1vEnDu*^2oj9RpiPeFeX2M9z&RvD^W@Mai<&g^MXOX zzW0~=hkgN%cm|Yk-w7#8im(_!8Lhg|7fuTjdPKN1j9HS0X_6ntd+4r2VP168I9uJ| zcCFVQ!6>!Y@|-~-&#nQa04N&N5ip5wrU}adS*k2h<3HAx*L)}LM@5)L1K&#fk{^uX ziDMFk5p3pAl`C&dl_g5MtXrgz3#)*j%Y+rPKoT40v-K94=I86@TdFJqGZ;kGK5;HcAG@em>6v=1sS8=XkN^6+X9=Zv0ljQ|?az~KtsJ@ME6y$lhHF!M9 z7rZ|9!|Iblo5Ln8V6f5wJ>Tet^l02EJ~uu^Ztj`mFOzFZV=LbdG6u)d)sTdS(%Mm& z1W`{Iloi4VlU<=_Ak!Z$!~HVDWGo z0X02RIGm8SpZGtG&*rfq|1;H2rvog5PUklNdUD=2F^8?xegxTiEQ?C;lr-PvpEqAT^SoT`W4?XGy)9S0#;x@~KW%8Q_P+lPpI=^) z#ZSnMEAl(f|8Cw~5@r$=J8(`K6oQ-_Gp9;$q0UHdTB&?sD`xogX!ZQw!pVC zDzCDz^B~S5c7&@iDT~uRes-pP^0GWFq~viBCY_E~8uohR%{WV;P$R|f%rxHCAJIYw kSVMS%%)(iTQ`(Q%DvFikgy^I~YQ)wn#^^Qel+$v^f4M0%Jpcdz diff --git a/nlp_resource_data/nltk/cluster/__pycache__/gaac.cpython-37.pyc b/nlp_resource_data/nltk/cluster/__pycache__/gaac.cpython-37.pyc index 14f29d8d3fb0f06668549a75b40512390e27a622..298944d7aabea389dd5d4c146a05c94856b1179c 100644 GIT binary patch delta 1356 zcmZ`(y>H!A6!-P#eYT&IyuOmqs8OmWFEzAHNh2~41;hf(6|`7@ao=@PEB@-SUGh|< zf|3CVBoyv|#K6eLP-S6Y=mcV|%5tX;U7+G0VCp@7DWy?rOF!LvK7RLi&bjve!QV^X z+ug3M$fvRPLuY#5o4Kujs= zMZD47m4r;aqWJb-sLa=dS3;84;bxN$>!irVWRoYug2iE))GKjEKu7poRhecLKb#gtu``bVx2LDIPoJmRl?;Fca1{mE zd9aZ;Sve9$1Q3k`|J0bCMGv$vh1vFcnp8ECZ}|K>V{O(%e;2`!Agp29R8pj`gU25k zi~Z;GVMYcG4o;gt5MuLRtR>Rp53NPV!dxwjcEIJ6bd7ZRyL8U+koIMSQ~V}yy$X$majpTjFVH{+{2PdPORIJoUjHJZP+R9#<$$ z=+%wlvu9+1ob|ZVzR?P6?`hCEya6%5MNBe z_j>fgo6?1S%%UU@Lw`cvNclsk8;E6ZRp|r`K}YCu(OXFI`-Ap##H$lHd0KB@9RCJ@ rqOUCrd0`ddm=+7Vo)xS~;|-*!P<>xjiAy|Us(s>WE}@^()08{`F2ECV delta 1182 zcmZ`&O;6N781D3Q+U^JJu82X%0=r8}kR^eL5<-l|5H5y9MQx0=>=aqqZa33ig=jPp zOgwlY{nzysM|pq@-TNy4H3KyRK%JUMR{&vvT!6Jupqqk)f54?TEGfz@d6i~BZ#LYDcdgb6 zxA%wImrzkcB6}eFXA|7*_k4LZ@V)Y5cVjhZxOA;N&i3SKMdP-9mQg;S1lv)vleV@} z3*6?#DNBsR@rg)%5=&Fi=s*(GjM-W>Y*qrlF;`;y%5G|Qca+ppC32&t?ekDwI0j*F%YT*-IWDP}D^jdb=A513F~ngck)_V5++h!z{v zd(minMqfvRDtLg+!ZfA~!a*Kj2w<4)8ikXAoadIswrM622EJudo#hj~sE1unOx{ib zFU=uR-A2Q!6NK(xvKKZx;g5ntoYHa8C|9YuVOU++a5}vTVGLSqH~Do08e*YNv?o{wfpmgL z)b(GmFR6umoY|?q6cT;S#?sG<|96V8<4~_b9p?@EmM-wN{z}gh7_Yc6v;Zrpc3cEH z2CEAIAWp{t4kL_LFlbca(8a$H`j1UnG*m+~N;r!%SjQTcsM6l_Y3m0Q z;8AJ;^_0d(GV%Q2EiEpm6!v)6yWU-|vuSplghGD$m%3^bnlum~O{5Y;TOdLr2n%<1+GD+Ma299*i?o=zEY@8;&bc{8Ha*I_dG$72L%j=bp=+s8 z(Jg+UCH1_<&8;%L%{01iYl;3Lt;|KGxa^h{OFpW^mTNJMTW@NC5ftueL4lWday!he z2J6MZyr;P~9|}raI^5^>4iW|Wdzw>zN|`!2X}2LvGHqIO0D)oG?o!GOs&TF!ug1byj%} zr~B9DIw{PF{n9H2X>?|9sr(sZrr4|8zEnd43?oQGB_j>+le--1WGbY7+>W@+M`4;l zMJ!pTnJw;E)9j@9!dgE|?4aNjg^x6&PKt?p+q{{zAZ`0izOnbWwZdixk&Z6)*%u{? z`|Ada_sh$)301T5xwvDm^<+tjK*DS|7F1|2n_z>|CEFbC@<5tmVrZ^DNE@g?m`2EiKlID< zltVuo#cAQxDfa0W#cQ?6b1$Jq=40+BG9Qq7N2so#|GwzewigtEDvU~WSfAzp;nGSv zv3DSE`n|X?yfa1Xj*o^5=z1WY4EI)5Vg*I%ng_>nWiXnDmoZYZj%3?M6C{~_6knbA z3mXMSz-mxrzS(Mb!506Fj9nW-gJURaSv*J$meA;$1Hl(@dX#tpZ9j=O>fc^el~eB> zUxqY24|p3q?~yPXGX-jGD2e&T%DdFYaEXA91V~JHm4GS-b;7g_yiRZ^r5dS|ETUr+ zpL7brEQ`mDNmktZz3~xaMX@@1V}U}kR}Iv?-Iaz9;CCGzE~8np&1X0j-5i>qh=-#W zQLH~lCre{E-@vM7M0M<)wLTZzL?hfnIN*V|$Z(k5fay21r#eDGdrFle(Wv-g>~fE? zh8qOe2~?x2iU-uF{lRyE?RXfjTAU)W#rxkV2e?Wc$D9y0ERS2iqBo@_TCS#_XEohs z;@0@&^809^lZpK>@jRy{%O3u~vJP-OElL`41bKoLf)fO*1UJRk<7YoLaOhT&DBGIt z|GS_-PL(W?!y3V%ENVpBNOL)E1)YemP;5${U|lyE-nz~#X0t-hVirDTPn$E(nCxGG CP)@c0 delta 1413 zcmZuvOK%%h6rMXT&zP~}CQjm5B~F^OFl~v`hDsEQBhi#nB&t3lptE2!c&5~i=fN2# zU^YQYw_TtsK{rK6bkz+DZxO15gxGh1beD)lyWt0P#W~lOL!;R8=X=gQ_dDNtJUI95 zo8|XPrJ{nL`OmNUKQDY&uI*-Mg-Xb}iZ}dc9+S4}seMPDG*A0PNvgUM>03p3iz)QI zr6lU7^g`_#vFVvixvYd*sNYjUJu>$+uMircc~9}I$PNozDs&@j58s9Crs7)9NyJr` ziRtRcQM;GI%`9j~7rWbeFMoTSU>fna0ve_9lUtDS4N#m$_H|XCqO8D%6!#!Q$b&@#7c83zVdt z##!rY{-nH7_v{YV81y=6vgtZdpj}MzF;5iS_DO5s-Fx72!+%$HZ?(>txDAlEXsNEexNp@}DY=<7dz#^f(F< zp@-C%15~H+{yzVwva==$Bo(Q5*^uSsaN#7|u|Miu{(#>&)|W!*x{`#H|O7e)ZW z_zGwauV9R8$ZVo8QVVP7^sI5uSJ6C}YXyCe`Qz&MCuK8e(+;mpSiTQY2EP9*zdQY! zNlOg||6#gyjg}T(C!i?-^%u?&P%}UdnbJUuV86TaB!{cv#Weo%1q93H$7dQ$8(fT@_q(3wHZ|oRaPoy8q;j+DYu+hqZ=XMSBygTH;Xq zXk*9)8|Z{f2qPYNpB($@#!OGqpQ{L_@|<=BiKe(V_r@-%hKmI663AVb6_2Tf)$Rs) z-nre!4c!lyTTYRlJ(?5_vPm3=ObB;4iqqfmHm4RVT17p|hiGxn! z`)(1=qXWtWZxd|rVtx4rsUPO2cvvUcuT-9>Twz^^({Q^RwP<(q8iK{}tE$XlWoBxQ MR@qgW){@2k1KLI(?f?J) diff --git a/nlp_resource_data/nltk/cluster/__pycache__/util.cpython-37.pyc b/nlp_resource_data/nltk/cluster/__pycache__/util.cpython-37.pyc index 5403a35e330f3bb675d50ba98a36bdf833b39eed..e3bc7564bf6eadd37375ae34c0192f497d81220e 100644 GIT binary patch delta 2801 zcmaJ@TWB0*6rS0+?CgD$Y&LB|&8BNxr>Sk)#zHklt(Ww!R@6EQ<78*jOglR>oSDtV z2O}t`2x|Es1O?FtMMPR8FNy_G>6=&)d>N|ZqauheJ}8Li{JXhy(>7th`RAPTpZkCQ zxqSSm6NN*CLRP@PR{Feo@qA(P(%1N63%2M9P06RQK0{)1V43}w5eX9Su~1#XWA<@ z%SKrgrj`U-vokLVcE%~bC>cF=*3P{o7`=AhF5t6oO0bJ|3Hc?bg8Y7`jFht7gOnbk zyeJw~TY5~;dw-*_bSWu@;QFyS)AAd!8~91N<-3i*c1+KW9k9G8(QJ3ljgY5{$@w34 z594DfT#RP}-`r=eNH>CJXvOZd=de#J)#a-AODb1F3;*1;rxn2-mY44phnG*tXGO8Z zK2vHtipT_#fHH|?Cp5lX$tENBR$=OnyJ%Vi5O2D08jR0y$dw~6(ezTukg+T(c3_}FN>`<_2DCtaW*#B>Edeok@FS1b6{ND z$AW>e8XfjTA;}E^lV_KfzZ|Hic6GELLCT7D(OsEEYd(_lE37!U_w+tofrxNYS9ld* zfGFf194Fwa+Q&6XyFQ*#EbIfsd6=MMaBbEpO$W|6mzUofEQrTewXY~AZR?a%sr1ea zxewQg3;~Wu2vZg*SB}d+fF3*;x#9RtWm6gvoD6n#FOlg z;X`;~?5U0Ru4+%z8Jhg@f6S{rwM!)@lzG5B(oK0T5NPhnP?N9)tt7i$qY+I;5G^f4!4DsdDo6- z+Z(7k{>e<7aeQYXgvsqJ-nwszdqJT0wu{)6=VQpCi*5ln^A+CF{9M4@_zIf{__;1O z(G6^5<{XIK27Up+{Y0b=GD>_jn4U8eBVg$sL28ml>!xcj7-@8v7VS=>BX4LcYT7xM z+j%ySRfVS!VTOQmgi~WSnE#c1SHFTM=-pfO`)S7Tc3#(jyc(SbTn%7DWlR&C5KL2N z*=?gsHyjR+k^yQ~2na#~-e+q-p1~BtGX5gE(6qFi%4f8)md_vN9DM-4M(_(NZh?b_ z>C*)y0vh^>WYwn|pwRnK{vItxz`It)CZ6RN01UB9+v?A2$TxDn7oXeR{+@yiZKJUS zdkOX_NklvoiansvG}rBh~imNV1R z#>6H{Oo$=TTpvtK^uhR`5fkkbFPfO}U_#eEFq-*OxdsuCXGps>S&iGM{|wWQdXU# zJDJOpm30iK4&S-4KFP^CISk0T^%&5A6Y8A?95;w@-jSb`%>1uZGVf-)mM*eCpXf#TfRzFe(M*>c|0p zR-IrvzovGw4F6g+J1pIE+)Cs{3zjzJ2X?e)cbsy92sED6I+?+@YkNBg%VZ#M2{P77 z{%jbzVDgu>kNbo>mAE@8?IseUfYmCX18t-Hl>TO`PTmb9G!ktj+WGH#UqK|u)3}>N z4MIKIj`D+<4|_yHIY}tOYa+K|!f#T?BMJ(NkK-UGD{Xc5pug31HYJh2sP9TXbvGN>rX8MdI%(~4|ljrOdn z>-cE+H$+5<0~<)7&_`lz1%jdtr6BYw?kd_V7!!(8)3AeonZMCT z42aWnwm<8}dTHA89dIi(FhHHW!ar?%g&}^md81Ap$-#h%b z=GlEaP)n*v$YdFG&~31z0BUB%YO&}?#o{%bl5~PMwlq&B#*-$SFm>o9xQ9t>Ct;Eh zWTMi5(!v1}YwKF0%T9E>j!#I9)tMseiczPx!cOdV`_e$`5qSp%<3&P|xOWZsiWM(% z%j@t2^BU3f1%JP7HyhwTv<-D7TSbxgw1TR;zO-#)K^|B}KeP^=c`NtUTAc)blZPF< z&JLgk48=(^Ej9z1$wKA95fTC_IrFjRRq&wWo{x~{7>VVCRGqkL<8;}D^X^wme|6;8 z@m2H{a6(%LPTe$@ugxb>j}-{;;uVFiRZCIdFHLN6QrL^@4;}0klv`;Y(y;a@0j=*g z@9G+2NBQZlQT!;p-PPZ;iWsX?0hI0NF8{9U+yEWgIDHAQwdPY3n10MHM8>rex!4dixZ*;#Aq5hgP9^l}MZt zcpQh&LSl%YDzr84CG`o4QDy?KVz%JvTPXA_o*+AwZt1=qMnwoN!8x)GlPxpnLgbZf zKZJc`BuZEf;>Z_$cPc`G`|>ndwX##mzCjwKddx#~cL;v2W0Yw1GNV=(a8KKdMR2Q7EcWuCr+@K*C;m7k z5GQ_`G>^m@H&^Kd2hs5vJ|WeqtBRb>=y^SxJt{nAW1O-lOF%a-97cg0TL?kWPC*vx z)8ZzFHa^^2n4-^z_^3cXX>_^8YA`wDmZD^tl # Porting: Steven Bird # URL: # For license information, see LICENSE.TXT from abc import ABCMeta, abstractmethod +from six import add_metaclass from nltk.probability import DictionaryProbDist -class ClusterI(metaclass=ABCMeta): +@add_metaclass(ABCMeta) +class ClusterI(object): """ Interface covering basic clustering functionality. """ diff --git a/nlp_resource_data/nltk/cluster/em.py b/nlp_resource_data/nltk/cluster/em.py index a93d19c..51dcf1f 100644 --- a/nlp_resource_data/nltk/cluster/em.py +++ b/nlp_resource_data/nltk/cluster/em.py @@ -1,18 +1,21 @@ # Natural Language Toolkit: Expectation Maximization Clusterer # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Trevor Cohn # URL: # For license information, see LICENSE.TXT +from __future__ import print_function, unicode_literals try: import numpy except ImportError: pass +from nltk.compat import python_2_unicode_compatible from nltk.cluster.util import VectorSpaceClusterer +@python_2_unicode_compatible class EMClusterer(VectorSpaceClusterer): """ The Gaussian EM clusterer models the vectors as being produced by @@ -95,7 +98,7 @@ class EMClusterer(VectorSpaceClusterer): while not converged: if trace: - print("iteration; loglikelihood", lastl) + print('iteration; loglikelihood', lastl) # E-step, calculate hidden variables, h[i,j] h = numpy.zeros((len(vectors), self._num_clusters), numpy.float64) for i in range(len(vectors)): @@ -149,7 +152,7 @@ class EMClusterer(VectorSpaceClusterer): def _gaussian(self, mean, cvm, x): m = len(mean) - assert cvm.shape == (m, m), "bad sized covariance matrix, %s" % str(cvm.shape) + assert cvm.shape == (m, m), 'bad sized covariance matrix, %s' % str(cvm.shape) try: det = numpy.linalg.det(cvm) inv = numpy.linalg.inv(cvm) @@ -173,7 +176,7 @@ class EMClusterer(VectorSpaceClusterer): return llh def __repr__(self): - return "" % list(self._means) + return '' % list(self._means) def demo(): @@ -191,28 +194,64 @@ def demo(): clusterer = cluster.EMClusterer(means, bias=0.1) clusters = clusterer.cluster(vectors, True, trace=True) - print("Clustered:", vectors) - print("As: ", clusters) + print('Clustered:', vectors) + print('As: ', clusters) print() for c in range(2): - print("Cluster:", c) - print("Prior: ", clusterer._priors[c]) - print("Mean: ", clusterer._means[c]) - print("Covar: ", clusterer._covariance_matrices[c]) + print('Cluster:', c) + print('Prior: ', clusterer._priors[c]) + print('Mean: ', clusterer._means[c]) + print('Covar: ', clusterer._covariance_matrices[c]) print() # classify a new vector vector = numpy.array([2, 2]) - print("classify(%s):" % vector, end=" ") + print('classify(%s):' % vector, end=' ') print(clusterer.classify(vector)) # show the classification probabilities vector = numpy.array([2, 2]) - print("classification_probdist(%s):" % vector) + print('classification_probdist(%s):' % vector) pdist = clusterer.classification_probdist(vector) for sample in pdist.samples(): - print("%s => %.0f%%" % (sample, pdist.prob(sample) * 100)) + print('%s => %.0f%%' % (sample, pdist.prob(sample) * 100)) -if __name__ == "__main__": + +# +# The following demo code is broken. +# +# # use a set of tokens with 2D indices +# vectors = [numpy.array(f) for f in [[3, 3], [1, 2], [4, 2], [4, 0], [2, 3], [3, 1]]] + +# # test the EM clusterer with means given by k-means (2) and +# # dimensionality reduction +# clusterer = cluster.KMeans(2, euclidean_distance, svd_dimensions=1) +# print 'Clusterer:', clusterer +# clusters = clusterer.cluster(vectors) +# means = clusterer.means() +# print 'Means:', clusterer.means() +# print + +# clusterer = cluster.EMClusterer(means, svd_dimensions=1) +# clusters = clusterer.cluster(vectors, True) +# print 'Clusterer:', clusterer +# print 'Clustered:', str(vectors)[:60], '...' +# print 'As:', str(clusters)[:60], '...' +# print + +# # classify a new vector +# vector = numpy.array([3, 3]) +# print 'classify(%s):' % vector, +# print clusterer.classify(vector) +# print + +# # show the classification probabilities +# vector = numpy.array([2.2, 2]) +# print 'classification_probdist(%s)' % vector +# pdist = clusterer.classification_probdist(vector) +# for sample in pdist: +# print '%s => %.0f%%' % (sample, pdist.prob(sample) *100) + +if __name__ == '__main__': demo() diff --git a/nlp_resource_data/nltk/cluster/gaac.py b/nlp_resource_data/nltk/cluster/gaac.py index 436ef98..06eb30e 100644 --- a/nlp_resource_data/nltk/cluster/gaac.py +++ b/nlp_resource_data/nltk/cluster/gaac.py @@ -1,9 +1,10 @@ # Natural Language Toolkit: Group Average Agglomerative Clusterer # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Trevor Cohn # URL: # For license information, see LICENSE.TXT +from __future__ import print_function, unicode_literals, division try: import numpy @@ -11,8 +12,10 @@ except ImportError: pass from nltk.cluster.util import VectorSpaceClusterer, Dendrogram, cosine_distance +from nltk.compat import python_2_unicode_compatible +@python_2_unicode_compatible class GAAClusterer(VectorSpaceClusterer): """ The Group Average Agglomerative starts with each of the N vectors as singleton @@ -134,7 +137,7 @@ class GAAClusterer(VectorSpaceClusterer): return self._num_clusters def __repr__(self): - return "" % self._num_clusters + return '' % self._num_clusters def demo(): @@ -151,9 +154,9 @@ def demo(): clusterer = GAAClusterer(4) clusters = clusterer.cluster(vectors, True) - print("Clusterer:", clusterer) - print("Clustered:", vectors) - print("As:", clusters) + print('Clusterer:', clusterer) + print('Clustered:', vectors) + print('As:', clusters) print() # show the dendrogram @@ -161,10 +164,10 @@ def demo(): # classify a new vector vector = numpy.array([3, 3]) - print("classify(%s):" % vector, end=" ") + print('classify(%s):' % vector, end=' ') print(clusterer.classify(vector)) print() -if __name__ == "__main__": +if __name__ == '__main__': demo() diff --git a/nlp_resource_data/nltk/cluster/kmeans.py b/nlp_resource_data/nltk/cluster/kmeans.py index 389ff68..bfe1604 100644 --- a/nlp_resource_data/nltk/cluster/kmeans.py +++ b/nlp_resource_data/nltk/cluster/kmeans.py @@ -1,9 +1,10 @@ # Natural Language Toolkit: K-Means Clusterer # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Trevor Cohn # URL: # For license information, see LICENSE.TXT +from __future__ import print_function, unicode_literals, division import copy import random @@ -16,8 +17,10 @@ except ImportError: from nltk.cluster.util import VectorSpaceClusterer +from nltk.compat import python_2_unicode_compatible +@python_2_unicode_compatible class KMeansClusterer(VectorSpaceClusterer): """ The K-means clusterer starts with k arbitrary chosen means then allocates @@ -80,12 +83,12 @@ class KMeansClusterer(VectorSpaceClusterer): def cluster_vectorspace(self, vectors, trace=False): if self._means and self._repeats > 1: - print("Warning: means will be discarded for subsequent trials") + print('Warning: means will be discarded for subsequent trials') meanss = [] for trial in range(self._repeats): if trace: - print("k-means trial", trial) + print('k-means trial', trial) if not self._means or trial > 1: self._means = self._rng.sample(list(vectors), self._num_means) self._cluster_vectorspace(vectors, trace) @@ -123,7 +126,7 @@ class KMeansClusterer(VectorSpaceClusterer): clusters[index].append(vector) if trace: - print("iteration") + print('iteration') # for i in range(self._num_means): # print ' mean', i, 'allocated', len(clusters[i]), 'vectors' @@ -175,9 +178,9 @@ class KMeansClusterer(VectorSpaceClusterer): return centroid / (1 + len(cluster)) else: if not len(cluster): - sys.stderr.write("Error: no centroid defined for empty cluster.\n") + sys.stderr.write('Error: no centroid defined for empty cluster.\n') sys.stderr.write( - "Try setting argument 'avoid_empty_clusters' to True\n" + 'Try setting argument \'avoid_empty_clusters\' to True\n' ) assert False centroid = copy.copy(cluster[0]) @@ -186,7 +189,7 @@ class KMeansClusterer(VectorSpaceClusterer): return centroid / len(cluster) def __repr__(self): - return "" % (self._means, self._repeats) + return '' % (self._means, self._repeats) ################################################################################# @@ -203,9 +206,9 @@ def demo(): clusterer = KMeansClusterer(2, euclidean_distance, initial_means=means) clusters = clusterer.cluster(vectors, True, trace=True) - print("Clustered:", vectors) - print("As:", clusters) - print("Means:", clusterer.means()) + print('Clustered:', vectors) + print('As:', clusters) + print('Means:', clusterer.means()) print() vectors = [numpy.array(f) for f in [[3, 3], [1, 2], [4, 2], [4, 0], [2, 3], [3, 1]]] @@ -215,17 +218,17 @@ def demo(): clusterer = KMeansClusterer(2, euclidean_distance, repeats=10) clusters = clusterer.cluster(vectors, True) - print("Clustered:", vectors) - print("As:", clusters) - print("Means:", clusterer.means()) + print('Clustered:', vectors) + print('As:', clusters) + print('Means:', clusterer.means()) print() # classify a new vector vector = numpy.array([3, 3]) - print("classify(%s):" % vector, end=" ") + print('classify(%s):' % vector, end=' ') print(clusterer.classify(vector)) print() -if __name__ == "__main__": +if __name__ == '__main__': demo() diff --git a/nlp_resource_data/nltk/cluster/util.py b/nlp_resource_data/nltk/cluster/util.py index c7ab691..a3576e7 100644 --- a/nlp_resource_data/nltk/cluster/util.py +++ b/nlp_resource_data/nltk/cluster/util.py @@ -1,10 +1,11 @@ # Natural Language Toolkit: Clusterer Utilities # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Trevor Cohn # Contributor: J Richard Snape # URL: # For license information, see LICENSE.TXT +from __future__ import print_function, unicode_literals, division from abc import abstractmethod import copy @@ -17,6 +18,7 @@ except ImportError: pass from nltk.cluster.api import ClusterI +from nltk.compat import python_2_unicode_compatible class VectorSpaceClusterer(ClusterI): @@ -174,6 +176,7 @@ class _DendrogramNode(object): return cosine_distance(self._value, comparator._value) < 0 +@python_2_unicode_compatible class Dendrogram(object): """ Represents a dendrogram, a tree with a specified branching order. This @@ -228,7 +231,7 @@ class Dendrogram(object): """ # ASCII rendering characters - JOIN, HLINK, VLINK = "+", "-", "|" + JOIN, HLINK, VLINK = '+', '-', '|' # find the root (or create one) if len(self._items) > 1: @@ -248,15 +251,15 @@ class Dendrogram(object): rhalf = int(width - lhalf - 1) # display functions - def format(centre, left=" ", right=" "): - return "%s%s%s" % (lhalf * left, centre, right * rhalf) + def format(centre, left=' ', right=' '): + return '%s%s%s' % (lhalf * left, centre, right * rhalf) def display(str): stdout.write(str) # for each merge, top down queue = [(root._value, root)] - verticals = [format(" ") for leaf in leaves] + verticals = [format(' ') for leaf in leaves] while queue: priority, node = queue.pop() child_left_leaf = list(map(lambda c: c.leaves(False)[0], node._children)) @@ -267,9 +270,9 @@ class Dendrogram(object): for i in range(len(leaves)): if leaves[i] in child_left_leaf: if i == min_idx: - display(format(JOIN, " ", HLINK)) + display(format(JOIN, ' ', HLINK)) elif i == max_idx: - display(format(JOIN, HLINK, " ")) + display(format(JOIN, HLINK, ' ')) else: display(format(JOIN, HLINK, HLINK)) verticals[i] = format(VLINK) @@ -277,7 +280,7 @@ class Dendrogram(object): display(format(HLINK, HLINK, HLINK)) else: display(verticals[i]) - display("\n") + display('\n') for child in node._children: if child._children: queue.append((child._value, child)) @@ -285,11 +288,11 @@ class Dendrogram(object): for vertical in verticals: display(vertical) - display("\n") + display('\n') # finally, display the last line - display("".join(item.center(width) for item in last_row)) - display("\n") + display(''.join(item.center(width) for item in last_row)) + display('\n') def __repr__(self): if len(self._items) > 1: @@ -297,4 +300,4 @@ class Dendrogram(object): else: root = self._items[0] leaves = root.leaves(False) - return "" % len(leaves) + return '' % len(leaves) diff --git a/nlp_resource_data/nltk/collections.py b/nlp_resource_data/nltk/collections.py index efbb78c..882e15c 100644 --- a/nlp_resource_data/nltk/collections.py +++ b/nlp_resource_data/nltk/collections.py @@ -1,18 +1,21 @@ # Natural Language Toolkit: Collections # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Steven Bird # URL: # For license information, see LICENSE.TXT +from __future__ import print_function, absolute_import import bisect from itertools import islice, chain from functools import total_ordering - # this unused import is for python 2.7 from collections import defaultdict, deque, Counter +from six import text_type + from nltk.internals import slice_bounds, raise_unorderable_types +from nltk.compat import python_2_unicode_compatible ########################################################################## @@ -22,8 +25,8 @@ from nltk.internals import slice_bounds, raise_unorderable_types class OrderedDict(dict): def __init__(self, data=None, **kwargs): - self._keys = self.keys(data, kwargs.get("keys")) - self._default_factory = kwargs.get("default_factory") + self._keys = self.keys(data, kwargs.get('keys')) + self._default_factory = kwargs.get('default_factory') if data is None: dict.__init__(self) else: @@ -81,7 +84,7 @@ class OrderedDict(dict): return data.keys() elif isinstance(data, list): return [key for (key, value) in data] - elif "_keys" in self.__dict__: + elif '_keys' in self.__dict__: return self._keys else: return [] @@ -117,6 +120,7 @@ class OrderedDict(dict): @total_ordering +@python_2_unicode_compatible class AbstractLazySequence(object): """ An abstract base class for read-only sequences whose values are @@ -143,7 +147,7 @@ class AbstractLazySequence(object): Return the number of tokens in the corpus file underlying this corpus view. """ - raise NotImplementedError("should be implemented by subclass") + raise NotImplementedError('should be implemented by subclass') def iterate_from(self, start): """ @@ -152,7 +156,7 @@ class AbstractLazySequence(object): ``start``. If ``start>=len(self)``, then this iterator will generate no tokens. """ - raise NotImplementedError("should be implemented by subclass") + raise NotImplementedError('should be implemented by subclass') def __getitem__(self, i): """ @@ -167,12 +171,12 @@ class AbstractLazySequence(object): if i < 0: i += len(self) if i < 0: - raise IndexError("index out of range") + raise IndexError('index out of range') # Use iterate_from to extract it. try: return next(self.iterate_from(i)) except StopIteration: - raise IndexError("index out of range") + raise IndexError('index out of range') def __iter__(self): """Return an iterator that generates the tokens in the corpus @@ -192,7 +196,7 @@ class AbstractLazySequence(object): for i, elt in enumerate(islice(self, start, stop)): if elt == value: return i + start - raise ValueError("index(x): x not in list") + raise ValueError('index(x): x not in list') def __contains__(self, value): """Return true if this list contains ``value``.""" @@ -228,8 +232,8 @@ class AbstractLazySequence(object): pieces.append(repr(elt)) length += len(pieces[-1]) + 2 if length > self._MAX_REPR_SIZE and len(pieces) > 2: - return "[%s, ...]" % ", ".join(pieces[:-1]) - return "[%s]" % ", ".join(pieces) + return '[%s, ...]' % text_type(', ').join(pieces[:-1]) + return '[%s]' % text_type(', ').join(pieces) def __eq__(self, other): return type(self) == type(other) and list(self) == list(other) @@ -246,7 +250,7 @@ class AbstractLazySequence(object): """ :raise ValueError: Corpus view objects are unhashable. """ - raise ValueError("%s objects are unhashable" % self.__class__.__name__) + raise ValueError('%s objects are unhashable' % self.__class__.__name__) class LazySubsequence(AbstractLazySequence): @@ -327,12 +331,12 @@ class LazyConcatenation(AbstractLazySequence): if sublist_index == (len(self._offsets) - 1): assert ( index + len(sublist) >= self._offsets[-1] - ), "offests not monotonic increasing!" + ), 'offests not monotonic increasing!' self._offsets.append(index + len(sublist)) else: assert self._offsets[sublist_index + 1] == index + len( sublist - ), "inconsistent list value (num elts)" + ), 'inconsistent list value (num elts)' for value in sublist[max(0, start_index - index) :]: yield value @@ -386,11 +390,11 @@ class LazyMap(AbstractLazySequence): by this lazy map. (default=5) """ if not lists: - raise TypeError("LazyMap requires at least two args") + raise TypeError('LazyMap requires at least two args') self._lists = lists self._func = function - self._cache_size = config.get("cache_size", 5) + self._cache_size = config.get('cache_size', 5) self._cache = {} if self._cache_size > 0 else None # If you just take bool() of sum() here _all_lazy will be true just @@ -457,7 +461,7 @@ class LazyMap(AbstractLazySequence): if index < 0: index += len(self) if index < 0: - raise IndexError("index out of range") + raise IndexError('index out of range') # Check the cache if self._cache is not None and index in self._cache: return self._cache[index] @@ -465,7 +469,7 @@ class LazyMap(AbstractLazySequence): try: val = next(self.iterate_from(index)) except StopIteration: - raise IndexError("index out of range") + raise IndexError('index out of range') # Update the cache if self._cache is not None: if len(self._cache) > self._cache_size: diff --git a/nlp_resource_data/nltk/collocations.py b/nlp_resource_data/nltk/collocations.py index 150e29b..eb7bdda 100644 --- a/nlp_resource_data/nltk/collocations.py +++ b/nlp_resource_data/nltk/collocations.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Collocations and Association Measures # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Joel Nothman # URL: # For license information, see LICENSE.TXT @@ -23,6 +23,7 @@ these functionalities, dependent on being provided a function which scores a ngram given appropriate frequency counts. A number of standard association measures are provided in bigram_measures and trigram_measures. """ +from __future__ import print_function # Possible TODOs: # - consider the distinction between f(x,_) and f(x) and whether our @@ -31,17 +32,12 @@ measures are provided in bigram_measures and trigram_measures. # and unigram counts (raw_freq, pmi, student_t) import itertools as _itertools +from six import iteritems from nltk.probability import FreqDist from nltk.util import ngrams - # these two unused imports are referenced in collocations.doctest -from nltk.metrics import ( - ContingencyMeasures, - BigramAssocMeasures, - TrigramAssocMeasures, - QuadgramAssocMeasures, -) +from nltk.metrics import ContingencyMeasures, BigramAssocMeasures, TrigramAssocMeasures from nltk.metrics.spearman import ranks_from_scores, spearman_correlation @@ -65,9 +61,9 @@ class AbstractCollocationFinder(object): def _build_new_documents( cls, documents, window_size, pad_left=False, pad_right=False, pad_symbol=None ): - """ + ''' Pad the document with the place holder according to the window_size - """ + ''' padding = (pad_symbol,) * (window_size - 1) if pad_right: return _itertools.chain.from_iterable( @@ -97,7 +93,7 @@ class AbstractCollocationFinder(object): if the function returns True when passed an ngram tuple. """ tmp_ngram = FreqDist() - for ngram, freq in self.ngram_fd.items(): + for ngram, freq in iteritems(self.ngram_fd): if not fn(ngram, freq): tmp_ngram[ngram] = freq self.ngram_fd = tmp_ngram @@ -367,7 +363,7 @@ def demo(scorer=None, compare_scorer=None): from nltk.corpus import stopwords, webtext - ignored_words = stopwords.words("english") + ignored_words = stopwords.words('english') word_filter = lambda w: len(w) < 3 or w.lower() in ignored_words for file in webtext.fileids(): @@ -382,31 +378,31 @@ def demo(scorer=None, compare_scorer=None): ranks_from_scores(cf.score_ngrams(compare_scorer)), ) print(file) - print("\t", [" ".join(tup) for tup in cf.nbest(scorer, 15)]) - print("\t Correlation to %s: %0.4f" % (compare_scorer.__name__, corr)) + print('\t', [' '.join(tup) for tup in cf.nbest(scorer, 15)]) + print('\t Correlation to %s: %0.4f' % (compare_scorer.__name__, corr)) # Slows down loading too much # bigram_measures = BigramAssocMeasures() # trigram_measures = TrigramAssocMeasures() -if __name__ == "__main__": +if __name__ == '__main__': import sys from nltk.metrics import BigramAssocMeasures try: - scorer = eval("BigramAssocMeasures." + sys.argv[1]) + scorer = eval('BigramAssocMeasures.' + sys.argv[1]) except IndexError: scorer = None try: - compare_scorer = eval("BigramAssocMeasures." + sys.argv[2]) + compare_scorer = eval('BigramAssocMeasures.' + sys.argv[2]) except IndexError: compare_scorer = None demo(scorer, compare_scorer) __all__ = [ - "BigramCollocationFinder", - "TrigramCollocationFinder", - "QuadgramCollocationFinder", + 'BigramCollocationFinder', + 'TrigramCollocationFinder', + 'QuadgramCollocationFinder', ] diff --git a/nlp_resource_data/nltk/compat.py b/nlp_resource_data/nltk/compat.py index 163a200..fef28a6 100644 --- a/nlp_resource_data/nltk/compat.py +++ b/nlp_resource_data/nltk/compat.py @@ -1,13 +1,198 @@ # -*- coding: utf-8 -*- # Natural Language Toolkit: Compatibility # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # # URL: # For license information, see LICENSE.TXT +from __future__ import absolute_import, print_function import os -from functools import wraps +import sys +from functools import update_wrapper, wraps +import fractions +import unicodedata + +from six import string_types, text_type + +# Python 2/3 compatibility layer. Based on six. + +PY3 = sys.version_info[0] == 3 + +if PY3: + + def get_im_class(meth): + return meth.__self__.__class__ + + import io + + StringIO = io.StringIO + BytesIO = io.BytesIO + + from datetime import timezone + + UTC = timezone.utc + + from tempfile import TemporaryDirectory + +else: + + def get_im_class(meth): + return meth.im_class + + try: + from cStringIO import StringIO + except ImportError: + from StringIO import StringIO + BytesIO = StringIO + + from datetime import tzinfo, timedelta + + ZERO = timedelta(0) + HOUR = timedelta(hours=1) + + # A UTC class for python 2.7 + class UTC(tzinfo): + """UTC""" + + def utcoffset(self, dt): + return ZERO + + def tzname(self, dt): + return "UTC" + + def dst(self, dt): + return ZERO + + UTC = UTC() + + import csv + import codecs + import cStringIO + + class UnicodeWriter: + """ + A CSV writer which will write rows to CSV file "f", + which is encoded in the given encoding. + see https://docs.python.org/2/library/csv.html + """ + + def __init__( + self, f, dialect=csv.excel, encoding="utf-8", errors='replace', **kwds + ): + # Redirect output to a queue + self.queue = cStringIO.StringIO() + self.writer = csv.writer(self.queue, dialect=dialect, **kwds) + self.stream = f + encoder_cls = codecs.getincrementalencoder(encoding) + self.encoder = encoder_cls(errors=errors) + + def encode(self, data): + if isinstance(data, string_types): + return data.encode("utf-8") + else: + return data + + def writerow(self, row): + self.writer.writerow([self.encode(s) for s in row]) + # Fetch UTF-8 output from the queue ... + data = self.queue.getvalue() + data = data.decode("utf-8") + # ... and reencode it into the target encoding + data = self.encoder.encode(data, 'replace') + # write to the target stream + self.stream.write(data) + # empty queue + self.queue.truncate(0) + + import warnings as _warnings + import os as _os + from tempfile import mkdtemp + + class TemporaryDirectory(object): + """Create and return a temporary directory. This has the same + behavior as mkdtemp but can be used as a context manager. For + example: + + with TemporaryDirectory() as tmpdir: + ... + + Upon exiting the context, the directory and everything contained + in it are removed. + + http://stackoverflow.com/questions/19296146/tempfile-temporarydirectory-context-manager-in-python-2-7 + """ + + def __init__(self, suffix="", prefix="tmp", dir=None): + self._closed = False + self.name = None # Handle mkdtemp raising an exception + self.name = mkdtemp(suffix, prefix, dir) + + def __repr__(self): + return "<{} {!r}>".format(self.__class__.__name__, self.name) + + def __enter__(self): + return self.name + + def cleanup(self, _warn=False): + if self.name and not self._closed: + try: + self._rmtree(self.name) + except (TypeError, AttributeError) as ex: + # Issue #10188: Emit a warning on stderr + # if the directory could not be cleaned + # up due to missing globals + if "None" not in str(ex): + raise + print( + "ERROR: {!r} while cleaning up {!r}".format(ex, self), + file=sys.stderr, + ) + return + self._closed = True + if _warn: + self._warn("Implicitly cleaning up {!r}".format(self), Warning) + + def __exit__(self, exc, value, tb): + self.cleanup() + + def __del__(self): + # Issue a Warning if implicit cleanup needed + self.cleanup(_warn=True) + + # XXX (ncoghlan): The following code attempts to make + # this class tolerant of the module nulling out process + # that happens during CPython interpreter shutdown + # Alas, it doesn't actually manage it. See issue #10188 + _listdir = staticmethod(_os.listdir) + _path_join = staticmethod(_os.path.join) + _isdir = staticmethod(_os.path.isdir) + _islink = staticmethod(_os.path.islink) + _remove = staticmethod(_os.remove) + _rmdir = staticmethod(_os.rmdir) + _warn = _warnings.warn + + def _rmtree(self, path): + # Essentially a stripped down version of shutil.rmtree. We can't + # use globals because they may be None'ed out at shutdown. + for name in self._listdir(path): + fullname = self._path_join(path, name) + try: + isdir = self._isdir(fullname) and not self._islink(fullname) + except OSError: + isdir = False + if isdir: + self._rmtree(fullname) + else: + try: + self._remove(fullname) + except OSError: + pass + try: + self._rmdir(path) + except OSError: + pass + # ======= Compatibility for datasets that care about Python versions ======== @@ -22,14 +207,16 @@ DATA_UPDATES = [ _PY3_DATA_UPDATES = [os.path.join(*path_list) for path_list in DATA_UPDATES] + def add_py3_data(path): - for item in _PY3_DATA_UPDATES: - if item in str(path) and "/PY3" not in str(path): - pos = path.index(item) + len(item) - if path[pos : pos + 4] == ".zip": - pos += 4 - path = path[:pos] + "/PY3" + path[pos:] - break + if PY3: + for item in _PY3_DATA_UPDATES: + if item in str(path) and "/PY3" not in str(path): + pos = path.index(item) + len(item) + if path[pos : pos + 4] == ".zip": + pos += 4 + path = path[:pos] + "/PY3" + path[pos:] + break return path @@ -41,3 +228,146 @@ def py3_data(init_func): return init_func(*args, **kwargs) return wraps(init_func)(_decorator) + + +# ======= Compatibility layer for __str__ and __repr__ ========== +def remove_accents(text): + + if isinstance(text, bytes): + text = text.decode('ascii') + + category = unicodedata.category # this gives a small (~10%) speedup + return ''.join( + c for c in unicodedata.normalize('NFKD', text) if category(c) != 'Mn' + ) + + +# Select the best transliteration method: +try: + # Older versions of Unidecode are licensed under Artistic License; + # assume an older version is installed. + from unidecode import unidecode as transliterate +except ImportError: + try: + # text-unidecode implementation is worse than Unidecode + # implementation so Unidecode is preferred. + from text_unidecode import unidecode as transliterate + except ImportError: + # This transliteration method should be enough + # for many Western languages. + transliterate = remove_accents + + +def python_2_unicode_compatible(klass): + """ + This decorator defines __unicode__ method and fixes + __repr__ and __str__ methods under Python 2. + + To support Python 2 and 3 with a single code base, + define __str__ and __repr__ methods returning unicode + text and apply this decorator to the class. + + Original __repr__ and __str__ would be available + as unicode_repr and __unicode__ (under both Python 2 + and Python 3). + """ + + if not issubclass(klass, object): + raise ValueError("This decorator doesn't work for old-style classes") + + # both __unicode__ and unicode_repr are public because they + # may be useful in console under Python 2.x + + # if __str__ or __repr__ are not overriden in a subclass, + # they may be already fixed by this decorator in a parent class + # and we shouldn't them again + + if not _was_fixed(klass.__str__): + klass.__unicode__ = klass.__str__ + if not PY3: + klass.__str__ = _7bit(_transliterated(klass.__unicode__)) + + if not _was_fixed(klass.__repr__): + klass.unicode_repr = klass.__repr__ + if not PY3: + klass.__repr__ = _7bit(klass.unicode_repr) + + return klass + + +def unicode_repr(obj): + """ + For classes that was fixed with @python_2_unicode_compatible + ``unicode_repr`` returns ``obj.unicode_repr()``; for unicode strings + the result is returned without "u" letter (to make output the + same under Python 2.x and Python 3.x); for other variables + it is the same as ``repr``. + """ + if PY3: + return repr(obj) + + # Python 2.x + if hasattr(obj, 'unicode_repr'): + return obj.unicode_repr() + + if isinstance(obj, text_type): + return repr(obj)[1:] # strip "u" letter from output + + return repr(obj) + + +def _transliterated(method): + def wrapper(self): + return transliterate(method(self)) + + update_wrapper(wrapper, method, ["__name__", "__doc__"]) + if hasattr(method, "_nltk_compat_7bit"): + wrapper._nltk_compat_7bit = method._nltk_compat_7bit + + wrapper._nltk_compat_transliterated = True + return wrapper + + +def _7bit(method): + def wrapper(self): + return method(self).encode('ascii', 'backslashreplace') + + update_wrapper(wrapper, method, ["__name__", "__doc__"]) + + if hasattr(method, "_nltk_compat_transliterated"): + wrapper._nltk_compat_transliterated = method._nltk_compat_transliterated + + wrapper._nltk_compat_7bit = True + return wrapper + + +def _was_fixed(method): + return getattr(method, "_nltk_compat_7bit", False) or getattr( + method, "_nltk_compat_transliterated", False + ) + + +class Fraction(fractions.Fraction): + """ + This is a simplified backwards compatible version of fractions.Fraction + from Python >=3.5. It adds the `_normalize` parameter such that it does + not normalize the denominator to the Greatest Common Divisor (gcd) when + the numerator is 0. + + This is most probably only used by the nltk.translate.bleu_score.py where + numerator and denominator of the different ngram precisions are mutable. + But the idea of "mutable" fraction might not be applicable to other usages, + See http://stackoverflow.com/questions/34561265 + + This objects should be deprecated once NLTK stops supporting Python < 3.5 + See https://github.com/nltk/nltk/issues/1330 + """ + + def __new__(cls, numerator=0, denominator=None, _normalize=True): + cls = super(Fraction, cls).__new__(cls, numerator, denominator) + # To emulate fraction.Fraction.from_float across Python >=2.7, + # check that numerator is an integer and denominator is not None. + if not _normalize and type(numerator) == int and denominator: + cls._numerator = numerator + cls._denominator = denominator + return cls diff --git a/nlp_resource_data/nltk/corpus/__init__.py b/nlp_resource_data/nltk/corpus/__init__.py index b305c95..89a15eb 100644 --- a/nlp_resource_data/nltk/corpus/__init__.py +++ b/nlp_resource_data/nltk/corpus/__init__.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Corpus Readers # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Edward Loper # URL: # For license information, see LICENSE.TXT @@ -66,359 +66,359 @@ from nltk.corpus.util import LazyCorpusLoader from nltk.corpus.reader import * abc = LazyCorpusLoader( - "abc", + 'abc', PlaintextCorpusReader, - r"(?!\.).*\.txt", - encoding=[("science", "latin_1"), ("rural", "utf8")], + r'(?!\.).*\.txt', + encoding=[('science', 'latin_1'), ('rural', 'utf8')], ) -alpino = LazyCorpusLoader("alpino", AlpinoCorpusReader, tagset="alpino") +alpino = LazyCorpusLoader('alpino', AlpinoCorpusReader, tagset='alpino') brown = LazyCorpusLoader( - "brown", + 'brown', CategorizedTaggedCorpusReader, - r"c[a-z]\d\d", - cat_file="cats.txt", - tagset="brown", + r'c[a-z]\d\d', + cat_file='cats.txt', + tagset='brown', encoding="ascii", ) cess_cat = LazyCorpusLoader( - "cess_cat", + 'cess_cat', BracketParseCorpusReader, - r"(?!\.).*\.tbf", - tagset="unknown", - encoding="ISO-8859-15", + r'(?!\.).*\.tbf', + tagset='unknown', + encoding='ISO-8859-15', ) cess_esp = LazyCorpusLoader( - "cess_esp", + 'cess_esp', BracketParseCorpusReader, - r"(?!\.).*\.tbf", - tagset="unknown", - encoding="ISO-8859-15", + r'(?!\.).*\.tbf', + tagset='unknown', + encoding='ISO-8859-15', ) -cmudict = LazyCorpusLoader("cmudict", CMUDictCorpusReader, ["cmudict"]) -comtrans = LazyCorpusLoader("comtrans", AlignedCorpusReader, r"(?!\.).*\.txt") +cmudict = LazyCorpusLoader('cmudict', CMUDictCorpusReader, ['cmudict']) +comtrans = LazyCorpusLoader('comtrans', AlignedCorpusReader, r'(?!\.).*\.txt') comparative_sentences = LazyCorpusLoader( - "comparative_sentences", + 'comparative_sentences', ComparativeSentencesCorpusReader, - r"labeledSentences\.txt", - encoding="latin-1", + r'labeledSentences\.txt', + encoding='latin-1', ) conll2000 = LazyCorpusLoader( - "conll2000", + 'conll2000', ConllChunkCorpusReader, - ["train.txt", "test.txt"], - ("NP", "VP", "PP"), - tagset="wsj", - encoding="ascii", + ['train.txt', 'test.txt'], + ('NP', 'VP', 'PP'), + tagset='wsj', + encoding='ascii', ) conll2002 = LazyCorpusLoader( - "conll2002", + 'conll2002', ConllChunkCorpusReader, - ".*\.(test|train).*", - ("LOC", "PER", "ORG", "MISC"), - encoding="utf-8", + '.*\.(test|train).*', + ('LOC', 'PER', 'ORG', 'MISC'), + encoding='utf-8', ) conll2007 = LazyCorpusLoader( - "conll2007", + 'conll2007', DependencyCorpusReader, - ".*\.(test|train).*", - encoding=[("eus", "ISO-8859-2"), ("esp", "utf8")], + '.*\.(test|train).*', + encoding=[('eus', 'ISO-8859-2'), ('esp', 'utf8')], ) -crubadan = LazyCorpusLoader("crubadan", CrubadanCorpusReader, ".*\.txt") +crubadan = LazyCorpusLoader('crubadan', CrubadanCorpusReader, '.*\.txt') dependency_treebank = LazyCorpusLoader( - "dependency_treebank", DependencyCorpusReader, ".*\.dp", encoding="ascii" + 'dependency_treebank', DependencyCorpusReader, '.*\.dp', encoding='ascii' ) floresta = LazyCorpusLoader( - "floresta", + 'floresta', BracketParseCorpusReader, - r"(?!\.).*\.ptb", - "#", - tagset="unknown", - encoding="ISO-8859-15", + r'(?!\.).*\.ptb', + '#', + tagset='unknown', + encoding='ISO-8859-15', ) framenet15 = LazyCorpusLoader( - "framenet_v15", + 'framenet_v15', FramenetCorpusReader, [ - "frRelation.xml", - "frameIndex.xml", - "fulltextIndex.xml", - "luIndex.xml", - "semTypes.xml", + 'frRelation.xml', + 'frameIndex.xml', + 'fulltextIndex.xml', + 'luIndex.xml', + 'semTypes.xml', ], ) framenet = LazyCorpusLoader( - "framenet_v17", + 'framenet_v17', FramenetCorpusReader, [ - "frRelation.xml", - "frameIndex.xml", - "fulltextIndex.xml", - "luIndex.xml", - "semTypes.xml", + 'frRelation.xml', + 'frameIndex.xml', + 'fulltextIndex.xml', + 'luIndex.xml', + 'semTypes.xml', ], ) gazetteers = LazyCorpusLoader( - "gazetteers", WordListCorpusReader, r"(?!LICENSE|\.).*\.txt", encoding="ISO-8859-2" + 'gazetteers', WordListCorpusReader, r'(?!LICENSE|\.).*\.txt', encoding='ISO-8859-2' ) genesis = LazyCorpusLoader( - "genesis", + 'genesis', PlaintextCorpusReader, - r"(?!\.).*\.txt", + r'(?!\.).*\.txt', encoding=[ - ("finnish|french|german", "latin_1"), - ("swedish", "cp865"), - (".*", "utf_8"), + ('finnish|french|german', 'latin_1'), + ('swedish', 'cp865'), + ('.*', 'utf_8'), ], ) gutenberg = LazyCorpusLoader( - "gutenberg", PlaintextCorpusReader, r"(?!\.).*\.txt", encoding="latin1" + 'gutenberg', PlaintextCorpusReader, r'(?!\.).*\.txt', encoding='latin1' ) -ieer = LazyCorpusLoader("ieer", IEERCorpusReader, r"(?!README|\.).*") +ieer = LazyCorpusLoader('ieer', IEERCorpusReader, r'(?!README|\.).*') inaugural = LazyCorpusLoader( - "inaugural", PlaintextCorpusReader, r"(?!\.).*\.txt", encoding="latin1" + 'inaugural', PlaintextCorpusReader, r'(?!\.).*\.txt', encoding='latin1' ) # [XX] This should probably just use TaggedCorpusReader: indian = LazyCorpusLoader( - "indian", IndianCorpusReader, r"(?!\.).*\.pos", tagset="unknown", encoding="utf8" + 'indian', IndianCorpusReader, r'(?!\.).*\.pos', tagset='unknown', encoding='utf8' ) -jeita = LazyCorpusLoader("jeita", ChasenCorpusReader, r".*\.chasen", encoding="utf-8") -knbc = LazyCorpusLoader("knbc/corpus1", KNBCorpusReader, r".*/KN.*", encoding="euc-jp") -lin_thesaurus = LazyCorpusLoader("lin_thesaurus", LinThesaurusCorpusReader, r".*\.lsp") +jeita = LazyCorpusLoader('jeita', ChasenCorpusReader, r'.*\.chasen', encoding='utf-8') +knbc = LazyCorpusLoader('knbc/corpus1', KNBCorpusReader, r'.*/KN.*', encoding='euc-jp') +lin_thesaurus = LazyCorpusLoader('lin_thesaurus', LinThesaurusCorpusReader, r'.*\.lsp') mac_morpho = LazyCorpusLoader( - "mac_morpho", + 'mac_morpho', MacMorphoCorpusReader, - r"(?!\.).*\.txt", - tagset="unknown", - encoding="latin-1", + r'(?!\.).*\.txt', + tagset='unknown', + encoding='latin-1', ) machado = LazyCorpusLoader( - "machado", + 'machado', PortugueseCategorizedPlaintextCorpusReader, - r"(?!\.).*\.txt", - cat_pattern=r"([a-z]*)/.*", - encoding="latin-1", + r'(?!\.).*\.txt', + cat_pattern=r'([a-z]*)/.*', + encoding='latin-1', ) masc_tagged = LazyCorpusLoader( - "masc_tagged", + 'masc_tagged', CategorizedTaggedCorpusReader, - r"(spoken|written)/.*\.txt", - cat_file="categories.txt", - tagset="wsj", + r'(spoken|written)/.*\.txt', + cat_file='categories.txt', + tagset='wsj', encoding="utf-8", sep="_", ) movie_reviews = LazyCorpusLoader( - "movie_reviews", + 'movie_reviews', CategorizedPlaintextCorpusReader, - r"(?!\.).*\.txt", - cat_pattern=r"(neg|pos)/.*", - encoding="ascii", + r'(?!\.).*\.txt', + cat_pattern=r'(neg|pos)/.*', + encoding='ascii', ) multext_east = LazyCorpusLoader( - "mte_teip5", MTECorpusReader, r"(oana).*\.xml", encoding="utf-8" + 'mte_teip5', MTECorpusReader, r'(oana).*\.xml', encoding="utf-8" ) names = LazyCorpusLoader( - "names", WordListCorpusReader, r"(?!\.).*\.txt", encoding="ascii" + 'names', WordListCorpusReader, r'(?!\.).*\.txt', encoding='ascii' ) nps_chat = LazyCorpusLoader( - "nps_chat", NPSChatCorpusReader, r"(?!README|\.).*\.xml", tagset="wsj" + 'nps_chat', NPSChatCorpusReader, r'(?!README|\.).*\.xml', tagset='wsj' ) opinion_lexicon = LazyCorpusLoader( - "opinion_lexicon", + 'opinion_lexicon', OpinionLexiconCorpusReader, - r"(\w+)\-words\.txt", - encoding="ISO-8859-2", + r'(\w+)\-words\.txt', + encoding='ISO-8859-2', ) ppattach = LazyCorpusLoader( - "ppattach", PPAttachmentCorpusReader, ["training", "test", "devset"] + 'ppattach', PPAttachmentCorpusReader, ['training', 'test', 'devset'] ) product_reviews_1 = LazyCorpusLoader( - "product_reviews_1", ReviewsCorpusReader, r"^(?!Readme).*\.txt", encoding="utf8" + 'product_reviews_1', ReviewsCorpusReader, r'^(?!Readme).*\.txt', encoding='utf8' ) product_reviews_2 = LazyCorpusLoader( - "product_reviews_2", ReviewsCorpusReader, r"^(?!Readme).*\.txt", encoding="utf8" + 'product_reviews_2', ReviewsCorpusReader, r'^(?!Readme).*\.txt', encoding='utf8' ) pros_cons = LazyCorpusLoader( - "pros_cons", + 'pros_cons', ProsConsCorpusReader, - r"Integrated(Cons|Pros)\.txt", - cat_pattern=r"Integrated(Cons|Pros)\.txt", - encoding="ISO-8859-2", + r'Integrated(Cons|Pros)\.txt', + cat_pattern=r'Integrated(Cons|Pros)\.txt', + encoding='ISO-8859-2', ) ptb = LazyCorpusLoader( # Penn Treebank v3: WSJ and Brown portions - "ptb", + 'ptb', CategorizedBracketParseCorpusReader, - r"(WSJ/\d\d/WSJ_\d\d|BROWN/C[A-Z]/C[A-Z])\d\d.MRG", - cat_file="allcats.txt", - tagset="wsj", + r'(WSJ/\d\d/WSJ_\d\d|BROWN/C[A-Z]/C[A-Z])\d\d.MRG', + cat_file='allcats.txt', + tagset='wsj', ) qc = LazyCorpusLoader( - "qc", StringCategoryCorpusReader, ["train.txt", "test.txt"], encoding="ISO-8859-2" + 'qc', StringCategoryCorpusReader, ['train.txt', 'test.txt'], encoding='ISO-8859-2' ) reuters = LazyCorpusLoader( - "reuters", + 'reuters', CategorizedPlaintextCorpusReader, - "(training|test).*", - cat_file="cats.txt", - encoding="ISO-8859-2", + '(training|test).*', + cat_file='cats.txt', + encoding='ISO-8859-2', ) -rte = LazyCorpusLoader("rte", RTECorpusReader, r"(?!\.).*\.xml") -senseval = LazyCorpusLoader("senseval", SensevalCorpusReader, r"(?!\.).*\.pos") +rte = LazyCorpusLoader('rte', RTECorpusReader, r'(?!\.).*\.xml') +senseval = LazyCorpusLoader('senseval', SensevalCorpusReader, r'(?!\.).*\.pos') sentence_polarity = LazyCorpusLoader( - "sentence_polarity", + 'sentence_polarity', CategorizedSentencesCorpusReader, - r"rt-polarity\.(neg|pos)", - cat_pattern=r"rt-polarity\.(neg|pos)", - encoding="utf-8", + r'rt-polarity\.(neg|pos)', + cat_pattern=r'rt-polarity\.(neg|pos)', + encoding='utf-8', ) sentiwordnet = LazyCorpusLoader( - "sentiwordnet", SentiWordNetCorpusReader, "SentiWordNet_3.0.0.txt", encoding="utf-8" + 'sentiwordnet', SentiWordNetCorpusReader, 'SentiWordNet_3.0.0.txt', encoding='utf-8' ) -shakespeare = LazyCorpusLoader("shakespeare", XMLCorpusReader, r"(?!\.).*\.xml") +shakespeare = LazyCorpusLoader('shakespeare', XMLCorpusReader, r'(?!\.).*\.xml') sinica_treebank = LazyCorpusLoader( - "sinica_treebank", + 'sinica_treebank', SinicaTreebankCorpusReader, - ["parsed"], - tagset="unknown", - encoding="utf-8", + ['parsed'], + tagset='unknown', + encoding='utf-8', ) state_union = LazyCorpusLoader( - "state_union", PlaintextCorpusReader, r"(?!\.).*\.txt", encoding="ISO-8859-2" + 'state_union', PlaintextCorpusReader, r'(?!\.).*\.txt', encoding='ISO-8859-2' ) stopwords = LazyCorpusLoader( - "stopwords", WordListCorpusReader, r"(?!README|\.).*", encoding="utf8" + 'stopwords', WordListCorpusReader, r'(?!README|\.).*', encoding='utf8' ) subjectivity = LazyCorpusLoader( - "subjectivity", + 'subjectivity', CategorizedSentencesCorpusReader, - r"(quote.tok.gt9|plot.tok.gt9)\.5000", - cat_map={"quote.tok.gt9.5000": ["subj"], "plot.tok.gt9.5000": ["obj"]}, - encoding="latin-1", + r'(quote.tok.gt9|plot.tok.gt9)\.5000', + cat_map={'quote.tok.gt9.5000': ['subj'], 'plot.tok.gt9.5000': ['obj']}, + encoding='latin-1', ) swadesh = LazyCorpusLoader( - "swadesh", SwadeshCorpusReader, r"(?!README|\.).*", encoding="utf8" + 'swadesh', SwadeshCorpusReader, r'(?!README|\.).*', encoding='utf8' ) swadesh110 = LazyCorpusLoader( - 'panlex_swadesh', PanlexSwadeshCorpusReader, r'swadesh110/.*\.txt', encoding='utf8' + 'panlex_swadesh', SwadeshCorpusReader, r'swadesh110/.*\.txt', encoding='utf8' ) swadesh207 = LazyCorpusLoader( - 'panlex_swadesh', PanlexSwadeshCorpusReader, r'swadesh207/.*\.txt', encoding='utf8' + 'panlex_swadesh', SwadeshCorpusReader, r'swadesh207/.*\.txt', encoding='utf8' ) -switchboard = LazyCorpusLoader("switchboard", SwitchboardCorpusReader, tagset="wsj") -timit = LazyCorpusLoader("timit", TimitCorpusReader) +switchboard = LazyCorpusLoader('switchboard', SwitchboardCorpusReader, tagset='wsj') +timit = LazyCorpusLoader('timit', TimitCorpusReader) timit_tagged = LazyCorpusLoader( - "timit", TimitTaggedCorpusReader, ".+\.tags", tagset="wsj", encoding="ascii" + 'timit', TimitTaggedCorpusReader, '.+\.tags', tagset='wsj', encoding='ascii' ) toolbox = LazyCorpusLoader( - "toolbox", ToolboxCorpusReader, r"(?!.*(README|\.)).*\.(dic|txt)" + 'toolbox', ToolboxCorpusReader, r'(?!.*(README|\.)).*\.(dic|txt)' ) treebank = LazyCorpusLoader( - "treebank/combined", + 'treebank/combined', BracketParseCorpusReader, - r"wsj_.*\.mrg", - tagset="wsj", - encoding="ascii", + r'wsj_.*\.mrg', + tagset='wsj', + encoding='ascii', ) treebank_chunk = LazyCorpusLoader( - "treebank/tagged", + 'treebank/tagged', ChunkedCorpusReader, - r"wsj_.*\.pos", - sent_tokenizer=RegexpTokenizer(r"(?<=/\.)\s*(?![^\[]*\])", gaps=True), + r'wsj_.*\.pos', + sent_tokenizer=RegexpTokenizer(r'(?<=/\.)\s*(?![^\[]*\])', gaps=True), para_block_reader=tagged_treebank_para_block_reader, - tagset="wsj", - encoding="ascii", + tagset='wsj', + encoding='ascii', ) treebank_raw = LazyCorpusLoader( - "treebank/raw", PlaintextCorpusReader, r"wsj_.*", encoding="ISO-8859-2" + 'treebank/raw', PlaintextCorpusReader, r'wsj_.*', encoding='ISO-8859-2' ) -twitter_samples = LazyCorpusLoader("twitter_samples", TwitterCorpusReader, ".*\.json") -udhr = LazyCorpusLoader("udhr", UdhrCorpusReader) -udhr2 = LazyCorpusLoader("udhr2", PlaintextCorpusReader, r".*\.txt", encoding="utf8") +twitter_samples = LazyCorpusLoader('twitter_samples', TwitterCorpusReader, '.*\.json') +udhr = LazyCorpusLoader('udhr', UdhrCorpusReader) +udhr2 = LazyCorpusLoader('udhr2', PlaintextCorpusReader, r'.*\.txt', encoding='utf8') universal_treebanks = LazyCorpusLoader( - "universal_treebanks_v20", + 'universal_treebanks_v20', ConllCorpusReader, - r".*\.conll", + r'.*\.conll', columntypes=( - "ignore", - "words", - "ignore", - "ignore", - "pos", - "ignore", - "ignore", - "ignore", - "ignore", - "ignore", + 'ignore', + 'words', + 'ignore', + 'ignore', + 'pos', + 'ignore', + 'ignore', + 'ignore', + 'ignore', + 'ignore', ), ) -verbnet = LazyCorpusLoader("verbnet", VerbnetCorpusReader, r"(?!\.).*\.xml") +verbnet = LazyCorpusLoader('verbnet', VerbnetCorpusReader, r'(?!\.).*\.xml') webtext = LazyCorpusLoader( - "webtext", PlaintextCorpusReader, r"(?!README|\.).*\.txt", encoding="ISO-8859-2" + 'webtext', PlaintextCorpusReader, r'(?!README|\.).*\.txt', encoding='ISO-8859-2' ) wordnet = LazyCorpusLoader( - "wordnet", + 'wordnet', WordNetCorpusReader, - LazyCorpusLoader("omw", CorpusReader, r".*/wn-data-.*\.tab", encoding="utf8"), + LazyCorpusLoader('omw', CorpusReader, r'.*/wn-data-.*\.tab', encoding='utf8'), ) -wordnet_ic = LazyCorpusLoader("wordnet_ic", WordNetICCorpusReader, ".*\.dat") +wordnet_ic = LazyCorpusLoader('wordnet_ic', WordNetICCorpusReader, '.*\.dat') words = LazyCorpusLoader( - "words", WordListCorpusReader, r"(?!README|\.).*", encoding="ascii" + 'words', WordListCorpusReader, r'(?!README|\.).*', encoding='ascii' ) # defined after treebank propbank = LazyCorpusLoader( - "propbank", + 'propbank', PropbankCorpusReader, - "prop.txt", - "frames/.*\.xml", - "verbs.txt", - lambda filename: re.sub(r"^wsj/\d\d/", "", filename), + 'prop.txt', + 'frames/.*\.xml', + 'verbs.txt', + lambda filename: re.sub(r'^wsj/\d\d/', '', filename), treebank, ) # Must be defined *after* treebank corpus. nombank = LazyCorpusLoader( - "nombank.1.0", + 'nombank.1.0', NombankCorpusReader, - "nombank.1.0", - "frames/.*\.xml", - "nombank.1.0.words", - lambda filename: re.sub(r"^wsj/\d\d/", "", filename), + 'nombank.1.0', + 'frames/.*\.xml', + 'nombank.1.0.words', + lambda filename: re.sub(r'^wsj/\d\d/', '', filename), treebank, ) # Must be defined *after* treebank corpus. propbank_ptb = LazyCorpusLoader( - "propbank", + 'propbank', PropbankCorpusReader, - "prop.txt", - "frames/.*\.xml", - "verbs.txt", + 'prop.txt', + 'frames/.*\.xml', + 'verbs.txt', lambda filename: filename.upper(), ptb, ) # Must be defined *after* ptb corpus. nombank_ptb = LazyCorpusLoader( - "nombank.1.0", + 'nombank.1.0', NombankCorpusReader, - "nombank.1.0", - "frames/.*\.xml", - "nombank.1.0.words", + 'nombank.1.0', + 'frames/.*\.xml', + 'nombank.1.0.words', lambda filename: filename.upper(), ptb, ) # Must be defined *after* ptb corpus. semcor = LazyCorpusLoader( - "semcor", SemcorCorpusReader, r"brown./tagfiles/br-.*\.xml", wordnet + 'semcor', SemcorCorpusReader, r'brown./tagfiles/br-.*\.xml', wordnet ) # Must be defined *after* wordnet corpus. nonbreaking_prefixes = LazyCorpusLoader( - "nonbreaking_prefixes", + 'nonbreaking_prefixes', NonbreakingPrefixesCorpusReader, - r"(?!README|\.).*", - encoding="utf8", + r'(?!README|\.).*', + encoding='utf8', ) perluniprops = LazyCorpusLoader( - "perluniprops", + 'perluniprops', UnicharsCorpusReader, - r"(?!README|\.).*", - nltk_data_subdir="misc", - encoding="utf8", + r'(?!README|\.).*', + nltk_data_subdir='misc', + encoding='utf8', ) # mwa_ppdb = LazyCorpusLoader( @@ -478,7 +478,7 @@ def demo(): # ycoe.demo() -if __name__ == "__main__": +if __name__ == '__main__': # demo() pass @@ -489,5 +489,5 @@ def teardown_module(module=None): for name in dir(nltk.corpus): obj = getattr(nltk.corpus, name, None) - if isinstance(obj, CorpusReader) and hasattr(obj, "_unload"): + if isinstance(obj, CorpusReader) and hasattr(obj, '_unload'): obj._unload() diff --git a/nlp_resource_data/nltk/corpus/__pycache__/__init__.cpython-37.pyc b/nlp_resource_data/nltk/corpus/__pycache__/__init__.cpython-37.pyc index 414d9b792a50726e566411042636dd575817d717..2176b91b393944991a30ceeb00e7735ee28ca6b0 100644 GIT binary patch delta 563 zcmY+B$xl;J7{z-#(4^r-*$@Jo!VWg9jfn;lOCbCOH((jgR|lw-LYblHdyMj!9-uNf zkBCf91eCoC7i#&qA{A~M)H{4Mk(@bpN^8orOEW_=8S5=1S?ljXa{sFok;&}a91bw3ua$tJ9d#@Ns8-h>BgIj4$Sq?Nxzj|yd(4x-b)#( z_)q_Dqo4di2JjJ|@EIHUVtFKH2K6BQO&P)_{jb={`NNm;+Zn%x`ShjyPR<{pzblU- z*pnX9uOAa#F?syOV_4*GGD_a3^PQJGh65a09z8A-IBN6G1(FdQ;|G3PN>94|f%1a9 z8R`r%xnMv delta 602 zcmY+CNlz3}6otEbuoGg%Ap`{y1MF~Nuho?rF%lOf%(%{f)6*Hx+ zCC>8*hy#{_)82)Dz>xS4-1rBK$EMQ>wRkz-x#!(?>s9?a=ydCQb-f|LXIo!S;?JT! zA1e34!G1KjVRcj@u%#CxHI9=&P)AedU9aoaLEFoM$1y|jmoUB%_xzVy3VA+!>u489)j1+lB(zwB98S0yo<)J3k zEvB~DhRcy}OCEP{7xy^4MZPZuSPn;H#sl zNq;!fxoLBEENNyWHed`T&75JSVmth~rtg}j{QTBwO^bVfn|E{F)$(J5!JnWrys|E1 zqL`Kyud(%zHsu?w%i)&h9wVEU(vd*}Qb_JItz5BiSO}K4rLCeRyxF$jns_tqKef1@ I?J$G?0HmJVP5=M^ diff --git a/nlp_resource_data/nltk/corpus/__pycache__/europarl_raw.cpython-37.pyc b/nlp_resource_data/nltk/corpus/__pycache__/europarl_raw.cpython-37.pyc index 01cbbb482fb9564feb5df0f6ca92d6237016efa5..350933d9c5ea555246aa617ab6a8f703c810e9d2 100644 GIT binary patch delta 31 lcmcb`evX~niISrZZX5<$n7G>+}P4;7M008F(3*`U+ diff --git a/nlp_resource_data/nltk/corpus/__pycache__/util.cpython-37.pyc b/nlp_resource_data/nltk/corpus/__pycache__/util.cpython-37.pyc index 3a0893f6cd72a812cc52c5f28a02cee968013ecb..85df09d5b377308ee3e765ae3d8d8ad3f38c9349 100644 GIT binary patch delta 1027 zcmZ{iOK;RL5Xa-hoAqvXx7!ELimIrGHd5)T<#8wo5E3m?p%;3}0V`d*ux+xAoJ6R- zEK)1DfDq)&2jB+k$KV^}%845i7sQDfZ%ai5iSo1K8GHUSE zpS7&7_;dVJU~(6}bORtPK_V+A0UKyOJLLM|D>i2nT70U{-5dd9;6>ry zF4r;?Qu|bo!I~aNv=O5SJVc)XlvM~CsZeYeqd6cq}0gp)Z2LcQtA$*t0N50@*(jT5 zE#_)^*ch26m;WEQq*&IXc+|c&Fxf0%qz+;cVM=fU(^O|gsrO(^Z2%*B35IpQQo3q$ zTXwUOX1j?TZZ}=5ZNR_jG&s?%yx$!b3-^^|D`*V+J2-0a-_i68c)fuy#k*lu+SDa= z+a@h|jm`JUI_Fz?mTL*+MooGWaRD&}2)urj?JNwhy>@W<65=WXYiG{UH%PnT2MUf!C{lxHZ(sbOg5AV z(nEVHVheNj5AfhYa`1mp@DJF72%bE95l_Ciu@*}^%YNR>&U^E|^B((a?)BXEea{;u zFs7?(eCr$`-*Is`ESN08+r0%4M9?M)hL#32T^oTRjKEwb!W7mn5msdFm_bHlguP1w zThOP(cYZ)LS*_I291a5}ON4y*^3|rMe(!vqHsQ-VqQ=4eu9WMt6_q!`eqDA#)hJ)n zd+Z*)G5Ewv1~v50(b8=X90Ru*zU*@qg(6bErSkAi6%aYZ7y=JeX8?)Ad+nwS#fE6*eMXmA$s$tthTbq0YmGZ!1qv zWG^q^B29OS5)={GoTG*ZgD`8;B3>e~!Z=n#okwC_&NreMcQ}{&jkjM;g*lkR2KZEA z@bNIlp&A{Cd8Jo!k7w|_lRZ=EJNjd;L>>JtSA299Pb(s>Ag&_*-RM{vh3&)Zbg@S4 zl4Y<0n`b#@D`W6vY=+LBo;b0vb_e<(e{(y%%{*pWK$H<_3679<&a6ne51U=g%c!~z zX4TImwybVOwK%cjekU4+r>z=!2J5t);AE}$+O=V^FfO6%L3Y^0slQ$PKbx8buRZ;u uFd5*?chm(iY7%h~F@;z}pq$kGMAht0ONw4Ix{F2X;%~+%JcecFxBdWx)Tny^ diff --git a/nlp_resource_data/nltk/corpus/europarl_raw.py b/nlp_resource_data/nltk/corpus/europarl_raw.py index a4caa7b..b03011c 100644 --- a/nlp_resource_data/nltk/corpus/europarl_raw.py +++ b/nlp_resource_data/nltk/corpus/europarl_raw.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Europarl Corpus Readers # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Nitin Madnani # URL: # For license information, see LICENSE.TXT @@ -11,45 +11,45 @@ from nltk.corpus.reader import * # Create a new corpus reader instance for each European language danish = LazyCorpusLoader( - "europarl_raw/danish", EuroparlCorpusReader, r"ep-.*\.da", encoding="utf-8" + 'europarl_raw/danish', EuroparlCorpusReader, r'ep-.*\.da', encoding='utf-8' ) dutch = LazyCorpusLoader( - "europarl_raw/dutch", EuroparlCorpusReader, r"ep-.*\.nl", encoding="utf-8" + 'europarl_raw/dutch', EuroparlCorpusReader, r'ep-.*\.nl', encoding='utf-8' ) english = LazyCorpusLoader( - "europarl_raw/english", EuroparlCorpusReader, r"ep-.*\.en", encoding="utf-8" + 'europarl_raw/english', EuroparlCorpusReader, r'ep-.*\.en', encoding='utf-8' ) finnish = LazyCorpusLoader( - "europarl_raw/finnish", EuroparlCorpusReader, r"ep-.*\.fi", encoding="utf-8" + 'europarl_raw/finnish', EuroparlCorpusReader, r'ep-.*\.fi', encoding='utf-8' ) french = LazyCorpusLoader( - "europarl_raw/french", EuroparlCorpusReader, r"ep-.*\.fr", encoding="utf-8" + 'europarl_raw/french', EuroparlCorpusReader, r'ep-.*\.fr', encoding='utf-8' ) german = LazyCorpusLoader( - "europarl_raw/german", EuroparlCorpusReader, r"ep-.*\.de", encoding="utf-8" + 'europarl_raw/german', EuroparlCorpusReader, r'ep-.*\.de', encoding='utf-8' ) greek = LazyCorpusLoader( - "europarl_raw/greek", EuroparlCorpusReader, r"ep-.*\.el", encoding="utf-8" + 'europarl_raw/greek', EuroparlCorpusReader, r'ep-.*\.el', encoding='utf-8' ) italian = LazyCorpusLoader( - "europarl_raw/italian", EuroparlCorpusReader, r"ep-.*\.it", encoding="utf-8" + 'europarl_raw/italian', EuroparlCorpusReader, r'ep-.*\.it', encoding='utf-8' ) portuguese = LazyCorpusLoader( - "europarl_raw/portuguese", EuroparlCorpusReader, r"ep-.*\.pt", encoding="utf-8" + 'europarl_raw/portuguese', EuroparlCorpusReader, r'ep-.*\.pt', encoding='utf-8' ) spanish = LazyCorpusLoader( - "europarl_raw/spanish", EuroparlCorpusReader, r"ep-.*\.es", encoding="utf-8" + 'europarl_raw/spanish', EuroparlCorpusReader, r'ep-.*\.es', encoding='utf-8' ) swedish = LazyCorpusLoader( - "europarl_raw/swedish", EuroparlCorpusReader, r"ep-.*\.sv", encoding="utf-8" + 'europarl_raw/swedish', EuroparlCorpusReader, r'ep-.*\.sv', encoding='utf-8' ) diff --git a/nlp_resource_data/nltk/corpus/reader/__init__.py b/nlp_resource_data/nltk/corpus/reader/__init__.py index a1db6d4..19c1515 100644 --- a/nlp_resource_data/nltk/corpus/reader/__init__.py +++ b/nlp_resource_data/nltk/corpus/reader/__init__.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Corpus Readers # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Steven Bird # Edward Loper # URL: @@ -103,7 +103,6 @@ from nltk.corpus.reader.pros_cons import * from nltk.corpus.reader.categorized_sents import * from nltk.corpus.reader.comparative_sents import * from nltk.corpus.reader.panlex_lite import * -from nltk.corpus.reader.panlex_swadesh import * # Make sure that nltk.corpus.reader.bracket_parse gives the module, not # the function bracket_parse() defined in nltk.tree: @@ -179,5 +178,4 @@ __all__ = [ 'NonbreakingPrefixesCorpusReader', 'UnicharsCorpusReader', 'MWAPPDBCorpusReader', - 'PanlexSwadeshCorpusReader', ] diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/__init__.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/__init__.cpython-37.pyc index dfc3584a7c1a1ace13c1b5b885f6f33b7ae4ebd0..fe100f76eb047b2419ba8e3bbbe3db8a273a9b7f 100644 GIT binary patch delta 110 zcmeyWJ6BiTiIA5+B$%&WQPt$bs3I1l*$$6rBY_9sH#YOs)*NOTwewh4EG-GmxST_I} CyBtjb delta 207 zcmbQM`&C!niI;Z@kn=zGEMOeX3+H79LMCus~II3keHW~S`l2Hn37tY;hbMoP+A<63gi^| zX`0<)kB?8uPmYg|l9_BMEWG(3e<~}ZB8bUX0M-^?4AD1PRa9RnP(LF-H&s6?u`(mS kAh9T0UvF}hs6X2mpqp5jCT|o?oP0o(WwMZ1Hxm;h0F3TA`2YX_ diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/aligned.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/aligned.cpython-37.pyc index 6012d40eb18b82eb45f53961f4f04b21947f1280..2566ddb287d92f258dd1d7165fad3f4b1efc9414 100644 GIT binary patch delta 1234 zcmbVL-D(p-6wYL`KgsSk+uB;&SZ%RYBiIUJQD{pQtAe7S=*1w7+w3;3+1U+eH`Ml~ zih|&cl(8VbfZj@O`vg9LEWUvE;)ORlXQD0DQp7+$=KOwV=6t&~@oB=oWZR{j__g(o z+Jg;yXuTy2FXxdk#|R|^&-pOV6soWSEl67lRi;sm=~S0?Av9Q#7UfwDOU$Guv#7;v zYLnd6oTqvE^PH#q#b*lb5w_tKgRZu&6UpHy9m!As^lKa2*M-jfz4L_0VxU~kSv!8yveN+XlV z3nwsf-+QWwAEn9Nj&6?sCna##TA2PvI@=9{M&x@nN!#R~Ep}1CNec)lLBN&5F@%!{ zRfJOr@^yCr*94}30shh+A!g^jeVveD{^2MUI7I%EC2 zr70!ymVb0^FQTkq3PHp2(lW%XtN*OsNIm8YeNLdGXYiK?pMMgk^kK4{24QzcQbke)xL7&EUIS;w Z42!*X=+EJ16H9=VcZid>NWbnhe*yux?tGDe-T* zv)W$Vx2loL4OeneAQ6NS$>AIVI~T}A!3t5aDr`B>h>mq}Rsw_Mah{miBn4bR(rw9A zUG2H#YM%Z~#zk)HuEA}?%X8au^J|iAeg)oxgZDDh*h6Jdnd0V;)z)T2yw;YpyBWm} z?YEk&qFz@=xUV``S4#*@u|TWMj`0f$8T2fqnM;A6q}_4X4 zfQtY&z$E}N-6McCQPAu-dt=p*Irw1RMX1V@Qk@*19n~Qe>e_}_F)nl?8gD0U*deWQ z2iAWzhOHirw&QkZSF}hcA+-W<2H+2RsQH5Jm%dMfgG&Qsk{M~#_da32A(7WCE8l+vvZ5^j71GTO8WFBiKkQ}7R^*3N0pA=4UJI&Z8h3jR$<$5|b^Cv- z*W_NeEu62|%h3l<#Za$8nWGnolbGTRcRb;LqH!=(C1;FMxVvYMU@HvLe)I4}hs(kv zLkD%jBydu1*d#mvPYTa)#7*eL(`nO*eX6l|tae9iL+C~98&)XTPHHVPmPq7o2i`Iy PBoJFh=;i)TV}a@G diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/api.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/api.cpython-37.pyc index b576aec3cf19a5dca8ff9ca8bd36e960cf914d71..77daf69d4c6d14593102160f7356d0fc60356ce8 100644 GIT binary patch delta 3797 zcmbtXYiu0V6`sdF#{2T(I8PJ20g`x~#12VJ?8I^GJZwUhL?t|jNW*w$yqZ~Mvg`MVK92(Q`3$B5%C0(** zTk+(wqAN1CRZrDbddBn^JY!zGoX`_A7x$87P1mTM@OsKAJw@%Lmo8`Y47D}yOtzfU zbLC#Wx187WG}zQU15wae0*Q>U}4l#RY3n(9%1(oZoZLI_z&lfr}z>H?)Li z7|f|M!#H1mG(RT~<8)DGq`yiv>r{do1Al$fcI+$s-8=qxXE!}SY1(DyNBq})*NQvP z+|F;`H0ik(?CboLFRgif_;%Ww<%VYH`jxAEw*N+!o#{sr!isCF1cuG@}S*WVetPR*-`fev_K?+#tk&PTk_q zY+4v;aJZ3{vAt+)AGSMb#g%Lf#5+7caPaK^wB$Wl7APPRQZdeu1V%xpJnDz8P%pbx zn4;)x;I9ts8fX}xktpyDe}muF|2gmz`3RzIyUQ>V#Te}%VhgV4FjNo;$YaMWuOg4M zKdAp{^SC@lNY@;$h15ydcyT_{FT>_&mWRY09V6sxs zu^ELNVJ7NI@eO%dMKv{WLUs&V>^KUjkWv)U9JYEvh&WXO5i?BNW^*(G6{j4q2T%oF z5j}Y1Jh$QmB94`&3Xdw(C3XT;$oU7M)kq1E#DMaHI|}RCt`g62!>zdC1b=!5--n=t ziZ&5hQH`?cVchT$6f2jq(=c|XKECsWyc1_2khlRv9`j$w_KL_76`1|={Kl^1r^)}v zQ7;krEOa819Z7u>=i*Rs{ni;Kry7=8SGsAKiW8$)m6~ z;;Ma`TF9dCJc%mEBtt~(EQn@qqh;8Ad-P}{LYy}LX!irlL(pP}QB0$lK|xuIo)dor zpCE{=VOX9S1eW7@P@-F8H%u^M5E1_yegU2bEgw^~z1k)%mdY}mvEbMGQ>FT^?weBw z5txuyr=>_3Rg*y#zB+b#y9p{yujZlMHcnPW+_>N~`{(@p*r{!|(~TLSS-Q&qK6Y}V zQ}sZXZPRcOnYJXS^^(Rh;gP0-}H$9ML)~BBzzlukah=XU9tD z-gtzTGNIGPIh@1=B14?pM&MM!;AK8FajSsDG`Zh|is*9`Y+UymA3ShzFBUGy0~a4^ zbiZ^UV`QMc4v*RI0oEY@Vk*i1bzt+cHoV56CU`A#uqSOn$Z0e}3y~~24Yb$!4-ek_ z9zg!Ue|LD0KQuWy(gxAEl)G~jCvxii=Hwf|mook7K zA3J*TAh~uPG$U(WU276Wo6vfs;jjGjqt~0Sb68Fr9!7S%`uHW7+{(qVp+l?8u8zbC zCzoLFFXVR38|Jr;4RiPS=B}3pB4J24aY0_OMeKSgDk!Qb7>W=D#+qTwC@UEH#?WR* zPPFsyDVVr|U<#7N zsfDg52CA3BMZaR~F`D0C(dU#q?>Qpj&z}Pr^u|W_AbSe8B>&n1V=T1EvTxTsXDSW7 g;4Tm`IiqN@CTHLY|8l>gsi~Z-ElW@2m!;Hy0g~S5+yDRo delta 3665 zcmbtXYiu0V72cWI9go-E#BqW{6C7_!>}2y;$2d-6?6|fQ#{?1wLXlv34C9%xz47eM z=FC`e(ua$RR4Gy_#FY?2YK5Sr;YWF`5G^XIN?VmbNJt=rE(i(nQ2UD?{DS=HIcN4| zcOfdZwP)|#d(J)I`OdlL&R+Yq^4Vo&WipjY#_(_3?472Zip4&qhw5J(2jln&eu~10 z1X_^o+bA)bpXv=;Xg1wE(pIN`MvjGR$u&9xSixg^i&UfYjj(=n&T zF&>RMhTHzjSuK~siB>0x6G@!Va_jK9&1uKyb~lC3>mBv(SSEGx2g!*$Po(F3NZUS? zmVz|6mg7Q@P5v9v#bz>!Y;)W>Ysm|pydC0W!d|fQ#qzMSDYZjtK?pzrNflg~C|IGr zAoY3hmr4QXI1dSNZr$&9=5%o9-Q|+h+@k$_47TCYHz)fS{DRwm*1E9Zmn>N9&x*nI zQ_5~}Vg27bh(Fjsg16ZsPIcUJ^Co(}nFMVRi5?tD!!*GyfoYyAKhrU-?55?60<6DA zHRTqAC2&p8$3UHBv9a?{H#O5k#FkTV9})lRypq{P?lRxLX^WS)abNdW;tpQ@^PomNX332z)$D$$%&j7aDEuZl>g9vb_6w!4Ka01v%w;PaZe}ysPtFEt+(;`~ z!YwHNA|BW}^1%OO@;5XKR1|tFlK>(CIO%OP>O>*6i}?~Radx`IS6laOtvKMBDDjne zgXhX4H(gRD32i-GpiE@qyn~F-zWl5U*pUTX7h zSmpg7`5+|q0_1Jq6=BaEeUztF9I+;YZ8dQh6MR4XSbW~|vvsYgI+M|6jvvo-La!*L zcdn#q+ddLIN$e)Em&862R1VNjVu%FS8Ap68K1!nme#wRK0^C5Wb%ZqpVBi871Zfn; zhPaxXHd>5UL+Ae%Lv(Flrx@kl?S`?PNS6BIc?;%)hRrzTWUiEFGOsJsltJtRZU_@N zf=LobNpMEnX_P`C6WkSK+_W4Aj`2)BbPEAsf5cG4={?<@$7#qxPUDDDFMhr!(^Zdz zX-t^;Vm>?|-q<4!kWZ=6mgG32Ma6O#?Q@32YEw9gb2H^f_nuJp(hBB@Ho(MEc}vAv zl{k)+iu*zF^}eao9Ns;orw2ZSZsf9|W^bpp#8MFtcC}PEmWroZ(Zb!Rc&2{ces^FBD2luDC{ zDHm@PpB&jJJ{juoyPj>_46XUg;^4tk`x;dbHF4917mwwbvz~7+J|Z zi1PX!&GSj|#G&Eg)g4gkwqLZZTGMG-Nm2vryb#9WtIB5a^5OI@Dr4Y#p_@=Bl5a5= zB64a4{af*iu^#c_aJ$$sJTT;t5jB)5osGWcX^{+NJCpC2fm;lNr^Kb3ADeR_ z^K66^vp4TnkUlGZGxFm10P%`g8J)@20aPyC=G-Khlo!Oc(bum>${&%GK5^~HneTz= z4J791|49m+ir_UQWv>|g(aIEON9F9uj;NMNBP!@-`C_hD=j$IxRgdTz@BVJOYVAfQ z7{r^x8qf82V!}Zq;ZcQDXl`=UpoHshi?=6x#rlb{19iKzWLK#VwyV`gLp>Hz=^gRN z#FfK*J{7PcZ**d$f-knw&9OSL_p04Pk6k>(-Q(#AT`W&-XYW$SH;NDs7xl3J0$$yb z4;_W}E<%4c>JAFb*1ZEXnAf4R9i4^`gfi7D-a5K#ghLK+D)QJgkx2FcyxLli^RY5M`z8@mRy`pbwgLrkSyXj>@-kE&nfFl8y z=JF&KNfb!_#^3~As>uGxs7nUh)X?%0O>-Y~>Xd=_W7nxN30O%Hy zZMN@uu1)XY0jM<7iZ3G&`o0%%G3Hu}UbvWbtkB}$H;_Oj{wb2%%q!8)rEK(bDZ3Qr zJ*oS%XK_z{$%1jxN8e*jVM=4P;5$p6J4SE20W}r87ZpQkSK8H0s-bnP#O}jS@;`;( ByEFg* diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/bnc.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/bnc.cpython-37.pyc index 56e8750221f24eaa31cbef93e7940226dee2662f..9622ca5b8024f990cb1e385946d6cb9d25f17fd3 100644 GIT binary patch delta 680 zcmZvaOG^S#6vw+d&QxkaHoY_Rm1C5Zy-es~1@<6RLV}1EI#3`LcMROMZWF}Ww2F4^ zg76FUDOwD*aT{&w3v~ZeP-qwKk8{tPb8dCxe#4n^yIY&YIajV9opDKCj-4AkR?SGR zc4k%&JhQejO_+H|K(m{6y;SDoLN*I96Fz!VS%6ngLM@kE7qBd#F{G)t0=N2`_YPDe)=J|hB}jrT%M(e1!pxIw)NTU{Gl zR22Rn|g}FX^ny!W}*dn$0LNY@54kdhVOIB~oc&`8$ z437Xy0-F7|1dl;mH;^n9hM$>93;u0)76Csqogwn?$}@-& z28j3E-jnbWgplYLQbg#DX$b)qejhCgg#%+h#4`RIdoY^gka@-}MxJ5d`N+huBKeV) zCkg#hYM|Qg_(^E9K!r5>7B!ZZ3N&J?Khz&fQIuUnJOHJ#X0;)Kd`n3`?wZh-`7~D0? zFzymm_-{$bvh~U+YPfU#OxT!BSK~Df_;X+j_%8iaU*L!Wo~LK@V7@g>WKwxPcDTvT z@}KPEY+Yh-2_?E)!Mr!|S*BF@&m22p&+%c4gAB$7;}L`Z1vYugc$t|Mc^tI1qI|ue J$WZ87rQbNHgSP+x diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/bracket_parse.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/bracket_parse.cpython-37.pyc index 45e44012ebdb847fa5c54dd3a27171edc5ed082b..23deba13d515f15efd3ebb822e18f8457a2d80e3 100644 GIT binary patch delta 1497 zcma)6OKe+J5WVw$wx2(-5+{z6kd!vH`=X#$p-Pj6klKlx(iEqOE&7onlk&``rAfGc zR-59GMMU_hLMnrhkoY)Am0*EHFr@;qfCXYjL5P*bf^8*O!4C16d3LEb{C|kk? zTX|85i8XN4ie{^}Mq0J8(ru%@DnnLSp3F@k4>$jz4}bC<>eFkX^rOdfr}C-!j%q>}Gv4lG){m@>4 zP(6GZ-1JZA@_yO}e&f$aO{vhdWB=@onI+1XF6FbXY(14b4hh3n6ux#13f%|W zA0~4-!ElHn$>7D$yNB^ttJ9DvTCbzGXBVWfFC43;d#)?+WA(4zDIF|K^(S|77uP)0 zPX$@9?>>I+uAi${`~BcMz@E9Gu@a1cz#F^AActoBwEwYNnntS|_*(q9QJ3QO?jGU$7N z8i>WW9N%x~^DpWj4*1)!gN*qb_A$QMGI^W~VOLvzywiox#E4j)YHf z)zstc(;a2on?S$JCcLi7*x^Ardwpp-zN|e-&)Iqc0TLpz3jETK9~J)&^LQ)tzV9h= zs7_2b#^4IR7(P(SSyhlAu5YEnYvF^&A+k7lKWrcDVgj?`a(gY0CO6V=%qh#o(USRy z7x4(IIEXri-<5P|MgE$6FmhglR=iYp09J9e>=S6g-tsQEjtk|>oe9pM6*7cbH^i`W zkLoztZqUd|0s)Inc(|q>$My}j>`8utL^=Fo-~8U_CyD+c(GTIf6^Xq`o+Zg`Px55t zNU67|t+E#OgkQ(1jOU1S2Vbh{!k>004HM}ek>1`d$|)k`y(pv6Qw7qF^5NfAmGBu6 z+VPKQ>i-CLqm}TvN4Q;`+S~p{De0G<_jSU&lfbI4DwO*?iQ8(GjwjON&?yVp4*Xx4-ofvE0 z4}kX@dLV^=HJl5c=F|^0coAN0Ookt_k{{m-m4k`DHAa1f#>PlfMUZo)+6WZFJrG8)r@5zv+xkJPddhBAZts>~M|F%Kgt_MU@(c7d5{Kh)7>WDUt~K*s zo5CBjGu(FZlH~Y`MwY+f{(vq`)2ubaitX_-)klBo34(Kh%&arh%AX4*@nn1~c7|O? p8G6_sPCTC+pG*&o^$!jw##4AVUL71@FCULe;y3YL!#2Tx;$N}}JU;*c diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/categorized_sents.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/categorized_sents.cpython-37.pyc index 93e0803d25a3035421cfee1f71d1ae87c4718421..d1a70ead64fc56f30be0850feea8b684272ed8d9 100644 GIT binary patch delta 910 zcma)4%}*0S6yNWzZP%8xrECk)q5&2nJwZS!2QMUqa8Tl*muBq@xN+NU-fXQlp45MU zOpJ+F&n80S!FZE@K(A{&dDOcR6XU#X)M6lU4!`&P-Y@UXkCkVu)^*EDO5$}B`;GVe zmR(yG29fF#k%{8VEv2r=sH?uZGnCqnv>_3V=uae~Q=J-*wR(aWHzg-=aBEk!(kuk8 zz1IjIbSN9VQP<2=R`fjHu67wzc`CiBCxpGl>*;sPme3p(QbGXgQL^cVbT0spGQhGS zjA6)Z0A+!HkCMR+%U0wp_cK$oacP{1Nyo{W@3F8Mv^wj3VKYU5A@x_dJ5k{uG9@|B zzh>+uK!0R3gSMDAN&s>q5D#_aA+30}3U1HmSF^9@3K$<3@H_qwO9QA&TJ+(dICiAS z=lRFn%wiFTX&pDb6REV_jEr5+rye=c0mpzX;3==>H@EU=mJrGaNEaprM5cS2VJG$< ztaV1)A$!C(Ce{uMKC)>QFU%tws31%uToQ0Plb3?Y75=$6wQ&|3xQxXk_d8>KdM^ON zo(m%1R&igB0_TK?%rLl1+g_gv$8br=_;faJdC#VDl7F&o6N?=kQv9b~8jl-}91AXr z=pkP$y_kzXM^%_Gi!hIH@?J0_d`J9y>A=8W33>jiJZ=BYiM9APng}oOFXc)OwQa|Q tB_Sh~dG|r#g~^3$CN?k-Fk@uUpC*x6YXzk1({)@N!valCE2eC!zW@QKumS)8 delta 885 zcma)4&ubG=5Z>S2gse$T+hmhgO_in_vGGz#X^G&a5JdzFLM~;K_iEB?vhlsu(y9kh z@GMVI&tAQR;Kh^Fe?s==Sv+|VS`W@_P-AJqclh4SoA=Fp%*?Bm!-{pyvf>h)GmTrD zuU}a9GSmxxNVXA;q#5EXUgk*uh;%G{lfsHNU7PGNi+;CK9pN`q0Cbm_xkaq^QQTjFba zVQB`whg#naTA@mN&Cu9td$djd$$({87T~40I#s)oM%PBjA>O#RE~*nwrwKW(OF!a%H?LFgk#fU=O8)| z)!f^~XgkW_vLXVm^O3b;3-GotV)=oAYsu2$V}9QLJJiUEhND8bBo6b17+NFuH8j_{KDqDFDz=Mb9ZfN1Q~d=#V5>X; diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/chasen.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/chasen.cpython-37.pyc index 3e852058ca68bd7949c03628e0bd15261a71aa40..09097c0f7e5c2d6e6f8cdc8f933084980e87701f 100644 GIT binary patch delta 1953 zcmb7FL2nyH6yDigd)I4w9H%6$5>%-Yq7srhEF146HC^vo zZ`MlMOF&H}j#MKqm8iIZ5C~O>3tSN52UJ2rVlNya2lxRM2i}_~PFkv}j+Jj_-n{p| zd2imE{p;NK)9zxi=xF%YweETK!}r{yTidt_w1984txgP2b9Y-TH+GfL4GBeDy zvyH5pXF}^b>19Iuy0)wZ*}&S?0*mMF7!3#4c96q$j^}Y*2%I3ltu=~4At>VL@`1p; zsl$i7ct>LzpLJ;n^mnw%zz&_P!cw;bttj!(|O!xl0W*07w`AF`7CLmxYq$2E`=43ef(3tJ*-#_dl1ggjzgI}Zkm(`+-2 zntoEzLFpIwj4EtbK(?s?{>G^M)cCP#ODRfL6u3lW!7eo zlCtfuijJS$5YK1IY{U?V3Hgut<%KfZ!2v=zMuOuA&t^L-|8-8wzcXVOi4n#~@ws6Y z5`TT22cF_QBNwvY_RTpf$F17T3-VnM@<2v1Sbf_ z2_^_$B6yi#l0en%K2X&|7J&*~klbdD=~#}v_k*llEG)9Jyj%E_xw2iXGFyIDJYFWo zilv&Tk#EM(=?ae;1zh=SacXn`w-2x>A~JdRyAk&hJKHr^N)a(KM*U9Kf zeON--eOVq_dB0A(bdEHm-d!Nm-Fl%a!JD{D0;RS&#?X2Fvp2q1COP^=*4*s(Hi5mb{(mCTr7k7@JF8DP%01D>UyN369WGm92WN-8Y5c zKp%xG05M=fdXE2htb~i?b2O(M&uN;Dr_$jq6r{H2wf$D)d6iP?c=(-3y1?|znvqXXb+DfqHKU73?OHnyx*@Mq-3fXs W3%ZSmL1ea3nlCNbB|B$hs{R95kYZK< delta 1877 zcmb7FO>7fa5Z@SlhJ;$GNFy~TSJigoeZ(efuQj_? zfI~Tu9xJta2M$#az4d}BgnDU@J@(Q|rD`ud7H$>Co;vfa*gz4&w)Wfi=FQCaX6DV? zpGr?JWj8VzTfyJyy&dOi{+sNn`##P*#bdtGpttI?j@nQejWu7}9lZ5jy96 z3~oHo8Ya$7FM;y}PvG40EH8PaG?Jd}rSNU@l$SnGp~aIA6sB;S&|{w{#mqC>z;=ubI*~Si=t!xI5y{HaWhdZza^g$@@^ECFu1cK zPJB5w*;Nei`nbVK&Q%O|nT8jDtP7xHuT)MC#kLU>KfP;=Oflru2u$Zme3EMjUgD!gM4K^d5 zr2l4V@olEUOz~T0ewI8J6S7Ylsb&DZe&|q7Kw7Zu(o6=I2Msvt3NqP!vYGvzT^w%V zxS!Zz%%WEfsEzt!J$JkEsy>v+fB7{eWb=jiGgm)H-2>+d-bIK_ekbBx>{y?7;Q~sE zHYCO7L~TL(8ZyWmBesFNp0no%&4Z@+e!~6eEIB$yut?q#j^hPQ#{rq)_ennTAV)H{ z)W+NRgtQq;*ORsdGva3cXUwH_@=@&)8D2zqrXt{V2g~@z^3f$T2y*0BUS-MZKJW57 zJy?4vo}XM^yz+wndt~&oK5U@up{P&RKd;d$6+1S1(C$V+rcfYQK)^~|r?FI9qtVNr zV%_7N;455^2SrgWmR56YR#lnKOa?c^Ka+d+ht~J3@=~+qVh7$c`q8b@Rg5i7UMXZe zIGZ#cZVBF@>1l!~ggE;_7;=c(L3axv08Gfx@&C3J@i6)hjVQ+}ilXAKRH%~|%W*nx zyX!c`G+y|3!$!XAM+fD>X)RNQIe;q8Os#&FAZkhu6ba<@dxJ*g2xRd`>5wD&aj>eL d!0Y>bjXoeDojnWrEoN!CT5jFSSxF0X^B?37PZj_G diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/childes.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/childes.cpython-37.pyc index 4f05bc628c9796ea657d587e382ce7b7c5a4d932..cb6f08743aa8ad699fff61ddd706a12129559239 100644 GIT binary patch delta 4337 zcmc&&YiwLc72dgzeXs3JZLibB@jA8>Z~bl(KQ?Y+$B#H^;-;hx=~bKU+H-9$zIS&s z_pTqyx~|%SkoZxhgG!~M%9i*8|6mbF2!VMArc9C;IH7jllt|Y$l>)j;DdTJ4<%GGh$(53 zDQhzEP+3!8PBBzXg zqsEMCQS*>?2)={bLl-?XM8mImXjqTFENjPWX$_6gs#l0sr`OZ!MTvbuuY1`8%*u67 zB3eua`7LSa>Tl8MgydA^Ste_z<_g(aJCn;gewtazSTHIE_57->uh?-tJDng-i0X6c zf??CltPP)G%Lc*ul)akQt+Gn45%K}QA!X~6Jw3Av8H4IpcivjtDOjwBe_t6?v^!yw9OYAPx9)TCnx?(>{uMM6?ywH)K-Cw zHKU6k4>5KeWyDVZ{X-O3h3)kGyiQ%T$`B;?y zBrx(o5d1O(UQz_y{}wsb(zCX;4!~Pgo^or$pC3F}cOleP9tcQ3g)kfaKY+9yUIHuf z6%4PhH`S4THz}P^3Nw8+$MOX$u{$}5TPAkujL5uq`Q!1&hOo4_CyfP#odg1+m8!Cp z?_xrPxYyPHbN+Vxvn0f)nj5`hF~%30n`?!MiXBw{O2N3VPrn0x zO9JBc6i@T7)ztEDw}kmfOYHnUKl}crpW%F(*_qi)J`Hj8Oa6Mx`|Sz{xoM3T3fv@5 z%#8ze%c7|0>=dwk$lq*br$Wg52$BvUj&ClLrD?-(r0MId6K7-qJK=Mo!V<{3`AA!R zya$K9NVcFum9BDh`uF9fwojAu?waElCzhVq_Z*k(sAeu}FSxozU9UijMN{RyC-u&= zFxtLsWHyXu#7XQEh{J-PzkRZQdM}0vb=NTq8C0S9FowV**Zq&^6)_;g`d|4(`yWr7 zMyuz5Bm!(NV(bhq^&=VJ&n3E(Zj|8njxpAapMywHheKSe!n6w+?lDyULE;waM|sT; zmQ$;GnmNSO0{Gz~bpdqhq;FGGaVgd04+k>;6u2tUU_;C+FQ{soT>;T%pCx8 zoEu%$P+jZz-#eQJT|I1CsB1ZPFzC72SkrB*dLB8V>j3=Qn!>dUpUx$l|JY6l??BzGP?31seA|%D>lR zci_S{KtkbO&U=C3c|EJIriN#&RrCC{BM1y z_g5%9%D(dCqdt|?^67y)T=H(p zYhJ3Byc^10VP2x-FiM`FzMa(P_C57np#pfyfnB;*c&C4 zN;LEt3B|>1c#~LPh1oE2gCo9@3L{@h-td=vn}m4m0F9J<Q(^FT&-x-}6sfFR>2l57F(j+c;W6Qpr&tY>HSBop}<4(z(^*)9ILi^JhzWK^JF zCezpXI~Sij-~q@drv9+51FzTui{eOtgO@yGNcQ0;0Q)2^;H4#Caza;fxy3@>Ekf{u zcAKOW%jIl_$~E8CvlPMs4qI5z0$W^}QE2yU)GmC{#CdL;N?F zo>_kpdHt!Bna*TWDGY%aE8<)*1&?aG!1Pp#L11~93`C2hQZzR^mt$tyh6NF%;HqKh z;`wbc?2sVNGb^H8X@RU^FLoE~jKQ$riV7s&Qd&%`)Vdq!W>6h<<}wEOH!jDo;(sOF qY9~4Xj1Kl9l4T_K2_Sg-nmJl9^mEuoh(u6Gjig9^66TGMAN~iHMo%*U delta 4156 zcmc&%Z)_Y_5#P7}?vL;MCw6jn5<6!njeW6WJ9ZM=*TZ(4#7^D#>f!{c3*~yg-Pl*} z-d*4B#jZKWs4d|GAE@)x7O4pAwo<>ff9MzGfsJi%o6fT_AN2jY{&chGK*PLH|$;~Z7$8Hb4$H>y1+W5;XQ@r zf;3HtkG&(Gj?{sJJF|U@dCTZ~I{oZozL2I%eZB0CJgy)-o#ip5RVH=p$v~Wh*f#>l z51 zatO(`Vb`B9)7HG69!;{Z2jlBCAa(uedApEy7Hz8FWElw9=EiD~Q8C6{af|5B4J;T= zYB^7Jn|t^a3&Ksh%CdsG-Q}5@=j6w6jj%JmL6J71CmD_P>1jG-wzzeDdeOV77X1(n#{5GBZKAl8{T{HkbM(U z?U@Gx*=vwpQ5KFgtb5rOlPRX!{}wmJHgb|n2M^efAjdk{k6RyqAfWvO z^6cFIF~0yV$0l!<+mG~+G4D9KH7PvfnLI5N?byy^NxTBFU0L{<`x(U87J;bj+x-#t zdbD+<5_YUB^cWDZQzeE90n{;{hYglK z+ReF8n%$Zm3!ptHJBj2jc<7e*4T@X&ZTYQ-zO)@cKM!Df!Ppf&$yKa;&RO)F@d97j z1y)aqEp{3`!!WwnZ6rE^Zg?!J6tGCD>~`nbkr3Iq1T6j!~%3TgmeH!gGf%ZmtwtfFHi7$+Zo4@o*AB*FzQ-m9f=(caCkDhav88jD6habfe%NB;4TcnRfz- z1tVvy6zJ$V_HuugmzjHcz|Zas7}(RtG{E827r_;wPK>kBQx|$Fdb^$RlJvQX3M9}z z_SUI>BC!unwd}9TmJW1$f`tdVNHaS-aJstGV8HsyHwJtPX<#>pHq=>QtajcSIn3T3 z_ObEdO)}0t9BxuMm=o-;!;@Vc4dRlO>U_f3-sZ7IBmMWv#0L*6?5iWubyu<-`cJYQ zlw29>hb!ZK)>UA77BEGA@R6KutX!ltPw5;oRG4Apbnjl*>ZJ@XFMB3I+x8XDJ3c4C zjOJUhOzaS}gI$Q9sKYpUJjbx;7+Z@Q)rjHUE5-gC|6S|gqT>|yx>4UsMAv;V6|Yn0 z;m>{-hz<`0Q&^KqB0X4wCqqOik%+J+=bkDFPm}F$#Su3pUDl8a$OCUsAUS5uAV?E~(`|C06A~7`8z#0)c22=Uv6jFB_B&u$$-3 z%qdu4PR_hAw1d;tKoyta0Mf zcmoV4C$CJ#)Fl1HIJtw5V*>A=WK2rZA=HYAFkr7voL$G)35_CYN5T)rQ5-#l1Y0ic zLxRnQS5=BNlqQf|MuJC$U*u^XM-Gz8(c~knM>GHgQ_W5S@dsrwsKSel&7Pkmf$}To z|0t3S``Y9g@&xEyN=zw^!v|bU^ZVet;G=s1N$~D zAc-QW;Kc#Df;?Es!&vrQV{GHK_#Oyi`zyeLUStRJQ-o=sccJuO1EJ2m?paCoLkmPi*0#Y322z iX^djevhlqni?Zk^A+bpek$Op%>emJDHyRxClYamLWfaZ; diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/chunked.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/chunked.cpython-37.pyc index e2dc4e3e5796a0af2dd9f4072f6b0e30ce5c9fa9..db1e0cb328f75f5e9c5ca9cdda761dce72b1e05a 100644 GIT binary patch delta 1886 zcmb7EO=uHA6yDj*{$#V;rfq6$(==_`Vi*4^{(*|EQmcqs`-9>lgqR&`>?Rv#H&}b= zK@ST4(2fT^iHIm3gc1}Ff{1r7qT*gWdhlSO7ZDU^rj4~#ikl^GcE0!KeeZocGc!k? z9MSjdy2_DXS)R^(nbw=8u8@hvnVbbI-UcSmM!^)wDA=KbXo_ILCz%q#LbhB`Ood^h z9WJP*%HAbAGD_oQTPx_MzKGQ@OtNc7xr3agSmDc@6^7AEf>}rAsuj6h%UKavPk;uC zAK?sb`q3#}b3K$Vj%B=yCFsunydR3hNs-_CHX{fr#&bwaDDU|m1<34$?2k`{T8`8zR6Q>Mi#mM1<<61U;g z5o6WlE-ZTPNabPVPN3xHS@LW8>d8x4ly$R}5Bdz)vQmcl(BRn@k!3X_?5c3GK}tbZe4RCeeXs$Jz(p!iHVGYu}RymtGkXH@Owh zhGTf7b(cXE_9b#T!E9m0DnF7TsFgWmJGt?9_*v^z3uQ5Dhv68UD2+Pfu$Z5O2=x#Y zX?Q5LyNOXQ8M^?S?DOTt z?}_T^>IQ0J#Q3`Flu-_5$Z>L^hq|d_1r@zRef*^T8`zH@rh9>jKc_oE4c0n(63at< zb*Z;`e6nNiXe-SZqNbl_5xsKBhQ4|tYnR~xgdAjq*p2Fkh*;1Ed0)((%iGq<8tCdq ze5-SymsW!gS2O+*$4X(^v=i?~@^=+t6paI_uA~t~jTmm~N~nUHpI{fgiMMri*9pX9 zhKsg0zKK?WLb%YC0usL7wS723d`xV#lfKa;|KjwOJq5=q+i)+f&!uIA_$aUO3edq(=0tk(3^8f$< delta 1861 zcmb7FTWB0r7@jk;Gdnx8(`+_tbKPvR&C(3LBvLP^r7=}okl1e8AUZG0bmo{{cXlV7 znYCIfc~BGtmF9p@Q1C%$pA?;ziVs3SN*@ZPw4lsOANtVO_DSiBp8q7Ec2f^ z|Lyz#?_BoBxs^HNuwiHddqz)HYk!#|%Yd&YurRB+R-`l;Zmc*iG_)x+i)S7UPx=h(d0 z6;Dcnsh_*_Zd63yr}}rYS{_!X>V6APy>EY1_m^zaoZ3&X$*;;fvsEHP(Gc262hzo1 zgDD6^kbVf$4tk9sbo|!Rp~JMAK0X~wK}cf7@!O6anj+!;J>4J^ttlWjmWX}&rRYZb z2-=%vb5kNi;5W=|xb1YEAEA*EW}2h3nPcT`(#ofO;s&orKWF^M6O%F^tyi%*vBeUP zQBOCwO&;KO7|cgk^z(_5*Lm`9jqQ^&;_aXo-7{9uvs)=yt9+f%XMm{9<$plu3%lvf z9`nEkH;>%93&9nf{A_41E@HRF$!gJBeoypH?qqV<&p51k1zWkSsgNHf$j0j-&*W@VTjt4#L7qFI@?7dH0; zG?D1%l@gNah060)__N~-lNaF&BA)~ls|Q=Y+wt%rxa|TcASLER6=m5Eq>5B2w=B&3 E2Lh&LqW}N^ diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/cmudict.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/cmudict.cpython-37.pyc index 02fec50c7c2579b5d6f857489fe76d00772a383d..6f49b2dd0e5859d15e42c04d3be1ba8aeef07d42 100644 GIT binary patch delta 683 zcmZ{h&ui2`6vs2kB$MnWyL(YvPo?M~mlhQKfr|wxC{&Bmp2kC1cBUFSNtQ{L#e)di zKfp3K@#I19DEd$MFU(({HxHhA^1Ugp>%l-?-p}vnW#-eyw~g#xmi0V%=IpfiewwZC zKY&1Z+!I9jic~)L3HpHwD$1$zsR|Dq>At*VA4nf*XS>_;R&Sdd$sXA;uM@t8S?x?8Jj-O==~A}0%&+9fbxvyjFBS$&WgMVgq3gMm^^&j6(TM((A72S)OY<1>TTsLP&rIMv+9rBNa zBL{l2*fnqa_L`#-8>(g@bsn4p>7voXlHz>V<-!j7#Jo?(n?uB`F12j3Ar7~X%vt*L zYJf}szgJ&|`hgh^9`0b1zJO*O4Hj1DOLZMW;U>H-o=i;QG*043hvwbjBfIfc`4I)B9&3_L}#5g3n9G9Vxc-%e}J8}h4_-e?g?ug6rzjN%rMc zvb8_PKoUVDm0n3_p75x>`=x)_p6rm^Cz8qFjL1L*FMJWoP)27&M3O%tY5b|_N1tCu zZ|G)>y?#zc$JI=Yp5>><)gp(J(NN!wAJRSjG8TQy00-8EiPXG-Le;zaSN#2^eNR2Z zs)d>x1}c}S2a)B`qm)AOS1oife4wv%COdxJ0CZikBZ=s!C{-bAr}U@N3Hp$)?&-H3 zbHj;I6O`3TLF%_UY*m3YG;1xceLAg){@5K4dRFGVmo>=*wzxjfN%G^mXTAQP3tO0f ztOvb=JC+2OEc7kd#5whKcVL*fg}demoboP@c?e8@=)HS>8J|*SLY#xjXHM6!vY0Cq zE($eO?I7B=b_@_A2;B7s0-=odD7M6t(*bqaM=3W^mdR?GWg?!J&B<`t6eZY?iA@(z pD_AV+At?Rr;?8aB&sF}*z2W0oC6}ez8zQZ?9+=nlP}ASI_8V%hf|mdQ diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/comparative_sents.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/comparative_sents.cpython-37.pyc index 8a041c140ea2411de6a69ea97cb33cf737b42692..6253b34c546e453f4d6b0ffc05502aeb1441e05b 100644 GIT binary patch delta 1607 zcma)6O>7fa5cb<$+w1i&2}$fY4f!#|TgeR-1*uVpXoyRpQb5=Nv8uFkybHDBKfPTi z;nWJaMM%KAAtc&E53LXt*@|O<0~gdvZ4ZD}RWGzsPaN7qrEp>9trmwgLfF!`-_HBq z%)FU-du8F)LSi-^R|4?sgsY}LaFoaS-P2E}lU5v*po7S4=bo+eUw462RACX;<)4dcA@oj*{;IT13*|F#( z`#~;|DJDmLLB$@fPzjBKg3COJ!>cRnIyKu=*UPM5sg5U5l6qZ-jwy<%1hAjoP%=bv z?kaOZl3*{Ac@ky0)WMxx*43h?Wv7$+kB}kO=qYBq(b3n2GF`7(cD2=L&s=9e_n5O( z0J9sawwl$7?aGx!t6i}_w^YH)I}58kZ~q`X{U|!9(3;h1T5FEoD+%%t2w~GZ?90CF zySs4L2*N1B7=XXOo9Z}Nzhe*kF6X!$>$#;&8Lun@;b5MW(p604hzUl@;AKS8Z3S*P7;*Aa2s%2ra1?4RhVOpT0rf z=K%ydiD~V{U;@QFU2@hB`}$EHvCD%;4&+e28v*M|_W-=Plp-W|k15%EL)}ija_ok@ zlzwvl$_@(0{2GH(H^sBnEwgFsi}hA@NtJkhZ&mg`*z=JBTOL-&yozF~|0`$^eOtdd z4~CzSZhLQyq~E^1&mqpKH^$%{d*+-SeM~|@Oe4f*#}`SOeKS5fg@f-Viu2{8<)g)t z{&Df6a*2jfB_l*|Is9qy%<1DN=lE@-`@p0oY4lpoCLz|z?aOR4R&*Ft|FQ?UCm$l? zo4m^^c|$*sRvaCA0)e;VRZpl2w!HJ}?lJNKubDv8sQLuJpAiX5xBZyRu!)Ja0~vn@ z->Zt7;_k<#t<9iQV11AMJ#jnFJH|6Sh{}_`+)ER$0-Xk!V?X8>3On$%2cZ{XCxF&p zvSF$kvz6+SX&i3RcE>tZGgsj19#S*#ZPC<;0gEi76|6x3iO51skcliRWYb?v67fK6!z?{o%K5YAwc5TG$gS}vK7Wup->_diGWK10a5ILY9Vbm-i3tNUTbz~ z;867dL_(+%)m%6fslCw(s#dGsTDTyDI8@AAQ=0$S9M5I|@{kP(6aKl^A1K8vO7v{f~;r}VSaR$ZsF z*)8n0Tp*vZcyM8K)eI`3r~^;afOZ&qvZhh9PBm?U4JhSQ7+s?8`e9*;&Qt^#X4jQ& zBD!~!y?zpAq1X^n*tXcNb=&~>T3Ju-~B!KCg8{zarm_Vm9wpFt)y64wQf}BJjfo@=z z*C#*j!KjT0y$F2(&G;78ewe++{#pNBn!BUk%Vb&%=1!V5^GuzN-eB{c#{+yb3HGG3 zH~AhXdDc8TXHmnx>ZZDEGQxc`khCOIeNJ=CGma-!W@_fLB3`r^2rbc#s`;C{5dVRE z!V3sAi?p@^z~+#rM^3D~mAmX}V&C=@x~CB~Aq)VlE~Nb%v)f&8GkKM2P9*{ueBC(0o6zO7rr20ni;Cqq$;o|<{{(M3EJiohG zSawalwuAXEj~$uV%kL{4M!lm{VX?GB{Om${xO;_jMLzE&6| z*kjT9Y5VAmIR|%lT7oE|H<7#X4B{irrMzFj{JiSD<^?YgIdh l?W3ItxVBQbC+Dh`ak^rT;=Qp^6p6@!Ok`0ZOI|USc>~BSI&1&{ diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/conll.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/conll.cpython-37.pyc index 481bebb74864bba7b135dcd83e62533bd2586fb3..83ad760fe9fad4e1fe4d837970c4d4b66591fb27 100644 GIT binary patch delta 5564 zcmb_gYit}>72esGXV+_Io!D`k)U}($j>m};+ezv?+_ZM$JlfPQc{UrB^?GM)Z#?_x znb{_>P!~0Y&^AEJMWOT|kWi`up&{A|Bz{nS@B=9Q(LaqeEmEN%@s~eEim2dxclI3z zM8Q$cp1Jp&bH8)$x#!%Oo6jjXpH~VKEiGXW|K*!rieEYD@qB`Z@?U_NhxosBjfw7w zd2~hhno8Om^D5wdrZ48>U%%;32Vwy^7BGY9rdX5YgQl7e#X{+DEG(a!%;t0?7LmMa zwjB59rxjf_TaSAVdi0PUe%7OhjTIMtv8WL^+s0$fdW6R!#>$Jni%M)&X|zRe<~C8iCxJ*qXO!t9VmAq?{3dQ)gQumQAV5eB8c}GpypN(0Zl4B2Asp>tk|Id?Wmu z-I&n4jz60-GLD~&gsyq1od+FdPE$yOv{Z)Pt?-zcvTUH(n-(uNPi$X{`FkMF*fPn4 z&Dv6DQ(3WW1p1DT66Y2~F!H6nLCj;HoN#>Ju;b^lq+3ofK2Is#y3I%x0%6y);#ZO6 zo?S4pyKIN{fE#1-Z?v6&Wn^s2V*MWRbj#~~66+8&D^5C5;IR|)^M>xiS`y=}2OqC2 z=lC9*K1LM>+%^3*ZcBR)m5r9+$Wa-ENUia6;^Wq<8}|aYLJ#7n6v28_1bfdgkY16E z2E+#|x{g%<(Kye^V19lqGA;$iqOtvJ+awSl4Z`qpF=|7Mk_-9FVmU~c#Z=qjLOmuY zu_W*Pz(s0o-{M*2GmNtu(kEiOaXQn%(MmbPRWP}&t$33|oql|S9QpGV8F zyv<5wn+Z!@VbFczu2t(BfYSlskg$=XZdm_E#4^G!v9&tHQQuPczHCNxt?U%r=u9M$c$ULM8>)*qN_jJQp)`*h{{$uGTR`Y z%2;+HlQgWic;oIBL#zE_taD?7`}7EWdK9ES1TqD+U`^s-F{f$HOxU)Op)cSmNQRX+ zZ8s)fs={&euMWizS;2~Wiht^SP8qoK*>zg5X{qe2TTb2)zgxRb{QU0jSp8{nldj>I z+*Vh(*OfM`woXWJp~@0JD-Nx(l#M4?wtNz5r7MvoSbLB0jGtHzOEr1^YZx-ye=TP2 zxvFdx!LE^h^psd|E@c`i-SHWjWVta^!Q9UxABmZ+vEfk&tBF}o4}0PEc2HJ@*Ya$B zP<*c|s*K7!_HvQWao41Np5yz{)f$O(EFk-q6PPu#$wgVlxz}aSxmi?dqY~|D zHu6Ui?Rjb!>K1q2X4(FdSu>x`$Q8@=ANeHpT_Uy7A9jbi)+0*CtQcA38EXN zy;AS4^x=jsf)Mi^$YXh#33`@B#gpsSi^bNk`0T#sQxZ~u44CTyflY^FXK@IYQaP=Z z1`--9J8CX#p;ubv^HQx@5-|GOKSg|9(FMHo4uENn<;spP16dWX4z3rEYzT?#>pM;Y zCbdAL5i8AOY5`M?P$`9}()tZ3aZUooH5)D~L1|kC_w!<1E$-=?Yz90>nP#8P3bSv+ ziTaq5A7Ivj04yQhoPe2_HB6cT@5^O#lweM4b<33ud9_bUm?`m>zUj5gyM!j#R-E1# z^!^`Q8Wrzq`?@1+k`|fVQu>zygGY`|4Bk>S=8iH)su&#TFEqGW<6JVE&)Bqq)t&Gh zWz+5*)jd?{rqY7c0ntEmA!X`!NS4Dav90*nrjM28 z70l8Fket{vxU=5`TiOrz4xN*aE|CWU2hiYb9)DDY@Hvq4Otdhq2VV`~3_mi)L}uP_ zqabS%a)OpXh=~?01wd$9hC3v7AD0qGMPJ3SB}-(ttR8q83I!&P5B>+Zi;~;oYM@Q= z{Nu&zTh7)xW{M3%o4esNVSZVV?c&L?_JRxY83>_L%ItNbd^<2rz7!lTb?&-+md#bB z)w9sJ$Rwsx)-)``wwzF<+tVeU;SD51Pw_j3L0^UVa~R}KqSM!y>u8MhV?20XbPm6} zwTC&0N%Ohk+Khn_5NuXW2+#yy6Kx|eAE-=}I*3dRF4qyaQu9rOGgcP*8g*>$)tc%$ z%+r4R(G?x63UuLg;;it_{H|qN=UTtIHX)I zKCz=$QJxnU??0>~Bb=aVB%a~|Afd~$E@O!N*Fx?#%FOu=575gX-vxOU1ernK1VLuF z%Sse7z6i?=F*N#>&;{lk#iB*=%hBzlGmw1+1m<{OH2!gdIWy1i-wj3_H6AA;NAdWF zMQ!IvWg}qLrAp(wGu_1!e->}-npPIYr@KzvcZHu_;IhU^KT+tMfSu2o2E8K8-Q7ok zA=dPiL9=|A9p8dZ^g0xA)=3t(inC52LG!6n*{Zp7unRf<9$3`_rU!aWVsEy%oA@SDN(l6fZwkUs486+XZIXeUJ&o>xnEf&KHalxyWA7GT`*cc zaB}qP^|mW}@tH`D9qYx)br-bChB-~~I6EJYE8gOVV{a)1Kp@8AcB7?W@8HOg)w2}n z9o)%3;ojjL)_~;6WTB-%f3MYRxpn$H23TQu#Ok3Z_(5xy4Ztl~ufgUF6TX$9{fzK8 z4bvozCMRNoA5|p1;^Y)A6HtxyjoMgMRA*x4+8P#e%$7EKrlG zKf(gl(97^3D1I+T9Vg%84J#2P;wxTy@QhMG?z%z9F;{jH{%LBpVLGkjmSvEg%4Xd5 zIs>&z110IzPoUIPbEZ&ktJvu}9={Ayg%_6ipt)Hrjz1j7jHVpmweOf<*I+>=7}RQP zRd$+Q;EBH@%WxW0zuz17tG;kQy&&2r-d0wM-%d;v* zz{?ygc{YH7O(26H^$Im;DtWmdfnUk89-m+Fmj8+8_h|);PEGOk^N!9|BUSb825aqw zQB-I4&PN#0I^`kc?_lw{yqzb5D+>#G=nW<@f5x;I$p#{{r}+PYe*x)LQw#tA delta 5430 zcmb_gX>43q6`nV5X8dM6b|#6FG-+cyiMU()S8<#+Q`FIT-m^W< z^UQGGIB8AM6m^TM(o$XMg1VrfwsfIX4B{93fFFoOh#&kYJfTtvRR|#=AtXej(sI7@ zW_2781zSFIw{z}y&i(E=cl_QH+Ive{@kmQc#K%AFQ+Fl*H0ksG0iDWEfE%~+pL2kY>G;bsfX@~6a8LSD}ku$#IKC{U* z9`Tum)%>uYh#nvEnPD@+0}-o*2U>YSvl-=n)LO;;824MuR_?c2QSPrc{S&_Us%L)F ztBsX9wC^70{gmm7Wc6)7q*E zv<|A%cfHW))t+3MX_Q8`mRo?i zWlZNPBQ*$Qu#T-hjiwLmg^A&c9omQ1eir{m+YLHaw&3Jhzf=9N<)v=HIsnb8lWq)n z3aObH%k*GfR1dWtzo)vLtKT&~K~)F5Jp*-ai@k#tqZK$ZDp82m8!xDiRj>Ap0JlcB z)WL5=MX*<7AYCdJ+d5GNLx->r0{Pi-NK77N9C5ZCRHfMKHwh4(cEIjRFtS$N=z?{jlMTi5qgW~Iyb6W{v87@6 zM|ia(S6XY+3{6d}`R3R()>G1{G_2ZLl2t8?paN!C!^o^JRB9_OSSp`eWO%W$4|&nbV`A)BzhS!x!@s z>JwO04)wB4J-*iMKaM9|eIPM;!O#n!j&_)pg{?2w&w@~NzIS&gs% z-9VZDH4xQfa)ohNp2<3eR5oonZ}FkEtBV_UEOVVs!k;@J8r>J-#Y1&;!$xk?w4Evx ztSpVA7b449unS&1{8WSDv91BZYpmeJe5EbdEopt%zPoM<4lI+K^3urhBvn z5-SmI+2yz@ubhEja#-a|D@Q2c!qqKGrQ9Y*>zUxSl^amMx*Ao2wVyMd!;>pvsS)SD zh9OMFQQCT`3GTFnf5(Z`CWCM+S$m*hoz=H43v= zmxGEbyq2{0>r%8mrtS5@!bv`FXA0EKvz&aTm<8-i)~cvA?)afkHTSiv$2wQ3hdMTE zhI+YU;=XgR+|I()`RrWXW_nzQZYY)KN@o2kBlSt=vszgFxoh}V z&tVE;X_we5*Y0W^7Iri`{3$PV{47E~_1CsG^})8F>fXBLZUGdeG>pM@vFLK-91g%@ zCLb@8K!CxnYvgkdDy6eBAqiFOWA*aZk_UJA8gR+vbrrW)031|q|2FmEw)V4tNX=Ni z94A?rn#R;}m^6c_^4_(4Dgng}@t4kRe^LvHZ3*jV#kp49+dbM0c&;|i{>-Yky0=d@ z#*+|u>VyDT0@2){otm<2x({tVpUYE{Tk%#eQAxOKbx~l>sO>%D*RSjn%3f>fsh*Jk z|KQRd73n+Lfx=1|7FV?1#bE!b(?|NRXmM_x<`#vN-Pc=O=4PF9>D)rLKwDYejZ9N+ zp07+MJL6ra$jPi}-9Ji{BMC~va2BTcN?8tV#->uu(L$iDy-s?FpT~7$rklm)3$%pR@VetaQ1s3rLZ9%&hK;c1K1*2@ zT73x`53xuXl(Q|zDmZSqTH^^1J2#vs%gpe%3yU7c^2ad9HN>PxxwUS2q{n!0L7g4^ z#ZF0Mz!cZLaP@JUc0u5<>Oz3>_ky}>_w&c96Xn_=#Nb37^>Q@7KbWzS;_K9LxR9B=<@+}+6o;X72tpn= zc$y~sW25rQNrX-w9~(bCN*gdQmt{LzH$%vUIDpoz5QiZ~A&x=Z4sj>MX^2UPvk-W= z5o(q9wQ3xhoF*WUUZ}c=;qG{FcrWLv2VoB#y6q;-j5lIP-K?-+$SU&Yp6gLoF=c?d9ro`e81 zmLD?0@@q7UTQjZa6(6V%_l;{AHMD37keC5HR`-gO=1A2hM1h)F%gS)>f;bk$rpQOysy=v~uBO^od5> z!gVbAhq^p6(S@BGo@Rmqi`=GUlATW`Kh{c{Zh1#5hG2-Xc-3ez)YZRxm(#fz?CKxp zk4V?xkkcn^n&U-_!QL*X%klE`1q`sl;BKdr?&VIrSqgxcvR;5q$-B)-l%L)F-wfL( zA>L)oNeV93Cjfdpt%{TIFK*NdrrUbhaV#oia#^pu3duJ? z7X_5Wt2d$4RClKy!4wXffSulk_##9NUPSOgvq!yo`1T}b;@UCZ`W5=#gH!q$ZAhPm3wz=#gR#mplOj-oGdMVR8UsCkwub$S^|e zif6qTh(pLTs!^d1O_i62IEgwa<32s0`76KbXGi137!Qt(@$>VV&ekGT^X?ti+6SWn zZtxw_2_x|}-X;7dw-e#4T{u4g?nw$S_ZJEo+xz|w&4dy0R;6YrsGhrRXnGruio{I7r6_RU=%;(Gn+d7C?I^adXZ}_8HOpE#r&Cp{-AAVQI#$w0* E1!iI?q5uE@ diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/crubadan.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/crubadan.cpython-37.pyc index e9f3a3e936a90f559588220ccadc12396b0352ca..6dd1179235cfa65a6fd26676b887eb169181fd4d 100644 GIT binary patch delta 1512 zcmZWp-*4PR5Z+z?INNvU9D$ZgLd&%fs%zm=l}dmJp`i^0Q6f~RqFNx!xph*jKKnZM zQHU-IQYxfM^r6~+AbEqgqP{^N;R*f%{{_780FMz!%xs7X=~+9T{bu}m?(@w4cKoM~ z|4y^%G5n2c_ru=AeZRex;!d)FNiIdo2Otumf9TR$3c4funsp zbq8MHX}^}%2aTYieJ5=W{J`hzGLx?K?l9@a-nJ1;NaJJHtw*18#=OO>vwY=~55B)! z@bg_!`9nyuGF%&F{W8h3sx``zelFuMP0ARev{>%)${a@J4ZIs4U4QkVCHj1Z{tN<6 zoMCd3t#_@;Dn`Q?z*XDE)suCM2X+~)7jcPsZW#8{s3^klJGQqpH!5IG8RnwZBpUU; z`NwPvy~<3ZY#p9Kci$W#&?PWjgZNfYh1M@;D2#=R-r354{Pv;rJ#Riuin5;%h6`_~ zYxY|`pYl1-3l8O07Eu?kVWbdfnAn(Bkv**{`|J0nDIW!6%UuD)f!X=(7`nzjRhuB5 z9HyK|iaadyun(ivNJiP4>f73*i$r$71JC2;zbw!};0c}s0wP1sMdX=PzdE!08}+Ai zaT$Wr9b>CVQ(E_o9dDQI)OXpY$l4mWw}r+U(%j|y5m}SY9X1v_P3i9POKh8uS?EiT zT0_O|BZt0OCE%Kx^%hUhP)*N0Sr{BcYz3EA%w`)GP_ZLw#;>af-t4J~%1eqQE6OPA z$M6(A@1Us~S4UZy4B{n#9Kck6dB@I8Qq-cTnGfTvG9iv6wowVmu(D-LZ3r(>lKzCK zyIQCpo%?@#zyu~-##Io`M1u=%8!hhhhA??swAEI9iT|K}u79`%pQ1KeF=^j3Hm%Il z``WhHw7ys#TbI~p?{3iTzlnpY)XPWgbIi&kr!H;pq{1W%^r<^RDz{`_uK0>CIu~x4nIGhGl*S9xW(6H4rf3NVrD$+{8UFw)1We@s delta 1206 zcmZWo&2QX96!(nBpX=Ri(-Im&mZ&Igw^fr+sw#&NsWcTFLe;bq6&A>HcO2rzAKPcI z3YAx?+FUCY4F|+2xq}4aS~$T4{sSKnhzl1~!LcXabEt~8yPBWh*ZkhhdvAW6|FhS5 zyWMs*{Pk{L4Ss5W);ae6Rn(;BYm!NxFrVR9Pk5^PIwPA%L~8g(BO8gCTE5lDW@4v~ z?=bBhOb~Gn8SrqW-WRUm~(+Jn$+i|7U?YG`zs` zjDX0Ha}9YGl;te2Or3QuEmznqDWvgG-*fi0J$GLlb7@LzSF8A5OWONvU3*&u=r#Tp2;cWsoFaT;9) z$N{+epf$JEC5Om|QC16xLRlL{3Gon))2^o82SxK+X*e845NyRsgxv}!y9FNZSjSZy zLeRL!I5YJ_tiwD$t^R6VW}mA!z4tb%=I+vbsE>_LRw`p%`)GM=R_2}~t$jAODjdui ztcr9{v&$Ch?hMvr>{L!ADtlGC{oxI*5?Gm*`p5e|o5yT-jZwLEb_HjH>hzS&Lo)-f zqtXmQ--B6_7YKR;oZtij)u%S&f-VMvET;&X0-PqF9%p6W{cm*C8=ch^U(gIs^c{|3 z@O4}TO^GHvW^n;0OUd9GE6(j@X>;HBzad4QbsD zf+*ustFdP+& z)b9vDSHE_rH|XZR)3{|=hRwt@JkQe|NX<*OSJPaMl4y-CUC>Ddx3SY6!@vG7ZifDz diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/dependency.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/dependency.cpython-37.pyc index 9584742780d0cc0ec6211fb0b22642f9678d364e..0a0de327bdef24bdbf2244b68bd8250e3d10437d 100644 GIT binary patch delta 32 mcmcbkdO?-jiI``)N04RX4>lX@r~`!Z~fP~uUrE5 zo8Ra!>+Z+eYpuQ3+G{@#$If~G^SpOpbxBE)gZ@htoD5uaIUVmKlm5>`iB)4uT7$XI18rwha5BIQbXWV|w- z^F`qakqV`P(<8zYBb7>Jq)MrZR4dg~R;<*ho79nN>1{%pq;{&K)iIQ+Rm;?|1GhPp z$?7<@oPMXMLj(6erKqY)hYBlQ=O_#qhG&T zr%tEeS?XqWrs^lsv(;JZY$~Z&>(vJOZBXZ^bLn@EI!|q+-?{31bpid(QwJ8Rizv~! z&Y?D`i*IwNi?#X3T*?Bif=ZUCOQ~e3w(ywim{1mRS+lx~%9d$OT(+37ma8kMWQDeb z+AU?A7Ih_+tkf1UPBWLSQd_C4HPvnzm#tRUbW`CPt%?zsbKzRGjmp}zYA##BW$o%Z zDqE*ja#;&8y-HnACF`|vV!Be@pl+m2tkPQ5jswp7opO%0>X?HR@9J{+w;1KlmDBuA zYh<4cMdN|qM6@R!ibbtaiD;-NrfPw3D6Yw1ST}Y!>nA#`Vm(fXdjs*qeVT3+#DTX+l|Tc6tU*Ol zBoqnt1P;nz-&D%$JbXM8h1WT9#26u`C4cQ{6a#g@FC*Z0Hi69ENPN#B~u)l~aYmYSEROI(qMIk*D)#Jfvk0uL% zGme1e?TyJuFwRW;F3T0`-Y;jMLLA5!c0^j(nqc(sM#9lY5rCLQqDG7q6Gf48f>Smm zzgYO87zvB?dzW{{qFRfrOFliqBg8D@x#IB?hw9+t!sUClsCKANw%lU8S3Gwalq=C) zf3f90pdHp(P&o_5a{!lPR@`pvE~`p@sq~`v)Jc6ys17)u}MV+y# ze1@2t+*dwD%w;99F4ht@`S^TLbA`1-xt*ahxsbpQjTgs%ep$9nW44W40HmRo;x<2` za-C)khZ|$@kqMg?@MscjffU2x8h>20JRH*F7}G63HYzF}8dwAl%sgXIxf*4*EszkX%mDK&l}fF+$_41e={*5NZ*sH+I^`AFw2Nkjo$+5aw~v6azd6{*P`(}^;x6S z6tzQIq%Rzl`oB{fy_f)eWTY4)%7stJUCGz0J`F}n&Dk}~$ZhLsJ0V&=kk&AWcTm>v zwmc}%Z6~~%isVj!9Mc&slO~Bj8`n*$6uXoClTLy3xXH7{Jfn5;I=d%6Im@_za;2A5 zy4HAZ^2BjWC4Vd#?@q1}*{q7*1&Z61xLiAQ{_}siOnd$W~p~Bpkd=%9ymHk|(@BLE#X#wp@{5pDaYnX~-5x zjda(e`A1srpsvNy@u0)<#BL-i+;_{V&!fLa`kODI5z8A?LC+HVWj&J&cZV%kFcP33zn zb7d_>iqVU@$XB#QoelT1P=beaI$(sTEU%*+ZAy)I7p*DkVUpxls?pHYJ{?|>4sGtf zU?^Wj+;V~ONYk_dj2H=3Sl;cl1R&jWz~N|ejEjSPeOgq_2|Vyo%LT{U$El#I@<^k} zMk5!~DWrI)LTYwx^sw!*E==*gsNev#7pX!=YGsD8?7BtezZLTa1`JsfSUmZ0B$kvTN%0vgSL~&(kc%e7p+n?TKp+{i8@a55`_eAuXLojAar^;xR2ob7G$6cV0r?4TyuYvnU&$u`M~ z^$5z?b~}LXrS&&FAN-~5u1HM-I0JANKqKHEZBHp*qO?*#(NOO*zS25w=AA(31}wJR&; zQgMIDZdzA|q9IHYwAQ6%uAF1cYI}U(lGwW%^kxE}p}kyu9;rcqc>s+7F95tqfR?<@ zUQHn{L+vwMB<`8@t3;Xc{HMdWO6sA>1GvTYo4SOn*XE!f&wtOE9e1QO5G!P({qUB$>mzF2kbWWj4 zzpx4efu3+s*8_o^=<*uX)f#2n)-7bax#R^ZVB=XHUO44xp4KD*nwf&}xC{iQ87H>& zH2f3DJF^XkX+hJT-C~AO)3txX{{{~hnW#}6?b0J8mESniHEwEF0C6Mv(f`XJ~0@p0e7QwCkp6KLhHpa$5zYacDbYoZGdw_UD&Hl>b6iea5tFY6recDftfo-vjtZ z09ac75a6EyphyXoNU)Y5%LK?LZ8z!B!r`>5K$B@(ur6V|k#NN^l<{Q;_MAI-Nm8XPf}?>@1aEhZ@2 z@7%pdzDP&#*az^J%(TkYj_&gwXB9KUwmC+Yiy@zJ$Yuf43k zhw4`T0-|Qza)?Hupg1&ILitWw5aN=G3EL#Gm#n|2@xS0i}q_W76?3LoVtF4 zpX2ToT98MFwP+yl1*0UmVKER>VZ{yvbgsqY_Z(Ho*8wsrvuEnxQrVY`6TyyIXv?mg zw)nYW(BbOO8t(2CI^6$C=%`N`{oNbtnEDk49IBo-e%{?az?c|+!|RAg){C$%P2V){ z^_R9M;t8qEp#i4#(b)Enl$Z4(ru`8$lm7vL&fBAx$1xAwHDG-u?}E>K2K>iT!sjOM zD)f12#Ag1NZzezN=@o74VZQ@49;|1PLeNOLSJtiMA5eNpd`7Ep8INgi^lU-XGXP%$ zK&YXeHq9HM?nGR(=hIWD0U>)0>anAOr!Oe!IH4ZpKSDF-fgAH(03H6W@z1^UJ1?uZ zPtpl``E}^TO9`KsbmEb}Gd}FuY&^O5{GLuKl)op?PP^j&OTX60z^2w+fo*MDwgtM@ zD{Z#itgnl}MbFco`w1cKUy|S4_cl-6?$@i}wD6;6P;cUkWK-Y5GW&7+Qf`7;r*zJ@aVFV8>GmPk=%Se6I z|B%#`hSj(7zag}jgdXEgdm3q(g!YoqH+=Il0{>5`xe02XXq3ipb`rn);^U|BwCjn* z_t8FWs9mtmpp%9`V4d;Z_?-(Ov-ANJ5=h6(A{6Acqh+V$h8vCB6Z5XQvQ0;T0A0`J zQpsAkfGQfGs3NjIg(*^$Z?w-XQQmWM^tj}-8=rKElgWQLd{o#AMZ||o1fsi^DBbhu zy~h3{PkDKXx78Sbbgg#;uwOHRN1vL>ig4rfUMZkcLM^1`AHVU4jwR>aw4m6=_F)Z+ zqXIe*)c59bl{F;eoyPIIo+)W0E)+B-Qh`-woIg=kAn9(Ax@`pSt}0{3qBpm>asS=p z#9PLbcYk{nT{;pX#GHoa73QQKQL%!zjH%#4SD&#S9AwLDhw1z1KzAY(R%wIMm!QiQ zRXg-E;|Ir|Zey9~a%u>9Sx4ElX}1nTt;D7lPB5k4CSSSdMKK-bqDJ$9zu?kFyf<@s zjI$@KS{s%2LhIM&EXg@|y-Y33j7RU=Ifs#itcNW23YdP7vnP8-^iZyWiu#+44^K85 z%KbZL&jv z1ADvF=*e)Jl;t!d#c5tfK-vut>5h#|eAGtfQAk$+Lpa#@pp(_Xlq`o8pU5GFOd1_8 z5#w`Qs@g1!is~jwSK1EtQAq>NAs+>(LRlpM{6(Gzz|w(E{1Sa+!#T!}jLNZ%gg|$x zBs2EYteD4)q6em~h6R+u)CCqDXD>(l78KGYM_)T2wONJIR1er@ti-8A)ZsB=4?HJE z8j~Nao>xl+DVMhjBATuT_iDUN$Q?PjN3(Szf6H)reMa9DC@6 zB{PVsoCV+q0N3G{Y*sN3BzAmFUzF6%-xqGi=(KzY7aRr(aq(xbWSH7F(6d(oWg@U9BA6Xf_s`LQ3;ExH5eAF#g8|$AA+FqSA>`86SnRaZ#g_Qk?apCE;OU6L}7KNo)hwN1VSS8p; zHS3Wb1;Ej6HBzq{o4z=HKm!gv%t&3TZ9ujOfW>@;QkIvGCUp5(6!YW`e}($!hYqTv zfnIb_ZU&W2ObUQkBU_N#iiRm4J05n zgY8DuxvQ34BwRYb#xT^=aXf>ZZiqHksf`5FhqXu~*kYVM*CT$Nbe(@qI1@9B=||@$ zz2Eq?v$cb;?aNBR>LV&pR*E`ZPe%hZ9jJL-wS3`3pgX9Cdag+}efz-)b6LSB(J-}3 zZvcR-*GbXm3+>J7JAa<{_s69i#UuaKE*Pxym!ANqq@n7#$Rc36O7J}7} zuJtDuzUy_0SBz!vReQIh?c2t6?=?4a7lgb9lv6#~26{UIxC_~S-~_bEc=f%OwZ~8k zMfk_sftQ8P8#O#&=(?Rjv6*2AG&N#6X#nfI=tfg%TImAZ~k@B47S!Bs{5`P&boZn}i*`Q@Aj7G^K_^3;hr#VXEuHlfQUsC*GpEH(cAm*sYdW=_3s3~n=pZinA;xou&yJ(NBoRe+EP3uv6wV$TX1A8YS z|6@}8mnNFFqumYwSSdY}d6{_wl~ObI&Y@yWG9J6w(UhI`&O!o+kq*t7HtF`v78j!9 zlQn-nS+EAHL2e;40$5@_DU+uU4o6=G%2z>jSQ)mNA9+QKJ$=O>KgU;|$k07r45p34 z;&44zp)+ZsEZDcHVA^1Rfw-ou0r4WG2Fo2zQ1nKeB=lj-^X-Q!g*rb)t`TUdWkV*O z+7Sgz=Uzycs0G4QbRqq zb$(&KGf8wbWjlI?XkY5sxpde>hlu+7gTY#{$GfVH(w)>MBTU*#m?6@xm*LKKsxC~; z5pGBSBAO${nPoG?)J+%BvGM2_jBSs7EWycY+y!)oZr|d(PfmFvcaW7-LOW#y{{V0Lb>t2MXy z#mmi@jmeec6r1VER9ajvE$mIujSC*7=1sU#p+++X*Uu6`Vc*j3q=I}T1jc6bJF~^3 zbH-A#Q^L54Z_TdGKxBoe7Sp+?!mu+-RVXqaPb}V4GOGQcWvs+}FC8+F_xpk2U z|9>}oZ17i$#9CqB%e%5;FIT0hKIW?wT)$xMUo7rp_aL_!)*$KDd0j>8XVb&ii)EJ| z7UNLJ6CF?6JSsC`n?1}L6-d8N48(c^bdm1A*2CtLn38Dv*63N?RuGF24{|M)g|=Rhubk}GHu2TR3jXYe=*ZUy)hflea8>Sd8@sl;;ihx)Pz>FFQ3vlUvzFBfr;bIBR#9fb7C z;9bjwSNL;wHI`6^-P0fl{jks+y;Ibhoh`y=zPmz{FJgV%Oq6ISq6^AMiilvhg-%p8 z8O`^0gp}6?XSaymuIYSCg(bB;t>-Q{GQ-0fYmQn;@6_KIJl`tz7dEgOhde>CoH9;h ze?RDCnn_IHRpzdZVuS0v!(oo@ptZ!;Hj2vhHV!AaspvsLj9x*nUz-l>Ly7oJUx!$k zKFI`^Ost|-!!vF$6CL8UWh|>p&Vgj?RXANshpZDwi_{K3H~K*GwdO6GM9Wg{6SrIh z8cp1UPu=p>is`GPh;aBMwaL8DDyEtfJ889Yj%sLYh5qu9m^}{8QmMQLWxRK~i0pX) zc5-IPha-Gy$n!9dp)DYmnh@CM?*j>lsg$H&lJjd&#Z6gw9Bm;fwlEsp!!{F74)(D{ z<^j6?;ozH{BI&*=(`m~-(%P1K)FW93bHNabkn;(U`d5J}n=ETLXrs@3y-RFZc8-#C zPLbKY*+-`6#}%AT?jzb;2CvyJZg)0v++ej}FO@bQwsDs_493WDbJR|;vYt0yymN$d zb5YZ29_-yjvw!f&P7!jJTdqTTNExa3?9(E_j#v-Ro0j?UHDb%cw?Jgb5gok%qC1Nm zmk0A_$p?7fO>3&<>3w&Z`II8c7eKM}SedVB?Pb+xsfAS#($&yjdMbrJrj2`?(sW*6 z{zeg#OW2Qvd;n!NX3cIm@;Gb0il z#<$#9ljRwUo(Ir~v2H#l z3p@jbF4EqEY03^fEMIoiz<;KjKkXImmAtutw)zXh2~r_zkE6my^R9hDnR`9Dzz0$6 z98P=6<`JGHh+)`7_j0tRzOYwpna!xFGsnzr&{jgS=e;btaG1f3VuCsEZklFB?GyzNEKm3iCf=ok~nnGUIr)qfu8svO7p9oU}+F8j<5 zWFMm}y(FM>9yP4d(k4J3`>^J%aRKwkAu-O&3yO>8pF*Oho40Q4T%$pT9qvt#Dd#Le zBeFP)k@q9ZGe5`TL6mXeet}Z-D4ILZzPA~e^{78(9^Efy&*D`H>~E7xnd@^v<5nDn zwo!J(;D!C7M6_f_2Ya^taKk=d?;tc9DUm+OF3gtBn)~xqyKS&4ET)Tre+5H!+Ve=A zq$&>mv}(%Izlf@g60M4i>Ud$73j3_Z#c0ps6y)^v2WStOY;jtUT?J6h@hB_V0>arn z_l``d<0-6iO7S1?jyq?`u^xKlzt0f z531UbVgqEw$n^!dfl_RDCZP8Pqq?n*C#ap`iK8T~l@g-FUjIdEb7JsRLL3mYzYHet zfeG8<8OqY+!~FSvfrYRS^UZk&>6l_GaN}q-EE3hG7vnrGnLhh!o^Ae?NW46F=Ac+E zHi0$0v=4UE!qStYO?EI#TuQ*zX^f8<$lmL3D$(7DrLBkV6VU+EXhIPNGbr4Af0|HvoI#v`Jv zY#q`Y2v}ZvZ>`a8>_v0u5ix)20#xM4f-J)OpEx2@kNa1etp~&u^ZQ3c?Z7#z@sF|{ zo}&Xt*Svw239w~8k2bdS z$soV*K;Y}LJI4CR^e(;g%*K1}Ts`Z7_|lhYJ(7EtXMq4!I*JE>J0P0H02J7%jL2b_ zjY9qc+ZcO5@{a%+546g2L`{7w$RYX?&-@xx!1Kg#A<6R+Ro4yv?XBW5G4L)}u#4S> z)b#*tZwO3tFa}Sjj>heI zy4o>@u-Q}`mA+2277t!`n|Rf=;C&GKF#v1l@4)7IWD%b_DO6Ixi_5W8LdziT;XzZm zQ%s@Yebb%v^$#>O^t&YNTV#xYQb1oN4Y&5hGVmdD=3OGWFylWI%J`Zh)0)3_&mZB~ zxqj9xy<2Q;W~&?ruLlT_a2dP!or-I9=NiSmrLD6wYa~KR0R2Yu)ZJo%{|eS3q^z+H zn!mqW>>Oaz-wICUe}UoX1L)KuEu07k4{I{x!18%$`0oi=E_}YkQHHIFu)>p+ZVv|lDt7Y6@uFC8Hcd==s_gF2+_C{OkF z)g~`EU!aO1>!B=T9;{gZ+RIfai$uHtO6>Yip$c*+Ub=L|+m}31Jby)%Z<{aQCw@A> zfsRF43<664o&W&|HKgYpPcyuS#CzU@(U&rjxXrhOZ&zWt;n)X>{BMhC>s(G}MRg~| z9m9TR!Atg$kfapgH3(LN6kko^j>5jW=Oe9^$gTlc3xMsO{am3F+11GILP`PHiY)J` z_{1P85RzCAxJR{*A0<*ad6axsc?j8?0B#0QP;nbln~}N$sR4j{0C@j^Lk)@Z2HSVo zTy@A|w<)m&kXXV?UM=$)Rbt^Nu}qR!rpQ46Oph`EfDkSbuI%{tGE%UkggSF9%(mv( zRzek9OphV%5G8~68se{A=`(L0Y3w(bzfj~uy{TreDZV-@<3AKQI5Z8n|slsH|7lZGqFhQPkhJo2cxGc6kqIIh$J`n$oL z`Y; zaj{rjXRdl&%r4JZm;)i59chu6uf)9daj}2ke-USV6Bh}F=vx!g*=TA{J>l5JY0>Uq z%^ye67nby|$KZ<$ddutt8yCPj$c|Gg`yX}iA+;O;v%NK}* zi&5YY#$z+2^dj%l>7QZYMJQhND}|Ju6QKil-ToH^ip=0=#e@M`^V$zulo8Qz{JFYHhEyTJ>sM^>S@{Pur{BTD7&e*7o*VZI!$J>&zn=2K0Wv?}H!f z?B`l*ueJ8tYd?mMf8f0AlyhKtetwRH{!Hxb@V+jrmfs+g_+zI;GyMzyURW%O#bZ%~ zVhsq7;IGwV<*&_S@WJE*G9+ zWt~!>jG$DBQmBl)+~OIl6e*+VTdIs!#?W`1QmmBFcf2xIDW&fOWt=jezGVZ-1f`4; z6P1a|B>GNL%9Y9VEmx)}74)5~v?-O!G(tT^nXb&Bk_yGG%%tyBWtLJ!-%6!gsiE&Q zrB}d>!Q*mBo}_tX5IJp7KkSCdxN)``MIVsx(u+Ssg|7b11(|Sx))o>S)T( zrThwICFNJDMU@~!F!%FkC;DXYg3rwi1D%9?%FFAMpOy5KSk3D>&W;$E*^ zXkDuPz*?obY>lx_+nEQ>XS_uX-D&G8zB=(^V#CgGwl1+!(rx90-Kx(06yg`0)|V5LXjES5_C~1vZf#LPwOF7v7S_aWF4!t6ft@(a>XvGN8u0kjk&M^g99gU% zUoIRpg-O^>0+t**X<06cH5Ohb%Hmk;z;YG}?Uy6Rk35;yv5{GrS9Fb-81L-pRx|eg z)u^%J>)Ncs39*r*U$lyH%_yEOX2q-}Q$#Jgnj&bX6xnADeLS{+nl5ZvM-`{F&13It zO!cWw^^vudP8&D(`8O<M5`eu+xEgkHB(jE+^8UjhamojglaX}_Gj%EfHBm4!b`iyW1ZK z9PkCAsyp8t<~pEj24L(Mrctmg99Ct-9}2ce8H#%@M__N1+!$=bu=V=-qywGU-DXL( zFW~D^-35l-7gi$>&n%1K2pu3SjMuP+>*!xS{hKWzH^b>ufX@(n(qJ{Rh~>4I;_~Rwrw@-9Ktj5eZgQT;=_10 z6LZZ}COzD2RjXm`*dljO#_g1ww1=w3jbr)FK(#G!SPql)8&xG@yY^AlRy%x>{$Z0S zj&)c6NQ@hx?C-1u+Oed0;q}a^GoZE9E?AVwMn-y5TzP%rF2Dc#g3N^%6<3ZZw!5tI z+Srq|-*nVq#O?i#9Y^Yp?J^u&+n2Ss8(AGP)EQ8FrK#r0pNOUG$K{pUiFu`>KznH3 zW-^2`^V+MgW4;8>?}YbIsBGxakgX$-ul3Jgk#ixX*k-)i3-ep2LFMs|hP}_{A10JU z&w6dnf~f#)zT2z~9yTRgi83Dmd#g^Q92kOWk`8}F?F}1t#orZimzuV%Qmr9;k_W#RO=oD% zg^MSJkn9HNA>grv>+9tnWO;)7HQ&Oq6#?XX0oXo<8>_sUDqhjd-zMVWR9RrXuUcW6JI-!QYG-7T2=gjQd8Bv+Spc9QhMJfIQpmPn#R(LT<7gz(jQv$zuSpeEDU76988Nz#T}hwq)7T#VAY{@J*=B zAmHOPcf**!wBl;?(V7>G85m>Q-EEXNtgS8bW+WVe&_PwQsmcebK(Yo>Od;kl4YUKz zowP*3+=tjG8o+Bxs7>Ahu$zE;q-mrNP}-A4PH$Q~epM_LKDuTVW9Krk<>>kW{fU4JX2r;0(r2|Hv}kMW!=trCcg59}=R?({{F( zeQ6!4*|s1Cwp#fJQqYLuTBja17xj8%b6Q*C#KZ0!(((9e9&Dh*k{VGUUQynw(pvBAVF;)A{=Di*N3>q&I05+<_`a+ydMTxUD z8i`7kR({17@zEYL#gI5acJUMRO$p#%DC^eb7DHU6t>0SdWIa`Bhqf-{t&#jb>V=6U zB|w%P_^yq;zV(}8OgSaxRRBEwpc_)tt=kreE3{X)%@&txw(T2w+$jDbz>@&&0Gk1@ z8cRf>@^XNk06~BS0E++`0hSO*ND8KrE-Aa03}vbI)b?Xj{{u}k@@-_11|a>tn&*O* z<-a*+L-}*6>LvY_4?G8AKLU6j-~|9^U%mqH8UO?-ArcAF5@;C#8Kh}CU1}hZFdw#+ zxGq?hP-Y(Nu!s__5q4{y-DPt*@|>2KiB{UaunKphst_Qlobw2shkHbe?c7v; zZke+MrejyE|qemAMh-bubj{S7pfS{hBt{x7N)HIZqF=>VO*bdhgdB@U2~dhH+P8I){WK z_%2n*p8_PsX0E<}q_PLK8J%r2(U;9Rp>^())*R;ku=bTib4-ErHAnrU)O@T~)it}4 zDV%5Gq3Ut%vaVLMCFWsfIq?j8Qu|}qMBh10hKc=wP{{uTz&Opx%F`C47|Z!=3D$9= z)~l3>(_}pxVun&8TG(zrKzE*o96Z4t#Em9pEb>E?p5=Mb>y$RX`xjlQRr1?F2PZ$G2osJBx_bp6bgf_MM**f)9r>ZMSNF+t;L3hYp>C?bZK}*dC?b7#Xh}-#>Nr z|DNkJ#C5JcVWlK$yH%dCXn)`T-2Z2$tBL8`mVmsGP2N^ULZLv|aCGhQ`Gaw%Nas(H zJ+xO#NgIMjC_+aJUT>48M6aF;3{uddEUg3*?$m~Yw0pt^7dU*7M8C4g`+j zSW>#CYZXf+q?SX}T`V=%NS9q&V)q?*#wM*Cau@g$VBg4lA{= zWNG)lU7}SUz0%3SV4L>X(Us0D^nFSD`_TtxaHL|#3BD(bjsR7^5>CU)PNF;Z(y=*t zW@{IsU%pVV^@e-WTbIxf2DHj+A9W#mB|kJu`_;9jPP(zPNSF5aYbUswr07oTXcT%o zqyB)xI|YAGQ4gKgny-64m#+4xBN#5zMqWQ@F=I6YX-}3C>cYXc+=luzwTv|J(JrPh z8fJuVXo2e=YGDL+IU@qnN=ySq>QsaFwP$zYecC5> zt?{@q(`;(ky4S2662J2Ve~NnMI4qXG(AU!6roHxVvG(ehr|Q4=i%~V)PsmEJXGYNU zLCy>m5`E<~D(Y{D_dQ%Y6)ocIacj026r}p)6HT+=Qt-dL5!6Ss& z$PGv7bhEXE_k|`78gF8rC5Kvk4zknZWZg;Dh2(TvPa>+!V}jXqe=v-k*?#yaOV2@W z;pdV|_H>s>tQ-j|EIU~-Py{AN0brKNTL8u&OSjH+1%~X?00>s-Y9QK2YM)M3Mu90L zY=GilRzO0x7^U&CvJtR2ClI}l>{|BCkBKbp>6^>yN~s_o2^d+uYB=ocQF-5zKCK*H zIGKr>>z0ovq>L&lbHgcBpVF&VXl43$7M2rMSphHw08}%_jz%6&ex6)vxGI)^>wjmj z#nd-k@Jj4$vbWIbxfy^lHtdGwvjEcw7}gM->h}2}dpO3REjOH+q$dRMJL|tX)pa|y za=YZq+TpKGc4MhF?EWC#x+Vj*w4t(BSrmKrtMA#Iyces|9=pG~jy>5@pykn}agopd z%iDR_3!Z5g_p(;-KzV5u*kmCkB_%OOkH%I#aEDzqX}@{MH-oh_#GR0+<|q5H+Uh9# zONV~rbUNSad)lG>Ml8qA`(Z?8JHiB)?{Y@pi1_(q|4jX@GEt&Uf3tW8FRPgG zhLc_}sB|Q_9W)j~e3+(uQpUQJTTljmOnc)!8_&^fbA{Z5dZrAe$t~KgZ%!DW)Nz6< zRtLtqKlaL-PU}?Y&OA#g+8W#!3>^%Xw{0!=D-GpzOD5L>=P$I6|D$0x&o&{qPMZ>~=h3{o;x}aj7oiR0<{oW2dmdWawBs3<1A8!Y!z{_?;!=y`<5vF=aV$d$sm1*z3 zQ!y|H$$#gzsF{lty9{)Y518Y0NoLFOf~1J0eo`u$Ng zO2?~Rdnj^DA7DNo9uXQRu3f_mW5?eO+QkU%v-fKTn8V#*h8JusOh#U}>ZL~?=3*=B zQ5geQvr$LG`+VjYQlR$+gdZylGFi0K2=>t3;Q{MPctykJr)La22409Tki>5E zyC7>1N60~>77tcE&lG6kR_&z^zSYF;l_wo*20BXM;EpPWO@1XLECsz|W{VsPl8NJ3m#${Pn=o4uB4cSyVe3(mC7 zPc5D)Ka_qAkm%23_YuN^*i&atWUY7v>@wp?1`M%=gvm~YJ=QnS=_|1zoP z`}!T@g)%#i50g6K2$DY6oME6s@5SiigdY?=tJhBu(R$`+XfLKMD1I~7n)tSoS%lc1 z*MB}iwAQjYu+MIThT_pr28z6ydPQ$56D`Y`M=P&hbfVwsQ|J!C7m0>x1&$kIFQjI2 zkUmzpu+#eKGO=VJ<(GLCFC>RZ`(L`+RycYKn@Z)0!dl^Agr&|tkB5AQ&`}^sCk-;2 zlBVr4Wv5BG3Z-a@&^R0=2M-J$M<6;WY0p*}0>fd(;i0O+43 zfH)FXg&lND^!MH3nFcJwWYH0F%JkSP!8ey?cSq?;2`|sm)*-o-I!zpWd#3P-8SD`< zMHrY8^~7a7q|z_hB1-hjt378L`- z#oqwDPHJiwR(jn=XEHEaGp693E7WVglsW`)#0~cdN;&X24B`&FGltUyUij+qyxk|77 z;3Q=?AtZxiy)!L#udpg3hkW4j!>w38(&jWdeR?WQL0_=Ps~!s9sfQMc-2;3SkYb0u zGoQ}z8pDPjA-sIBVg{|uCkLWA;Dt%Eqv!bnR^w5fY37VE#VPowLsQ=Q&6bzW0d5v) z#^K0IP@nYhJh9deUeX|zTYXo8`cZ&m1lma+EK6n?%p10Te_yIWA`*b?nDG%!Z%mn^5pudfX5TrT!zFJcJ} zdokm&$_VxC0-j_yiDi5gsf`_`)6&0C6Q7RLA8r@LdTkrM;(c0gY7=uCHLb^}_Yy)y2$!jSv4! zqr*{O!hKzdw1^)c4E01{r7=g}c)J*_*R>1xTpm3)i?(g`B3} z&@P%3YzSy$#)p-xi&Wr$1yJn)3+CBJYJ2tGU}S@c*;glrWtazNiNj8Flrs`Tm9xz8V*iwByghno zi&)#pLv^IKbmp+Y^LY$V`Uv-`!T;JKuC&&tVm<7x5^CCrQJ-Zlm@1d*4{Z}mt9UDh zU6>iE4{4g{1}VrL)MEc&@pj?23XguVhs@ri-|P_`GkyjN!;b#w!8Bd73^90+dqB_M zA=b@-)I3hq`#a4r3Jar=#tuZo|$t4h`$JQ@vx7NVzbbKo-?K);Cq>055C|Pzq1Z_T;zJ_kw5Ky z;konY1k#+Utz^!R5dJ*?}m@l*P#|pw)7dHdbHa7t3ldz_|E8 zil@2NT)v_CAvXFHWvht_`JPAoq9bv8g_SZs5$JK4juPRRQgf~AEvA~Kxl+5`<6 zB=^lgv@S(dVgBN0rshmMi>TQgBVyz@=AkyM3^+megdt{x_ZCt^TehGMWKZb6y<)Tz zk~hZaZwADe_`8RXbbYUw*vY%KJoMr7KQ_42=$P7hv3&m>Wmlselxc2@9z$w3z~cny z`8E%iZErPN&Op_1y=0%5HIsu8aA7Vjan0yx#=Y3y)=}0v7}zKBMN?{auw|PsSIk5C z7HUQ_rMFMA3A5=iU?jIu?V7>CfS4xg-T=X?0C>f?2ISz9C|FA2=M}{io1PyOlUCHB z8B@&~GNc~^4M>|*8<5T9<*TU4rom89w2Fb`3Mk)1bLIrtJ@jZbxq!TdDzG+;tKBFa z!Wyy@sgcQ8x;RXp(%GT#*CM|{ojM1X^ohI0)PF+(bAbTMaPFZSFjbNRhL?+*&Mi_@ zEP`7&AEglL(*(Z)+zsxOqSsn+1?rj50HqA_nfN6|n17~XI*<7l*yj}3$S}*vVKC+k6P&Hhfk^t z(T?|>(5pNNkUZ(9YtMk6ejlZy^cjFJ0naj|b^|m6Fi%4b0=D4EGe_nP_xXZhQww)e zYmXy>k}>Hrx`+B)F*xslpgHkX5cw5Qv!K34Sz2hAK3_O66BnVrNq_mEs94V~r5C+2 zHlmh^c$?=S<7b0SQ`G&0;@g9bhs0tr0Mhh}0AD8shmIivPsudn^fN~~5zGxoZj>f2 z3UJDRLxzCwuqvU17s&I4p$LnufPS{-vgU}q!T&xi3dN|LqxQYomx-fxZ}w5El2vNC zCs!{$BHZJ1k6P}@JL=e*f3LV#2+RKVZ5Fe^-U5Bs5#ecMnXvrI!OSdxTWEZq{KU`R zR)nIo46i^%3jw_0I;7G@EvEnZh?rfG>FxlF!(WJ7^B`s^y!{m2e^itYaNHKZJ7DW% zpPM#KVStiNk%Kh$dc5ys+hb1(dpGR(?O`JNVU^)*j796oF}dWm@lKSom`5;VH+MS~ zzCJkNn7H3Ma4sR3t6*~I@@@#mMfaijIbgV5J`BXXq~r}z{kcYClq6J3+g4F= zeC)QiL)?D_NE)qCJcQNw&mY+joMp`4f(UpS$>h=;k5cu7!A+NnJH>zj5=ozCx-wQZ zwZWo{`PMGkiSZiQPkH)D4Eq``I0^*E04@f&1fUrJu0=|KFhCUG0Kh?jLjZ>@f11nqSW6N0OKIsb z!c{Z)+2!J&ZF7DNl)nUE={yKBJCKD-YbWnYJ~G0;eWJLCM=14At`<|s`IlWI`#-l zhxvQnqR~&idm|>@e110t&1E9*A@uL((IQ$*zuf)N32{}%ADHxki7WsYz)N7P1gUa> zDF77!e8R#p&|+kl0Bl3!sYvmWN)xghk=+cy%M9R|VA>9=i{oBwMkRI#5)r;cn7xC8TgibZ zA}fi=Mm_|9jq>5GgeVVvRI0vgwif`Bw$KE{C zFv-Xfq^?ZUtYkISkJFvEitm2$mkRUr-`^_2^@AqrZ$UVql^|1#f(&FD^&4*!H_Tf~ z)t;hEf)JFXqcZmgJoBrlqVpj2RrO?rm+8a8Pr%(Qh z*gNoDLPn2Q=mx+~zpx>_TC}V-1_{IBnF*aD~Xl`y%w;8`#mw0RWagMZrL1pWbk{7%`BALKh0NQCJg=_ygws zZw_iG%B5J@>!tH5^S^bWC*qS(Q32ovSOBmHpb=mRKs&%@fG+{;1PB6L4sb2N)c_BY zi=fRdz>@$!Bw%E>9O_d0Fc|Yu|J z8(SA2!wahf6-JmLNY4lif@@QlJu9%vwI!-S6}YOX_UyneIa@flD$HS&)tJlb%wvrg zc0hTJ(*(E!uYPJi5}>LK-K5EqHXF zEBPm)Z^#C0m*G}9(liVu@5?YeS$wf)h*Pwj$(b=!Z$-c){sN3GZGbl1$^xK<>qvWf z!Ewm3Va8V4=ZUr?j~GPW03uE1eKN?QbXdCC1RK|iSM=&}X`%*BE9pkrFj?))$AZU9 zmC;AmXsAsYJ-AZruYH;*ecJ<}HhIz=>Hdu}SL|_59o>tzH0mg5$z3%%lYPMcAHrg3 zSKTO{xi2pt00D>(qV;+>tB8gDNGi_4j!3)re6y@jPM{wjwQCj{6taHyIHn*cQ8etx z+R^L70GPz>;#>XgIL-~&l=YfGuN|aQeDJPJj<5LFHjE^XdjAzD4}&Ny+Ku-o_P|P8 zEbeAyW1*gMR;G|P??+;F_p$SLip|Z>(_3KWTLbxYDNplETOv;QY9q`kDfYGI7PrAl zZb#UGCQcYaIWU9!kvfEM1OfAt7)qk)U+z{qyAw7p!mW^^h6RJ@QkTNzNZj+smf5Dt=hboScDIiDMjWN-o6z{a%w);_7V*r7~4-}z@yL_3bA%vjuBPtpo#uVdPIz#Bv-8Rz&A{P&M z^kU{};#E(^U_AO?n3#!wgVC#Ae6PDJ3frc?zIpR|@Auxkc{%j^w7p>4IR&1RYY!?b zuk8LuOR%7dt58DKkhlc?TBxolRHqqgP?K6TOLMOcS7SQU_K2Igsn9$vyijO?8GD** z!rG=?uIR-En+Z%Zm!+|Ov=$y5e3xPf3ISO{5wcM8wL@7KNZMpR?vs`qRo ziaowrE{Xok4KgmCW_~)za8#+5CM4NO`AsNbWNtPLl7=7E;nQm3AM=`5QoimSu@-ZMuvhI*(4QawO&2iYcEq+*cXQY-ixHSBz?t6_^vC>=* zLKe^@Rc}XQpwzgxb6v#QFQaL0qe+w+tNU%<|0;{3mb)@_24gL6Es4FxM$$f$A@ClR z&}rl+cf{A+8|xepfIKXo=Ld#SkdGjYqPt^H55?#F{0ypb2T85N?<|yuu_ugHP;XgG z72c2Fb%B_)H4WUIMV<)DzBP9KAip%=44GFu(hJ8?t%_ay$NsSiB!$07J|Qb_v(* zEV{-J#u2bSj#3;$|4DZ+#6j4Y7sX^5Vc1 yadGgm5v^5Ot6R5lEhR0b9FB=PX-R9g7SXMc&GiB!LC{r;SQ(YHUrV?2V$UC4g4v${ diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/indian.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/indian.cpython-37.pyc index 16ce5c3cd2249f0850de5b1d378c3b8fe3d10866..c56e219aacbba29dbd087f10b9e689001d9e2d4b 100644 GIT binary patch delta 1266 zcmbtTOHb5L6z=VNI-Lg!$Y>122eAYkMgs{ECn!-P222DKQx{F41t+wf$!!PJt-3UU z+_ir|q8rx!0DppA_y=6LP&ceR=a!j4GCP~}+6FoXgDbgUy?D@SBPk}cnJIIs!!O*xq8TThm^%LGq82Q<=D4dF2(BTFo*@*oMT7Y&Sc-pAOgqqZ*BL0m~mFo z2iSMjcqH*$|49*tPzQ>p*|XgHo19cJm76U;N@eQ19)Q3mreqUp!bv#QtjBD`Sec?C zo*-)-(7aJR+_4+)j6~htCB}^H8)wj=g5o@i3n<1=^hoZ3SK;6yo6ZjrmEF(RiN@aL zKaw(=Hm52*o_hBu;QlJxF@KLsu@=iR`8bJ$)XsT~)44L!2SP04Q7$iB#3 zHdCCdcFXNddlme-SHDaf>_hR^Z1$kw$@*@X%0VOYQyCkelx$BelG46^VjluLx5*! zb1EIMR~>t19KL^tS9mnd{wTxj zi@bD21=Wltd919n88awGxP@&9V>$jyr&izudNEij51U2 zVc%5aro^%SlOooyxR7A6xZ ziYYIp*)$&%Pj?W;CnHgNXO_J+>SuZ|)iD%28k^QcZpHrwmEd?Ed>fH>Fo6WQAe+(gEC@Q5bcoHhd| zVXb=MTKygyD%7eixHLO&0T*cz-bew98_*s1sf4KK~a6^5WP4 diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/ipipan.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/ipipan.cpython-37.pyc index 3e6ca29462e1b74d02d1be191980773b6dfb0d8d..bbe2b412c23724db123c396e985aa97881cd570d 100644 GIT binary patch delta 2557 zcmbtWU2GIp6yCeP)7@?D_P^|Qw|3jY6#9d$3ZX#xDSsk@O(2&gCQ7E42e;KBuip~yzpY8(ddgW6BC1&7#@rdJec^Po^uxptY8XmvS;r- z_nhy1_uO;O-Al*5KGv|kzCIqn&wOl29T{C}=vX|8Gd-X)E1=MC&|;;a5@a+ES)o!` z36~;DM9jlhv=mcfIF9JiQvpTV9nfRCbSj`rM(kursl$1l9>;my2;)4iv#EfbxJ3aE z$jnc=9yjfx>MfKFR}T7ZQyyPB{Uwv&AZv~87@l|eFdS!lSqiSRed7sq;|(N|C}hTk zq8`V7LM?0DHB^lk-9}WnQ4|3tvBoG*;i#~bYONd%PD{^5aOO*@YTBl!s&a_eks{0; z$Kw(X1$I>sGH*t)6>i7RAuC9XM#-E@@J*tpg;?CP^g~=bz8!)|XCvYG^>--E8k%l! zIVb`>EQBu=W;NS3EO$SAlAIe7EV>cmzkFi4Q_@V^Jp?=I-)BkqxxObYs0mWU{LlEf zWx8I$DV4V z!jD`Yzz_+DxQ6Yy9w^Ou=xBOtC)E=A4inA@3O{~u+soUg)E8gcIf>pYVRI7MY%5bOAOntLF60^*i&n05=kN!HdiuxY2#*o?3nbQ(cX-ZNaK-JH z6M}~?Q75ytl2L6z5Dn2jAHpw zeBKSMgyFEC8u%Reb$3UuA92fC!B8aIQN@Ty;l@BFeQ$%J6~_@kSEntfFxOn^SQ}IItFYAowUZvFq z6nG_Hp)r*qjzta~Qq-W&q0WNE!6VOzllI7jB67myw0fV1@D*oA&&l5)4P~1W{3#s! zA=f-1UQMMP4h=OY<~`Hm;wZzge<-sjLP+$FMjX2+$cF*@(s;?y=PhFk)t5_unxtSd O*vOJ0DbctXF#ZAJ&)+2g delta 2576 zcmbtWU1%It6yCeDvpd;M)+FY)*=*WulV)R+Xd9)aNt)E8wH4c7#L8+rPIhjR&CbrO zcQ%a?wTK0k3K9;2^`V6pgxXTDi!VXY2ce)ADZUMYf=@oy3dKUvbM7{6vZj*Og*|ic zne+4A^LOXFBbSau_f%DdefWz{j_WtKEk>JO8ONRJGnwT}(bI3SoImByDJg}~Ua zHKpP@V5%p4so*}J88kyDd}fHN?<%Pf?lrRl_Z8fad(C7MzGV1!a&mC-^wq8qF1%oN z-VoDnl6lJ1oPFMR>rmf}ljD7}#=?w~H$=9t7v5w0*&}d`9T*^lqKZfaNswbg z(ldr_bIUydOO@GPiJ}c{|Cdh4baF<^UJ07QlZX&XUU}3g$Mm`zx!Y>-u93KPv?a*E&?kUcA9&NTM%BIPC)dWBl0u)RL0`2KDi(gWGYbdyOEUJiF6TJ9ay0f z@RvbUUu7x`*YtF3CZQH27@-PUuRgixMBXvDS=$C*)WmiuB+)?9rDT_KAoXM&t+XKn zpt{^HxB_M$O+Z)e$x*eW+edaDN8*LYN1h*<(4RXrI)(_=gXSGXv+E&{U{xFLTZrUr zKcU!*i6R*5^!gzT*FB8ac}JM;`%tX=YF(lJ7;ZOo!tMqYzOHZF{lMX_EEv<%+|*^% z7vb&3ZnSl3U7k;)=RFVaQAYPu^mt~wtb_*&?q@LG^gn#d7@vb{O&|2%E%!TV_1IK- z3s6OWj(^vWBi7Aj2d@9U)?HB_1zFGl*}lz+drG z35lZvx1N%B-0}d>4Em|~Wm}XnwfODEHhjQPduM-A6?B1!XNh!^b|A|aT-p7}h{WM( zG|6lu$4euK6t|S)pkDQ0q5WbTT_d8E$VMXfW}Qr3HH;?O2Pg?*7ZHl4pvVY1i5!)( z?^ocL*_Mf}L6K%Sn;2v@#a|OWbcSe0caSPY)WT3l3kyQ3W2{Gp;xKYO4X3jxP(1EV z!IEWZkDw~WA3ARMSv`Ey^<#|c$76*R&ZQjH_$OCsb^kWE2vS7=CG8AG~GZbMUV{CI_+@0L`f zZ56V;X}l`u++I377rk?Zj3s0`X||>s)(|oBnbHkRA3K67Le}b9K0R$9=a_RAAELv! S1Vi&zSFlP&i&P)?@qYk-v+Upi diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/knbc.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/knbc.cpython-37.pyc index 0d5c4bbb2dc8f2b23e88b4500586f6068129d4ca..cf12a67d5d79b38fc55de38461cc9cfd418405c0 100644 GIT binary patch delta 1689 zcmaJ>L2TSa6!mz$Ua!~Q>}Hcqwn^L2W=q_Xgj9+IO-N}=DIgRf6%Lpne}q$N?!Q{-1;-L@Ad1X6F6*^WV(> zGe1myH0@q?U0Z{%SK18jZ@R6GyD*>{C9#$epAhWpvF_{87UDu;_=e()xR{u}srtpZ zlvuu%l>KsI`?liDxRN-&leoSM;}W&*XnysYM$6Q`qfwhVw+ntBtxyN}8g*$E+B)l} zPxr0q`~z0It$~|N=0oELiEKoh<66i$FOd18z0Q-d$(n!99c29GJ@y zwl7Wh1fP~al51pGI{JmvdnVoo_S~9}!(^F;3&Sw_3x16^wjy0GlCq3Tb1kIRN;`l) zd4-vc)A4B3Xgi{Eo5ylbP2z3C@R3i8p==)|Ct_qg%$I&{0;2|7}Xaq3r zH82P?Xv82@-GP66Yt_yOIVpdy94F`Gc4cyQ2JBv)8_?`%*;lZqDz9D=f^p2rRnEKI zmrKsbv&Yb7PbJ6G%*}VgxW`0pzgqy*7$OU^#{^@*t z9&^D@Ae=>b0^u2ieJ9mZi5mpNhvenn@1jZf3TJ=cY>8DW)T&E=<@6n*{Lm( zQTa{nl@l16UqILwt~A$xyC=u$<77%+tVcWgn&WOKCS=hU&RhZ^`w> z#o7NAmy;Vu?e}Z>M`PxUs_WgP7=oqQ(NpIJ+8bAJ7X}TpFgI4BlyVdIQAEI;LyFje;i{(95-;M~##%bO+&QET z#IU4$SN=UPLzbjB_yxHle;aI1j)I3*;jiv3rsUa?h*FV-X@~LG(NsW~mE+B^4Ie%9 zoycz>{one4Tf{{6Ef@qQTGJg}hfCZf$4Ha#0GQkgf+UR6An+Qw9l*<(^*9Rxu7gp% zDtSRfH&smEPUCECN}Yzf?miikzcw4SUKYhW=c!J%B!6y>1{N+*M!1ZCTj5Ix7?v*} n?2A=BtY~<0lP>MWYyn+G6Ts0e;(+pjVy$2mYF+EBVn5 delta 1687 zcmaJ>%WvF7825N>uXkf7yLoK8p$U&>OI*r^7FEzDq-s>u9B34^2fzg^XJ?YQ>&I%w zp}=J&+5>QbG9V-l2%#eNP^p3(`UmvDjX%Jba_s>QATB-eeUmf^QNWUaGxI%We)B!% ziYc{)D?Lt%-WvoP6bTnq#trNK&^ zb-EX1Z2w9Q$=OP?v5$utT+cXNl-DYE`xAHw0eFPqmH8wJol^}#nUsO z_v&1SVT;QygB@1I;CUe!&!a4To%4(r@-yeD{$X_4VaYL?h4p?IZ!l395HIFc_Yl|V zT0x>cl#y9sq#0#?m9ky{*<@)Bm6Y+``u+=9s21!!@DV2fG-Amq_tR6DE`9`I8sRv? z9Kvou)J)}e1QbpA&>bTqn}56ClT&y>$Sn(bXB|>KfJ?Hv#~0lmSLfnLTNwIwnxf2%D|;?evJ z!Y*;ec@Shj$ff!eIUz-Tme}%M{oM-{kbY#f#B@t|T+vZ=g`Ac=qWEmzhhvxz+Vn@c zJbZ2%>#ML>+JQ8HW9>Iz`@`?FklK@N|BLG#`Rd3UP+<4!P5Ir(xzqnkt{~SB zqIOpfkDffHN`E^khJeyI*fbh#^%w9K1`YGDFjk|Kas%&CLcp9uiXq&L7G{q}uul^e z>gFakrtlqATr|K_#8x3Y&_HW!TLc|=8GY~^uF76*v50#c-SqHEXCF5p!;42_b{KyPRhJN62JlVA$bUjcu`SGeqZc#& zHW)xQ6ek}&{#`lHf=uK;gF;{pX}Y7gb$9|AWSlez$4)BjAV|U}4FYdSJwbjojq`3> zMT6_|_r^qD1yO|)-$TEl3_8%F8={SmALH$?7jX-eyo_)j0q@1HBH%gvHH2MyYKGMf fFLaX8jhM}&NDKqmz;o2BmF!Y2vya;)*Czh}R(@3C diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/lin.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/lin.cpython-37.pyc index f720098dc453c4a41a966f3b351532a4fd91cdb6..1edd9678e9764bec2c3248726e0e733e49777c56 100644 GIT binary patch delta 1189 zcmb7D&ui2`6wXYN-E5lOZr!%FqD5@A#_CpEL0s#yD60rXq23lTY%^&)G(XBrma=&0 zp+&KvSaQ&#=*gS1coll^pUBCp2d^G_(Rs7lUBQC}^5sqP-uHcPGVgWyb2)z_pEnh8 zwX%E8i@p5#*s@pO9H z9i?G<1H@d%?4f6FE20|TSL}hWpkSl4u)$~;J1ecwjcF8qc(RL5sNz$q5$eY86lREp zgoD~_i?i9Jf|t^MwKYWnv^1*^~?`E121iy;_}5* zU7@7M${_Q=T<|H6-6+_oE+tX!)p$3b4FSy&S<9v%KAoI2N4Jha*wZ;9mJh_uv4D(m z*H(q873zDG@IDHS$+~GMJ_|4bAeB_sdn=Q`Rcdk?P`}A&+^&f=2O~M}Ny=d!R`(Lf>CSQv6nzVA^f2|n5Jv{1oLlK$>7V(Sqrt7u>xwn7cgVb2? zo53<^R$nIBQoW)b3gf;#_|;Kw4`ut7WT4sd0;W!ZI9ob_Fs9+{pJmIlr9A;myf;jv}8k yE%`J$wx~lgw8Mat%>x|v+>PW08w){1TE17!iVS(+Eo>kQS=dsGTT00oG0;!p9_+gS delta 1112 zcmb7DJ!lj`6yBM=pS|_=l5m&vqD0U{;~Kq~7zB^w3`z)w6dF|uH;vV8ZK!gWJ47PLm71_(7kRjek+j{YOwLIpO)^^&$=I1qxtS$dqHIY`#mkD{d1&aZp10#SkVWGX1OC^cVS{B@a&0Iv%M0uOu9BvWea!_ zl{>HTJ6ujfl42N+??i?{1xM9j-?R#ArGNAZ8U?rt9FBMHW%lqmZ0b11)r+yb?2tr@ zLgcPmcbzb@0~Rl3IF66gE>Rq2M{$#Zzl``(L%6vETi(GeZ0P z|7i2rSP9#H;J3Z-N$2rEfGzfdiGF4-Ml5+bPyij(soQ*eF3o`7kmg$MWoGB_Om9c;S{&9bg>b0>C7P zSmR8t8HSeay8NDkoB$osQ(%J}h06>yA@ru8R7%D9bYWB-Q%h=5oLc;Vf1Ot)3!!1Q zH#mK?#{D?mqK+SR^@Os{=F7(h4`c*Ce_XZ~yk+h3$aEdD_`C6VZ-$|(^fGK5u>#k% zt;h-du=|#B%j>=s1%VqRv$BUAqC^enWYb$9cJb KLnV%gvT diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/mte.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/mte.cpython-37.pyc index eb277f4ee040ec65992a40bcc6287fa5e9c07d06..f2ae23745f1fa69c04d36a812eab9c39446ae6ab 100644 GIT binary patch delta 4397 zcmbtYU2M};6yN>#WBqP7w!uC+_sOWG+hDN4q#FzdZXhtgM9N3$`u`8g`o;ZsGBCs# zFsKiN8`O*KTwjEwCl0_v75( z`JHpmx%al$FS9Q%Gy9O;ZYjgRsq&fN+cWlBc6Fx1cZ!wCOg2QzW2S&9W)7G|-x#&T ztN|;w%VpE0vVd(@nQWFVm&#;|V!fyf*s*VwZP>S6lmZUy*<}az9N2SW&ndgG=Tgk5 z>yo8|WuA(e&-XZ)W>Zr<53^2mgfa6(B&r0Hq135G5IKi7z+8E_?s4;)Ie0d~<)9Kx^+Cv74+_#F&eK1kFy?5*bj6J zWN4FBm?A+>!S@vdJDwr0vPQG%%weqZTF1~Lqszx`8jUtX&kFbR z7|sUmbu`=zw^#IUp(BDfk!VDrnMGxb(?C=-6DLw$x@rc}QqP08>fjOp;hO*uWx5Oi zX{m1_Km+_#)wcvh@Dw0EWQEB@N7WJ*q@`XGAWC&U2;V_V#D*4LZe2(K^>Hq$dua+zF)F799ZU6_}|xp64RpNvp)pKarh? z(+9{-8C#Z_fD#3mVa9h8_LFD{)NAm|$|ba3rls3aEEDtjpoz$T3~#J@0xbcxm7q$c zB~Wj`y4vnn>xf4^i3SpG5*8EzBStnDR|CfBSQI}qyDTCg?9qLwS;Ntgs>YPmsf5fa zjX9+v_rX`S?rL(550ltMVuZvfTp3!`xSyH}8=2cpQ5mh!SU1QT;dotu^}!c)bF2p@ z>pP4+v}k?$!}`O_v>w}d2=>804Zi%>!6uyh2KwE-!-ZT6;w(KzLa9i2o;NKh$fge` zK7-rtcGj1E;I7vD4Wvnq&%YnI3ENsZ-Rluq)l5n}oRIOAvj~l@Rk1_^U(2`cbR|8u z;bNXHr9Viq8pU}p_}gk>ee0oOx57iQ1a7vj9i70YW{IegIJy=OD_lo&bTes&BtE=S z7%frRGL2Kfvn+@}z#I(DJMn9}xy{TP3Y`K*dOyXKztcP0yVyE`A@K;|ohH2OGhrf& zys)iwf9l}I>erf<=b-6$=Tr&iDB9RhlmAwU{ zWPNdMijMh*6OjxW2E?`I0#rn+U>c@b}iz-6SSI0@Fa;dBq*ngGpH8~ zT;7ry5Nsvf@il?VYe--4&K74fQW6snGTBsW90;;0eaAOf-1=xq*89NcZ-M(=CC7na zJE6IIt+@+ZoL(L9u=jBKR<~a=cheA!!@4y!VD0gZ72B;nsQlP&&7YLw?A}dqguru| zGYZ}Pt?+5jrr})X@Kc15`QpGu8Hshd(?5IoEEONqjto2#3| zJ19y93MTR~5xxt(C5P`7ggOYb{?+N(t1!xUmA2@n~wYGo0YbDYcXC8-;UGfd**X-gMMR5aO$rgIeOD xMz=I4JBx1JTruMh0kMQU6;%f4LR9JdyPlcw-zM247qjV1dRK+x%`%&#{ePFt5?25K delta 4414 zcmbtYTWnNS6rFSD(Ye!^KAgkVgJe-IP*KJ)5pm;#XOo^$R# zYp=b|K9Ba>OYG_;<`{7}YzF-I!qJz5hi4ph?Ba~udni^CC}9SLDdwm(CI@6}Ta=QE zhJbC0L9r_GMS~)%@&#j{6#F)%6#JzYq<|fJcEy1`2lgD;D^tp`SFVp{Q+0zYI*oec9-!zGMF>Z&4t~!vU&zKp$kk(Yu zJT6QrwaU;`!$avpD~)-W&Q7Pl=3I2^#%L&ZR0;W=Fk>9^o6%3VaW$j_k46*WNsoz_ z(~w0=;$VzdV24+dnHOpWj@3eyDbz?XC&_@ZTZrU0dYlh!Srv^k`Ij*`X z89J`b6Hc!P;^=Wz$;mVf=j~s&XYyKxeCEyI%W>==jaR|?awkkV_PTv(+LR=BQ+vLq zoYFi5Kb5zje-$*A$?HX`CSFZ*YRD`#X$X~7vsd9@MF(P8;hkj;_}PA>7-NM(5aXEJ zkIl@I@J?zk%2q|TL%W}uCF16_&92Z9WPNtTn~vXyKX@~>7pBO=*}SqI3A8(!co`>M9#FTh3DuT6V=fJS%8n?Ow;A%`m#$^*n*Iq9rkKGu&A| zxRH(s-bluUMwi9T7N>#O(M+63^U75>lb3cHHdO6f03dP`0AiaK03a{zECMvZ{i?MK zKm<<#(naDF03k2!Edg?>dOiprrKC0$o?e^}O)>o5!Bl(HLCwBtr_E`sj_8xp*b9i2ge2iv^m0vu$3_GC{AUkzWL1qF<0N;Oo5xNUE?Y?bz4M>#4)vaSk;aA3KF-N zZl2tZSBRW+3%#{s>c6R1+Iv{9z>s)^ z@JQp?_$QHnnM9LU8ig-LG%{MpgaWkeqyBgZG{AYhFtujhTXWfljXpxH!%PmwuE z2J1YVGcU}bPB3WdTrvZKt$@4UCTQw3r!IGXT$~B*wc=X1k;%HH>Ohb$r0#le6pv1u zym<6}@cCNcLC=zPAlNSGUDYLbV~cyye6aRF>bq4wN$#N$8iwJH8gTY`_Y_;K?dUv* zWady6$NV_K5dzN@&Q{no*b2A$){W%~hffej`ge@?9t`)h zqtG}~b1t`EJZzW8xJzslv40lyEBRtKWEtX#qmoZjD^4bRvoGOdcP@UHh7jf#xI5C` zLM=~u`cWbd)^#|b%g0W|!^uPc*QU$Xct)_~~vU3J|S^NJHL<;r* diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/nkjp.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/nkjp.cpython-37.pyc index ee49da05e7c064214f860a8630fe634422135a76..aca3131e1aec690bd8e40a83e279d86e1c501861 100644 GIT binary patch delta 4551 zcmbtXeP~=+6`%Xwo0&JCnWRbDw5d%uP10$aq)F3ulcq6kQ=3}T-8Q>e#>K~E-fL#; zeD%IJNm}ft(ER`xUAh-tRDw*oiSL>T4)GYIpb6@+;N<{>i-^YBe25rtXAjKVBR zt4%O(GGj20(L4t8X0rw6Ei`X7*_BAcGH=w4fSiT*md&PDAXQNUDTcy`PR_V2i*^Coccsp)q zm%uC(RI_m#OsBF1wUUcjXGEwfkH=B-LV}$eYEt!*@o>avi;yPv$I6 zy8>%l>o9=biy3R_8L=37&`kzyMb7m=rJgiR^3S|D8-4UFLWq0$2yz}o(u`z35E)J< z^JdmEu9Pf(b+$DW!D`{H%OX9<( z_f9uPvk&IA(-@6f6Bf#pYUW{VZi_puQ`3DY^%H!vK?p@D>?9W!EYl#2 zUl)CCf7!%OgHPzE?}@qgBg3D7r#9CJiprBqcg4HyeXLD9YM)pkUr>tdN8JPfI|5a& zn|{^o#*W1gAvuiX2$H9fj3KF(O4V~%4_N#*dQ@kc`&EM5b)W7JZqdcVOef$it z{7&5LobBJRLo<_3y&mG1;-8%t4s75L6if5a?9*05C4UmvcQ#UzG4q8+p#CgA+Bv(j zL5Y?xGzRrP7-HkuqIu#zJ_7^(StMtXd=AMhl5;@fiu7N~qJ&nsEWCRqu;>pq)=N-w&`CfSK##cZ4OB)n*nP3jU}glm)5IwNIpwbz2u zUjPVu0*HqNDcM>T_g_rynmaWP&-qa#$U^But&u;6(?$tA01NNHj}C5~=>a_?Vu#L@ zcX!=o?lIAcTPZ`U`zy)$arwjDSvK@}sjqo%6>~ zzz`DbE;4|LKqsv0a1=%=UAJ+Zg#0$`pN< z2wTg^ToI07Y`ZnKU~pszuSwV=jEp%fe%fbF7vM2Ragft$Chx41rO#Un4dsMgIeEtb zj!}`{`|4ap5XT!IprHU?!LfU)Cv+?C9tH`WZds1C0bf-v@IEbO;@?>zY(8E@oLL|; zK={o}%HbRzI!G|VplT_5fB}FZmsH?BIgOiCz$sM_TJMp_nk7~~W!cBTp>_~9c$6p$ zvnXp9m-l_QpD<`H2Gj|?uvkQ4s5|a=uZXCJr^QtNz^B%igp6=2S6H-+RJxE!S*Oac z^)t3IiR_dBpT^M?610N9h~&#it|2*zB!`5Oa~?)h@5UkftX*Y*fbpeNm zDGZMzg+)bZ;Po-oB31AJ+!oMv{8=RDk)&w}oLJ{D>KT^!?ZB~>_2tAbya;@vIar9 zA=-vNU@yEeGPtj@x_Z<$3?$yh-V_%{PJFSNG?XutDs&3Di70*WK4T?wWOgD1r-R`h z6kMv#vg6i+1}|vqIx3<>dtMX&7+INKf>B(BGgFi(+sX>iXPHK!Km&ftTjmnjR3f w!fC_FB{aOmf(SM(n4o2BgP zMIF@g|F4v~C%$?3(F>b41PXAY40GFndm*LTe;M)A`4HVwF8|@kea4dFD`Q<_ zrRu=Yx^Ja;lW_~aA4eE`Qc~llN@_@b0^X-fS!)t8*_}Y(?t@72Z2DyF2@`*N$W3*ooKiD;vk&*l}(Ah?^#Eoi;d)og&IoWwPEGdpFtL z+1?q)P6{=p0a}Ry(G^sJR{SVbP>DyoDt{_Lr3zJQrTzd3p%MHD5kjpHAoZWAfOF2+ z-t~H22-upl_s%`{obNpEo%dE>TaBEKM1mgpJG6K~d;M0VmA!ncZ+tzkCROIq72SK& zlMKvybf50O>Cyd$|7kfHgt@8*U>-13n1^5<)I%^2JuM}}FbnGum_=wc0`oe(9_IBl zuY-A1Z-99N&Fgh`(GzccA3>atGsl-U3$Om;7;BtMWX!ygxRSb_F^egFH8Cg#q*>M? zR;0620pR9gB#1uFIK2qL*a>OCZ5djMFIx@p!tw#}_*544@_HDhZ#C>M)ycn525GD0 zNQ1{6-%>VbWOb6}5|p(OoXTY_J8kBR(^F!@fA*XsTQ=u1tU0QtWeZtb)11JvVS`dT zV{tF6#AS{u9fg~w&HXT}63Y+3yI%2}`t}HKg<;aGA*@adxsv=I3hu|M5 z@lYE(+>P72fZT|jH)GaHIupyAy3u()9&+TOS>#di)9_0pWVwpoETr-Vr(NpS)+!8+ zSG$%qR_DZ5BCmCkLCwgy6R4x6be;TjS$wYU&IN=J_wwV&IfNvN!r5lwX=54sN&iPOf4G)W39-i-xIgnzC194LhlE%S;mepwPI&XUbT2tg5*?F*&m$@4E{!N<`eEU9cKHAm)#T>&#B{@T?p{nI1D-QVsZ1(#sk9_Qy}SZWb(cMmP>^n&Bp zG~G;Vn(K}wVB^SJp{MMQ1z7kH9t*2c^(vC8sI7cKJk(QUaq&jaZPq1T>}_A1L;(Xx zu(vn?OanS#=_ukT#!=VuIn6fB99NK$f`QuqtE%r5ZGG47$2-*qJH{DqW8_OHQuJLi zY^;R(m9}WrV$c#ev0C6fETqMqfs^Yy%ZX=j8znQvsvH|K zgRB}x78Fn+v|M9xSZG72m_VumHCCs}P-k9Mg;68N^)x)2f8u+CkNB|EZluJUJ=0<= z5oZl>mZqT;f0c+mN>>)T4Pj9_3S|t3AyuL4(*L0VEphVr56)2Z%7LpDB)1;9jv99+ z1O??u(KGY``>co#4<6lK-aKh*MzRG2eM@A9PhTR`q?{6la-{=SH#5=k!S|7o%5$_k z8G^$>bI%E$lLl zG2VU*p*zY|C#II~mK%_<~BH9!K=)iMJIu1lT0p0~jS$ s_=4cmA*fElNApU~m`24Gb}YEx;J*}RQCY2zu6vrJ({)Fp4@bxU4XOk?3jhEB diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/nombank.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/nombank.cpython-37.pyc index a6ec88552b89bd098f4b7216551abadd67a057a1..c542b2233a59914cdc4de0b1762feb458cfca5a0 100644 GIT binary patch delta 4074 zcmbVPZ){sv6@T|V`}x_i9Vc;|WJ%hD{B{30jn{Uh&4SZxv}|42*0iXmrj?4?TtV8hk~}lvets`+$>OgV{5RhvvAW{%v*X=-xinTabr61>~969yXwVI3F5%NjfRAA>{VO~~m)PT4T4>WnMp@llizy3iZf zpS5f!W6e?f;^v7EA)gbAy)1!ZBS=uC+sJ6ffNWb-pEdJy*UC03ehA1l++Gy3=lEK9 zl6;518-83}lCSyLd9F52`U)lQ1!=jkIo%j0eV70@a4BOJ>647*8B>tqEo7KY%f=7z zwMc8Xt7aUit=YILkf#GI56PvDo;BDYzZW?e-wA=GK#v8F-3~#-nyirk|0ptX z;zeMVh(WITS;By#i%F|8mei62I0qeKD9BqUFGJxiKp9#p%<)3gDKf!--1OQJ#LdQ# zAYgV1iHOw!9GwONhzkX38El-J%^jCTyhSP|Py$hYOu zk46B!@IsY6B%rjDD5;d{Kbqfv{DHK!px#j=V(KBOvhqdC{uckFW%>|efXW=gB|Gtm zTDyyUthI{KSEAqV6^=)7q7}&`khrg0Tt|>`h(FsJ?bu7>VVJ(lS6k2e8(=g&JqxHlPYPIe4Q5=jcl86;CkaA_St0ToM`VYhALPsREm%T~-G=lK0ti45|qZ6}|3 z1m@y_GPGw=_&FrcBN1u2fTI~C7m<7c39hL#By1qQUu@oQ8ztm8Z|&$keF7LO-T^x? zU~liRPvbgNxV`xa+pQ)Avw!Ey9k)kzg4ym2JLB(ay3S~U>H0r;|0BOi>|9glfPjr& zxT>9J4mILl7UNBw-R-*+`g9$dwXT26Kim1rlyFoi?T5ALpxA$gZc_{D69e@SSblv0 z_MpQk)i$GD*WxWkLM^)Tb9ijrFG!0`#jo5bajKB(Pvbp0Lv28cT-(*8YUcn$t>P@YiX+S*`wEhmk>JHrxKg%WGb~18GhkM4 zp>=Uw`8=}b3C3`vX@2o|aZ9X8)A4pUpzHHRr^p~z4D*!s-^iH>iqR}z>HYIXYtG5% zO`G8nwXo&sy+WX~BCX^C7cAFv~BG4BKkDt-|5gAB(@# z2pn+F5j>3CZE^ib~I)R(sJ_H2c7J@Q-#V^ENiL6Pz T@CLCag(Fe;ZGop*hS`4uIS&XK delta 3975 zcmbVPU2Ggz6`nh@vpch1uXnw!e^bXvytda#*0GbgNn;zkk!wOyCm~K+Hlf4Xb8D|X zyR*JCPGW0=NnH@4l&V}IBz~-w^&yBlUs5AmK_O@X$x3f?PbT1{Kv)9GCH7t(9neLL2439)VW%GyOEF!VzbJQq1bUGsM2dT^9OJuut;5I+b+g2XQnMH3p8%O!hP|spzHj#`<|t z7c`f4Gj?tVekn8Oa91k|s^GorurZw2?T4rBIO=RlkVFT?C7(t#DA^FvWQ|{neKh`w z97a*o5E8MpPu_C&TF&8+w?4kzG7v}0P*`!?wi|^gzHOZE=YMPY*{txq8Tl9z=rU3w zP%5%PoEQM&siJT^lJR_4AbaXffZ#U&_=y=^gVF5lEYtb3tz%PoK-m!_NhBwbU`E-e zkW3=Mtu-(OV61Q!021Txw;mw@{^wSgO!Hf91=7oZ+cx@KJFF#wC8W=x@GO$cNW{Up zf}^WQK8s`y3GS&eBy1qQw~M#hhY1Ie;|B*eMO|Svv1uTQDEl0a+mIkQ&!5jw z=v-`H&z}dWR3Oi&2~De%Ly=94h^GJ!b`zD|LV~dXk4zkkq@MA?Fv|aOFui#JCqIMa zA`(our{$TQr_5b1RpJd?_&kyqan*9dB8BoU+9bQpZ5;NS75q)%iy?1N##sj=TF4v ziJ#w%pF1kzD>G3H3!jvKq>nAq9KD-o6Sw&9;xj!$|DK&ng!v|)?#mAfrF#ouLS97t z*qkN855*7gfAzgNT-pLhfm(C{FX##)h+nP(O!&|PtjT}fKQ%1cJVt`m`dtw(Zxy?4 zgIJnR4vbu?3%F=wqDDlupj=JVs-bsaVGf~V7lh{S*Psgc7kY2-_QU^u@Rzug_+H7~ z8YgQE3a~Di$d$w7lQu^9{X{2!cTnZVU~7C0AwDJ(S^k~De9vPtk>`U)f05h~LdPE; z_}jea*#91Q2Lj*5-#XSc`+o-i4uD&%3BHQKXLoq>@d8=quOD9OqU^ZR@>`5V61}d)ugSx)SV<$$=W2 z{AK>t6RUBI1w+60X+Bu(-{sxIEcrM|JQCYwKP5huY=`3- zC+^4j3Y2R&5($Az9>=KJQ{f_lSNr{n*lvf#!cQ^yy&xf}_@ML?pX?*D9F`-5e>gf) ze+gD^4;!_LySm!jzKC<1NWO$bv^rtEYFLbfqnO8aw2pu^zfabDp;kP0nl3&SPs+2> zY@#`(-_2S{_%lFh>92zR(pql8wQb8`3T#g4Im=y1iggT8DlGU6+>tDo1|gM4&z#PN zVM#m;J-} diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/nps_chat.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/nps_chat.cpython-37.pyc index 943fbd08d7bc87e5231b3f2498856146f1b3b01a..ee006fc3e7677c2e7bef72b815cb71f393d08fdb 100644 GIT binary patch delta 1306 zcmaJ>%}&%%6z*-AX*<&nARq$64E_veh`>f8F$8rKlo%E&n%GU5&;pHjrsTE_%Ekri zesUkc7cenCfJ@mBmWNH5NZmdg>Bu90$_mZQz z=7C}r9QnCa)qkT(mBa&4)7$hN*QQ?RvN{dEwj7f0hsBfS`6yuX+>q}N>F|CL{y`rY zi42haB5&yuh)n@(1bh`f z+&4EZn9O!al`!Ez)>wbjp!TjGguyu9>HA(o(%7_J8rq?6@A=FLx;5I^vnsOy3S=-@ zWq%g-Kgi8#feG81I!x*ETbvR(JWn_F_Q* z$_VJG+=XhZ32=?8`uYq`LK{~^dV2wU%lw1>t%U85N!YtFt9&C@yIp}0m`HWY zYdUpGdy}^bun24n%wv9>J6gDc^a`{Hns^nLu7PP!L_(YtH*J{cz#9LRUnLbjZQOf@ zD}cx#8qXl;C!5}5g5Y#I(#D8!7c9>pI+-gZm`({YiDaibh_&)vofGu}3^aF8VuD&E zHUWW8_+R5_3==zFRZQ?DkIlEQ@LaM>Xt3`Ld&#EITt0me$ umL{B#B+KwZ|I?ZVPKH!nX{2hgw%G8Uh`LJ{5#WvuStFWk5V-J(Xv#lRDdp$@ delta 1303 zcmaJ=%}>-&5bta0Zg*+(St-dSPx631y;J-&FdozsPUlj zXhPn}XyV_%A7J7|F8l|*dGw&sqsD_XZ`p1^;%+j1eKWt`%=~7$*Y}~%xM3J+34Ynf z_w(5uBh$YGFOK98PqL~{Vi9(@#`ATwd!^;n+y`HD}SAfBU!J)C8PSJa7I2PWtQ1AzZzR7EzEe^K{_M3{lN zh%ii0*LGZb8Eyj^j6}9C@}Fuuf0yj!zY>qmAMG1z>ODjuIv?jxlQ(DTH7T*0qKu|e zh1Tm3qUg+CU3HzRE|<8hEsbC$Gy~?J=3=F*La#~wMq3;cfGh%z%^XA3vI+1s?@28T zD=}OTWu)y2jt9yRQv^$0T8LrBHHg z&#M)BIAM(xg`p?G%<^yP{jt+<3KbY7=y3*JXTj7qrAk~CJQ}Q+%02$0@eUc_*Y&9v z=mA6nqEmQQ#4d-VLIuIW7DdAnpMhYRhvS%unVD^>E?dTxUiJc>JIHHr5^iMh07djZ8= zu{3cBjS4xfE5S{Mnndx0;+|@)S#BYQ!5ZyzrVk8$85--Um10u8Atv1dV-LsmU0@2(f zbIh*@3;7p<7ccWWJS_>NY7Rz%1O2GiRrOwVF?TE9SA#b}(6-EqV?Vi+dof!)4(^S zR@w1k`MHp06x(0P#3ZZY^zr3;asLYPK)f`ALrvj`}nmk^c_u*$-QdaBUm)saaYyoh9Yz#vl)LF*F-6u#j5Y)sOD{YZpzL5m5Cp ZyP9vNxfmvLYXLp+H)F)7KH+w^^B2Wue?b5M delta 614 zcmZut&r94u7)>TK+1X{&T@|-2;@TgFx}dwz^CHDdEff(HtU-|3ooRR0WD{nh;;DtU z_m*6H@44*%;2)8b7cV_~k_vubgjR}yd3-PL&HG;R?b+qiU_A(G4m|C_*1^y3!Q=Hl z5b1=DbR~)0n1sZ|*^9g#=d~lLWM3S~6#eXl47e{Vz$=OX_a)zUI@Mp*h7K!F-#Me^+BLz43-!9O5A(i8RnC$~Vf`FZ1wZop)k2r~$?2>2h&JOYN8C4?skcg#_fVHHosr?l3Xwk?lfPsZ3X-+At7rcVpardkWr<6L}P= QwMB4P4nRrWj$px3QbeT(%Hm2OLFUrT6tLNvj77>I{lc5u zcF)}ZJ|#aH=o@gD8G*!&L4*m2Fa;52 zK;jlJ)Z59aIXN(|gTfK$6GM<9VGyA(xn4|~LjferQDi=Ok(ik%h=UXk(4c=SrT_q8 C(%3S;^pOH00OzBfQ{U7tc+2Uvsi7!1%V>JLi988b5r%R5-T(E3lfX6_4PJy zWc|ye1yWzc3nXrFq@|W5XC&t2XfhS?0R^I%a*~TcdLe|+<{KQ%OpG#iY9vz-(&|KWpM?NAaiME z3fOE-#v(7QpXlIn0d|P0>lLAm)llc~FacMznQIQ--HOK@_X0YdrK+?BZ5=%;o zG#PJk#>Xe;Bo-IP$3w#cta|bp!A=>FI+!Owp@w9o(Bwj)r81H*L7*=&6c`Cllaa!w z;EC`o8EHHUHqR2#W0aA>qhj(CQR8|okOFNGp#vmtvB$@!I@F~d&DAJ=Ymx5i(CU%VDY+%pXGCVF?3UGgDY9PR zyizpvf^f=AR%RBPVih*csx9+Wb1Q!&tj1=}2%F*MQ{A0r+BT`r{s8BZx|UaBzr$J9 zO(UL9Gv4&OQO3d`+pN>P7KJhQvSgpfVUI({!plUkjZkK5iAcNhIrkYC&`zg6vYR65RMg6K|5eEBul-ItiF$YKKP9oBwf)%MN|NZV?Z?dC z?n?OnreUuoQF0gt`}XpKmHVI78)|*A`>OqAH;h;qw*bO(E|abZxZO+yo(@(68$>ps z2m@$e2tSsQpM^Z#;jt*09@ImaaO^;o)H;5Om@$pv*#%TTo>rqkVF!?P-15 z*IrXfGX30O(05)MfbT4kfp%^(eE>aPxgZIQSp%|8PO)zazA6I(Z_|u`@y*TNye#a# z{P&f6k9Qh(>i=*A0{8)|F%%6x3@F=BJW`5A#mii7SB8s z9wURyE0^Bqaj4@5BDol!D>~FHzY8nsdTvM_HFLukEt#A9huA{gnesg4f1X#U^#~7=}}CcKr_}4-fexd2WST9LUa>O{}4|%_W3aw=}L9X zjI8aUo6FBH!B~EWR)!QR7LDN`6pRZ2?y?Tm(NjflLsWBRkDS>vx zXn0Cb=+P%A^|_8wZfEgC#l}5j31VVj+cO71^J>19epSmvYz*a+6s_D?Bm^g(Fr~C%Et77$Ac=FoB z_UJ|u=|zm0can2r)r7^+YFH#5UdsQEspw|PPV*;_GoBMTSMgG&68yh@c!Br_TCh0QeLD&pA zvxk|>MYI^3W90UVPM6(&WR8MLBh{etM@~> fttW-{o=(H&DpJjXC8}B#asf*-ARie0S^9qgx0(C3XSv{M-vee6HWS}=ghKg*-42#cayEPCZTxq6j~0e{*ZdA; zi%d*@Mr$VqpVqc^2f*y>cDd{nsjoXT9();MCoE?1+uAx3ZX=G;tgS z+gm&w9;xb}DNx)`K~QN{^(2o_LgmX+g;eBr(cmWglbg3Z1Qiz))u2C&FV7) z743j@x@Xq#%sJ_eK*d-u)re zyD&-Zr*&atFmK$-X?a-6vjx)R#*v5^l;uU^MF#otZLA zzRzEXj8uaRciq=BxxD8vb_6QyFj|5+H&=8$yO?&Ej0VM>F8C_Vr9Ds`kKx87%)0;j zh_vlF*%He+E{md{K^&%mXUCn;JTa1F1X3fe);M-CQ_OKo`8Sv=!hbaXZj6v)kwzU3F8;-Q z!=7}@w)Z;2mV{gn5?9~JQK*W;Ef#Mv+=bw1KIeMrQlXsug70pR^My63su*7P$Hpvfx>y~!j^{c>&%ML%wZ(b9@d6H0_9 z+k5!W>otCR-N}`rMo42jA0uknX14q11Jrg1@y_ZuG|qpD5Ab-`*_D!ReN3dC%QiI( z2_x&!lsLL;3BTXg!#jJsAxw>**zo=8sgr{GTK9^nUw(qr;zSQGQGea@!|I7o1Mxd6 z-Q%ezNGlHYc!PEZE=%6q7s3Q6EB|4pV ZlKAv=@eLc6O%hguBq-O54vNxp=pS!FI931v delta 3115 zcmcImU2IfE6uvX}Z};wQOWFc$3Ja7{vVesEir8f_(2CTEBB04qSZ;S_m!-Sib?(I$ zbDI(u6kkZnyr2&Td>}rmebGb{eKj#Y8O?q1QH>$-!6%}`b7tALE|_S6P3|{m&di)S z-#N2q=JU~8qlp8FgrmVPIsS6)>Rh5{_9Aqc#z~oOqDqQ@s%})%$?6qS8Rj@C*)~+>>&CeRwC>`z=4L%mK6pJbi?Px0V1|WG~&bz8rdw2 z`M>NT60>ooju4Xvql24qUTCZS3s7Pa5pvSa;s{Sii zv>Qf1Z#K2M(thTff$m#%D$WIZk<946{f<^3%uF-uQY6rawK^$kbvk4Ek-9cxO%DX- zuy!grWBbu7+Bqk%dtini4fHyBRtv1aPHV4Um2HVVTaZ}_&a9iybI+@a7OG(+U3M$u z%*|Yu)x^^fAi_Rp>XeBSxWlUm zuK}ctMTR1alSyuY0!~uL9qE2JKi5IXSOS&UhvSNWtk8DHy_)Nv6*!W*=M=`9ml$-_ zkyhba%#@QZmpngTt<*AjfISLeMc&*dkG6G6(v_Q4rW*FeI1DbeE%LZeYxV^L?Kki> zjM*tuDC&`1!xL`_?^S4^m3JehjZ}0m@mgijoT>`ueX7ZIYm##1%G*a;qwp_YK9Ih0 z^``|_o4oU@&McSr?FD1{J>G(Y>iBl!v0EPz%bq2PjYAnJa+Xs^x>L?2 z(=xl}5%5f3>m88qtzC9TM;{|sb(VL|&m}v^!zc745{<22c9kkUMxyEjA5}@sY&Z$` z=KC8q9>zl0xC?cVLVdbY4x`5})cA-HRdIhMbaB3_)(a(DltEXMR^L~<8zoYVpeY02 zZTD)v7z53N>++1e)3Xcsh{RiIdcrK9FiI?1?gwx=cejsC01tS7bF&C>+5ayVvl2FVJOVsLq6#m7lPP~sz!7$O`@gHF0Tvr=fco0(R{gB;-K z#mvErcY+5KW1{{C{tvzSUyzviecdb|F|?UqXTHbpdrj5o=drDOm5QsulR6)0RMxcL zc<4O^*v!L{4FINj8q=8(>XG3YI_}NTj4aQ}_f}YlY|qa3h0s|+eIX<(8n@Sr4>e{p zXGdcWcel-L-7A5Fu;Nai#)`bWZF&{vE^D>YSN6x8>`&W4GiIEILBfR}%9@^)Wg>$1 zOPXwUIG*ih9CiFey|8{wI*^?p2t@>}N0bnt9&Jw(Rru`RucuPf)oS6fepBrhe$1Wd zRntWU8PK&0eXz?2rGj?^rFYaPdvT^8wF3tx{~8M$6f>}-1waf!g#B&Ed8LmWG9&4} zI~xWv!FuKv)gD=$LVf16f{+JH=FxlUv`ol*g3yk zQXk!)dQ~lz1}88^rnh=#c{2{kpMz2w0Jiq_y1H8&R=cJ8v@k&;FyzV6OyR$Nrp)sD z@w_!Q+y(AOnH2_Yer!bIGO$tgz5MRhH6+I&hzX2fcI%l$!4$%=aSeg2rPeC*Tc=Qa z8etG1b0}qT6DEHNMOP5U5ipWKuOm!{Ri6RH0xXG_keC+CpAgp}jzG`wDh8^5a7#*@}nn;?pMD4`grU?F}q2MNNm*_o0wo9sHfQ9P&z zJ&Kn(`5&kUK@`-xXa9gb+KYGbQV@J^LP#yrf%(n8_j~XC-n?PoRn#KCUMl4YtV-v$ z_oG0_HX4&r)>-D9Jj%?MGkD6I~&Ah zA71^T>Z|qbD%O)Z1RKFYUVX;js(+~@kf z6kip4OqP#%&|GBY+OS<|tg);;yT&|1!Dl1hn+`-@TOKiW{YiZ7S+ zWNgS1{vfl$-cam$Az6WHZDp38UOod~`VHh+RG{PQxad8HpgWqjbp(9A`cztdb^zHq zgfc+pcpi@ec=$zhokBQ`fJzdxc2$~|I|xJ_c7o$#)G`^Yyu%!cOw)Dbc{tni!!Yev zgGk1sq>7UhQoWw7H0`LfC4yA<-!3*Ze^=8w|9_GhTNy-r6pH#1>f+A;ss^KT<-dvv BuU-HE diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/propbank.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/propbank.cpython-37.pyc index dc95c18bfbb9fca81598c7a559adccc26912a5cb..cace71d4518c171b4cdb0ea5e5b46f810cf2d6ed 100644 GIT binary patch delta 4491 zcmbVPZ%kX)759C{_Or1a^B)3P$cPDs@E;ligoQR_kfgLtNH#g`o@~Dh7@qx{`<_!s z+R`>@r%7v5b~{D=5Uo?CO_Mft8vBr`ld4J8(tMfrVbj#BeOta{ANEJ8HZ5C~opWt$ z<3i|$@$tRqo_qfM&b{Z}`|%6+UYvM55b!zRFY8&?emmuG{0Tdoe{N_Thp+VyAi6^* zh9gCygkW1Sl&G>k(YcoLMSYUvF)CC3s9&~KBQS?jsu7%XeAS_Qb>DS|?xTCIIin$3 zwNwqfO5G1UKdrguyhft6zzOI<-~^Eqt;Gi;`&l_vfm2-|r!+i+L5g{U!O(zqkPBkNGQx-F<^}mM$e#>Ub z^nzxuW~jAMLq2r7m<#0jA3UE_HbdjVRBzT|z5I?k>DmMRgBt;F138LA5k*v(8JZRq zjK*~6DLs9H%tr{TgHeVetOdve-?)*q?1Y)hOq}J<`KAW_z!mOXFh4r zq;AQ1n>uj1aI)C*r#61?J4c$rsI#D0sN+T~71v`E7x|?Bz1FQ}*&z`A22SVz0vh-7 ziol`q5OVe+Cuf)ybVi)H_FSLxsEoy5?orU?gLi=l_(Sl_*4r)>xzYLuthtVb* zMuNuJV@PB;Xo?*H0@^c>U!9HcvFfIy5)z5lC`vQ{iMqf++hSwL*lJ3sd0(uT_#4%q z9s3+=)i|dQi5%Jismqkbt?vL*PlZuGq_O?;wq&YrA-{u<)qXfDW2wP@Es|qEBA#4I zyO0q9B3v>}f+R%+rej;ZogjFNPuER*v3|_X&az7WPTlwoFFo}NJkPa>H`@)Qzj<7sR? zgXA0%oKs>5*DOQ-e$Id0aJ_#3y95X< zVXHfAMZ9dQigJ)7K8TcXTPCxq9&t%*1+jz+s$j~3CC(OLy9yWl!lsd)7Txt|8Wc!P za{v}rJt*SuHoyLaL|j^K16I^8x2diH2(T3}V>^W6)nNZ>HlsASk!+b!TK1!BBvWRU z9RT)0{+lmlQwdb2FqC6DlCAzp$uRn+z@Xc~=1>OncK<&QA3l6Qx&b0gxNsUYEjPPx zCeya4ewvw?cr3jyAcUS47p2EsDis-jtLSK zfTmSkqrwS&_AF|84oO)QxLsQ?iV+8&ZhdxS8f9jXoI-+&S*RKCm@<1cSEJA4z}Jv` z9f>US8vkQ!<8f40VxW*q8Pv~U(87pRzwfZ$g&)pY{)ichi8o;K%&Tz;~zp&?<3PIwsvMs>t8IU8cQASy}YLt!eam^jY6)F^lyRUYe|8Veor+2hGh%?L0vmNvqwx)f+*N48@gM~tP@aW8# z)}|MQI+1gwP$fRvI63?gp>yc4Hx!Re;nJ0k=vLLSqf?Q^;vY!^j6eKRlpdc|e&;L2HJRmdo z2F`gC$qgiLAz4RKv@1=b<8J=;(bUjRGaj*i1AKUNez$WY#6K9l_l!&ozBq+<4&I=- z*N@B*Mkl=5g~>BBCno2ngew)Z6N@sPU|R09GM^FRI{vq@zV>-&v294CiIrrDpB%qA zD>GEci%=L99(=08Ym~i>YKqw_nk%LTz5EMq*LG;APTqAayokKWUg6WU`K+B~uqhaB zW_A;Z@Xlw`3EMOci#eem^{%9h9(dMKntSv)t>lV)f=9h+!(Q%@BW{!o%067*C`-fh zoq71JW5a7f)a*sg@Ssj*Vzy|`tlEoaS{u+dU;Md&WZa;_ZN``2fR?u*hS!gh`XEaK zI$yFF3X#HNJ7wxwgHB)>wt9g0iAp;C#IL{ys7h34Xw9)NNYph)Q>YexYv5b0kRb8R H2g!c{1UYr8 delta 4298 zcmbVPYiv}<72dh~Ucc7%8e?8IfyJ!BFKjn97%Lbkb|EH!q0y4`YVA9=7w+!e%)K_4 zLY)Srv?{5S451QLq*jqCMXl6OD@uM;YAaQ3`ClpORi##KDnu2jLaJH`C8g(^wY}@b zj?)_adCtt4^Z3rZ?#I`DcXeQ}q9WpGMZCp1(p^`teia8Tz$emGU_?zF@i?uhQ|o;vg_VtImuxoLUM#xkX%V}nNd!1xst0$ zt}rS|uH;oT9woWTh>{#tay8eer`o8Yo*MLAXUQ7ssWoEM6H_*7jR-sANz^@{06p>m zi!bEZ&jWtfYdb=AhW=2$iTWR%YM-()y!}FIcFM}7#KrbDSrDW$+{)wzPRbugj&^}TH!yBx$5T#1 zQ|fMTB5m@tVXJvdHZa}wiqxfJ3p>iru!fzmxop@C^_!{8n2{QIUe1@l)3nm9*hR|c zF`=1&EH=o~70vxs=xIPt-mzG=negV@d!V6gDfi)Y)^<|aac-Ylc)vnp!)mf$?1fq@ z04Cikkf59CEZ4_OYy4u~M==+LxcyMG^`BL}!p=r9IdU##PV=KeSVDNw;V0T{o;SY* z9l=TN2+MN7Q`ShCW-^d+9FZPN1XQfah_Fm7fHTnpP?^A$_!5B|Nad)shIpEUWc7*EmD$6pTs)g{(pGlSYo4%P%-R4&KLzdB zq7NM_-MgX1`0THH894HvXhPRsB2Y zZpz6eA$9=T0385OF8Ton0hn4uiy{b1OauvPrLkcL3(9Y8xXA`(UHuKVU*4&2eRd;_ zB_jDqzXIjsfD?dc0m{b+S|`Zu(~X$_&?-5z=+Rb3)hu zB|q5o;ovCQz7lAP+pTds5f&>=l#-Ep+bAF)>A02Ot;CeMm)`RB&1 ztVjN*v0)C2UJMc>s>SnY*8`9l6(P;(gj=o|wK0kA6FP)^W2BAnU$adHL&X94E`F92R7aDAZBEvM7iNsni^Gi4e1 zv#a1!wbXh?p6}7Gjr>Aok-g=9`d94_1w)mgpvE+B$g43iC_Cc~3*X=VZ>`6IkJ=H- z8P4QPo}tQeLmkPhmsZ98Rr%YUEzguBHB5%oXb~BOz~E*GjQ9{(g-;U}#V{XFlelyz zpDA-XLX2$0wJoppqEbkto$Ug{|j+0 zDmO62)00yUy|H58N8MkS(Ne`KR1B59 zDeKyQAHXUXr{zD|yALW3G%OqOhl+S*_G;iaNF5&z9zUHbEWmu4k7-(^7G?6Ir@Qo$<7ybT)R~n<3rnHs z^tuM|8-O_gc6#1>!Le$oL{Zf1GTpnCHrK1Yw>C^d`v`%{Ce?GnWiz5tcJ|dWLmutB zSf&#D4zzB{yM48#Uj>MY$-nevy7M|vd=2njz&yd{tkRIYzJFr93!_Yi4}5$|Ricc{ zx#7{-+~TK>Dhe`i!}_^HCx(w48a?XzGbv|$O4W~CtNm0ULU928qH>_WW0yfKu?i#_ zL{SOkt^V(gDEhIr)T_eP1b2h@B&DxT@g}+pr75^8@Qx_|j;imXMo5#h2jjiyOT@%m z^v4ZPOlQX(%Q9_&gC`ljoH5(zlZ|urjg<^%P3K~pl6~^w!5H()z(CJT1?j5izZ~)$Zg#_)4Ms0g zDu~B2c=qO9au@N^lUEOW_7Cvv$$5#IB3;<;&3x~@?|n17AJ@LF`P;tlTILz%Pxii@ z_=|U&W+SZ75=795gv3NUqJ2BG334oA?P<)ltsP6`g!9G{j&x-Hn1x)>M^=#k^z=RT zJ(WT)-rq}K4x}3Yq+eK}WVCfen*R53{aG$^8U+Qca`+GEJglmQylL zRZF)@jWP3vl>3GaC{*-Ov9>;MHn4zj0pTL%{&fE{1(yt~jGJrvM{#+lhJ}j=ml4n( zEFoMmaMl+rBa1SBC8vg7Bp}qYajRq#GN&JuSBR?*%k_oHz}eKQX&mVf<+pcml{s>h z%2)(!dbSPJt|Qz)_^;g0ogfgc5Qe>8+u1r_ZW`K@(hv}+a*-N=kl;`X>Va~sAc$fCm786Ww2mEi9X=1J z0;%U}#gQvlgmQ)h7k&bge*nLL6DMX8AQh@5Kf61#@67A**QF0j#r0y*HQ=f5Y;M0l zEY4rO1P5*e1}EH%NI+nvk=epCjcCi*Fu29-w+6R`CG0m=;BfPf;X9vp-+D>}? zwfp);{^6~Rs#Io9JBiz2nz3+4^~1EId@4uEJcu$EQsQwUyF!gh`DC2LikYoMoT{68 ztK1w`3j>oCAnKda(}$(nas>`@22n+vz}z48KT~oRqEEoh8U3@gxG{$pYlwNoNyGx; z6yT^Yxrk9L^S9*K(6dBx^=P7%g5cb<2XT7!)J4xI8Hk&j}?ff}SoAfUYr9W!ZRxOZFgoU&?-eq+^@Xh!q8CbW<7!y=JQgBrUXcLr%9@@_Zl}`{I*7wB97~HOszcyS7uj z1RoLCV|MWs(_1Z3w{j!&E7=u3s0;K zOjryFhTn1{7VOI1=3a6DR{*}^(xR7?d}(~5k;Q#|CU3i5)+v-`U*(gr)Br|Kl4_jo z-hCB)x!3qctUO$=!6S4v(`2;7Omm$N#GgGEMx1dZ0{brHI+1LJ5F8eRVb5HK$Bh8d z6h+d*T1bm(A*S*#<0na!{}=z~SUaksH(xC}9u!BZg09QiZq(QcAbcZ`clc=A+o_J4 zeN_+58s{Q_-r~QvCC+xCO*fJrB)v#PYLCn%3My~H<6^-y6>^ENC}h#ei|y}|KP#af z!%AO{IR(Qsb7kCR(=_Zt$zh&RYs1P=Cn2Zz7%xzx>{26_cQT%lbC|K1$u6&Cm}TJp zWjuS)&fDIqv0{5mbz{RW8qchHOHOfe{7|KOyd`O z#@ZVc?CZ+AJx@zyFaK`$U}6Va2Xl6w+LkZVVs=~AM3Hxb%Fn#J@BA$8Ad`hf)+uH) z9(4Mm3s{%j&11m^Q3Zq6{0JAgvENrd?t5>ulOmtFGJg}H@`XJFzCjqN@@gR95I>ZQvIXXbrn{>-HX zHh>bXeuVI>iy@cPtD|TZ1t<7xV~IUt7Kx~h=Jq5gPVqZqKTV59MzY!CRit$(tMA*3 zkclgSVlc3Fn3wc+emxcAQfk)`bjDhd;C9u&d& z#G$bUV>Z2WUhMWs7<*-uOy#lu>8{$7qbgYLlmbh9bkb{3A0d@e1$XH;)PGF)o zm@GpO4^%-w6vgd>g%99y5rt+bk|M{V`VKv+1@%@vuE*gn>eqq{^^)!44)A;Qt?A8h zQ>v{byY_V^b9?{6^;0Os4r1q!U={sfb!_|HB{84PNwZwEvyMg0+C0xwKjGmUylpIPxw;($sjxfkG%t63TeS4rkE#b0{>E8OyRqf zmr{a7K9)`S6O#6E$p@0cB?Y7gE;WFpE()e<`ri~xzvX{JN^6UQf~lDSSP1Zi04M~_ z5WI)1ASi@PaaGV8zr6RGN|v4P{6CX^vieuyTEFNkz`J7$J zmI~#$9kw1$_F-h5cyW$;cYWx~euaG#-b&Q!ae}Uz$xy3IGnrj>GjijqA9*$*aPDm= zokBPa!6hsPl>n~8V+R3LNfxz$>Q_UmpGxfa$UF(KiRQm9HKQtebA8291&Skkq-*Q6 z8#N9CaNkJeEq1Hron(8}-qS;~`nhnT@3F4d_+kgzbRu*iJcYnhJ2964sN99e#uiXz z$R$VvkVPl^wDm($c4pfWGGm>8I}f^oVki1?n=;!aYGx{>O{^P?Q6AoB(N_A z=O#Hpq(z5O)w{4KaBJ|cvHgd0dMGGfuZ9FWHw-;*m>Ik#nYDZ=y9qtX%OjVK$_A&w z2_4C$#-)+%#AipIj0MFTBs%-?f5>6d`-zSOPx>5Yf=T~Rv?xT2u`9zpV^0>*^nj+Q?1t-0Ev24cfV=C z1fVOAr8|YIa|o9ac*@6uO~OVB>0>DbB9lgfSMGZ H40rqsos*Qz diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/rte.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/rte.cpython-37.pyc index e7277c5c0998cf753d0ec30dc88d6e69f1b1f745..6974a0bba6441dd167129b9a8087dbd9978c41e8 100644 GIT binary patch delta 1022 zcmZuv&ui2`6wV~sY&O}=?zYvcRl5rQNQs?cQ&Z(cR8+|R@&}1?r-OI#u?m0`NGZKWqvD0x{40S{RFbgxotXRkd4E}mR49LgOZ`wX6``r`qAZrS^o2l2pQ@|j zQ^A{zaw#}}JNRB*B{Qf7q?>r8wcSp@z6N&Wtj7tD*{Ry-n_h?AkNIl-(TuQ=1dY&X zHRXTdsCdZBj>L6kwRi!aFngbWtA{(6fhn#aTtyg1z!by;!pR#R*FrR4qX#o{09?m5 z-J1424{MsPJWAJlvL`sD!h{%QW%0I7F|s>}d?TfXJ90hi$zTU!lA zs*L?1;b_=+xPvQsfFPL9gdxn9@vOI-hr5NlCG36X+xMBs3BJz+m$`SA&r!|`2W3a* zQFcV(Df8UW#=!1E44v&C-?`oWZ7&*zpIzaT!+9h+SgJSUwybZ4yUn;0!t44}_MG1> zPf{k75_1O)D0IK^f0kioU)`C#A}W=&5ea3dawk<*bGHKpR6>bhfM5{8D=0gWt#xI0 z!U&*Ddh!LVoWWUwlMX-$^}xZCa)Kb*iqasMM=2RcU^Q-JuZo{<^n-Z@)KGtlE65H6 zK{J~uy__E+li`zgnv8gW-eekALWwzfJI9RU>5hZj{Z$gCQO?H7Gq;EtNy*1El{s}2 z&h<1prGC%@ceWKJX(MiT=H6!?%d^Yc%z_G9E9aryT?B~1FK);*N+pD844$cLQI?1s zx*+7yQFS+p0mx7UG6gB$bcZS{#x${^(*%XIcG@jGU&6D`*Q1t<#EIVMO2_c_Q}*S| z!nCrI6clg7Ta7S1B=++?zvPHqcUF_<=n0b#S*_}=oW~$ALU4g#lz>uzYXrw{*zW~g z#ETeL!Vw&9n@&aR9(e^A&3;xN`}BPjkJmP3Bh|h2%P*Ff^sh~xVA&G1!)ei#Guw{E Uc1zA(CJOpew&BpUoWaWN|GsLg761SM diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/semcor.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/semcor.cpython-37.pyc index 7cb913ffd2eba51cb5dd99c0b522eb042a04b34f..c316eb4b915747c364e75cb11c586ededcb6108b 100644 GIT binary patch delta 1982 zcma)6O>7%g5Z<>x_GZ^VYbP#oViKnZ9^AtY3(5;M;&POY@U)_$7zX6AeI zX6DTY%Req>FJ!Zt0KZmx&;D*NTi)G)g)3NsOI*<>foO?=WJyGDB};}q*;iIkr}(KL zZKVU%Qb9-Dl;0OA(q;53U z{#-sr#`|H%6zOA)Q_g0qu@#NC|5Sbvmr7v0LpO7M($gdEM?c`6Cc`|v@dA`*JzZR^!wMB=0 zo2M!&;MzKyH71`!(GN#mw!EwZl$Dy2lf=z9T_YL$9kqBLtX@)Gv@!11gKrio%>*TJm|rO6?F z-Z7-OW1UD0)gA`YZO^^@cKj=Hq9cP|BtkL=RE9LQoPlm z=TN)Iz8E|`&6!?AjkvvSVUI#s7(y6Es31Iz zfDfWmolF$h;<^)f@n-09k^{s%ex7Jd&j1@68YL=wd8o0%8}4~znE^1RMDqNYo&`Bk zoXxuL9$?`FXm%IA2p_vZR3Z}%{uwbx=nT6*be7Dqx#6ErV({jJr@97mqQGbgqFy>Q z`-{z1%=4E)qE`@JL%52-Gtezi_ZoAc-G?ud0aRHQ=^UJ0X5UoKkdgL&r7m94ksyV@ zLs2`udT1Y@5wBYs-bK{(oWr8L&+d%gBG*~DTAk!A^(MF_X?PSkCwn{+ZL`*Jeb0t$ z_Op$uvxtfBl;tgyZXjUCQ;hTBOhn2aCR&5NGE>LC)L%#K8wf@A!`SeRlOUREXJ()k z<;<3zLVw_93JsSBJDrqAmoX+w#3d3@5^8B@;u0sE7gc#@H1uX}Y2R E3o%G`kN^Mx delta 1969 zcma)6OKclO7~Z$n&upAHFFVv}+Q3q1~p4sYvUc&Zgy=myHTix z6anGDjcQJud*@cpd*sCqt-uCm9 z*bce%<>YWDM%p(6@_vvf&EGJ%t|Bf-qRh_9rSTN(@0W}9mPd*=?3?vg+ooH^0{c`x zr$#`QW#7xc<}oLlKtTN-48bZ?cUsi(^W7hmS@Ef1I58#qNM)^7-)?UCPj;uYyVB(m z(7r&&kS~LVBIWjS-#(NB`}&~AB%EC*PPNB%8Z3PYKuC7->|SPstwn!2ZNR>A;ehcm z5ve|D2L3s=9{Y0<+Y?L)uZcVH6h8JdF6n ztAd+Q*={z1e&_DTmfeOQ)LLG9rE$QgiJfQn`Et8DoYEY^G!kjGRG*3Fm{q+bvsI- zSoEpEejd$btoV7Cc+k2EB_vc~WPM_>Xy!yx1JzKi;#5KvmNJDY?r#QZ24iF&W#&g8 zoM)bDv-@4flQKAz)I{JqJPm`rHgWZo3WsKzE*_Hhb56pUB?ntg`XWk~*qwmZWNC#bRz}Ymb9bc+GvR73!<4#fe2m$XOZyi1)f5YeFebi+|(#Ld7 zSNQ{DimPUlmRWkXv?U_}_Rxwp-C(N_zzwH(;V7?J0ax!J@KpHo)E|;VccH%QwVZ92 Yte`tT4WLNiQ#9alF?NM=EN;aA1Eywj`~Uy| diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/senseval.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/senseval.cpython-37.pyc index 505a1a68626b39275ce6edc4749a542343b9334c..6661ee17aeb1790e71574cf7a2c23ba45a9e0de0 100644 GIT binary patch delta 1492 zcmZux&2QX96!+L(@2q#%*?bq$B2Ze06{SnR0EtjVv>`Nv@RcHDr81lKggTkESI=IO za0w{k!mXN1>9GeSPN<61D+g|H?FCDS0~apv2UJ4hz1fr~p|<8XZ{B-wE$MXBdCb;>lA&VGc)j4ku#G}f-5mm&%p0&q<25Q`>ikhkz z+_QoqT-TyHuIpkL*CUY^4eT4iXk>k4_>Bj5J#*PN3$F`tl85WPq@BlUQZ#x=+)g7A z$~YI$lG(D)it4-+9g*ZKAOtRJS&o}N3-g_>$Z+kn(@wXQtB2-~FH|sas1ei&stDg3 zFo4tJ`DQPJrn+w}vMKeKb^r2He0>HU?V}BSXrNT^Fa*(sFg&k*vTt4(B_b@3>XU)HRJony%YTgW_vvt+j^ zf}oU%BoqA-9ZyK%gji_USyaNET^OcAjS!3yV7nVIN$@hk(M~Pt5XaE)Dvr!S;LPQ& z3uEd}_c|L_Gu3T2sJ^SdHK`+hhBz(|91&>_D(}sMf!bG$O{+WgPivP^fuRT~;fngF zzBoFg6&q=s6yC9u`1H_1HprFV**Ro{A7o^Sr2yKt#S;hId_o2v(Zd$&x7 zS_SIrgTbadjbdPKt;0*|o56UOz5vTnmQu}r+Bb>HUh)!lmSyc71oEyOZBe zlkj5r9?%Y9_vn*HPq_$9SW0rg7_EajZPV delta 1315 zcmZuw&2Q9J5cl)i-nZTlHrWP33l(8W6&E%O1US$nMGy*#Af#>jAxn|j?0adPc-O13 zqlETAtL9qu@cx6|A|WBP9FcmWhn_1Wd_v;ZTW`PtG2>0LN{XfTd-G=A%$xbmc&q+T z?f%tb(bn+$cxm2ydtB3A(qZ>AP`H9C3J`?mYJv$pU@pTw6X>msn_;9g0%M`Kn>)L; zpxw}fDXc9`STg%-#^rd<3XW$kb9lD#oD)ZFRM;|)ih{_C!j|S1h5ohX6d(On(#vxW zOY+wP*^=#e0i;wXGk?}hOa?eXjv$M$twA2QH(%CQ!j`PBHtwy2od&Gc$CYl(vZ}gm z{BhwuehvewK6UOv0gnmyJdhpmJV#Z{Z_X9zN@@Q+dJgC^5J@L-MI>;>Eilx!`8Ug} z*s6V$-rKwU#-0rPC~k(W&h#bq#QJuOo=>vNejt4j9n2%=x1+exZpvt)dpTQT6BNkN z6C>=%c4B~R2uFtxC^3 zSRS?(?p;?;?a3R5D5)~R0Kta@CkSc;ea}5UVGs@HaYYsaXEwKO7*sd&GpwS1%Wtxx zvI<{R(;g2|jB$c~pp+(R?|e5b{KD9n`mcDma1Iq%iVOlcuNF(QL!YLK)zEJzS#R0j z{QmYW(AAStmD%b=X_$?u*QJXm&ip@iVpEU2r66pseb=2kw5VHEx|LIp2I}@0imtw~ z45!q;1O9z_1&CERi;(a*T$648M+u{(yG%M0+vs=T3du&?@PowcfR6;y>^Z#GS{lZb z{q)!9u};5BVm4R&K)7aeBLs1fg&7L*vwAiN2iuU2|4j-yL(pfh--Zer?%>*K1J7~? zD&5~J4;hQ7kpsax6y)Wm4Pqsp_9>{4L>4*l& R&^asU9Fb?rr9YsAmNbc~B9#gR(yG$PSx>=@*K57; zwn-`xK{;_iLUZIVK&k|7E*!XX>7_zkEWw4I5r|t)eQz9+uGo4tZ)d)F?|a|8dG^C2 z{~XzTFAT>FdGg@h2_*dmNlx#34UGlNcQ%VTqZs4#R&dGkaFYVuab5-E%q) zGlaElBu?UfWhAa}?pgO($JJOP@euPwLt|cIerQB~{Hyg~^$+XSSBmxWvwqG`@^Q+`S`PD9 zffRX}a=D&MQ7)+m&0pB8S`TK9)<=f@|Br!}5bF|t0L{iZtkTQs$1qfX1^YHn5Kc-v zjha(dZd?>1Pv9(~m7T{u0n<2*8ntGZikg2~>pVk|uGSCIY0n}A^}8q0`K>aOK;Oa5cbB8I1i^y-+%;2kW!bHKv7Xug-}`|5JJ2kQb4kt*wD029PPTLMU{Gh z1A^loLBI)#BUJqZT)1%Kf_y=!H*VZePt2@=5Ru}@liAsCXTJSrlJ6t$M&@S?BPqeN zw{qFq%u3Q0E`C1>I5Y6F0vJasOAc{j9w`&}mc3YAF3UtZD>;gzK9U^Ol^@Dw1=gAq zhjrXlVXZl_Ym%wIUHd9;tSL_kk-4tj99AKtNy3C>RXv+A%X%Yi-^kDVbvM6Yug?2T zn=a;$@D;73>HrLn(jSl{znCy{y?9<_O|KTv6#8m+kq(G;<*h0}d@-zPC$8Pi2#^yQ-VQMt03%(FARBM4-XY?2bCJ>Pj z#VOVcT4b94(1D36r%w&uS~~!f^(@;$fyh6h7EBehX(sw20!(=`@;?U_#^9 zpjBPSVzopEMY|D%^F(DZco&9037uq8x}gswlT_xX* zFS{@7pT#pII@8{6F&4}GOLmAK$maLY?F_`xSrx!1+5GUC9f4uOuPj)s>Qi?O;2)=i zmQ#SINP~(Jv53n!2HFXQam#|vTh=Tz=7wjG&=I>_iY0r{AFy$8J_<~oID290P$ zOP>;PrF+G-+yXSra>ks|3 diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/sinica_treebank.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/sinica_treebank.cpython-37.pyc index 1d59b53c1f3a3cf719725002d625c6173886914d..b9b9f4d1b926a438b58f06139fbae485cbaeaae5 100644 GIT binary patch delta 32 mcmcaFep#H`iIpkZzl8eG|LV*@HT%;fhbey;p2?e6z3gpHzw+ozD`%b$)0Syg^ zKH5Lv4^XN!6qJaPYJVUF6)jSv%s2|6Kv?qA%ADi}IO;)-P=qnpT-!$TMY9Kpj4_^kjCB;M1& delta 535 zcmaKoJxjwt7{_zDOPWTbw!YKiCK!;`7f?hkw1YZ{Ufk51#joHj2;S48br28ke|OLG|2@y$eP*AsR>iVRMUI)nJ@@*?nl8-A163S_ zBB}VaZs#>O}}{Jo!7(2W2eOe zzr{ORO6KzX4Zk9bKaj0lTAmpIY#1nWBWT1)Eo?@md2VUWroeH+gkB3yiWCxQ%)Ns3P>F9R+?o+qu`K(K1xGRYAhYZwbPDk4icNK6Lbr{oH_!%ETIf z*UKwoa>vF&CO{^o{NENc?`)ADr|t>{cyaeeLYEKly`~^RqviLW+V3_Zjq7~XDC-1z zX#B!h*o8l0Qy_T|@ciQXFqVm&oH4XBkeN0IO_M=%*VaVJb!k|0U7krVt>`k?&HPnJ U7#q!ywgbPEg=*j>*g%H%18VnMgoJwJgjDq};Pk`|ao`N%&7Zh9|fWgK}$ zGLwHmLdhiDXf^9K-*IiP9=f#Z1^2!h)Z#V2v0V*+yq}hHNS>wu9!c~F{5-o-Yz4H) zp2+X133wi2>DaF$cw)+2o+xp$BkUqD&?8aaaGUP#HmwY^tm2lsAx)X4!GJ5FyBqd+ zaox75yG?Doz#c00H0Gj12$*XhU<#@MKyjrY4xsCbtgBReJg1su9vykyUV8t-#TBmE zVRa*LLo?RSOnpD(@krSl6g`XVL41vzWdFn~uOru9n+)it+3mzI`%=4a2#YgaGlS*z zJeg&mB*)mMZZ4gK6S(tRXfT3u#DWTsvmLi~RU$3kj;Da%hU?fnK6Qfg>{)W0DT#BJ zMY!1WaJ=6RTeKP2>z-fRg6dTrmzqS}w1+oWg=HC-|BxF)Dp4zZ{#Iv+i?)%I1SqC z?9a^8kLEyHh2p-AcGzd=`302UK`2KGOiM2!R8aBXR^ZV*K$Ypn+T;QnwMR$f?@rtS zSHa{Q*xqE{8oTU4R?W2|cS%Q6VX&XG1MKsogY4t%@+f*79=&Z1=d=W{#Zvue+27e~ zr}4U-)V;CM+5~8_t)sMf2_OGghEcHm>~X4pflNiqg~mX`6?e!cLsLGyL$p|BPu)~e0mp+@VLWH9b3NGPNZO*VSxRPbP*LVZq3*1;j!79qP6HM zHj&#{9at%kj?zwac&n_ci)~M;-TJ}-FrvN9T z>~wBy4khylPuljUEqKjc*xE9v=v4qqZ+hX@1Z*$SJc@<26x30bdAXTQKD7Nsxal|T zDZBGO6#ezo*MW$Rad5NKQwRlwE|Ew=GF&e=e5d8P6|@adl_I)qkd&+wJ(eN*YySd? Cw_4%= delta 1827 zcmah~&2JM&6yMqPde?RwoTQM@rX~~uS@K~+O!=te@D*Gn1X1H&EHx`9J4!-qZ+N>_ zf>6bwz4TTz96~)Q{M>10wRH25Ef# zTgk2Z4W7GIyR+(V)L=a~C0^@SObgG3#EkLh7->1W)Ti>2Z7Q1xddWIbT;WZ=wE?BW z;$n>FJMJAFQm7zfA>RtStfcL_z&F5kKNhcI?li@Mg9H?72T%ww5%%E|#1U*OrrRc% zPghNc&61&vyH~GWpIeZY8`f3=9y-Q$Wbpk^#v@_3Q?i^8&*O{ikjPo3gT%F4rw3(u z@pIy+_{n;ZQ5r`(YQ5ryon-~_eR5npw4JMm@C5HXN(#ql9kBpqaiVRuos~#Rx05}{ zZ-smArVm~)El#8+MImwevT|pzKWo$H8Ihrd_{ z;kR(4>|{kPlVzWaXFZFBcV@V-H&IbbA7Ev%m3}>jpPVMyl4Px2X;WRCEY{b8aM^Ec zlorH;UO(bZyi{)62C5xTqV%RXvhTN>pW>>5$vs1QRA;RDSz4bXI3EcpEPO^#B*Fhn zK}RzPi{fl%@nngVwp&NV?{wTQQ^jN&b*tj{%+@GzYM*ylM{S`rC-9t$ANohdcYS3s z+h>mM8DfXU6k4o`;=qJB)<6GV|Hn~#9sNUY7FR&T_P)f92xF+!d|Dj1z)nRI26U9j zn7c4Hk9Ud66|_?ywJkZPT-_0mKiHh?3=52<(%erDSogG17I(!L`^z)ifeiBV3Bpc8 zxJ-h#*XL{TYX6z9RDH=*jU^|cKEP_C6^0yY&1L?k86;~!XBxr~nb1H{8Q2v@RfxJJ z6t@o4?tMr~9}x@_941f$Qdy7Fc9I}ZaEjnG0kwciJW#QL>IYO7pgc<*xU{Ih*XkUu zkoY12MU6_*$z=)@pDdmn7hevJ57Wv?!ZmzV?Pfh(pTdcyY6XV)Ww0;@G?!I6V1!m= e(B)M5ly7%Q6!v)iv+=)Ohs62+t-Xm8JEWwfX-(CtKM@cJE+%KC>{hlB`G zm7t0W1f+wIs$NjJAXVbB7bFmB5nSNPkFfOpuYy}gSZo))$$PSgLPMydEMnQL$1X&=cbLp;u_CysLt)vJncIaE;#YzlG^=54AE<~}W?1vg!C7&Lx0 z0Gfan*^~r13W}f_0tFffat!8SH3IVppC3}4&pV{(*4Im6Vg-yEEtZ$_wb!beVYZV$ z-2QH0@XzVYszEa>?tQ`?fccnt!5b!|o4qXdcE>m}qfN3-h-D$7+q~8O#(^0A+F-cQX*0at zk$O;uzj1~OoezZ}FUB7^!{0lvk^n0wk_o<}4S|PEy&$^SXNghbHGfFla8Bc^!?oMh zVYw8#Y*?O^H5~0)Ya9()|IKdpd@+OX%uYe!f6E7tkhWZ*MXlA#67y6uD*CIMz*ukP z2WQTq=Cm~Io)jC-_>+AIo-ycYV0$bcd*>xH2ZE~&WmGCeLXo`^VpSLk$?A}5aSVyvEdl7mH9k%-( zS+gIx?T*<1!XpLEId&(d4D$nc`u|`Ymfz`7SV)>@q@M`! zv7Ygz!@KQe0===+BsahAG{8O@PfqZLdI;C}heO?}l@(^N!tRd0-N!xV(W8a!%=z=C4_n-<6%o;4f=zbBk2el_g8Eypmx9Euh=w7kqtOkPRw zj9OOl7G7b$Os?;`isgNkl``iG9FJda)HPu=GF^m)*YQ9__#+A~r|1?1idrd7XJD|L z6~hvY;yV8zlf|-LTbYEWYFDO%hGkz7%Q6!v)S&H88S9};3G@cASjNg0Z;_^Jfu2bb|ixvcNffKYw5R?w^#J4 zLRZr%Hqi2n+Xrh2bEPFfNC&$lb#;b7LD7PCqM7$(sc6&+dbygNW#3418A~u~)DjDN zrJ&T3PRj9*?HDDAtw38#pvv0wX38@|vVH`}D2|{AfJh3o8wN{!vsluKswHTZ!j5^J zpY{UeHFnqE!+!RrhW8yIK{!NqH4rtYeP_wkgURVpK+-sfy_u`Pe5P+d zRzs`QjAe5|zV$FXH+Y1>w3}rD1N-tp>J;NDn~?iiFfE#&2VNv;0fPwgAS_bPNffp9 zgS4cRC8fNiD%metH1yHPfzHIvuGRWstToth#dC6-{TF&}21#i@iUiUNyQ-m1zoShf zjB9yu_-o4YvZm&F+HRRQ!&Nf6D-b%24!6sz?u*wX9f74g=q<&%Lp`jwEyU(R<7}ff z`RpOWqI^U31L9GA4^bTjhh2{%GF}?rv76xxcyybaZC4Hz;r&L0ixE4*AKMcT8{uy} z!o|oViI5lLPdvhZA~%SSU5Un{{6rf9lTGnPM%bNbjELs%(NCOX`0Q})c6C@Tg)SSG zxVna;U2BchAo^eS?Gu~h_|WVW6nGhqFd_59d$xO^Lin zvJWh(2>}0Pe%%*xk_g)zNC%rQKie7@>p&+-9}hm~E9kU(!N_CBp%FG68ysoH4Yr|p z97Q{d#>3D!8FZ5}Ka0IiMo|eblM|sx3E#Yt5o7Nra+by8HQS2NQ)sZ=`N*36%5C?| z23aqF=GhPN$@5&t32hsC8kV?KbG@R?O9Z+f?n`tey}Zftc56vBeO0RzFKbk0uO*8*A_iHcJ6g9t8(hIAn*}a6)&l50iOO9-Zo<)Xu#h(cBLbPi+``fGd%+`f+X~Kf0v$}DXre>pI-5>lo+|Ce?3LP_I)#SBaUFgSxma!N!Zej&K- M$c2vwPXs&v2fO#!iU0rr diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/timit.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/timit.cpython-37.pyc index 2132d5574683a31930f335db1ddda598903d68dd..ecacdc35de9eb11192cbee8e0905f3866487a414 100644 GIT binary patch delta 3491 zcmb7HU2Gdw7WVbnC?>8 zorXfA(WnG5r>T%O(u$5d_dG!c;?BKKgr-AQ!b)BUSxMe}UNbiG)O-u_Hdra-rFiRk z?L0HutOl#`Lda;hw3DH1)88&9(hp_Bve5;*=$o_UV&1n)MVT%a?R?4NrepgYa*lT* z%Vf&)QESfhmt5}QyPhu<+?-!|KmArT-GJc(!HxX`We*0#OzUCxp19ol+V)1|N>*0V z0MewF7@OlozUacl`{GpFbQB`U6@O{VF)1SLyEkpX*P1cVj=Xm&hud#4RC=W2(IK*! z4Fj2gjYKmUpS2xMKCan~?45 zUScqsfCcje%PSMBme&s?Wpe+fc)fG0=;<6BRO5$!H6A8NRapy6((53?Egbn|P#ap1 z7@ALsYSn29#zb<*kJDrYi#X}|Iq5ljz#A@qP@LYP(v`lSr%4$XB#lfHYOrYASFMBCK+yYlMyNBlX<~=Moll zd3{}8mj^w>RLO{|xr3#?kGR7GBLvMN)7`NrL*yjED1xB@&r5BAFV!SL##LDtXG=}O z>Hg$_b<^N5EH>3f^kB29snGolO}mzMs?bg`-*dWC3HDT_RZSJB?w%J#-V;{Qp^G4b|q?=lyjsVyTAMWBROL^5w=g|M zR))pj_Z1Fih@?IwqpCkijqIHw?o-6oXCa3%$gn>vwNiQI^jn=BI#H=CibJ2~DZZ>G z8>c*v1GR5;;s!h58ItLN|Ejq@iwrOkjN}=fvp9J6IW8ks4tr#c|A}1LvP&enDE>J7 zVcoNkFZ#Ubq0O4k@wRwjWO|r7<=Vl9n=nHRwUg(FB7<0Qil%-7RQG?b+!#rObxHyZ ziNu2+vtjZ1gWFp$PcG+{jGhI$NX?S`v|yu|sMPQ_C3cSPWu3wp?OjVPtVRnqsvDZ+ zxsq{1cbDccxd6zNY_=wF@R9wF_-xcL9^k0##SXh&oPif`G~lo+O|}3?iY(mEm>HsP#yP;wVX=_zlr9KEe{>@$m_EKwKO@ z!*+=q<6}oNH1q`mby0YcsAYmb5WGb2GQoL*3k0tasM%Lc;46jpFY(gE&)d}D5Xt#I z)b9r0h)WM08*4^=8JREH#g%n6Vl>2_!OKGq=Dh0SX2_O0EaIjK+y&G8kJvfcJGn%H za|F*3sMNkr)S6>!-W@=}KdZ-bTuU~<0rA%4KbF<88J2OtTBa+@%B@D~SP?u|P$!IJ zML2o;#LAyE4Wbrm`{s;O$}jwn45+^+(Kw6XH_hNp9GKk%&*MWTO}y3nW#AYHU4B7R z1yCeexa7h8#Cw{cOf1c_7uAiz!-Mg~%Yz0tG3pD%#6B3KyqQdtD3X}?qP%zu0snu_F58xk#Wwq$nScI& z{_FfRpU#j=uaN0I<>g@o{^|yg#_#M=l%H`@`18S_2Y$|Z0823xi&$!c7({+mLzQ36 z(B#)=_~chNbogose==YM2+H^p!DPq?$+4aYpFo=$JVY8KDnk{l3dckQhwTJ)Co^=8TsbUVho7SY`1!T!6})%N z>E7iq69MG6Er-?MEPT>TWN9B`X$Je&0*S3fkcx;mM)TP^fL!j@ngMll2_piQZ6@fD zyTFF;`Kh{dO(pm>hfXZuy~*#@J<(i(*Y9M;nQbj#{}I@$`Q^2-))FEiho;-1Ke6}IcUV#LH8xU_FFTEo4K=WKdWJdG<;J-Gm<(C>` zF)6?0A$hh7S*12lB~Dua{szgXyxP#bz{qgm(n5CJ;;zES*8kRzTA+x-j%za4IcvM4 zh4MxwSP3{8YK~Lp?eQQnI(X(_v+xs@Ozq$=Mwjunjg|bBy5&2;VVR$NJ$C??lm@yS z%LG(mRXqp=B;mf2tJv61I-C0VnwgE9HHrYn}-eG096__vgE`}W| zw2ptZ=~Sa!XoYFPQAv=F`ZJ^H6m@EI2O<^Zh;#(2!t=&A*L-n0^A9eMjjJ81aBWjKDK+C?!;#&2yX=XW5eluFn z-)MPvF~?b-XOrx#i#WcEUu-?twzvslG{N(`o!@H}G3mFOFBUIuQXgh_L3uFDZ7&Z+ z`J_GD;nKg>+wqc4zu7)Jg#JY|QPJx-lwtdpH?DUAp2ju5XFn=q&mb58MtGE(7G;h- zMTO5Yp-0Ydydq>~Ejx`Xm-(MNJ}Y?_(g~NQ9MI_rKWL$E@gLgzcVMS1UTkm(i><`O zB~%C%C&yBBFA(0$89PI&AA`WQaQpG=B*vRN8dexObZcq}{VcQLdOMBlC%M^Cughzb z&va}fb^P;==3;1z-m)QJhlib$5ri84%;X3JmyH2Ykl_W6vjmW@@JMHz`1z}yW78{9 zRW$;>ndEDQwc%(dLh&Y&0q#Uv55hhKSy%lylGSn$M_6X;C;}$Y^8`JYisu-UPj(Pi zbXcC@Q(f&u=O1@kscbqjSA3vMGq1m{8tfn5oVF4%tiST z0HLNGwhd<|0Ge15&razTXGiI5LZ7|tZwWHy_0>$bR7Yb%IdB6N$2TFSQgiJ;!ZgSpgoXDXrjSl(n?w;+lZ zPa-UXe?U*3_u$Q|KS3D%1s=TX$$7KxwuqRJCzm?;4*P-Zjs2GwVT8e4vlM7wb=&^j;YtL8!NpGR>WI+rGz5Zhmw9Hq%J-> zODmWs4W4uYYb%U+$b15`iYqE%4$GZZ6lQrRjk_Cd;Z|D9C5%@T`1&aK=(u29jCImv zX(zis>{afP)kVzkkS#sx=RA`ZFfO@I`9>rZ3x(_K7y3X5@) z15wf*jh}4|FM2!VI!1jWX`XKLB;4al*}S48$rW23vvAM=7F76!dNN&srR|*)S8KE4 zX|*gW)%xWnMZ$~%-#9el8DvLWfsfzCoRjvfFCJ8XTqqKBX{Kr>X##JviJGmkLB%v! zUFdi%_NLb^xwu4+03`vs{C|TDXz%X7|2|FDPh;-LGifPjcsm9ZD@iELC`@=z%6B=> z|8fydsGK*VQQLWDsxLT>U?AW*f#RnXPMpPM69Fp@bvc-WssOICX$5q_)J>~acaAtS zPQ`Hmqy7}EsTZ6_m{K5(EPPT78?1Usly5gDKMF+z^@8i9(CuYS^pQiemxmD)VvAE~ miQ6;t8#Z>tE&ioU(pH?ZUc@)>$Y!{)sZ&CASKYJ3HGTsYU%rU| delta 888 zcmaJfgc5cS%Q?Km+BNl7RrsZ>FOC7KA3C@P^Upo#+=T9uP8m7CpClQ@pXrcrtU z@pY)mA=*EHKpZ&d&XqsF$>JAq;EL1}I}T|ogs|k<-I;mM^WOSP?L*DFXIZ8sp6brV z_WPkVOCAqvOILl}(S5@)h(rlZb<)1+m_&LYQH|=aC8{&?mEvTmyd~Mj@aXMmfqc`5 z#((K`lH!YoUp*rzKTA*+%kf5IFYsC8sXN#UdM>mZb^gg%OQl8a_UMQ4osbg$YA!Yq z6DzFSa$zG?!GYqX1hT_nw;oCSb(HDh*oU z*62;<5xG^xjx1(VJA;UYu|`=dpbScw)7)NkvF2?Y^s<&?xrm>qu#uHZh=c|uS7+{^vEku=D$8VF6X3e2S^)9g+RO4J}a znd6Wj=2vcJ@R)d{2yuWe|KDH*-2Tgt$9ZyR0ei=u#hL&!*9ROnCSI&|ye@N6QJ=Bs zFBjp0=sDuch5hG>*ug9Y^aJoef#lK>Cu}^{F(Bd;%T$Lh3V4}eUIe46%BofJ0s#q7cAPC}a5W+Z#P|S2nGQqXz3zTfc zw-A@UNp?Pgdl59Lh=F{Zd+yCW~_(|lft(lpd; zniZ50J}sF@xqNxL1HimC{N13sW|Kch10}^kU!Vy%)6ZxU2i7&ARC6W-(jRCOv~(4B z>n-bxw!@aiSP$_YSk*b+oou@oP_k02|Fum_NXAnsGsZa~!bL671ES;fauKqsYOhQz zBB>AtN-(?1w@m>pl8qTJZ~Ivx=1TSjODPk5RZh7G kbQX2@LpSJF{ipvbmu;e9UQor^k^8TVECSH*YoG>y0Fazwxc~qF delta 366 zcmYL@&q~8E9LMvo?HnsIWeycR?A8h04ziQ(pw3OfDeTrus7+vV={nL@(5t?{2>1ZL zfq3-j-3RESh=|EV%;C!?PASx<{^C2Kww?U(vzB-R!jWg@FJt$dG_B3J^4m z1&&ETAm{@|nDq#l#mNH>EM|{@=PbnG%f~H*!D|mSNqtRAJ}+A1B<8J~@NS&ULpg2v z>fRo}bMOn^~OlOs@Yu7cQld2-7s;LQpErXO`hXxnp=KGE2+q+llrJ4L!SXC}l}R zX;7buZ0bjeoad=8dC0i*^K2p{QqQGE2OEbO`ZxcQ@fM|nIAOWqUDHTy9RyWi1^)nS Cc~i*% diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/util.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/util.cpython-37.pyc index f7acadae1a7e31c3d0674fc7b5c203afb9b70d09..4a489bdbb64adefb6d0b245b481ee5dcb20157ca 100644 GIT binary patch delta 4626 zcmZ`+YiwLc72a9j-M#zpGk!l3C$YU*JFn)|G>J`|)UHdDb<{eq+q5^EoojpR``Dg) zw~iCR0x4}Ng^~`XFVvP$ii!$Sx=_&y3O`yXLI{dbB`!ipREUD0RHFRJ58#}+PMmDq zwfxPQIcMgaIp@roxsU#Uz40nb@2jm%gz#62o!4JG8w$Ngoy$KJiM{ymK6E}kP!5}l z711Irw2tuf=ujdW- zhqP5V-pHG9yvgW6xn5pg!JCn>nz!&)Y}fEM-j3~B-oZPu?c-g%8{2-qp7-#T_cLvP zZ{WRrHBxE5hOb55I^M_ovCZ%Sp2l{Nuj3hPhxi~L!gf7pcZ4z<|3?1LW|$uZgO~CK zQmOpdR@1Ny+nWM|29je}b;Ewj^`KzS=-xunaQ(Pvoc4&WoM9K+*b1?}V}ZrRiyh~Z zL%8&%(M=^6Hi^BR zosqMuWT6>(Z&g0(KEXzNXcbx(;xs{ViiQns)bAunA$*L&h)%MzET$SB^OxI1r*7bi> zj;_{O8`Mi|9qWG4Cj%RC zyjcU5@lTF(UB5>p2K&WO=7zy3QW1C*4wEg{F6*nh4swbm9vK`EFK4z6T{Q(G>XdA+ zdJ=}gDw(=oL^a2*QE}{JR7i0bRcuq6$f8C#Al@EK>l?7+t3|*}0&fA*G+nw(x|RF` zHxY~xkPF}l!BK+a1RBB8o+L%b0U8koM0P068Y}k=on>iqo0iHuP%OFneFbCQy=tDE zLdJYrv~PHDx6Du_h;oirJ4rzLFZ*&Q4J_%E^U2ajgulUDf0DQ|x{JviDHaS&(y9q0 z@zn}!470LKI7ZW??gWezeG7p!KqlNt>YgHyo{|2Tq;3`=6Ag~tA*3}`>a$B0+)VsD z!Ez?k5>KL9@$XF&hYyg{odhQc?jj(c1R+>bxwMNVck3uOj|;hUNvRPftk$Wjs(@bk z%H{)MwoCkR%Y5h3MDo|A_?{WVHcl5|dnkBJW)^2aD&yZTmL`qEMjq?jfjJl*E58#U)exWZUkj^myN6+k1y8 zm4G$~wFLD9pCkwq+)AJz`0;{k1jXKuEmnQVio3S2?pSUkbj&V_C$@hgLR9{IV@u`3 z?V=_|@gL9Eh|#g;%Ck46*iNaQ;(#@il31P+KgCfO)=kIOXY$qiA?g2$KkvG58)b$c zEf(@~vi3MR?yF;y<6{60WRzgNEm8&yw`6)SN*b!%LnksGuKarUqpZJ#bT*UlV^}hC zo(Is21HMwsd9!}hIE}^4g${9U_=X;7>$NLU0Y|%ay-B8yzn%cO*{JY7Y^Rn}c%od0IA^H*>CQ8Q!eJ zK^i7VU}-~=lBI%-{W~U{OVv=VSHcQY!il6BSL;);FazS}SxDTpFU#h|L;F@u$RH_r zJ-Ew-$Zx@Gp6m0o=qtCF%NzPB)5*_`VureOGlrPL>R2m-r^Uba9n3$DBN+X8Y~X7I zF@h&(v<65MJVmffF8IxXA_sV}Waqs=&%||lo)OROzq3qwe3rPZiWJCNg%2ir2NjWV ztv0$I?dzgbFLe?pP&Y~H>Tt1OTPki&Va*Jl!x8ja@TmCTP=IWIqZ*|XoE3CrnimAlNDg&Eu6F1{dx&6>2?cAP|;4KtOA?06%4 zv2t?!FHD+}#Wa_meSuc~7QsORi(rC)iU3{@?#q7#E_mNOU zW;kS!hL>p|wtw>A!LjTV+(AR~-g^uwErEB;qA#mM-5~en(j63R>w*}Z9AmGG;^cQX zeV>*gRrrc$*FB*xBIA8>0{(|WHE^+V;Lv1nk7p9W%R$EDd&q?fL16U^IHh8+Gj+OP zb3CvBQk9HS41P$matkyeVEqUwKWuH$Vi+V3{a3XScm)MMKubjD?R(fOm7}*m#cq_Y zj9?zY!s3PMD|XJJj}iqOXYo}>DSs)khhC+5$108ti1d_sh$CR zDg;)@g7$M{4C0ZK?ucS7oOVppna8Y}bEjZdyf?LH?c$9qI>o$j&VTjVn|dK&^Bub&b9ws`DF-!4DqmQL{k1ZM;f^)DrlKuXCHvVtP3 zFTOSDHYsqoM|^l>J9}3Q9DVeJeC=H+Blum?OY@~2Pa@?<3$}|7;y}HEJI(>d5nto8 z1(OGIzoQw_dlF<2-oddk(Ri$by@Q3SL)?7q?lIDGCOIsBLnqw9KCHP_)#7;+S*%WU zkwfmf@+AIxY=9jWb;nng|3JbliwW@^q_UX}f$ihi3DV`uNcr(;x*QzGbmb$-kL3@^ zCkQMMH+id7bik8=(c-5qbEAQusRm&FEIy@bTnuo8x>P%~THEyIHsa-HlVV1&s cx(?e0Y@5Oj;S`QE<9~asp&TlQ1ZX|~2askP2LJ#7 delta 4429 zcmZ`+eQX@n5x;l7kG=ERImchI^Wm5{w$Dy%^5uj$ac~@)xFI$Hha?*(%lh7)eR%iw zGP_sDrmY2pqVfl6`pRbu3aNk!HPGrH{XnYPlD4V@v{WsX_KynDqLxanDz$&5rF7=4 z!;wok%fFqSH}huRyqS4#e|(9ZeT~)Zt*VL#@V92_4()}7K;Tc}C&7Al^SO@pMP`Q7 z5D)O68MY#7gaweUFr${DDv}PFF)OadB^@>^t%RD8bi}N(s?}<1nYzqMs!7ReYIyL2Y1x+X6hs<7WaqZqzKG*Oj-i*%{-o;n)RcDyGmUr_t zyalONzLvM**iHNvONv*T=2=9fnjdWJMDZ&JduMFR=)G5;pRw~g%=Y$(?>CRI zdGWvIQ@sr+kz%k8A3)MRq>%EIU8ZhLaed%beES#v2Nqhyd##Pd*H%8pT1mr;I4&5v zRi+6wIQDvRcJ(A1Y(^5u2t-IBWao_>)X{q*K@8zD%!f6S{Y6n}`77%nVK1CEZ3owU zBX3~~O8j0lwz@+}nzI7xNhOhW4bWXf%YbdcavBLrb+5v;XUqKdqMBwJIZb=N_;#zt zwvg{$*fFzd1HwqdO$3$1tw1?1KAPjk>0tmHyim?K?ZQ@Z&${|sWN26xRb3riFXXs( z-iz3ojAOXvCMz{<+HlcJOVj>Ve0d$KsN7Cgb`T`RkJ^8`D6Nv~USulk7-`o__)iV8 z(=#sAlITtX8Pd%}ts;;S2swr^3oGcoiGcP7{FS+dylHqLGmE=OCFoTTI*tKu*3S8h zQZy~6i^|RwD=50Dm>;>AT$V=h+a~%u2kWlQ8`z4nVe!?@U$x0Vgl%`mfbw-?sCCXY z?H+M|*LrawwW;e6neaCU?nJs=1Dye>k`^DOj&*-Z923JXhjq$};k(21do6?hdUBUpyrbn3UkoLmN#YV=3cJ@qs>qy#y2jxQk$d z;BJB=1f@_(is}ri5mt$xcXzPr;@`XPV;vMWHIcRk3jSu(B zWDohLQ%a8kp~83MBDJ@xz`VTa@X^ zXi{lA4nsumBajZrOqA1V#FLSc@wlB{4egz*yT2-XIT)~1t8o!6&MlRD7kD^&| z@6G!UkCN0;f@1_K0ma1Mf|AM7DoWv=L^%s5awz4Ma44u$D~b|=$zpWV?LoF(Ol+BJ zE-6yHuEcgt8(83Z7$}Mlww&ma^;3amgF&7myBi6vRRWH@OTyf`xt#`5L67Mkn<|&; z#<4fi9oQf)ZyoCOMPZuen;|Hhl*E56#pR&r+tweE5pNZ9+t%);Tmo95ENDm&bsIsD zV1%FoL5*P}@y`u4x^#fR0r9(StxY#t3Dw$*qPlMnJ5(I&`%VQ*7XNcgJKG_xQQWcJ zQqsz*@Dd!S#+Ga6wCQxY|0MlyajgFWJ0Skszh;9}>qRCVX->g(VSpwoTQwEh$E-NA z>nvM;3hA*_+>2u0)Lj=~HED@GuRr@~ZP{(t3Kvz)q{-`H&~^HMo!BAA>_D zZd3s-Svf->Yk`6W_tVUkX;XI`%W!9G4$|SXG*()uq$tyY7V(~Vbni-Li{?1w4x*X(rgt zI`QW4!d+w($dfBc>TlasuABmScs(y1UAUf9yz^vu=ZKSK)Tu8F5c zRz|(?_ z)>f9%FeO?>8(2Z~jc#Pm7y0OmA=WMuWA}~FtX_yV0_JJjQr+&S5oJz?TsAlDcp;+T zJTaqb+sb3fU{i3?I-c4>yfxOhUb?lD1kj7L%?(+oUPy;&2fm5?R04WM=lGiAdq^lO zQyJQkhHn!iIy8Rp;PBW4+(E2Cf=3b5I9&k!3@P8!q|6|P&JSPUN%7qHFuNrFGJdi5 zyEFrt!n-`X?k0T^2mVOAkN<%{1zaq?amToS2B+fwsec{^;3a}r2z;-n!7k+el_{BQ zj?*<5q$#;0QFxVP?#^B8<>K3SKE<}n zP=@gX1m>3xL%a)PPs8Pez_yoeWt7`j;=AaoN!v28N<2GZ_R-Fx(s_Ky?74cH{*?Ig zBWId|CKE(>csUk+c34 z?2{@_g{5iV=+{xri)3?jkN0hbaCti52H`b%CTntE?l&~O3{@#!Zy~QAJC~D0B7i+D ziR~qhm|&|I(5T;VgvyXCKY(&=i$mRi;iWJN8tJUpX8zrMAcJl2kv zfrpN@EWV2-Qfv&<;02_{QXT%nhVaEtl1GRtAb7D19RRj%I`V3x#!g#i4}M-5hJV4S z%9-wL51v>?F1ujJBSx*3ylf6@A*Z8a7$;I=lF4=aR9mDS2DzaMQL)l^!Ofcfm8z0P z>J^5r~@JrmX XdBHRW$Rmd;PGML75-cHVj&J-wv_|e0 diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/verbnet.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/verbnet.cpython-37.pyc index 4089e5d11ac14b1a7f5725b8de539d7ac16001bc..377354a5c09c957433307dbb4547177f7640dc63 100644 GIT binary patch delta 4076 zcmbssTWlOxb??kRX4h-Sb=^3A#hW@#vWXvY;|4o^#_2l2&clvVh?d5i_0HHHGPAqf znN93Xf(x_?YC*+Zf#8FFRc)mTL>WZk1O7lo>L(It$_EM(9|$Cn_&`BbIOojn+MXCB zgrl6f_uO;NJ@0e(;XCZh@3MyJ`ubP^{z{RpjCymc;lR>$*r|bZKxL|AFjGoP4A-(D zr)ByE(?R$KjgT2mhv_V2L`)^E&^l~H%~(3d0_Ov2L{(l1sEQW*ikz-f<;6fUx@CN0 zD9PN$QeMv$R4rrZw#KuDbs3O#scH&@AI z^t^6oGF%pqgAeMS0_39?21*tm5Z?<;g(9#&<~$7jnziE*C(wjMu`xrp>|DVtPJBlE zD)Qn&6tHgambPKJG2PPhmYvP#G#*5BVM8;` ztTpEt%2n3ejQk<12*v@^Gf2PxNhaO`I8FRHx^U$Ho?;w$D}puzs07{R%2vtb9kBgx z8pQ4txSH2&UA3+|uf^VyPu5gS%A|wKaxPl|$A(vQL(|o5$B-r)VyU5Y996IA>?&4i znOxwrmdL*(K501IR|n*72x4ej9G&SIK^{VdIT?mFCBwWyT;8|;xgO+*kXT)HBkQJN zb4{Zgl8oai$5^>ag~Yn$e0$$}5<4QMn^JXnA4eT|hxlgGan>My-qeyzB7y`5N!U@` z*UTq*fAZ-FV=emmZ1EOF!H^;fvNXUyxDer%C7|S%iD?W#;Qr6dB%8mlPMEG`y1zUyDp7Q zBAeufZfA{>#%Z@p2{*(wC}+RosKz7lap$#VWO1WrLDg4uja!f|dnm>~1yr7G!5u0_ z?KREh1p|B)vhnM`<^^E;lh`_VkDU-NcfWl|w-PH^%TCn&)iQ)Tm6S`USShBWW$R`Rbi5(*PYh;(fkC-cNJ@hM8ThFX%Mp5Iu z4{+5=rhMO>sczUKkB3+wU||;sC~WT&&mSkF`=6;n-BeqMa7r*DoriBH7=kdI9S zKHJ*4%;Z-PLkjNpBd>>p4kvizw`}Mn(vUBdIc+1wd4X3Wa2}6*oc(|sclI62F*Yl# z&FFGG5UzEiBP-yrp@<>!6oYuR`M9%V6 zzXU82Q<%MJc2<1Yd$@%f>vkoCJfDmUxi8s@!`sh43b-XF)%Q!*-bJ^&L7ZrA(AL)N zWTlYCMbAKYvrqS7ATK#D4JcBd$H`wp`8QCC%+v63!&w`q^s-l!mA)}9wgy}MvPOXV zRp%Fjvr-i8<|t77{lw(>6z(PgfLrK<*1O!or;)~|9J=Kl@%5n}{a+H^6Zf8fe)t?- zAMq@b?d+->#5V|E!fU;0gDdoTH3i3Eoo*AqKh<{Fl{J`QS)@QYem&0s?pw~$)7?13 z=7z`pZNFK3XL$DTcH3wEKI?{yMR*9=ABgbC2D0B<)=4jk$pQx9D7Ui+G7*ioA zzB`q=hyf@EKX>>34NjJ6uum#}OK=`|aUH}J*+jvuv_Q`sar|s&>s}Gnf$1O4&9i@H znIs+pvR?+gK3V?0!~u zK05a^7DgqwDPB*#JxCcqGB6tHINg`=9@2iX>p;=rEYG~fW>3Mg9809uvpCw$Brbb? zuhuilr;K_ER|Mz*P=;+kDn?T(s<84&ap>u0(YT;E#)a3|`5~mQ^^9^TaqRB>Bs@ys*z5Sm$#&t>LdlTY~ z8AWtnI%;O{@|ys5Bn`ASOa@Yta-%vKYks9rxe?8K1@JpSovyDYC;X%VfBVe13nUku z_b)wW?6i3Q%Frn3jYPHNhR$EYcZi=Ale>UUhW~+ZpgdD?aZ=ovkF{XKFNg2mN5}Hd z;zsICa7}!1KGin}+lqM75nQw#MGWAh=-8^3*Vc=C!V-U*zc%NyyE1Nlc1Md`gZ7w( zA8xgAQx#2VaSueR;*EtHBVDKgqh9~NeMHaIQ=1Xsb|tRC&jt9&2ESQo`f?h^b8yBx z5j>0F1Oh5sY%QD$klG)W`V8XmsX}i8-j6Gq`=hwRDaUaSnV83 zkYmAc%z2Jk$uXuJZQT(u3OhL=3Vi zx?I69v>ZMuEiNNQw)Ax#6klCTOoWgO1?_d0tBgVNl?-hHpF9@!fFO%Y3R3{WKl$I& C@f>de delta 3964 zcmbtXNo*X~5uM+&a=1;Xg(4}B+Yu>qB#x+sl0}N7N2G_=E=rWLj3SSk)1PEZJ&UV* zXp)LVz!qQxNDRyW6vIcwHjp4d+zJrnk_Cv{b{_LmP>$5eZJJb=IC{A~X3`THHaPG|5<599(m(-o8Dn0zWZ z1)o7PSPbPtjLt%4xERSrSm5P=9@ZoG0(yi;-U#NRx-u6?Mc2L;7r$20_aEL!Tu3pu zRp$#@)wFe^U^}PS=fOk}uW@7fyiqdjd>#~WBK&4|CvHBO9bPII`S49`b*Ws@;MVW~ z@%wNl9LBQ&rz`S^^${CLk|2TjxM^5+pErkPTpjr8?4lr}WG5tA2-nsv%~FLs#?b&=YzRWS`4g|-p@ z_NRql2ky;_-uR`9JLxn=q?^btB0WUtE;OfFgg#vTHw)sgl|e6Y+t976&M)H+f(IMU zr-IT3mla&LfEY&>43isr6Bud9vUsbbx{pk++3d!w>b!)&<7{{<+3$*UXMehd>K3Q$@!%7 z#|j$|(3Nea`+#hOKJnYGeXLD9?&?kr(uPzVlFIhdd&7OHPa=8GNpHK)#^?lnl)BR1 znqriT*CL)bbTmcAdb7_z6JKthoZe5`n<3h$hGcTBO*K~CFR6_Df(|4?zi%f{=g*xFcKwSD4d4=u-w;OF3JprVKZ{R$ zzm@SDLGr<7qPy{W5^&EM?{4YJ1oHmDS?DvA@ktU>-0-qyRyoMGO-8t3;8;|C!^w<) ziq_op==^nVVj=*Yo<}`bMc$xciFU< zeDENVkEY!&E5ZfZk*04(6k741#|iKKBO4he9*I)yEMLt6 zl%XDr89MSwQiB+C5`zWCP76CVn7K?$mx#W4L$C znMwbS^(E!)ZivR48?yO|ovKyUxY#?qXS+}G1=8kRAC4(GPZF-t&UZ*l5WbBMH=@}# z81r6*)|$vT9^|&^+$w-kp<>E&ud@7>9Y@C3onIX~qeL>e#Ki6#nVNW!mIM&uB)8>y z6HaiFczo6pP7lOeBOm==8a@tO5 zq_^J_s47OFMDJxJ2#2pf z@1~;)kynWTG1anBb``qQjTU)v9^3nt_bwvGXVh5|xCpdhXK@mfJxYioF3P<-#dK!R zNCscLf}wz;Ni@0LSbcE#;iWWS;@wR49D%_7QT&sNb~Le?)F?c`&&Tk3xbC7RT5Ox&FA9U}i582B{aM^4CYhiWp_MH@dco1xjHbOiy*Ce;ELRawg1!wZiK_xLl zj5mm6h)jvQ=VlJeWv$w!*4mf!S5^B}MU;nB@%BtiB+oy;{t9WzBiRsibWpZRQYT|u z4{Ot7!rzV7ThubX<=X4Xm8X9XeoWSoC97c8?#4x+CB!;djD?u<@%hJ$9Tgv492w5w zfd_CsU9quO>GQ*{jMCKqT;T!a#45^@t_1SECBN}8O!Y)yk04H$c zNz3>mFY%QMOjg7fvscdgysnK-pVuuF;MmwSn?qxA)K!yKxQ#>;Z(q7Tn)Gh{-ydS{ z0EFampWT9qh?KDV_{HceijNoV1o)dzFxSdb2hdn<)*Y#j5eYVyf;T!#Cfsss-5O7GTS_ z0Na3Vo&lUuI0HD#bAWRSX8}9h1?(!E<74-WnwS5Jf;^HqQ7oEaV>RCF_|ezT4@u5~ z#KEodMkLBIPw$YTd_u1gCO^>AlR3x$PRd_&Z%!p?qPIJKnCQZ< zagT}&vd9_~<OUN>wxlx07;SpCP7f=<{>O zkcF2@*^xuB4M-p(GU2e9PP3LUx*qSPWagtEx;L-5&;F34?jaRNU)tn&=qo$vlJcWJqw?pyb4h<%V<;LUnm zZ?`&?)JDLt-;{ZL!1uk9sP^xF&^ELU}d3>3n3^dNu)fg~XEo=zo`pB0SXw^M= z+zm_0^uExZJk|azP%d>pxj&67*g7$A!BV3V8(!CMw8ducD}x5gC8PedCZcWm<;2oR zkCvce&@GIAWF=fFHQI|Bg;twy1bzi)h|q0^ QI@BgMD~uPM!qr0gKXhrp8~^|S delta 1072 zcmZuvO=}ZT6rK0x&15po)IftxEv7}?j6zacP{9&E3U;Gyq!lDCB<77xnq(%t87=L` zg}Sf2g}Cq!NGVD-Zr%6;x}0tVap5m0bmhG-pM(T5^Uk~Xo;mm2d$xSK>^yKBTZ3Qj zW$A_f(V2VO2Ea8=np%sL3BfaM((UeB-`3VOu5MA_s2gN_c)G>RxzFelpkR@WbOe=i%;Z=gd5!g+X{=-J7xOb9G*c*A2XX1J=z4q-Ogg> zi=M-kQVotdqgLuzp3WAp58E4GrxCe1;Q1=ovd``fwG6#_3enH-?=FwSD5C=JHe9>x z_|2%1kAjBh*AF~F<=^buW85}@qZH_aK*6eDMCE`zwpLpKf8F$oIH(YpB}FY_>FG>5 JmA;cM{0H^AzWe|H diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/wordnet.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/wordnet.cpython-37.pyc index ac956924c5c6061aa6822e62aacd9f4c469f9d0b..649a51d436a92b828b178061ef0bc578cd030585 100644 GIT binary patch delta 15205 zcmbVT33yz^k)Ah~M%U=HZrfuWw#UeaZ27<^@@)?Jkg(0z%pel8 zT!vuN5JEzN1;inQ(4o;s&fx|t9sv3Y&OjnuxW~B4=WLSz#ee~ z917kYVQ0X}PglT2PiNR2$qD2{JOK~ayTZAVyg(k~?r?sjAW*<~PPj1Q4R|9(fg*nQ zgo`632d^GKsA&1&ib!Rkii-=v)sdP&4daF3+DKiXj&W~zd}Kmk0^vo0iCXb?SHKsT z6sQk01RC`Qy;1k+^*zo&lU5R#th)|P(MpGuz|=K1txPMw%%+v={(X+XG(C?>#%UE) zQlX!=&#_MlG!N=^R9LB1QDK$d%!SjrtXiv~vYK?+45F&l>ZqhnA4iSNr1$aK1bUyq z@3XYI+9a+1G9}QW&C?pRM#5%m^R*^zGGTMH1=6%}gM%Y|!p>~=!*i7g= zea6m~oi@EiwGlR(u-S~wA#4s~t%S8QHkYuujLjo#o@&z;Y16eC)XIEqrZ(#`TVR3K zqRpn~>Dpp#j@C*w3pGcF&A%jgJx%8F!pjnu{0uMGU`Ux zt1A}i3&uk`!g}f*$9`vVEg6^qIKFabzaeJI8$D-~RM2~ZeukZZM-dhBowuq}pLm2) zGfXe)sLsso-P5Ow=$?pincSau_KXQs=vTx<>*1pZH*Nvu$Ww<|u8^+$j>=Qe%2)^Zq)yB-H54`C!DyGBZT?zWTClBY3?>gr5`*jqPGYx;+v@v)J`+_1q z+{G=jrPwDQ_s%<|PK%4mm0L-Wdby@(-sco1q6;3tZ6r*Se6eWWXA6{J`4$PZc}((n ztS{81YOzR=G<>JLzj*WXF@{?limE4Yd{?%XoOjw79A2UK(5lJwaF2YrWXoqvw0a`T z{WQ_zWuSD)nB(X-f<1bsmj~sM($5^n2^+ zX!G@GJQxZ`=wUo24~?spTgw+TjKSdU3icbi@e^56Su39}pEG$3+FbHDG1bs@<8fN> zwQ~8md1K0QkuG+qJtEfMXFNr+bc{)Cgd(AEP|zG3&#>V0DppQ7nG^A-YH!f!Rn=$Z zS1Z;}Yaa}wgHuBtB)@pyTA<*d)c z5RR+rF)%a(Yxny^C0ecms0OG3m;_J{&;ZaV&#!9HW&)iB&;l?UU=F}sfcXHY11thq z01&=zT&}+hnZiL8VqUPF_+mY%9m}7KcMd95KSu zGv)5dEpAMwStGxAcTJ8pt7pm=CRaB5P+TPRFohE2U5!1F9kDR?MI+LvlA{9-16Ik> zDSKyfJNaQf5(%nZvHocM6eD+WFP5ICV|k82+kG=KAWCE%J`3 z+s5>-6vdF!L@*GLll{KIldN7dS5=L0EN+kvw_=R2HvxMRuorP2KmcGHz;*x?0OE;dM4$%r08(=J$SB7DTycx^&qcPKWST+_PQhwe{q0AdGKJtQA%UF?H+ry%O0sz+q? z+-0BT4IYyNbC>u|y(oFQej<;}T|NbaK6!U$o~C!h1(B&gE|<<*(FEO`g4UxC1S5T6 zop!s!^V%Arz*A7>g!O?aIgn>${`~#|sKj)(uRph4td-Zyub$+if>BB|^VO)XYlbRf zvG}v{sri0oqkM1vy=5CgOUkvi`(4%)u9XignC9m;V2q{``czxiVuS?7G3h$}y-g!p z$rBe4mtb4D9N5(WFilznX}Jy46^d&5fcPTn;LS{Tu&+;#YJRuyp=_I6vv6W%*2vkN zySZQEcQU!~9e)yBj=or*=?H3?wLS9Acr7=l55&{W$r~2Uoj9r`FVv8=%a2lTEwU@h zUisgPH`E8guk?-@^zq!qJ$h6h=o3rF%k!5kZk+@%xx>9INhCOsF=DPdu}AS$A}W>7 zE%}T0VlXNIHxZDtI_EZr^y-1W}p6b)Mh0>lcs~( z7)ye05Rs20n2>sC*~N~|Lui86F%*{8yXhduY`Rzy0SeP=?1|EPRb$=V^e`-Ub)sBm zPFFZ)^b37P>hcx8P~4VW@`6>><=nK3r_^*&^4M#LN%GoN+DZOVje#mc>8If&#HrX7vg`z*A}KFfTXV)Ggqj7M zDkD~*m6_4n3J{&xm|09aT7>qg;mmV;>Xo$vxzn>`S&WU;vp5`#?9hTsZzU1lrxtB{ zXUX55J3;KYi8c&V7OmjB}7c513r$t%|dzBxow3bWDs@o=5y;5 zBlZ1r->^@^;#y6j+)vN;z3yq7*01lJM(^9{odkgUu)^s^d1PzVU{)4%E^+`4f;1gt zdNkUqa7e%n268j6hWHLhA0!Yc?+)&wpw3z_8G_Ms_I;wf2fDz7+6uf%g;Jw9Q(Mn- zDauXq3xWAlcu1BTFdbxfB=qe>I${zr?=Q(8281;QX_xUZm7bjT<&5r~7&>d5q6C|n zL+&%IcgMx`Xulqy5rCCpO~OS`zL9auP^Rg+uwU=jO((7A3B3Zt;Ni1;nf~iQSS4Se zGr%Y0@3%>J+@7Grxm0rdAMA4<<5s%Ev0z-}0eAFJMDR^u&ImaP+8vBgwA-I&?FGCn z@0T~Lbj4%MuK9OrQ+L&ZiJq(SnA{jdh;1PCzc}rXt5CfFcy5n6C$VQKLp8=2B*LA~LwUNtM{%G4FcJ)SM#7Y_g^1llcaSc^6* zv&)k7dmuXuz&;HcJHbST1rpE7YZBFUn1_HftVdfIo@g9L5@kv}bu2O6QRG9DtZsX# zOFS)0F0J@z`^aOp!&lxsOtNujo=1 zq+QIg+r@?p`8ojL&}bds0Gx$mGsaq|l;f^i(Za6tAc!2+aBl=9vvRv>R_3~ks`BMk zR~ggJ2T>(J8v$zH2mHjrbIvnfPQLoymf?zwXgfgOWwC0*tiz%;N%#KQ8#(IiMy-`! zU<=7Cs9qGC1Onc^7;pb7Ww{h?i?U2}Bo#N!ts?dE$*cA^H*F-0s?v{J@n&DWnief= zmi8E+w@P#W#FM3d;zA;_lD^>~Sm|E?6@vhm0(=nw9>9v2uLJfK0JipT0Q(lew*h$n zZ1%;uS4^DH{%)Jq?=UKGH5&d?8aX zqUWzsSzGGn16B6s-xBJtFp8^cX5AN6$pnSU-;_e(UCo>ZcF9f{Ib%7f#D`f#l2Kk>zu(3#OZ){mmd9rRKEVj=3y0N z?CC>h6?9cO+H*Z4@A>**CQ44vQ4r#sn&lVH;V4<$Bzta|JopG0{saIunt2ouiny^m zL=lUVg~f;kS{Hu?$OpI&paS4c0;Z#F<(Xy<0kxy8(_diCtlQ}C?hXwI*rE6UWG+s1 z*%ku1I3=SbOnSdjJ@pRMDtdP5(AaY^ULG`?!e5BwUb+4of8T)(ik%cqLp~8vV1;`= z@n0xsQ(%d`;HNv>;n;%>tQe&bCg7rpAg*C`TPJkyQ`K)%-~Hxdr3d}A``uxjq-nK6l(9ljH#+vJhkrcUetS^{N_QV_~qu9WBY7DV<7?`Yl( zZIjS_F+=6U6U$cqhz8iA%G4d9Xi)5-OOl|VaES-Tw!^;V0uXK>;4fsmvCavbHUoSJ0F4In)2RqMHf){Nh)yD_>9vk@x@^B1bSSbE{ zYdEY_mTrf9?Awhk5ZTP3Lnhpii882|cMR6}C}pE;W~%f~pRxj$EZC;sMh`y$fc3ME zGcgOBoC~F+8ln#sY@1~NcOK6FPcY(DOppui@>j<3&hn-Yskn_UIdIp+>XV6tL4F~1 z-(5SECZOq_O=nQ_7^ZvIZahc!E|=ZyQ+CMhjsx~R z4=3!06ipdYw%e1=q$}Y%WFN96T|Ks>dtf=e(FYzw%1Yb$t;w834%gx7&>TaQ{3aYb zorhc~$NP|?*e;x%aHGV+x7al2A@^XyJ)|TYYEImfa2`^IsOzh2SJ=80&84|7^CUg- z+=OQzl@d)}22bWDY&-K4xz#o*EkHdz3$0o$XP-TpH_#pTCi3<{xIi)=6v=`S^@Z`G zL&cgWQ7|-UPkIyHLnT8t8e>7SC|;VN#u5dI+(cobXrDV-Oyejcsf!^s;pM=0j^i;r zzr{8zQpv!b)MW{Gc}u#>(&5@N7C~WHo+#IHci9gp;?jg;aA!rLwAyy4GNB~Ox=Edi zEgB%Ua?PgY{foWYhVf|mRkq|fF3rRf(!ZWumm3c$3wM zY7&=PPE>T$Bq+86%7tfavF-5=3pQA@#TKn_+Tzu_@($RAQ?cRydA8^ex7r3CrV-Wf zi0)30s5Vin6_D(9V`{wSOUiIFb@AFn9d$U~D&a|(keHwqf>wN)1gj%K>L9`ZETfDj zrjc}8JeaK@wT>sE@lbDtjit3;sF8ThJjMyo0(C)5pTSeS#YSAMG^hzYs1Frea-w3} zTOOaNc@y@;#BO^9S=5&lGQk}BR4F;}l8N%*y=8-#>%zE?WOj(wgyYbpA)8i6sx1Rd z`Vu};@+7T@p7j;BWPQ}56+3OoNy&PM9BaUi%{XuV0%wNznIH=>S-&>& zrz;|I(fze{(D|4TmA@vi6e`;p-SH z_2;Oh79>6brrWw<=M#Ey0Z97++5nJ5@hO<-L7rk3cF-pj>+tOmj!oGYnx&gpZCb9b z-qd!^Dz&3?69wZP)`jg^V9rKA?3I_HtR3K0v{ztlh5h*2#jtL8v7zuD$Keu(zt z!`dmE&>l8UQ;7sTv}0PwLA(u>QXQQ`;)>D{7G3iA1Daw=HPxz=%NtUS>#-Y|Iq9Qu z+HQ4<(&)w+HyfB)${9H{;|ZQkZN**W&c8`DHoM2+ba)ky;#7(W<|*ZNuaZ(898i=G zx<@PWM{KY1zDH}6Kc{~F=mI-MT+SbscsiZHkU0gR>t|A|}Sf&(V4XNvY{2j%)kx+T)>I!++m8G)h$fOml3y0u%vlAQ; zHFNp_J)5~!kQUpqlPrscI&rjT%AX%;8-$GQBsz~29a=+U`Lm6~XJmreQR4$qz(Fg) z%JMrfp)6dwQyvM3mb=NE@n&;*6puYmDR$>6a{o`CbGYJex$SY2W_jKdE1RL|Op1oS z2xn=0R6b(DzDBP-^6(R94)R>(2=-%O9x*xB+ z#nK~(;M*~V8mh6mX$(r9qd+ORedm+*bsRDN9Zhl^h~;Z72#zc{TzAIxa9FfZjhPcN zjJ_b9<{9Algg_>!-62!|)-s(V%{gd;7b4OsD;S5ABXm$fBp1n&r++#FqoHZ;qyo~H z4`e*R6B$ew9EEXTN_nP1A$2$XOsutJ!vTnuadYC&OP0+WD|oCHbHbr6II;)jZ+^Bw z3COCSe^L!&4Y)V0YFl~EDmtx5&a_Ywkh7j$w;COaZ5SM9`2S>oZb%x@A5Z=G*<~2Q zC(kuduMN*{;(`4C+x2vX;=E72L<6anuRQ+{HGbfQ%JXs{Y1Sv?SRWMqHwM8YnTC3# zS(e9}ju}Xwbn;5!$vbfxM2N_LfBn&3&ohL;DrMi%ifK=yk^2Egk%Q`eSe~5_o3gh- z9{FaueB$Vm*5~LwW7R4Ixd7ml4T1b`a_)=UTVMF+7(xCI`Q(e+=N>yLkbvsIQ zklS%m%g%mu$)Uo4?Tp%W!iO3q>Fn;6g%iMvP^ksM7f<#(9)a^on*Ff zu@fZeW_D%pE@%M%a_I*=^~ozOO5tI`(p$1vFPE4Ti?Z6q78{hi%yaoD4u^Kwu<1K; z5ksrPgmf^}*B9QSiheq7SLGkgd)nc>tq>18k#*JT6n$WmD7XOtQ*iRl!YrV(5eh_A z?yyI`_38%YzvSt!HEi`|7_)O{=8Xs%E<_iIiMTT}N9aa>m_BUa>9kH?EpN+1;5mi4 zbfzABt;cyD+(r-m_#a1BbQa^KH?b8>vN?J1`eOiib8AN^0>(QBhvX^LLbH4pPr;Z^ z#@|?2gr#HJ`})NHpo$Jb-e{~g?Y*PFMd7QZYvl1af?gOgekZBv?PB`L}tctTKbMhePPdW(8)ts7Z$ezaCT8@Ql9!hOo`y8XNjZ=+ zfs~i(5>7x$jPnW0ujX`cxE^T|l`f^~f}so%kt1iku$8j9!eLS>RSQSF6D!^+Yx1U6 z{zr*jd0lS)!|7+3N*-IRV2{l)C8^PNTxmG24sbR~)*w)S@$`L_5M>ES&_6N|71Lg<#54_?+ zLm=W6R{&fNz+3rWfV~fZ16VP*50E#()4<$;S3V)@1_qZW=A(k6rL%#tr6XA6NXA+b zyi~TM_Iv=`Gw}v$U0VqdV_*C9-PLL;WLIl|g}peIjKEBoh3TuBE(*#r*EPJT74Z>> zcqzY1*etpEt@%n)e)%m`d6oV=<;5khQ7P&84Lt0k0Mt-PX4n5K3g~a8KDCX5=#Qzt zzx`c1hpbjm@j9qsoTd}soKSX@bqHTV-DLoO0^o3i&GJ3adsO-k0x2<@GhNmDs5IaX zb@4ecI|oO!KVqG2ySqbrm~LJWPnxcY{GEp7@-XPE?`3E4zFhUEm1`w@?A0f>N@}H6)lru;0|2ozA-Z6(SNX1tCpje+8WM>UQ&sMqaud4DKlre-CA;FVJJ27+QS?UZ zMqGQ6Mz5r@e!29cN;j}BtI^m;%arTny&qN1x$e^}W>K7OYP8sHIyH)E6y5BD8aR+r z3hd$_Iqm8}`aXdC`XXL$bo)bpj~AzpCCNo0_!kaEb4t5__m#b;3!)f!fO3jbPt zR{FJtZ4tZA9&z{_3dZeWXC%j$!)-^{70LDGa@!epNAi4mk$hi1Kj(xCA|9WI+pch7 zr0CkOO&rMcRjB#99llCmm9N@Y>OyPl?uW)OJ+yHBwKhT1Gu(S|j01q33e7f}Sh*xrv@D)hc?f z(mXUib$~jm)f(!k;f`r)t2$AgbWHIzt8>&kwVqmDb*?&DZJ<_*I!|p>r%>xG^=!3C zol33g>U?#Y+Dxq(TGN5%0~W2>Z=sf#T3&9oP^*PoXHn}cZcV4wbZX6QIxvHpGyE2H zf$CLTh|Ns(EOq)Zi*J@XL!C*#v(<&_EOj=Gw5rw)i+547%zCpTU$$K!&$iD=USa>T zeL@A9Obm*Ilo~3%yF=bvu9wktL3)} zeACAvDU4{5eVXuh#llQ*T-JMbRE|L)uklnWk``JkHx{|&bDsGvV_?Dz1!TDRFyn8? z>4n=qk0cRIup~Erxj7}x^3Rqe&G=T5q+S|D;|m{;b%%m}H5Lhw zQg0)YTDi6OoEhV+wpvkmI>trTLy=H8AjqQn6D;(PD^^V!EpE8{{;q)D<@Y~DTXaK9 z8zPKV0Ml^9;$4~;8EBk-zjny)|2ZvhqI|e=c5@bqSUO5%B7&&q_diDj7k(b(aNO_z zCAhCftKC~KO2D-gpbVfKU;@BIfJp##0Fwb402%?N05r+xt6I0uMsp6pJb?KC3jr1b zECE;supD3oKqe@23o%wy5jaABI;dP$yH$2q&$GL1qDuZl^^cYNlASe|SY^`;ugshH zGkN^`HC1XX*aOcA#A&0)Cn=8$4^x zyxa}MrAZbvw$)=wI(|m+AF-K+nq{bQZ9PO91F4;c&XO-SE^UFxqazswogxs?{5tK< z=ze{sTr_3%R4i@`^js4?r0T8m_9-hSW8q_<+QWh9{&~{b)Fun3suiA zHFeHoXJhqdIksVo_jHfYk3pIgTDS1~7t2qY9#Sgg15=k&413FIcUw55$Ahs*_tKTp zKCRUW6B;%0#d~YcH!X=-j6#1{i$ntc?pP=q*My$leT6~`v)lOfzR12|$K9*RyJtUd&Ew6IBI zIFp$PCpvAq78h}}?C>hFkD4h@5f@TBYgN2=p{tuo-}2Ybg3PtrMmN&VQHR=`jvWN`ku)3E5#r!(BD)`<{BA@dRW-29bI z(9IaQF0D5Z=?-i9Kg)RwR!o5c$H2@9YrRnlFptR_7xb)wN(_7ZhOO3U))Na2=tufsI!O1XSdiiOBaCFEb}XdQpOnbxY&qsihBZY3rymQ|3_JYdR9qTr10aZ26RX$R`%bkg&h4bHASz?Usu!8M@=K&Jj1jgHg8&^Ba=e8ab3BxCE2;IH@7jfm`sa()YTRa zME0qHrQeiF$L=xl#9cJ)4J4Jgo19=#6D&pZvL6&cxHH)bsNvi4UpwM6Sq+L9NkH2k zE@skkDBVksx5&%3#G5eiS zZxS$^dQ8MM)o^bMgnP8rLc|2JJ&Z%wMuB{7>xz;r8O*{$rF_4mQaPAhwe6&}coK#)K<0ImY6Ij`YT=~O~Vff@R8 z(~r8i9jNyc@RfB24pM$+Zi+O*NCQ1UnEwEUlu*Nx?@=n08pWP`Yo|j|u+PiBzN>K> zFU_>FVWS!Y$$bl9XBiE4eny_ZOPHoec|?+WN5_3OZ38=2&zjF6WFvtw( z?;Cv8V11azxEMyz(*YOyG#rO|v>wf{)3%+|B?>?a4X}La`741~CHL&AQXZAGXPpV# zpmoTS%-i!j>%a--(ix5g;uPZJT0}Q&bV7+6(Ql7XG@x!h6z%l7Gj=ys?}ldmYA6_Q z^<~;~#1DwnM@5Zz7Y(Ltg%paaiLLZEC)*G*vbR=wIr){n(;RJZ6t7~~<3iI?&TlBa zUYp?x_Qd@Ws&1LO|2E-e8DK9xmdSUu>C4$zW*`%c#HM;2=syAAahu={9%3Y@hsQrm z?&zH4RM}5C-0!Cm`gdxXV9~7N$MhsFBR~Nuvk7u=UykXfLy8B1gPAfS8ilNk)J-`Q z@2g1~;TxO-nVEQk)O42RuImu5^>-^V!2%w3XrHGdu=Vp2I!eSz?Bh zf&Lk`RT+y|LqT4-VG9LY0p=Laa8UPC_Xx{SWl7KC^n>KDF6*#qrnwEfe_i_;@hq0$ z%Gg^xM;)eOpGWH#0KX*QD?lME)Y+*C4e@E@R0o35tf`()UU_V;Z5?yZ3aMjq{}j70 zmC-V$`g79x<)E?y7HtnmFEUB}XrC$C zMEUiv&e_DtX#k_<)dc5KdJ_iG{c8d~PwHyxr#m!d>cW#|M^Z6s79IDsDp`Kj^uqPj zqM_8!x8%mF${pXLzIJ)d+ZFP%s~iKou2Qfm11JXw19Siki*8;Px1#R|z$E|)fXe_l zp}rEWYXNv?+J>^P`TeV(w3g?C2Ol7;wL&}=$!m_+II$bVi}L>C z>sz24W|~e;cBVVBrn;W`7A2dmt+JNAPEBu(;m~{N{w-bsa);a`D;1{<$puwC)M4cL zW0AvD-w3FLg4|9Vl_%sB^f@CC!!854A7n(l1ponTmX-K-d}w`6R?zW4>A zI+pz6H)bpTqhMgC>JZ(vP)5=P$Nx8}Tf77C5fBQ3J$gJAF-tjwiL72r@7Ha>P)066 zTu$9r$+8J@NHaf*rgBFs+W%cZ#87nR@mc5U;m7*jMcyp3q>cC42d%^vb6Lb+4E_U%vh z@;;1=_N-bt- z_d1O{)P_>mHNFf8DZe&YJr{fS4Bq!b0HjlfH8isDAbv{yC#5m?6s6QdLzRkK{$QwX z4)0=XXIDx$v}1x@2GS@LMMlbYDX};u-M1~I`p3%KP7tF{Zku{mC9TM?QNh4GpRR-u zIDgnQ>En6>I=0CRztddDr&s~hFzjb$Z4-U}ZBJhJomQm%Pu)I!0`zFOsZ6T-xnN@Y zz}w*Uj`ZA7J;2rv;CUCo1q8eW=FZrTj(TG0%caT*y>FrGuFuV^dhq^GYy`q40Qjou z5P7Ma3db*;gyxz?k=F2#ALUaGw^L9IpjeTYeDID-6wiwwg5{1hL}T@G_nmdpcjt1a z8LH&qott(vl1zqE@SZjobdt~w*9BVNYFz7BZ-#pviach=5O@Xvq5#+f-aK=atXCFq zk6d@xlvc=W*K={t)dWw*88-tUOp2JNaDk(!(_59u zS!LN%)bC2z4mbyM60RXdu^c*gyCvZqR8+^1lCb$*@!W(RSG=RjS1bcxvUDn{Q_VT% z>d%e46S+sJn~?I-?S6N{av(q9uC`Eb0mkXqV~(n>qt^bs-p+VoBJZd|6Z`t}fze-( zHSQq+i`3ji0SQ=`C>$&vvd~fr`itTv2}@$YmMBQL6P`rTQD=WKNnJ`(7ei`lm!W;2 zoY(Nf?UrGYN_y|2DNA_Dn^RMk4v&_x2nxgUM7iodXg#ioF!8HMlvY~?D-%khtkVjv zQQrWOm8%vt?~B%I3)Z9NS6TWixHnI=oY7ke@e&o)mILMeRn)HPuTE4asuHE0MB87J zs3CEQb)vG<2Ij|=L({ig`ck?glh$sxL@Vr;c=bUy33N&!w^ar#J1x;icUXE)(28n# zMfavwG$BEsypZfxy)0hy1!Z`B6XUgsi8SFPvxkjPm#9-cz!gVGun8o{M2OJ48zPfo zrjTxnGyT+_NH~)c#RCV*D=aLn_0UF=Z#u8B4qBio#vmG|Q4W2w-9l9JXjPMFRaWta zV(F&B~#2XJx0X;cr@qs4#Ye+OC z8ati+Q`F+blnTq>)FB%2Pdjcs^e*iK(g*V{M}L0-Id}8UL;<`TEc%-gO@m%)=M5?S zQ^_@{H!)Ssue5a9h+zpBlK+<{Z1n9-Y5QphC+9}i698W<(b^nS8i@Tp!wTD!m-o%%s~cE+bmD4-T3e-s-})E@9rT&TI!YM$?Vxy9-tu4-eL+P? z{609732`7_jDd9kuMjZu%>$HAJ>7h5j5CN&OK}ot$ke^l%oBwp^el`c4Eyqev2af$ zN>}r^@HzHVj;p6GRyaR>IjMIsW}3uZu4pPHHqTWY!FBsXe;TO3B$R z|LhO&Ca@Yg{zy!8clGg^?zM9b!ce*mJMKs3?Z{yi1-rE1LBG};q&FFq`03^y9d5yT zFg+u)mG;uWG{J2JoFrhlHg4$fuV2@(g>OCLCuBA+OcJKY4JGvw+`(fb)T_Vv`1^6^ZGO5 zUQcMd)ipW!$m=H8Z)QT*FAFc)2%Z&r<@5MoN}K6 z7A!Nu%SADv9F*UC`tg}C1{v!@>L5+963{h*z;GOn301!%Ir#HNg%tVZGh(eN8)_Wp zwfRkuVK15vimf>w$DD8|h?~US^3rD)Djo9SXaBbv?&foDUcF+~g{!F&(Lc*AFG_$L7%#VxSg6O=GCXC~rWrnh5SRhrSWanYyu|qOor&bC5HF@g` zU9Kmo1+SBZFIKcX0WS9fjKBxh@97hW^oJvjvh~_B8GLcc@~2buAld;*b^?sa23Jm)e%2hM3-vnWc@% z2Z5{ z|FYOT6$lCM$=86;Ms0CET3lAN;&I3v=gkX_WH)MXV;_tMxl>3)4Hm*-H(!wIfa6zl@mVKY# zMzNjY&dMSVV2XnPn9RtbS3Eso`YIyjSeX-ym%uiKyR1ug{C2;6C!)}P`s06`fl+P8 zO;2JISh73O4zP$H0^o2k517nB#N`J*tA~Js;yfj7wX*b$#f8{ehPAs}e2k$za`zik zs(se3k>9THTcFG3jc)`zL3|Z*bcSNpubHc!oqR*pC zm759)c_;zwwxN*F@1&9d`tou6r@Oh6u0r+%--78jWT%UALBfff;c$O0^*aYWLv%Bw zF+0W%&7*rADg?un{=%$hqVVTX2VIu(k{`TTY*k*Bj<*)gJ*_mJR&oW$b zcH=+{4+}^87E$~gF*Yk>>{jPOAN}a#JfHKRHuU{F`W^w8g2#VF3-3AvvR#3f3uXq! zNx+}*_#ObNvSQ#;K-R=m5OeOsRkQ#G@&M*wUoOs+4*@_Y{la?0K8)o zDo}8qnxB{8K>YN3t2NZgQ8f$_4%pZoYzL#j?+=Cpx*nu-jy}j7HWexFFF@j5`wF$1 zT#f^5`q)bYn2=IJy-NNDM8`Np4ComYaHN&(6M$^qE?tjG)* zw^(Hf()H``aV&xEuU1zMy)28oH}@Qvi%x+|5-?_d1I$9wX(~|?Ps-QdYdr@>{(SVn zEX|^Sn(3eU-1NY#%%XorPCPa198QU{rb^+1$b=G&*sv1{(c;}F0% z0ZOTl?tvK_X4ddL#(x3ua{_eRO?`Kf{w9KU3nR0Jo+Xr7kdPx`aXx*5XI1hk4$AL- zRH!^FfB4aJTUZh=guv~^tUCP)tsenAMS#-5cC^9Q=LiL9AJF%Y`hL+3su4!Pa<)t3 z<67l%`SHhF^LYCeg1ks>`|Fl{X!)ry3M%zVvVH95d)E9#I9zwNHdB%=En^kdI zMZf&uuRAU{Mi{;ve@~RYds2xRrIjjWE&}}{Br^)*T0p3=!&E|Iq{HBBH#u9y6%@*= z<^6wi?wDilKVE(X@iB7o7BL=+g>}KHC(=jK6d=;feRgh%-|E8Gb!w5uq1pr~?Grm!P zPYHHfm`7Xzknw?GUpHKBrka;dA_BdW00pU)bQTwp5%a0e21x&v*=-(MzTH06?JBa< YS-qoZz~Z34^ncs3+UQ#>A39zCf07t#%m4rY diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/xmldocs.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/xmldocs.cpython-37.pyc index 0495bcb2307640814bc1100bdec7e32e3b588f41..d3c8c9487de9c1489f00d73abc1779d3a3975cc8 100644 GIT binary patch delta 1751 zcmZux%}-oa6n`J{=6i-IpOu#Kp(#8NP^&EnQm9~~LZM|U_-gz5n0JAZ_c1x|O-kKh zqZ=14$W0no+Qhij%_QzMCMNy`<}OTXH@dWmn&+wC0 zN>$dBpf91Y^iCEQRfRs38YoSX^j#&;*6AeaW&BFk%1Uv{%lUaLFX^;b@QYSa(iyMC zFIgo?XT1}CtJNy$oYz)XE-P-{EqtuF1=jkJZnYy{bX$;bVI9bKy85bOmi8ZfQZ_YF ztU)!1?ag{niK}5CO7);x30-D;)tG_fMORE!v!!=ry4k~monsHZ}Wgz6s;ei}8tYCP`z zR3DRJH>W`MNu;n|vWp}+tTS9d_WQ=y-Crh8Qm_Z%K?-SVyXV34e7nEzL@3;((Q^=FS-7Ogh5*v2ulH^JGW_4unXGjJ`TuJHY{6KXsE>FjT_ za;~$R5cV#SX;K zLHE{3s@K9ASj65S)`;shkD0~hjJ!wZGXyg9W&ZcjuzHIRo4o@zogNIA6v~0k%93q( zVP$8G@0j20y+mr~2wo<5g}@^y6UeEiuFK?QgfNM$NX{>p=N1=5>YasamzGx-;5sRV z1n(2vCa4n}Hce7;fX}1gCjO%{{IlVKJz~u+>04xNdhf@qe6fa5Hwsm0X!hw#45Gm3_rlB($ki^)*`) zvPN}3YiEV5Q^U{MIU#3h-ZyPi$ObKJW^QNPocmJ#ql}v;=7)-1you$g9(GoB5usPQYeKPmJIp~Fv z1AB8-Hl{9V*kOgeqy9CNDscj7Zu1}_b$}2+X76g_3wE|k5(vWta@-X(4weEYf2I1a zM~UaA3ZV&Lk^!8+HRX{{r}|%OnmltFb%O+*(Di~YAal0KH}w(%P7*?@Cgf29!|d1W zbQw2>afAtkNj7d+FTTJ>4bi?*kW(zu3Q_!Y{Lj67_WE2pyMQbJ5f*(7sUS0b{g~u&cjvqbFNcE z76JQ&DTMP!?#IdngD+rnF$Ne}RnDvVQhpqaewhCyXFSU=$+ky!me9r02Kg~2%_dGa zjz}jpIp6J`6S$NBO!B4UX)yGle>n2KB$e5Fr_K*!W476Jl3p|P0?s>(vF}dp-MWPP zYQB?nda2fha6f{x-1MKcx)T?>xv_rpUVGz?T~CYcty<&GN_%^?(QehD z&K*D$ugAoJP7iLO=c421X zfy4`g!TAx> zgk8Z~2pjB=nK>8hN=b> z~Ld0@u)bUFwc#EN&m07)L8IC acb}5Q0{4jV-cV&jD#!(i|2P^-dEp;5f@Amq diff --git a/nlp_resource_data/nltk/corpus/reader/__pycache__/ycoe.cpython-37.pyc b/nlp_resource_data/nltk/corpus/reader/__pycache__/ycoe.cpython-37.pyc index da94d7deff178aad3544dc7d63079b039cc55e85..c2104378c1fa8cb222333ae18b59776240b92b5f 100644 GIT binary patch delta 1773 zcma)6O>7fK6!xtD*7k1V{Mf011VTcSbs(W3Awdd(AeE?vBBVrA7>SkfF4hX$Yu>C| zLZ$wwC`j$0az}zgdqm=j? z`O5Dr`XyaYhVW^`o922`&#@cLv8Bt^WFg5y7PI1&)auq-TMaFTEY(Up3RwwT|2|yk zLOp4vP*2$^>fNYomX5k^Cs9u~pDnS;=F4@>y8o+H=&_WQkO8eP*lVV1+ykdpHodjF z&0A*pjgr=p1(4WSnrd*E5)fNvXT)!@dS2Uc847gKclVA#CP13h9uzZTHU97lT=Ti$ej}dy(;ZBWv#ahV*g$$~B*QwZ!#eMaPz4kGH3vd)2 za0G=BhGF`aL=NiBar7>>{!W!xN44G*1^xavophyuz?=xiLPiuuiNpvAG6nc4Tl|Q| z{@xPjYq&TaO(?3e7)#<(SnJcSS;jQ+ZTG8*02QBE2J;iVai`=$9S3FaF@g9wec`iX z2o)eQgsKq0%`2A6wqss4kxbXL<8;yjW#e;`f*H9V3WzI+VJWV1aRKJx+qH05v(#ZTggf$57MkvdJ{42c;M-R5Aa-FQ0_zGfILAF=AUYPq&sIbv(Pu<>gg~my307f(_>>RPNB;hBBXuBe~g& z)EWG5_K}n=YUi{QiN_OM@>yfmgOrhhqLJQ!`yiwFNlI(t9bV&J@=kn za~|3d7U}U1Swzf@A7xST`S|SRevAj|G*Imz3*C2#>~8O?3su)@RP05{;{T$EC@NDG JP5d?f&;P$@LTLa1 delta 1747 zcma)6&1(}u6whqZ&1RGBr>Tv$)@rrcwx+hy4cWUUP4u6D+nJ4%)u#DqnYl~vIVIJd=o z!{tpeDl23kV9WzXyEwFEb1s$D6a$DRj{>*rp7j+ii3?qBrJI2pDr(0T{S7$ ztjvZ>obuQq@Oms>Z0$R|ACY@e^r1L_0(VOyWODh!g<<0nEbarLDL#dTMW|^&jbMOZ zoNRh|3a=u=ZH9%I>hjFvtL0W@94p7=}#BmIZ5GjMOp zNXizi$E@>19#3J(2SFn&Qe288t9RDQvRuXHtRHq0Mn3CI#T4Z<{Qv*} diff --git a/nlp_resource_data/nltk/corpus/reader/aligned.py b/nlp_resource_data/nltk/corpus/reader/aligned.py index a97fecc..0d8a67a 100644 --- a/nlp_resource_data/nltk/corpus/reader/aligned.py +++ b/nlp_resource_data/nltk/corpus/reader/aligned.py @@ -1,10 +1,12 @@ # Natural Language Toolkit: Aligned Corpus Reader # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # URL: # Author: Steven Bird # For license information, see LICENSE.TXT +from six import string_types + from nltk.tokenize import WhitespaceTokenizer, RegexpTokenizer from nltk.translate import AlignedSent, Alignment @@ -26,11 +28,11 @@ class AlignedCorpusReader(CorpusReader): self, root, fileids, - sep="/", + sep='/', word_tokenizer=WhitespaceTokenizer(), - sent_tokenizer=RegexpTokenizer("\n", gaps=True), + sent_tokenizer=RegexpTokenizer('\n', gaps=True), alignedsent_block_reader=read_alignedsent_block, - encoding="latin1", + encoding='latin1', ): """ Construct a new Aligned Corpus reader for a set of documents @@ -55,7 +57,7 @@ class AlignedCorpusReader(CorpusReader): """ if fileids is None: fileids = self._fileids - elif isinstance(fileids, str): + elif isinstance(fileids, string_types): fileids = [fileids] return concat([self.open(f).read() for f in fileids]) diff --git a/nlp_resource_data/nltk/corpus/reader/api.py b/nlp_resource_data/nltk/corpus/reader/api.py index 98b3f5e..0b30f5a 100644 --- a/nlp_resource_data/nltk/corpus/reader/api.py +++ b/nlp_resource_data/nltk/corpus/reader/api.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: API for Corpus Readers # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Steven Bird # Edward Loper # URL: @@ -9,17 +9,22 @@ """ API for corpus readers. """ +from __future__ import unicode_literals import os import re from collections import defaultdict from itertools import chain +from six import string_types + +from nltk import compat from nltk.data import PathPointer, FileSystemPathPointer, ZipFilePathPointer from nltk.corpus.reader.util import * +@compat.python_2_unicode_compatible class CorpusReader(object): """ A base class for "corpus reader" classes, each of which can be @@ -39,7 +44,7 @@ class CorpusReader(object): be used to select which portion of the corpus should be returned. """ - def __init__(self, root, fileids, encoding="utf8", tagset=None): + def __init__(self, root, fileids, encoding='utf8', tagset=None): """ :type root: PathPointer or str :param root: A path pointer identifying the root directory for @@ -71,18 +76,18 @@ class CorpusReader(object): tagged_...() methods. """ # Convert the root to a path pointer, if necessary. - if isinstance(root, str) and not isinstance(root, PathPointer): - m = re.match("(.*\.zip)/?(.*)$|", root) + if isinstance(root, string_types) and not isinstance(root, PathPointer): + m = re.match('(.*\.zip)/?(.*)$|', root) zipfile, zipentry = m.groups() if zipfile: root = ZipFilePathPointer(zipfile, zipentry) else: root = FileSystemPathPointer(root) elif not isinstance(root, PathPointer): - raise TypeError("CorpusReader: expected a string or a PathPointer") + raise TypeError('CorpusReader: expected a string or a PathPointer') # If `fileids` is a regexp, then expand it. - if isinstance(fileids, str): + if isinstance(fileids, string_types): fileids = find_corpus_fileids(root, fileids) self._fileids = fileids @@ -112,10 +117,10 @@ class CorpusReader(object): def __repr__(self): if isinstance(self._root, ZipFilePathPointer): - path = "%s/%s" % (self._root.zipfile.filename, self._root.entry) + path = '%s/%s' % (self._root.zipfile.filename, self._root.entry) else: - path = "%s" % self._root.path - return "<%s in %r>" % (self.__class__.__name__, path) + path = '%s' % self._root.path + return '<%s in %r>' % (self.__class__.__name__, path) def ensure_loaded(self): """ @@ -182,7 +187,7 @@ class CorpusReader(object): """ if fileids is None: fileids = self._fileids - elif isinstance(fileids, str): + elif isinstance(fileids, string_types): fileids = [fileids] paths = [self._root.join(f) for f in fileids] @@ -283,26 +288,26 @@ class CategorizedCorpusReader(object): self._file = None #: fileid of file containing the mapping self._delimiter = None #: delimiter for ``self._file`` - if "cat_pattern" in kwargs: - self._pattern = kwargs["cat_pattern"] - del kwargs["cat_pattern"] - elif "cat_map" in kwargs: - self._map = kwargs["cat_map"] - del kwargs["cat_map"] - elif "cat_file" in kwargs: - self._file = kwargs["cat_file"] - del kwargs["cat_file"] - if "cat_delimiter" in kwargs: - self._delimiter = kwargs["cat_delimiter"] - del kwargs["cat_delimiter"] + if 'cat_pattern' in kwargs: + self._pattern = kwargs['cat_pattern'] + del kwargs['cat_pattern'] + elif 'cat_map' in kwargs: + self._map = kwargs['cat_map'] + del kwargs['cat_map'] + elif 'cat_file' in kwargs: + self._file = kwargs['cat_file'] + del kwargs['cat_file'] + if 'cat_delimiter' in kwargs: + self._delimiter = kwargs['cat_delimiter'] + del kwargs['cat_delimiter'] else: raise ValueError( - "Expected keyword argument cat_pattern or " "cat_map or cat_file." + 'Expected keyword argument cat_pattern or ' 'cat_map or cat_file.' ) - if "cat_pattern" in kwargs or "cat_map" in kwargs or "cat_file" in kwargs: + if 'cat_pattern' in kwargs or 'cat_map' in kwargs or 'cat_file' in kwargs: raise ValueError( - "Specify exactly one of: cat_pattern, " "cat_map, cat_file." + 'Specify exactly one of: cat_pattern, ' 'cat_map, cat_file.' ) def _init(self): @@ -325,8 +330,8 @@ class CategorizedCorpusReader(object): file_id, categories = line.split(self._delimiter, 1) if file_id not in self.fileids(): raise ValueError( - "In category mapping file %s: %s " - "not found" % (self._file, file_id) + 'In category mapping file %s: %s ' + 'not found' % (self._file, file_id) ) for category in categories.split(self._delimiter): self._add(file_id, category) @@ -344,7 +349,7 @@ class CategorizedCorpusReader(object): self._init() if fileids is None: return sorted(self._c2f) - if isinstance(fileids, str): + if isinstance(fileids, string_types): fileids = [fileids] return sorted(set.union(*[self._f2c[d] for d in fileids])) @@ -355,13 +360,13 @@ class CategorizedCorpusReader(object): """ if categories is None: return super(CategorizedCorpusReader, self).fileids() - elif isinstance(categories, str): + elif isinstance(categories, string_types): if self._f2c is None: self._init() if categories in self._c2f: return sorted(self._c2f[categories]) else: - raise ValueError("Category %s not found" % categories) + raise ValueError('Category %s not found' % categories) else: if self._f2c is None: self._init() @@ -403,7 +408,7 @@ class SyntaxCorpusReader(CorpusReader): def raw(self, fileids=None): if fileids is None: fileids = self._fileids - elif isinstance(fileids, str): + elif isinstance(fileids, string_types): fileids = [fileids] return concat([self.open(f).read() for f in fileids]) diff --git a/nlp_resource_data/nltk/corpus/reader/bnc.py b/nlp_resource_data/nltk/corpus/reader/bnc.py index 4f3f148..9d02754 100644 --- a/nlp_resource_data/nltk/corpus/reader/bnc.py +++ b/nlp_resource_data/nltk/corpus/reader/bnc.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Plaintext Corpus Reader # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Edward Loper # URL: # For license information, see LICENSE.TXT @@ -57,7 +57,7 @@ class BNCCorpusReader(XMLCorpusReader): word tokens. Otherwise, leave the spaces on the tokens. :param stem: If true, then use word stems instead of word strings. """ - tag = "c5" if c5 else "pos" + tag = 'c5' if c5 else 'pos' return self._views(fileids, False, tag, strip_space, stem) def sents(self, fileids=None, strip_space=True, stem=False): @@ -85,7 +85,7 @@ class BNCCorpusReader(XMLCorpusReader): word tokens. Otherwise, leave the spaces on the tokens. :param stem: If true, then use word stems instead of word strings. """ - tag = "c5" if c5 else "pos" + tag = 'c5' if c5 else 'pos' return self._views( fileids, sent=True, tag=tag, strip_space=strip_space, stem=stem ) @@ -114,7 +114,7 @@ class BNCCorpusReader(XMLCorpusReader): result = [] xmldoc = ElementTree.parse(fileid).getroot() - for xmlsent in xmldoc.findall(".//s"): + for xmlsent in xmldoc.findall('.//s'): sent = [] for xmlword in _all_xmlwords_in(xmlsent): word = xmlword.text @@ -123,14 +123,14 @@ class BNCCorpusReader(XMLCorpusReader): if strip_space or stem: word = word.strip() if stem: - word = xmlword.get("hw", word) - if tag == "c5": - word = (word, xmlword.get("c5")) - elif tag == "pos": - word = (word, xmlword.get("pos", xmlword.get("c5"))) + word = xmlword.get('hw', word) + if tag == 'c5': + word = (word, xmlword.get('c5')) + elif tag == 'pos': + word = (word, xmlword.get('pos', xmlword.get('c5'))) sent.append(word) if bracket_sent: - result.append(BNCSentence(xmlsent.attrib["n"], sent)) + result.append(BNCSentence(xmlsent.attrib['n'], sent)) else: result.extend(sent) @@ -142,7 +142,7 @@ def _all_xmlwords_in(elt, result=None): if result is None: result = [] for child in elt: - if child.tag in ("c", "w"): + if child.tag in ('c', 'w'): result.append(child) else: _all_xmlwords_in(child, result) @@ -166,7 +166,7 @@ class BNCWordView(XMLCorpusView): """ tags_to_ignore = set( - ["pb", "gap", "vocal", "event", "unclear", "shift", "pause", "align"] + ['pb', 'gap', 'vocal', 'event', 'unclear', 'shift', 'pause', 'align'] ) """These tags are ignored. For their description refer to the technical documentation, for example, @@ -183,9 +183,9 @@ class BNCWordView(XMLCorpusView): :param stem: If true, then substitute stems for words. """ if sent: - tagspec = ".*/s" + tagspec = '.*/s' else: - tagspec = ".*/s/(.*/)?(c|w)" + tagspec = '.*/s/(.*/)?(c|w)' self._sent = sent self._tag = tag self._strip_space = strip_space @@ -200,7 +200,7 @@ class BNCWordView(XMLCorpusView): # Read in a tasty header. self._open() - self.read_block(self._stream, ".*/teiHeader$", self.handle_header) + self.read_block(self._stream, '.*/teiHeader$', self.handle_header) self.close() # Reset tag context. @@ -208,22 +208,22 @@ class BNCWordView(XMLCorpusView): def handle_header(self, elt, context): # Set up some metadata! - titles = elt.findall("titleStmt/title") + titles = elt.findall('titleStmt/title') if titles: - self.title = "\n".join(title.text.strip() for title in titles) + self.title = '\n'.join(title.text.strip() for title in titles) - authors = elt.findall("titleStmt/author") + authors = elt.findall('titleStmt/author') if authors: - self.author = "\n".join(author.text.strip() for author in authors) + self.author = '\n'.join(author.text.strip() for author in authors) - editors = elt.findall("titleStmt/editor") + editors = elt.findall('titleStmt/editor') if editors: - self.editor = "\n".join(editor.text.strip() for editor in editors) + self.editor = '\n'.join(editor.text.strip() for editor in editors) - resps = elt.findall("titleStmt/respStmt") + resps = elt.findall('titleStmt/respStmt') if resps: - self.resps = "\n\n".join( - "\n".join(resp_elt.text.strip() for resp_elt in resp) for resp in resps + self.resps = '\n\n'.join( + '\n'.join(resp_elt.text.strip() for resp_elt in resp) for resp in resps ) def handle_elt(self, elt, context): @@ -239,20 +239,20 @@ class BNCWordView(XMLCorpusView): if self._strip_space or self._stem: word = word.strip() if self._stem: - word = elt.get("hw", word) - if self._tag == "c5": - word = (word, elt.get("c5")) - elif self._tag == "pos": - word = (word, elt.get("pos", elt.get("c5"))) + word = elt.get('hw', word) + if self._tag == 'c5': + word = (word, elt.get('c5')) + elif self._tag == 'pos': + word = (word, elt.get('pos', elt.get('c5'))) return word def handle_sent(self, elt): sent = [] for child in elt: - if child.tag in ("mw", "hi", "corr", "trunc"): + if child.tag in ('mw', 'hi', 'corr', 'trunc'): sent += [self.handle_word(w) for w in child] - elif child.tag in ("w", "c"): + elif child.tag in ('w', 'c'): sent.append(self.handle_word(child)) elif child.tag not in self.tags_to_ignore: - raise ValueError("Unexpected element %s" % child.tag) - return BNCSentence(elt.attrib["n"], sent) + raise ValueError('Unexpected element %s' % child.tag) + return BNCSentence(elt.attrib['n'], sent) diff --git a/nlp_resource_data/nltk/corpus/reader/bracket_parse.py b/nlp_resource_data/nltk/corpus/reader/bracket_parse.py index 9a958c4..55093af 100644 --- a/nlp_resource_data/nltk/corpus/reader/bracket_parse.py +++ b/nlp_resource_data/nltk/corpus/reader/bracket_parse.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Penn Treebank Reader # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Steven Bird # Edward Loper # URL: @@ -18,10 +18,10 @@ from nltk.corpus.reader.util import * from nltk.corpus.reader.api import * # we use [^\s()]+ instead of \S+? to avoid matching () -SORTTAGWRD = re.compile(r"\((\d+) ([^\s()]+) ([^\s()]+)\)") -TAGWORD = re.compile(r"\(([^\s()]+) ([^\s()]+)\)") -WORD = re.compile(r"\([^\s()]+ ([^\s()]+)\)") -EMPTY_BRACKETS = re.compile(r"\s*\(\s*\(") +SORTTAGWRD = re.compile(r'\((\d+) ([^\s()]+) ([^\s()]+)\)') +TAGWORD = re.compile(r'\(([^\s()]+) ([^\s()]+)\)') +WORD = re.compile(r'\([^\s()]+ ([^\s()]+)\)') +EMPTY_BRACKETS = re.compile(r'\s*\(\s*\(') class BracketParseCorpusReader(SyntaxCorpusReader): @@ -37,8 +37,8 @@ class BracketParseCorpusReader(SyntaxCorpusReader): root, fileids, comment_char=None, - detect_blocks="unindented_paren", - encoding="utf8", + detect_blocks='unindented_paren', + encoding='utf8', tagset=None, ): """ @@ -62,24 +62,28 @@ class BracketParseCorpusReader(SyntaxCorpusReader): self._tagset = tagset def _read_block(self, stream): - if self._detect_blocks == "sexpr": + if self._detect_blocks == 'sexpr': return read_sexpr_block(stream, comment_char=self._comment_char) - elif self._detect_blocks == "blankline": + elif self._detect_blocks == 'blankline': return read_blankline_block(stream) - elif self._detect_blocks == "unindented_paren": + elif self._detect_blocks == 'unindented_paren': # Tokens start with unindented left parens. - toks = read_regexp_block(stream, start_re=r"^\(") + toks = read_regexp_block(stream, start_re=r'^\(') # Strip any comments out of the tokens. if self._comment_char: toks = [ - re.sub("(?m)^%s.*" % re.escape(self._comment_char), "", tok) + re.sub('(?m)^%s.*' % re.escape(self._comment_char), '', tok) for tok in toks ] return toks else: - assert 0, "bad block type" + assert 0, 'bad block type' def _normalize(self, t): + # If there's an empty set of brackets surrounding the actual + # parse, then strip them off. + if EMPTY_BRACKETS.match(t): + t = t.strip()[1:-1] # Replace leaves of the form (!), (,), with (! !), (, ,) t = re.sub(r"\((.)\)", r"(\1 \1)", t) # Replace leaves of the form (tag word root) with (tag word) @@ -88,20 +92,15 @@ class BracketParseCorpusReader(SyntaxCorpusReader): def _parse(self, t): try: - tree = Tree.fromstring(self._normalize(t)) - # If there's an empty node at the top, strip it off - if tree.label() == '' and len(tree) == 1: - return tree[0] - else: - return tree + return Tree.fromstring(self._normalize(t)) except ValueError as e: sys.stderr.write("Bad tree detected; trying to recover...\n") # Try to recover, if we can: - if e.args == ("mismatched parens",): + if e.args == ('mismatched parens',): for n in range(1, 5): try: - v = Tree(self._normalize(t + ")" * n)) + v = Tree(self._normalize(t + ')' * n)) sys.stderr.write( " Recovered by adding %d close " "paren(s)\n" % n ) @@ -111,7 +110,7 @@ class BracketParseCorpusReader(SyntaxCorpusReader): # Try something else: sys.stderr.write(" Recovered by returning a flat parse.\n") # sys.stderr.write(' '.join(t.split())+'\n') - return Tree("S", self._tag(t)) + return Tree('S', self._tag(t)) def _tag(self, t, tagset=None): tagged_sent = [(w, p) for (p, w) in TAGWORD.findall(self._normalize(t))] @@ -148,7 +147,7 @@ class CategorizedBracketParseCorpusReader( def _resolve(self, fileids, categories): if fileids is not None and categories is not None: - raise ValueError("Specify fileids or categories, not both") + raise ValueError('Specify fileids or categories, not both') if categories is not None: return self.fileids(categories) else: @@ -208,12 +207,12 @@ class AlpinoCorpusReader(BracketParseCorpusReader): untouched. """ - def __init__(self, root, encoding="ISO-8859-1", tagset=None): + def __init__(self, root, encoding='ISO-8859-1', tagset=None): BracketParseCorpusReader.__init__( self, root, - "alpino\.xml", - detect_blocks="blankline", + 'alpino\.xml', + detect_blocks='blankline', encoding=encoding, tagset=tagset, ) diff --git a/nlp_resource_data/nltk/corpus/reader/categorized_sents.py b/nlp_resource_data/nltk/corpus/reader/categorized_sents.py index 0c597d5..e0a3034 100644 --- a/nlp_resource_data/nltk/corpus/reader/categorized_sents.py +++ b/nlp_resource_data/nltk/corpus/reader/categorized_sents.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Categorized Sentences Corpus Reader # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Pierpaolo Pantone <24alsecondo@gmail.com> # URL: # For license information, see LICENSE.TXT @@ -34,6 +34,7 @@ Related papers: sentiment categorization with respect to rating scales". Proceedings of the ACL, 2005. """ +from six import string_types from nltk.corpus.reader.api import * from nltk.tokenize import * @@ -78,7 +79,7 @@ class CategorizedSentencesCorpusReader(CategorizedCorpusReader, CorpusReader): fileids, word_tokenizer=WhitespaceTokenizer(), sent_tokenizer=None, - encoding="utf8", + encoding='utf8', **kwargs ): """ @@ -98,7 +99,7 @@ class CategorizedSentencesCorpusReader(CategorizedCorpusReader, CorpusReader): def _resolve(self, fileids, categories): if fileids is not None and categories is not None: - raise ValueError("Specify fileids or categories, not both") + raise ValueError('Specify fileids or categories, not both') if categories is not None: return self.fileids(categories) else: @@ -116,7 +117,7 @@ class CategorizedSentencesCorpusReader(CategorizedCorpusReader, CorpusReader): fileids = self._resolve(fileids, categories) if fileids is None: fileids = self._fileids - elif isinstance(fileids, str): + elif isinstance(fileids, string_types): fileids = [fileids] return concat([self.open(f).read() for f in fileids]) @@ -141,7 +142,7 @@ class CategorizedSentencesCorpusReader(CategorizedCorpusReader, CorpusReader): fileids = self._resolve(fileids, categories) if fileids is None: fileids = self._fileids - elif isinstance(fileids, str): + elif isinstance(fileids, string_types): fileids = [fileids] return concat( [ @@ -165,7 +166,7 @@ class CategorizedSentencesCorpusReader(CategorizedCorpusReader, CorpusReader): fileids = self._resolve(fileids, categories) if fileids is None: fileids = self._fileids - elif isinstance(fileids, str): + elif isinstance(fileids, string_types): fileids = [fileids] return concat( [ diff --git a/nlp_resource_data/nltk/corpus/reader/chasen.py b/nlp_resource_data/nltk/corpus/reader/chasen.py index 0d0cc5e..ef60b0d 100644 --- a/nlp_resource_data/nltk/corpus/reader/chasen.py +++ b/nlp_resource_data/nltk/corpus/reader/chasen.py @@ -1,13 +1,16 @@ # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Masato Hagiwara # URL: # For license information, see LICENSE.TXT # For more information, see http://lilyx.net/pages/nltkjapanesecorpus.html +from __future__ import print_function import sys +from six import string_types + from nltk.corpus.reader import util from nltk.corpus.reader.util import * @@ -15,14 +18,14 @@ from nltk.corpus.reader.api import * class ChasenCorpusReader(CorpusReader): - def __init__(self, root, fileids, encoding="utf8", sent_splitter=None): + def __init__(self, root, fileids, encoding='utf8', sent_splitter=None): self._sent_splitter = sent_splitter CorpusReader.__init__(self, root, fileids, encoding) def raw(self, fileids=None): if fileids is None: fileids = self._fileids - elif isinstance(fileids, str): + elif isinstance(fileids, string_types): fileids = [fileids] return concat([self.open(f).read() for f in fileids]) @@ -106,9 +109,9 @@ class ChasenCorpusView(StreamBackedCorpusView): sent = [] for line in para_str.splitlines(): - _eos = line.strip() == "EOS" - _cells = line.split("\t") - w = (_cells[0], "\t".join(_cells[1:])) + _eos = line.strip() == 'EOS' + _cells = line.split('\t') + w = (_cells[0], '\t'.join(_cells[1:])) if not _eos: sent.append(w) @@ -143,12 +146,12 @@ def demo(): import nltk from nltk.corpus.util import LazyCorpusLoader - jeita = LazyCorpusLoader("jeita", ChasenCorpusReader, r".*chasen", encoding="utf-8") - print("/".join(jeita.words()[22100:22140])) + jeita = LazyCorpusLoader('jeita', ChasenCorpusReader, r'.*chasen', encoding='utf-8') + print('/'.join(jeita.words()[22100:22140])) print( - "\nEOS\n".join( - "\n".join("%s/%s" % (w[0], w[1].split("\t")[2]) for w in sent) + '\nEOS\n'.join( + '\n'.join("%s/%s" % (w[0], w[1].split('\t')[2]) for w in sent) for sent in jeita.tagged_sents()[2170:2173] ) ) @@ -158,11 +161,11 @@ def test(): from nltk.corpus.util import LazyCorpusLoader - jeita = LazyCorpusLoader("jeita", ChasenCorpusReader, r".*chasen", encoding="utf-8") + jeita = LazyCorpusLoader('jeita', ChasenCorpusReader, r'.*chasen', encoding='utf-8') - assert isinstance(jeita.tagged_words()[0][1], str) + assert isinstance(jeita.tagged_words()[0][1], string_types) -if __name__ == "__main__": +if __name__ == '__main__': demo() test() diff --git a/nlp_resource_data/nltk/corpus/reader/childes.py b/nlp_resource_data/nltk/corpus/reader/childes.py index 1d163c6..7c5faa9 100644 --- a/nlp_resource_data/nltk/corpus/reader/childes.py +++ b/nlp_resource_data/nltk/corpus/reader/childes.py @@ -1,6 +1,6 @@ # CHILDES XML Corpus Reader -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Tomonori Nagano # Alexis Dimitriadis # URL: @@ -9,11 +9,13 @@ """ Corpus reader for the XML version of the CHILDES corpus. """ +from __future__ import print_function, division -__docformat__ = "epytext en" +__docformat__ = 'epytext en' import re from collections import defaultdict +from six import string_types from nltk.util import flatten, LazyMap, LazyConcatenation @@ -21,14 +23,14 @@ from nltk.corpus.reader.util import concat from nltk.corpus.reader.xmldocs import XMLCorpusReader, ElementTree # to resolve the namespace issue -NS = "http://www.talkbank.org/ns/talkbank" +NS = 'http://www.talkbank.org/ns/talkbank' class CHILDESCorpusReader(XMLCorpusReader): """ Corpus reader for the XML version of the CHILDES corpus. - The CHILDES corpus is available at ``https://childes.talkbank.org/``. The XML - version of CHILDES is located at ``https://childes.talkbank.org/data-xml/``. + The CHILDES corpus is available at ``http://childes.psy.cmu.edu/``. The XML + version of CHILDES is located at ``http://childes.psy.cmu.edu/data-xml/``. Copy the needed parts of the CHILDES XML corpus into the NLTK data directory (``nltk_data/corpora/CHILDES/``). @@ -43,7 +45,7 @@ class CHILDESCorpusReader(XMLCorpusReader): def words( self, fileids=None, - speaker="ALL", + speaker='ALL', stem=False, relation=False, strip_space=True, @@ -83,7 +85,7 @@ class CHILDESCorpusReader(XMLCorpusReader): def tagged_words( self, fileids=None, - speaker="ALL", + speaker='ALL', stem=False, relation=False, strip_space=True, @@ -125,7 +127,7 @@ class CHILDESCorpusReader(XMLCorpusReader): def sents( self, fileids=None, - speaker="ALL", + speaker='ALL', stem=False, relation=None, strip_space=True, @@ -167,7 +169,7 @@ class CHILDESCorpusReader(XMLCorpusReader): def tagged_sents( self, fileids=None, - speaker="ALL", + speaker='ALL', stem=False, relation=None, strip_space=True, @@ -241,13 +243,13 @@ class CHILDESCorpusReader(XMLCorpusReader): # getting participants' data pat = dictOfDicts() for participant in xmldoc.findall( - ".//{%s}Participants/{%s}participant" % (NS, NS) + './/{%s}Participants/{%s}participant' % (NS, NS) ): for (key, value) in participant.items(): - pat[participant.get("id")][key] = value + pat[participant.get('id')][key] = value return pat - def age(self, fileids=None, speaker="CHI", month=False): + def age(self, fileids=None, speaker='CHI', month=False): """ :return: the given file(s) as string or int :rtype: list or int @@ -264,10 +266,10 @@ class CHILDESCorpusReader(XMLCorpusReader): def _get_age(self, fileid, speaker, month): xmldoc = ElementTree.parse(fileid).getroot() - for pat in xmldoc.findall(".//{%s}Participants/{%s}participant" % (NS, NS)): + for pat in xmldoc.findall('.//{%s}Participants/{%s}participant' % (NS, NS)): try: - if pat.get("id") == speaker: - age = pat.get("age") + if pat.get('id') == speaker: + age = pat.get('age') if month: age = self.convert_age(age) return age @@ -287,7 +289,7 @@ class CHILDESCorpusReader(XMLCorpusReader): pass return age_month - def MLU(self, fileids=None, speaker="CHI"): + def MLU(self, fileids=None, speaker='CHI'): """ :return: the given file(s) as a floating number :rtype: list(float) @@ -318,7 +320,7 @@ class CHILDESCorpusReader(XMLCorpusReader): for sent in sents: posList = [pos for (word, pos) in sent] # if any part of the sentence is intelligible - if any(pos == "unk" for pos in posList): + if any(pos == 'unk' for pos in posList): continue # if the sentence is null elif sent == []: @@ -329,8 +331,8 @@ class CHILDESCorpusReader(XMLCorpusReader): else: results.append([word for (word, pos) in sent]) # count number of fillers - if len(set(["co", None]).intersection(posList)) > 0: - numFillers += posList.count("co") + if len(set(['co', None]).intersection(posList)) > 0: + numFillers += posList.count('co') numFillers += posList.count(None) sentDiscount += 1 lastSent = sent @@ -339,7 +341,7 @@ class CHILDESCorpusReader(XMLCorpusReader): # count number of morphemes # (e.g., 'read' = 1 morpheme but 'read-PAST' is 2 morphemes) numWords = ( - len(flatten([word.split("-") for word in thisWordList])) - numFillers + len(flatten([word.split('-') for word in thisWordList])) - numFillers ) numSents = len(results) - sentDiscount mlu = numWords / numSents @@ -352,54 +354,54 @@ class CHILDESCorpusReader(XMLCorpusReader): self, fileid, speaker, sent, stem, relation, pos, strip_space, replace ): if ( - isinstance(speaker, str) and speaker != "ALL" + isinstance(speaker, string_types) and speaker != 'ALL' ): # ensure we have a list of speakers speaker = [speaker] xmldoc = ElementTree.parse(fileid).getroot() # processing each xml doc results = [] - for xmlsent in xmldoc.findall(".//{%s}u" % NS): + for xmlsent in xmldoc.findall('.//{%s}u' % NS): sents = [] # select speakers - if speaker == "ALL" or xmlsent.get("who") in speaker: - for xmlword in xmlsent.findall(".//{%s}w" % NS): + if speaker == 'ALL' or xmlsent.get('who') in speaker: + for xmlword in xmlsent.findall('.//{%s}w' % NS): infl = None suffixStem = None suffixTag = None # getting replaced words - if replace and xmlsent.find(".//{%s}w/{%s}replacement" % (NS, NS)): + if replace and xmlsent.find('.//{%s}w/{%s}replacement' % (NS, NS)): xmlword = xmlsent.find( - ".//{%s}w/{%s}replacement/{%s}w" % (NS, NS, NS) + './/{%s}w/{%s}replacement/{%s}w' % (NS, NS, NS) ) - elif replace and xmlsent.find(".//{%s}w/{%s}wk" % (NS, NS)): - xmlword = xmlsent.find(".//{%s}w/{%s}wk" % (NS, NS)) + elif replace and xmlsent.find('.//{%s}w/{%s}wk' % (NS, NS)): + xmlword = xmlsent.find('.//{%s}w/{%s}wk' % (NS, NS)) # get text if xmlword.text: word = xmlword.text else: - word = "" + word = '' # strip tailing space if strip_space: word = word.strip() # stem if relation or stem: try: - xmlstem = xmlword.find(".//{%s}stem" % NS) + xmlstem = xmlword.find('.//{%s}stem' % NS) word = xmlstem.text except AttributeError as e: pass # if there is an inflection try: xmlinfl = xmlword.find( - ".//{%s}mor/{%s}mw/{%s}mk" % (NS, NS, NS) + './/{%s}mor/{%s}mw/{%s}mk' % (NS, NS, NS) ) - word += "-" + xmlinfl.text + word += '-' + xmlinfl.text except: pass # if there is a suffix try: xmlsuffix = xmlword.find( - ".//{%s}mor/{%s}mor-post/{%s}mw/{%s}stem" + './/{%s}mor/{%s}mor-post/{%s}mw/{%s}stem' % (NS, NS, NS, NS) ) suffixStem = xmlsuffix.text @@ -420,11 +422,11 @@ class CHILDESCorpusReader(XMLCorpusReader): tag = "" try: xmlsuffixpos = xmlword.findall( - ".//{%s}mor/{%s}mor-post/{%s}mw/{%s}pos/{%s}c" + './/{%s}mor/{%s}mor-post/{%s}mw/{%s}pos/{%s}c' % (NS, NS, NS, NS, NS) ) xmlsuffixpos2 = xmlword.findall( - ".//{%s}mor/{%s}mor-post/{%s}mw/{%s}pos/{%s}s" + './/{%s}mor/{%s}mor-post/{%s}mw/{%s}pos/{%s}s' % (NS, NS, NS, NS, NS) ) if xmlsuffixpos2: @@ -443,17 +445,17 @@ class CHILDESCorpusReader(XMLCorpusReader): # if relation == True: for xmlstem_rel in xmlword.findall( - ".//{%s}mor/{%s}gra" % (NS, NS) + './/{%s}mor/{%s}gra' % (NS, NS) ): - if not xmlstem_rel.get("type") == "grt": + if not xmlstem_rel.get('type') == 'grt': word = ( word[0], word[1], - xmlstem_rel.get("index") + xmlstem_rel.get('index') + "|" - + xmlstem_rel.get("head") + + xmlstem_rel.get('head') + "|" - + xmlstem_rel.get("relation"), + + xmlstem_rel.get('relation'), ) else: word = ( @@ -462,25 +464,25 @@ class CHILDESCorpusReader(XMLCorpusReader): word[2], word[0], word[1], - xmlstem_rel.get("index") + xmlstem_rel.get('index') + "|" - + xmlstem_rel.get("head") + + xmlstem_rel.get('head') + "|" - + xmlstem_rel.get("relation"), + + xmlstem_rel.get('relation'), ) try: for xmlpost_rel in xmlword.findall( - ".//{%s}mor/{%s}mor-post/{%s}gra" % (NS, NS, NS) + './/{%s}mor/{%s}mor-post/{%s}gra' % (NS, NS, NS) ): - if not xmlpost_rel.get("type") == "grt": + if not xmlpost_rel.get('type') == 'grt': suffixStem = ( suffixStem[0], suffixStem[1], - xmlpost_rel.get("index") + xmlpost_rel.get('index') + "|" - + xmlpost_rel.get("head") + + xmlpost_rel.get('head') + "|" - + xmlpost_rel.get("relation"), + + xmlpost_rel.get('relation'), ) else: suffixStem = ( @@ -489,11 +491,11 @@ class CHILDESCorpusReader(XMLCorpusReader): suffixStem[2], suffixStem[0], suffixStem[1], - xmlpost_rel.get("index") + xmlpost_rel.get('index') + "|" - + xmlpost_rel.get("head") + + xmlpost_rel.get('head') + "|" - + xmlpost_rel.get("relation"), + + xmlpost_rel.get('relation'), ) except: pass @@ -511,7 +513,7 @@ class CHILDESCorpusReader(XMLCorpusReader): shouldn't need to be changed, unless CHILDES changes the configuration of their server or unless the user sets up their own corpus webserver. """ - childes_url_base = r"https://childes.talkbank.org/browser/index.php?url=" + childes_url_base = r'http://childes.psy.cmu.edu/browser/index.php?url=' def webview_file(self, fileid, urlbase=None): """Map a corpus file to its web version on the CHILDES website, @@ -534,27 +536,27 @@ class CHILDESCorpusReader(XMLCorpusReader): corpus root points to the Cornell folder, urlbase='Eng-USA/Cornell'. """ - import webbrowser + import webbrowser, re if urlbase: path = urlbase + "/" + fileid else: full = self.root + "/" + fileid - full = re.sub(r"\\", "/", full) - if "/childes/" in full.lower(): + full = re.sub(r'\\', '/', full) + if '/childes/' in full.lower(): # Discard /data-xml/ if present - path = re.findall(r"(?i)/childes(?:/data-xml)?/(.*)\.xml", full)[0] - elif "eng-usa" in full.lower(): - path = "Eng-USA/" + re.findall(r"/(?i)Eng-USA/(.*)\.xml", full)[0] + path = re.findall(r'(?i)/childes(?:/data-xml)?/(.*)\.xml', full)[0] + elif 'eng-usa' in full.lower(): + path = 'Eng-USA/' + re.findall(r'/(?i)Eng-USA/(.*)\.xml', full)[0] else: path = fileid # Strip ".xml" and add ".cha", as necessary: - if path.endswith(".xml"): + if path.endswith('.xml'): path = path[:-4] - if not path.endswith(".cha"): - path = path + ".cha" + if not path.endswith('.cha'): + path = path + '.cha' url = self.childes_url_base + path @@ -572,20 +574,20 @@ def demo(corpus_root=None): if not corpus_root: from nltk.data import find - corpus_root = find("corpora/childes/data-xml/Eng-USA/") + corpus_root = find('corpora/childes/data-xml/Eng-USA/') try: - childes = CHILDESCorpusReader(corpus_root, ".*.xml") + childes = CHILDESCorpusReader(corpus_root, '.*.xml') # describe all corpus for file in childes.fileids()[:5]: - corpus = "" - corpus_id = "" + corpus = '' + corpus_id = '' for (key, value) in childes.corpus(file)[0].items(): if key == "Corpus": corpus = value if key == "Id": corpus_id = value - print("Reading", corpus, corpus_id, " .....") + print('Reading', corpus, corpus_id, ' .....') print("words:", childes.words(file)[:7], "...") print( "words with replaced words:", @@ -593,8 +595,8 @@ def demo(corpus_root=None): " ...", ) print("words with pos tags:", childes.tagged_words(file)[:7], " ...") - print("words (only MOT):", childes.words(file, speaker="MOT")[:7], "...") - print("words (only CHI):", childes.words(file, speaker="CHI")[:7], "...") + print("words (only MOT):", childes.words(file, speaker='MOT')[:7], "...") + print("words (only CHI):", childes.words(file, speaker='CHI')[:7], "...") print("stemmed words:", childes.words(file, stem=True)[:7], " ...") print( "words with relations and pos-tag:", @@ -615,13 +617,13 @@ def demo(corpus_root=None): except LookupError as e: print( """The CHILDES corpus, or the parts you need, should be manually - downloaded from https://childes.talkbank.org/data-xml/ and saved at + downloaded from http://childes.psy.cmu.edu/data-xml/ and saved at [NLTK_Data_Dir]/corpora/childes/ Alternately, you can call the demo with the path to a portion of the CHILDES corpus, e.g.: demo('/path/to/childes/data-xml/Eng-USA/") """ ) - # corpus_root_http = urllib2.urlopen('https://childes.talkbank.org/data-xml/Eng-USA/Bates.zip') + # corpus_root_http = urllib2.urlopen('http://childes.psy.cmu.edu/data-xml/Eng-USA/Bates.zip') # corpus_root_http_bates = zipfile.ZipFile(cStringIO.StringIO(corpus_root_http.read())) ##this fails # childes = CHILDESCorpusReader(corpus_root_http_bates,corpus_root_http_bates.namelist()) diff --git a/nlp_resource_data/nltk/corpus/reader/chunked.py b/nlp_resource_data/nltk/corpus/reader/chunked.py index bb32832..0edd0ea 100644 --- a/nlp_resource_data/nltk/corpus/reader/chunked.py +++ b/nlp_resource_data/nltk/corpus/reader/chunked.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Chunked Corpus Reader # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Steven Bird # Edward Loper # URL: @@ -13,6 +13,8 @@ documents. import os.path, codecs +from six import string_types + import nltk from nltk.corpus.reader.bracket_parse import BracketParseCorpusReader from nltk.tree import Tree @@ -38,11 +40,11 @@ class ChunkedCorpusReader(CorpusReader): self, root, fileids, - extension="", + extension='', str2chunktree=tagstr2tree, - sent_tokenizer=RegexpTokenizer("\n", gaps=True), + sent_tokenizer=RegexpTokenizer('\n', gaps=True), para_block_reader=read_blankline_block, - encoding="utf8", + encoding='utf8', tagset=None, ): """ @@ -61,7 +63,7 @@ class ChunkedCorpusReader(CorpusReader): """ if fileids is None: fileids = self._fileids - elif isinstance(fileids, str): + elif isinstance(fileids, string_types): fileids = [fileids] return concat([self.open(f).read() for f in fileids]) @@ -279,5 +281,5 @@ class ChunkedCorpusView(StreamBackedCorpusView): elif isinstance(child, tuple): tree[i] = child[0] else: - raise ValueError("expected child to be Tree or tuple") + raise ValueError('expected child to be Tree or tuple') return tree diff --git a/nlp_resource_data/nltk/corpus/reader/cmudict.py b/nlp_resource_data/nltk/corpus/reader/cmudict.py index ba1cdf9..a4aef7d 100644 --- a/nlp_resource_data/nltk/corpus/reader/cmudict.py +++ b/nlp_resource_data/nltk/corpus/reader/cmudict.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Carnegie Mellon Pronouncing Dictionary Corpus Reader # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Steven Bird # URL: # For license information, see LICENSE.TXT @@ -45,6 +45,7 @@ Y yield Y IY L D Z zee Z IY ZH seizure S IY ZH ER """ +from nltk import compat from nltk.util import Index from nltk.corpus.reader.util import * @@ -69,7 +70,7 @@ class CMUDictCorpusReader(CorpusReader): :return: the cmudict lexicon as a raw string. """ fileids = self._fileids - if isinstance(fileids, str): + if isinstance(fileids, string_types): fileids = [fileids] return concat([self.open(f).read() for f in fileids]) @@ -91,7 +92,7 @@ def read_cmudict_block(stream): entries = [] while len(entries) < 100: # Read 100 at a time. line = stream.readline() - if line == "": + if line == '': return entries # end of file. pieces = line.split() entries.append((pieces[0].lower(), pieces[2:])) diff --git a/nlp_resource_data/nltk/corpus/reader/comparative_sents.py b/nlp_resource_data/nltk/corpus/reader/comparative_sents.py index 9d6fcdb..30d00cc 100644 --- a/nlp_resource_data/nltk/corpus/reader/comparative_sents.py +++ b/nlp_resource_data/nltk/corpus/reader/comparative_sents.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Comparative Sentence Corpus Reader # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Pierpaolo Pantone <24alsecondo@gmail.com> # URL: # For license information, see LICENSE.TXT @@ -35,17 +35,19 @@ Related papers: """ import re +from six import string_types + from nltk.corpus.reader.api import * from nltk.tokenize import * # Regular expressions for dataset components -STARS = re.compile(r"^\*+$") -COMPARISON = re.compile(r"") -CLOSE_COMPARISON = re.compile(r"") -GRAD_COMPARISON = re.compile(r"") -NON_GRAD_COMPARISON = re.compile(r"") +STARS = re.compile(r'^\*+$') +COMPARISON = re.compile(r'') +CLOSE_COMPARISON = re.compile(r'') +GRAD_COMPARISON = re.compile(r'') +NON_GRAD_COMPARISON = re.compile(r'') ENTITIES_FEATS = re.compile(r"(\d)_((?:[\.\w\s/-](?!\d_))+)") -KEYWORD = re.compile(r"\((?!.*\()(.*)\)$") +KEYWORD = re.compile(r'\((?!.*\()(.*)\)$') class Comparison(object): @@ -81,8 +83,8 @@ class Comparison(object): def __repr__(self): return ( - 'Comparison(text="{}", comp_type={}, entity_1="{}", entity_2="{}", ' - 'feature="{}", keyword="{}")' + "Comparison(text=\"{}\", comp_type={}, entity_1=\"{}\", entity_2=\"{}\", " + "feature=\"{}\", keyword=\"{}\")" ).format( self.text, self.comp_type, @@ -119,7 +121,7 @@ class ComparativeSentencesCorpusReader(CorpusReader): fileids, word_tokenizer=WhitespaceTokenizer(), sent_tokenizer=None, - encoding="utf8", + encoding='utf8', ): """ :param root: The root directory for this corpus. @@ -145,7 +147,7 @@ class ComparativeSentencesCorpusReader(CorpusReader): """ if fileids is None: fileids = self._fileids - elif isinstance(fileids, str): + elif isinstance(fileids, string_types): fileids = [fileids] return concat( [ @@ -195,7 +197,7 @@ class ComparativeSentencesCorpusReader(CorpusReader): """ if fileids is None: fileids = self._fileids - elif isinstance(fileids, str): + elif isinstance(fileids, string_types): fileids = [fileids] return concat([self.open(f).read() for f in fileids]) @@ -259,7 +261,7 @@ class ComparativeSentencesCorpusReader(CorpusReader): if grad_comparisons: # Each comparison tag has its own relations on a separate line for comp in grad_comparisons: - comp_type = int(re.match(r"", comp).group(1)) + comp_type = int(re.match(r'', comp).group(1)) comparison = Comparison( text=comparison_text, comp_type=comp_type ) @@ -267,11 +269,11 @@ class ComparativeSentencesCorpusReader(CorpusReader): entities_feats = ENTITIES_FEATS.findall(line) if entities_feats: for (code, entity_feat) in entities_feats: - if code == "1": + if code == '1': comparison.entity_1 = entity_feat.strip() - elif code == "2": + elif code == '2': comparison.entity_2 = entity_feat.strip() - elif code == "3": + elif code == '3': comparison.feature = entity_feat.strip() keyword = KEYWORD.findall(line) if keyword: @@ -282,7 +284,7 @@ class ComparativeSentencesCorpusReader(CorpusReader): if non_grad_comparisons: for comp in non_grad_comparisons: # comp_type in this case should always be 4. - comp_type = int(re.match(r"", comp).group(1)) + comp_type = int(re.match(r'', comp).group(1)) comparison = Comparison( text=comparison_text, comp_type=comp_type ) diff --git a/nlp_resource_data/nltk/corpus/reader/conll.py b/nlp_resource_data/nltk/corpus/reader/conll.py index e138a1b..26849be 100644 --- a/nlp_resource_data/nltk/corpus/reader/conll.py +++ b/nlp_resource_data/nltk/corpus/reader/conll.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: CONLL Corpus Reader # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Steven Bird # Edward Loper # URL: @@ -10,8 +10,11 @@ Read CoNLL-style chunk fileids. """ +from __future__ import unicode_literals + import textwrap +from nltk import compat from nltk.tree import Tree from nltk.util import LazyMap, LazyConcatenation from nltk.tag import map_tag @@ -50,13 +53,13 @@ class ConllCorpusReader(CorpusReader): # Column Types # ///////////////////////////////////////////////////////////////// - WORDS = "words" #: column type for words - POS = "pos" #: column type for part-of-speech tags - TREE = "tree" #: column type for parse trees - CHUNK = "chunk" #: column type for chunk structures - NE = "ne" #: column type for named entities - SRL = "srl" #: column type for semantic role labels - IGNORE = "ignore" #: column type for column that should be ignored + WORDS = 'words' #: column type for words + POS = 'pos' #: column type for part-of-speech tags + TREE = 'tree' #: column type for parse trees + CHUNK = 'chunk' #: column type for chunk structures + NE = 'ne' #: column type for named entities + SRL = 'srl' #: column type for semantic role labels + IGNORE = 'ignore' #: column type for column that should be ignored #: A list of all column types supported by the conll corpus reader. COLUMN_TYPES = (WORDS, POS, TREE, CHUNK, NE, SRL, IGNORE) @@ -71,18 +74,18 @@ class ConllCorpusReader(CorpusReader): fileids, columntypes, chunk_types=None, - root_label="S", + root_label='S', pos_in_tree=False, srl_includes_roleset=True, - encoding="utf8", + encoding='utf8', tree_class=Tree, tagset=None, separator=None, ): for columntype in columntypes: if columntype not in self.COLUMN_TYPES: - raise ValueError("Bad column type %r" % columntype) - if isinstance(chunk_types, str): + raise ValueError('Bad column type %r' % columntype) + if isinstance(chunk_types, string_types): chunk_types = [chunk_types] self._chunk_types = chunk_types self._colmap = dict((c, i) for (i, c) in enumerate(columntypes)) @@ -101,7 +104,7 @@ class ConllCorpusReader(CorpusReader): def raw(self, fileids=None): if fileids is None: fileids = self._fileids - elif isinstance(fileids, str): + elif isinstance(fileids, string_types): fileids = [fileids] return concat([self.open(f).read() for f in fileids]) @@ -226,17 +229,17 @@ class ConllCorpusReader(CorpusReader): if not block: continue - grid = [line.split(self.sep) for line in block.split("\n")] + grid = [line.split(self.sep) for line in block.split('\n')] # If there's a docstart row, then discard. ([xx] eventually it # would be good to actually use it) - if grid[0][self._colmap.get("words", 0)] == "-DOCSTART-": + if grid[0][self._colmap.get('words', 0)] == '-DOCSTART-': del grid[0] # Check that the grid is consistent. for row in grid: if len(row) != len(grid[0]): - raise ValueError("Inconsistent number of columns:\n%s" % block) + raise ValueError('Inconsistent number of columns:\n%s' % block) grids.append(grid) return grids @@ -247,52 +250,52 @@ class ConllCorpusReader(CorpusReader): # a list of words or a parse tree). def _get_words(self, grid): - return self._get_column(grid, self._colmap["words"]) + return self._get_column(grid, self._colmap['words']) def _get_tagged_words(self, grid, tagset=None): - pos_tags = self._get_column(grid, self._colmap["pos"]) + pos_tags = self._get_column(grid, self._colmap['pos']) if tagset and tagset != self._tagset: pos_tags = [map_tag(self._tagset, tagset, t) for t in pos_tags] - return list(zip(self._get_column(grid, self._colmap["words"]), pos_tags)) + return list(zip(self._get_column(grid, self._colmap['words']), pos_tags)) def _get_iob_words(self, grid, tagset=None): - pos_tags = self._get_column(grid, self._colmap["pos"]) + pos_tags = self._get_column(grid, self._colmap['pos']) if tagset and tagset != self._tagset: pos_tags = [map_tag(self._tagset, tagset, t) for t in pos_tags] return list( zip( - self._get_column(grid, self._colmap["words"]), + self._get_column(grid, self._colmap['words']), pos_tags, - self._get_column(grid, self._colmap["chunk"]), + self._get_column(grid, self._colmap['chunk']), ) ) def _get_chunked_words(self, grid, chunk_types, tagset=None): # n.b.: this method is very similar to conllstr2tree. - words = self._get_column(grid, self._colmap["words"]) - pos_tags = self._get_column(grid, self._colmap["pos"]) + words = self._get_column(grid, self._colmap['words']) + pos_tags = self._get_column(grid, self._colmap['pos']) if tagset and tagset != self._tagset: pos_tags = [map_tag(self._tagset, tagset, t) for t in pos_tags] - chunk_tags = self._get_column(grid, self._colmap["chunk"]) + chunk_tags = self._get_column(grid, self._colmap['chunk']) stack = [Tree(self._root_label, [])] for (word, pos_tag, chunk_tag) in zip(words, pos_tags, chunk_tags): - if chunk_tag == "O": - state, chunk_type = "O", "" + if chunk_tag == 'O': + state, chunk_type = 'O', '' else: - (state, chunk_type) = chunk_tag.split("-") + (state, chunk_type) = chunk_tag.split('-') # If it's a chunk we don't care about, treat it as O. if chunk_types is not None and chunk_type not in chunk_types: - state = "O" + state = 'O' # Treat a mismatching I like a B. - if state == "I" and chunk_type != stack[-1].label(): - state = "B" + if state == 'I' and chunk_type != stack[-1].label(): + state = 'B' # For B or I: close any open chunks - if state in "BO" and len(stack) == 2: + if state in 'BO' and len(stack) == 2: stack.pop() # For B: start a new chunk. - if state == "B": + if state == 'B': new_chunk = Tree(chunk_type, []) stack[-1].append(new_chunk) stack.append(new_chunk) @@ -302,29 +305,29 @@ class ConllCorpusReader(CorpusReader): return stack[0] def _get_parsed_sent(self, grid, pos_in_tree, tagset=None): - words = self._get_column(grid, self._colmap["words"]) - pos_tags = self._get_column(grid, self._colmap["pos"]) + words = self._get_column(grid, self._colmap['words']) + pos_tags = self._get_column(grid, self._colmap['pos']) if tagset and tagset != self._tagset: pos_tags = [map_tag(self._tagset, tagset, t) for t in pos_tags] - parse_tags = self._get_column(grid, self._colmap["tree"]) + parse_tags = self._get_column(grid, self._colmap['tree']) - treestr = "" + treestr = '' for (word, pos_tag, parse_tag) in zip(words, pos_tags, parse_tags): - if word == "(": - word = "-LRB-" - if word == ")": - word = "-RRB-" - if pos_tag == "(": - pos_tag = "-LRB-" - if pos_tag == ")": - pos_tag = "-RRB-" - (left, right) = parse_tag.split("*") - right = right.count(")") * ")" # only keep ')'. - treestr += "%s (%s %s) %s" % (left, pos_tag, word, right) + if word == '(': + word = '-LRB-' + if word == ')': + word = '-RRB-' + if pos_tag == '(': + pos_tag = '-LRB-' + if pos_tag == ')': + pos_tag = '-RRB-' + (left, right) = parse_tag.split('*') + right = right.count(')') * ')' # only keep ')'. + treestr += '%s (%s %s) %s' % (left, pos_tag, word, right) try: tree = self._tree_class.fromstring(treestr) except (ValueError, IndexError): - tree = self._tree_class.fromstring("(%s %s)" % (self._root_label, treestr)) + tree = self._tree_class.fromstring('(%s %s)' % (self._root_label, treestr)) if not pos_in_tree: for subtree in tree.subtrees(): @@ -332,7 +335,7 @@ class ConllCorpusReader(CorpusReader): if ( isinstance(child, Tree) and len(child) == 1 - and isinstance(child[0], str) + and isinstance(child[0], string_types) ): subtree[i] = (child[0], child.label()) @@ -343,15 +346,15 @@ class ConllCorpusReader(CorpusReader): list of list of (start, end), tag) tuples """ if self._srl_includes_roleset: - predicates = self._get_column(grid, self._colmap["srl"] + 1) - start_col = self._colmap["srl"] + 2 + predicates = self._get_column(grid, self._colmap['srl'] + 1) + start_col = self._colmap['srl'] + 2 else: - predicates = self._get_column(grid, self._colmap["srl"]) - start_col = self._colmap["srl"] + 1 + predicates = self._get_column(grid, self._colmap['srl']) + start_col = self._colmap['srl'] + 1 # Count how many predicates there are. This tells us how many # columns to expect for SRL data. - num_preds = len([p for p in predicates if p != "-"]) + num_preds = len([p for p in predicates if p != '-']) spanlists = [] for i in range(num_preds): @@ -359,11 +362,11 @@ class ConllCorpusReader(CorpusReader): spanlist = [] stack = [] for wordnum, srl_tag in enumerate(col): - (left, right) = srl_tag.split("*") - for tag in left.split("("): + (left, right) = srl_tag.split('*') + for tag in left.split('('): if tag: stack.append((tag, wordnum)) - for i in range(right.count(")")): + for i in range(right.count(')')): (tag, start) = stack.pop() spanlist.append(((start, wordnum + 1), tag)) spanlists.append(spanlist) @@ -374,28 +377,28 @@ class ConllCorpusReader(CorpusReader): tree = self._get_parsed_sent(grid, pos_in_tree) spanlists = self._get_srl_spans(grid) if self._srl_includes_roleset: - predicates = self._get_column(grid, self._colmap["srl"] + 1) - rolesets = self._get_column(grid, self._colmap["srl"]) + predicates = self._get_column(grid, self._colmap['srl'] + 1) + rolesets = self._get_column(grid, self._colmap['srl']) else: - predicates = self._get_column(grid, self._colmap["srl"]) + predicates = self._get_column(grid, self._colmap['srl']) rolesets = [None] * len(predicates) instances = ConllSRLInstanceList(tree) for wordnum, predicate in enumerate(predicates): - if predicate == "-": + if predicate == '-': continue # Decide which spanlist to use. Don't assume that they're # sorted in the same order as the predicates (even though # they usually are). for spanlist in spanlists: for (start, end), tag in spanlist: - if wordnum in range(start, end) and tag in ("V", "C-V"): + if wordnum in range(start, end) and tag in ('V', 'C-V'): break else: continue break else: - raise ValueError("No srl column found for %r" % predicate) + raise ValueError('No srl column found for %r' % predicate) instances.append( ConllSRLInstance(tree, wordnum, predicate, rolesets[wordnum], spanlist) ) @@ -410,7 +413,7 @@ class ConllCorpusReader(CorpusReader): for columntype in columntypes: if columntype not in self._colmap: raise ValueError( - "This corpus does not contain a %s " "column." % columntype + 'This corpus does not contain a %s ' 'column.' % columntype ) @staticmethod @@ -418,6 +421,7 @@ class ConllCorpusReader(CorpusReader): return [grid[i][column_index] for i in range(len(grid))] +@compat.python_2_unicode_compatible class ConllSRLInstance(object): """ An SRL instance from a CoNLL corpus, which identifies and @@ -463,7 +467,7 @@ class ConllSRLInstance(object): # Fill in the self.verb and self.arguments values. for (start, end), tag in tagged_spans: - if tag in ("V", "C-V"): + if tag in ('V', 'C-V'): self.verb += list(range(start, end)) else: self.arguments.append(((start, end), tag)) @@ -471,31 +475,32 @@ class ConllSRLInstance(object): def __repr__(self): # Originally, its: ##plural = 's' if len(self.arguments) != 1 else '' - plural = "s" if len(self.arguments) != 1 else "" - return "" % ( + plural = 's' if len(self.arguments) != 1 else '' + return '' % ( (self.verb_stem, len(self.arguments), plural) ) def pprint(self): - verbstr = " ".join(self.words[i][0] for i in self.verb) - hdr = "SRL for %r (stem=%r):\n" % (verbstr, self.verb_stem) - s = "" + verbstr = ' '.join(self.words[i][0] for i in self.verb) + hdr = 'SRL for %r (stem=%r):\n' % (verbstr, self.verb_stem) + s = '' for i, word in enumerate(self.words): if isinstance(word, tuple): word = word[0] for (start, end), argid in self.arguments: if i == start: - s += "[%s " % argid + s += '[%s ' % argid if i == end: - s += "] " + s += '] ' if i in self.verb: - word = "<<%s>>" % word - s += word + " " + word = '<<%s>>' % word + s += word + ' ' return hdr + textwrap.fill( - s.replace(" ]", "]"), initial_indent=" ", subsequent_indent=" " + s.replace(' ]', ']'), initial_indent=' ', subsequent_indent=' ' ) +@compat.python_2_unicode_compatible class ConllSRLInstanceList(list): """ Set of instances for a single sentence @@ -512,45 +517,45 @@ class ConllSRLInstanceList(list): # Sanity check: trees should be the same for inst in self: if inst.tree != self.tree: - raise ValueError("Tree mismatch!") + raise ValueError('Tree mismatch!') # If desired, add trees: if include_tree: words = self.tree.leaves() pos = [None] * len(words) - synt = ["*"] * len(words) + synt = ['*'] * len(words) self._tree2conll(self.tree, 0, words, pos, synt) - s = "" + s = '' for i in range(len(words)): # optional tree columns if include_tree: - s += "%-20s " % words[i] - s += "%-8s " % pos[i] - s += "%15s*%-8s " % tuple(synt[i].split("*")) + s += '%-20s ' % words[i] + s += '%-8s ' % pos[i] + s += '%15s*%-8s ' % tuple(synt[i].split('*')) # verb head column for inst in self: if i == inst.verb_head: - s += "%-20s " % inst.verb_stem + s += '%-20s ' % inst.verb_stem break else: - s += "%-20s " % "-" + s += '%-20s ' % '-' # Remaining columns: self for inst in self: - argstr = "*" + argstr = '*' for (start, end), argid in inst.tagged_spans: if i == start: - argstr = "(%s%s" % (argid, argstr) + argstr = '(%s%s' % (argid, argstr) if i == (end - 1): - argstr += ")" - s += "%-12s " % argstr - s += "\n" + argstr += ')' + s += '%-12s ' % argstr + s += '\n' return s def _tree2conll(self, tree, wordnum, words, pos, synt): assert isinstance(tree, Tree) - if len(tree) == 1 and isinstance(tree[0], str): + if len(tree) == 1 and isinstance(tree[0], string_types): pos[wordnum] = tree.label() assert words[wordnum] == tree[0] return wordnum + 1 @@ -559,10 +564,10 @@ class ConllSRLInstanceList(list): pos[wordnum], pos[wordnum] = tree[0] return wordnum + 1 else: - synt[wordnum] = "(%s%s" % (tree.label(), synt[wordnum]) + synt[wordnum] = '(%s%s' % (tree.label(), synt[wordnum]) for child in tree: wordnum = self._tree2conll(child, wordnum, words, pos, synt) - synt[wordnum - 1] += ")" + synt[wordnum - 1] += ')' return wordnum @@ -573,13 +578,13 @@ class ConllChunkCorpusReader(ConllCorpusReader): """ def __init__( - self, root, fileids, chunk_types, encoding="utf8", tagset=None, separator=None + self, root, fileids, chunk_types, encoding='utf8', tagset=None, separator=None ): ConllCorpusReader.__init__( self, root, fileids, - ("words", "pos", "chunk"), + ('words', 'pos', 'chunk'), chunk_types=chunk_types, encoding=encoding, tagset=tagset, diff --git a/nlp_resource_data/nltk/corpus/reader/crubadan.py b/nlp_resource_data/nltk/corpus/reader/crubadan.py index 1831236..8470b06 100644 --- a/nlp_resource_data/nltk/corpus/reader/crubadan.py +++ b/nlp_resource_data/nltk/corpus/reader/crubadan.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Natural Language Toolkit: An Crubadan N-grams Reader # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Avital Pekker # # URL: @@ -19,9 +19,12 @@ For details about An Crubadan, this data, and its potential uses, see: http://borel.slu.edu/crubadan/index.html """ +from __future__ import print_function, unicode_literals + import re from os import path +from nltk.compat import PY3 from nltk.corpus.reader import CorpusReader from nltk.probability import FreqDist from nltk.data import ZipFilePathPointer @@ -32,17 +35,17 @@ class CrubadanCorpusReader(CorpusReader): A corpus reader used to access language An Crubadan n-gram files. """ - _LANG_MAPPER_FILE = "table.txt" + _LANG_MAPPER_FILE = 'table.txt' _all_lang_freq = {} - def __init__(self, root, fileids, encoding="utf8", tagset=None): - super(CrubadanCorpusReader, self).__init__(root, fileids, encoding="utf8") + def __init__(self, root, fileids, encoding='utf8', tagset=None): + super(CrubadanCorpusReader, self).__init__(root, fileids, encoding='utf8') self._lang_mapping_data = [] self._load_lang_mapping_data() def lang_freq(self, lang): - """ Return n-gram FreqDist for a specific language - given ISO 639-3 language code """ + ''' Return n-gram FreqDist for a specific language + given ISO 639-3 language code ''' if lang not in self._all_lang_freq: self._all_lang_freq[lang] = self._load_lang_ngrams(lang) @@ -50,23 +53,23 @@ class CrubadanCorpusReader(CorpusReader): return self._all_lang_freq[lang] def langs(self): - """ Return a list of supported languages as ISO 639-3 codes """ + ''' Return a list of supported languages as ISO 639-3 codes ''' return [row[1] for row in self._lang_mapping_data] def iso_to_crubadan(self, lang): - """ Return internal Crubadan code based on ISO 639-3 code """ + ''' Return internal Crubadan code based on ISO 639-3 code ''' for i in self._lang_mapping_data: if i[1].lower() == lang.lower(): return i[0] def crubadan_to_iso(self, lang): - """ Return ISO 639-3 code given internal Crubadan code """ + ''' Return ISO 639-3 code given internal Crubadan code ''' for i in self._lang_mapping_data: if i[0].lower() == lang.lower(): return i[1] def _load_lang_mapping_data(self): - """ Load language mappings between codes and description from table.txt """ + ''' Load language mappings between codes and description from table.txt ''' if isinstance(self.root, ZipFilePathPointer): raise RuntimeError( "Please install the 'crubadan' corpus first, use nltk.download()" @@ -76,30 +79,39 @@ class CrubadanCorpusReader(CorpusReader): if self._LANG_MAPPER_FILE not in self.fileids(): raise RuntimeError("Could not find language mapper file: " + mapper_file) - raw = open(mapper_file, "r", encoding="utf-8").read().strip() + if PY3: + raw = open(mapper_file, 'r', encoding='utf-8').read().strip() + else: + raw = open(mapper_file, 'rU').read().decode('utf-8').strip() - self._lang_mapping_data = [row.split("\t") for row in raw.split("\n")] + self._lang_mapping_data = [row.split('\t') for row in raw.split('\n')] def _load_lang_ngrams(self, lang): - """ Load single n-gram language file given the ISO 639-3 language code - and return its FreqDist """ + ''' Load single n-gram language file given the ISO 639-3 language code + and return its FreqDist ''' if lang not in self.langs(): raise RuntimeError("Unsupported language.") crubadan_code = self.iso_to_crubadan(lang) - ngram_file = path.join(self.root, crubadan_code + "-3grams.txt") + ngram_file = path.join(self.root, crubadan_code + '-3grams.txt') if not path.isfile(ngram_file): raise RuntimeError("No N-gram file found for requested language.") counts = FreqDist() - f = open(ngram_file, "r", encoding="utf-8") + if PY3: + f = open(ngram_file, 'r', encoding='utf-8') + else: + f = open(ngram_file, 'rU') for line in f: - data = line.split(" ") + if PY3: + data = line.split(' ') + else: + data = line.decode('utf8').split(' ') - ngram = data[1].strip("\n") + ngram = data[1].strip('\n') freq = int(data[0]) counts[ngram] = freq diff --git a/nlp_resource_data/nltk/corpus/reader/dependency.py b/nlp_resource_data/nltk/corpus/reader/dependency.py index 4314fbd..49e7423 100644 --- a/nlp_resource_data/nltk/corpus/reader/dependency.py +++ b/nlp_resource_data/nltk/corpus/reader/dependency.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Dependency Corpus Reader # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Kepa Sarasola # Iker Manterola # @@ -21,9 +21,9 @@ class DependencyCorpusReader(SyntaxCorpusReader): self, root, fileids, - encoding="utf8", + encoding='utf8', word_tokenizer=TabTokenizer(), - sent_tokenizer=RegexpTokenizer("\n", gaps=True), + sent_tokenizer=RegexpTokenizer('\n', gaps=True), para_block_reader=read_blankline_block, ): # FIXME: Why is it inheritting from SyntaxCorpusReader but initializing @@ -89,7 +89,7 @@ class DependencyCorpusReader(SyntaxCorpusReader): class DependencyCorpusView(StreamBackedCorpusView): - _DOCSTART = "-DOCSTART- -DOCSTART- O\n" # dokumentu hasiera definitzen da + _DOCSTART = '-DOCSTART- -DOCSTART- O\n' # dokumentu hasiera definitzen da def __init__( self, @@ -98,7 +98,7 @@ class DependencyCorpusView(StreamBackedCorpusView): group_by_sent, dependencies, chunk_types=None, - encoding="utf8", + encoding='utf8', ): self._tagged = tagged self._dependencies = dependencies @@ -115,13 +115,13 @@ class DependencyCorpusView(StreamBackedCorpusView): # extract word and tag from any of the formats if not self._dependencies: - lines = [line.split("\t") for line in sent.split("\n")] + lines = [line.split('\t') for line in sent.split('\n')] if len(lines[0]) == 3 or len(lines[0]) == 4: sent = [(line[0], line[1]) for line in lines] elif len(lines[0]) == 10: sent = [(line[1], line[4]) for line in lines] else: - raise ValueError("Unexpected number of fields in dependency tree file") + raise ValueError('Unexpected number of fields in dependency tree file') # discard tags if they weren't requested if not self._tagged: diff --git a/nlp_resource_data/nltk/corpus/reader/framenet.py b/nlp_resource_data/nltk/corpus/reader/framenet.py index 4eaa6d1..9705f4a 100644 --- a/nlp_resource_data/nltk/corpus/reader/framenet.py +++ b/nlp_resource_data/nltk/corpus/reader/framenet.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Framenet Corpus Reader # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Authors: Chuck Wooters , # Nathan Schneider # URL: @@ -10,6 +10,7 @@ """ Corpus reader for the FrameNet 1.7 lexicon and corpus. """ +from __future__ import print_function, unicode_literals import os import re @@ -19,15 +20,16 @@ import sys import types from collections import defaultdict, OrderedDict from operator import itemgetter -from itertools import zip_longest +from six import string_types, text_type +from six.moves import zip_longest from pprint import pprint from nltk.corpus.reader import XMLCorpusReader, XMLCorpusView - +from nltk.compat import python_2_unicode_compatible from nltk.util import LazyConcatenation, LazyMap, LazyIteratorList -__docformat__ = "epytext en" +__docformat__ = 'epytext en' def mimic_wrap(lines, wrap_at=65, **kwargs): @@ -35,7 +37,7 @@ def mimic_wrap(lines, wrap_at=65, **kwargs): Wrap the first of 'lines' with textwrap and the remaining lines at exactly the same positions as the first. """ - l0 = textwrap.fill(lines[0], wrap_at, drop_whitespace=False).split("\n") + l0 = textwrap.fill(lines[0], wrap_at, drop_whitespace=False).split('\n') yield l0 def _(line): @@ -46,14 +48,14 @@ def mimic_wrap(lines, wrap_at=65, **kwargs): il0 += 1 if line: # Remaining stuff on this line past the end of the mimicked line. # So just textwrap this line. - for ln in textwrap.fill(line, wrap_at, drop_whitespace=False).split("\n"): + for ln in textwrap.fill(line, wrap_at, drop_whitespace=False).split('\n'): yield ln for l in lines[1:]: yield list(_(l)) -def _pretty_longstring(defstr, prefix="", wrap_at=65): +def _pretty_longstring(defstr, prefix='', wrap_at=65): """ Helper function for pretty-printing a long string. @@ -65,8 +67,8 @@ def _pretty_longstring(defstr, prefix="", wrap_at=65): """ outstr = "" - for line in textwrap.fill(defstr, wrap_at).split("\n"): - outstr += prefix + line + "\n" + for line in textwrap.fill(defstr, wrap_at).split('\n'): + outstr += prefix + line + '\n' return outstr @@ -83,10 +85,10 @@ def _pretty_any(obj): outstr = "" for k in obj: - if isinstance(obj[k], str) and len(obj[k]) > 65: + if isinstance(obj[k], string_types) and len(obj[k]) > 65: outstr += "[{0}]\n".format(k) - outstr += "{0}".format(_pretty_longstring(obj[k], prefix=" ")) - outstr += "\n" + outstr += "{0}".format(_pretty_longstring(obj[k], prefix=' ')) + outstr += '\n' else: outstr += "[{0}] {1}\n".format(k, obj[k]) @@ -110,11 +112,11 @@ def _pretty_semtype(st): outstr = "" outstr += "semantic type ({0.ID}): {0.name}\n".format(st) - if "abbrev" in semkeys: + if 'abbrev' in semkeys: outstr += "[abbrev] {0}\n".format(st.abbrev) - if "definition" in semkeys: + if 'definition' in semkeys: outstr += "[definition]\n" - outstr += _pretty_longstring(st.definition, " ") + outstr += _pretty_longstring(st.definition, ' ') outstr += "[rootType] {0}({1})\n".format(st.rootType.name, st.rootType.ID) if st.superType is None: outstr += "[superType] \n" @@ -123,8 +125,8 @@ def _pretty_semtype(st): outstr += "[subTypes] {0} subtypes\n".format(len(st.subTypes)) outstr += ( " " - + ", ".join("{0}({1})".format(x.name, x.ID) for x in st.subTypes) - + "\n" * (len(st.subTypes) > 0) + + ", ".join('{0}({1})'.format(x.name, x.ID) for x in st.subTypes) + + '\n' * (len(st.subTypes) > 0) ) return outstr @@ -191,40 +193,40 @@ def _pretty_lu(lu): lukeys = lu.keys() outstr = "" outstr += "lexical unit ({0.ID}): {0.name}\n\n".format(lu) - if "definition" in lukeys: + if 'definition' in lukeys: outstr += "[definition]\n" - outstr += _pretty_longstring(lu.definition, " ") - if "frame" in lukeys: + outstr += _pretty_longstring(lu.definition, ' ') + if 'frame' in lukeys: outstr += "\n[frame] {0}({1})\n".format(lu.frame.name, lu.frame.ID) - if "incorporatedFE" in lukeys: + if 'incorporatedFE' in lukeys: outstr += "\n[incorporatedFE] {0}\n".format(lu.incorporatedFE) - if "POS" in lukeys: + if 'POS' in lukeys: outstr += "\n[POS] {0}\n".format(lu.POS) - if "status" in lukeys: + if 'status' in lukeys: outstr += "\n[status] {0}\n".format(lu.status) - if "totalAnnotated" in lukeys: + if 'totalAnnotated' in lukeys: outstr += "\n[totalAnnotated] {0} annotated examples\n".format( lu.totalAnnotated ) - if "lexemes" in lukeys: + if 'lexemes' in lukeys: outstr += "\n[lexemes] {0}\n".format( - " ".join("{0}/{1}".format(lex.name, lex.POS) for lex in lu.lexemes) + ' '.join('{0}/{1}'.format(lex.name, lex.POS) for lex in lu.lexemes) ) - if "semTypes" in lukeys: + if 'semTypes' in lukeys: outstr += "\n[semTypes] {0} semantic types\n".format(len(lu.semTypes)) outstr += ( " " * (len(lu.semTypes) > 0) - + ", ".join("{0}({1})".format(x.name, x.ID) for x in lu.semTypes) - + "\n" * (len(lu.semTypes) > 0) + + ", ".join('{0}({1})'.format(x.name, x.ID) for x in lu.semTypes) + + '\n' * (len(lu.semTypes) > 0) ) - if "URL" in lukeys: + if 'URL' in lukeys: outstr += "\n[URL] {0}\n".format(lu.URL) - if "subCorpus" in lukeys: + if 'subCorpus' in lukeys: subc = [x.name for x in lu.subCorpus] outstr += "\n[subCorpus] {0} subcorpora\n".format(len(lu.subCorpus)) - for line in textwrap.fill(", ".join(sorted(subc)), 60).split("\n"): + for line in textwrap.fill(", ".join(sorted(subc)), 60).split('\n'): outstr += " {0}\n".format(line) - if "exemplars" in lukeys: + if 'exemplars' in lukeys: outstr += "\n[exemplars] {0} sentences across all subcorpora\n".format( len(lu.exemplars) ) @@ -284,7 +286,7 @@ def _pretty_fulltext_sentence(sent): outstr = "" outstr += "full-text sentence ({0.ID}) in {1}:\n\n".format( - sent, sent.doc.get("name", sent.doc.description) + sent, sent.doc.get('name', sent.doc.description) ) outstr += "\n[POS] {0} tags\n".format(len(sent.POS)) outstr += "\n[POS_tagset] {0}\n\n".format(sent.POS_tagset) @@ -314,29 +316,29 @@ def _pretty_pos(aset): sent = aset.sent s0 = sent.text - s1 = "" - s2 = "" + s1 = '' + s2 = '' i = 0 adjust = 0 for j, k, lbl in overt: - assert j >= i, ("Overlapping targets?", (j, k, lbl)) - s1 += " " * (j - i) + "-" * (k - j) + assert j >= i, ('Overlapping targets?', (j, k, lbl)) + s1 += ' ' * (j - i) + '-' * (k - j) if len(lbl) > (k - j): # add space in the sentence to make room for the annotation index amt = len(lbl) - (k - j) s0 = ( - s0[: k + adjust] + "~" * amt + s0[k + adjust :] + s0[: k + adjust] + '~' * amt + s0[k + adjust :] ) # '~' to prevent line wrapping - s1 = s1[: k + adjust] + " " * amt + s1[k + adjust :] + s1 = s1[: k + adjust] + ' ' * amt + s1[k + adjust :] adjust += amt - s2 += " " * (j - i) + lbl.ljust(k - j) + s2 += ' ' * (j - i) + lbl.ljust(k - j) i = k long_lines = [s0, s1, s2] - outstr += "\n\n".join( - map("\n".join, zip_longest(*mimic_wrap(long_lines), fillvalue=" ")) - ).replace("~", " ") + outstr += '\n\n'.join( + map('\n'.join, zip_longest(*mimic_wrap(long_lines), fillvalue=' ')) + ).replace('~', ' ') outstr += "\n" return outstr @@ -358,13 +360,13 @@ def _pretty_annotation(sent, aset_level=False): outstr += " ({0.ID}):\n".format(sent) if aset_level: # TODO: any UNANN exemplars? outstr += "\n[status] {0}\n".format(sent.status) - for k in ("corpID", "docID", "paragNo", "sentNo", "aPos"): + for k in ('corpID', 'docID', 'paragNo', 'sentNo', 'aPos'): if k in sentkeys: outstr += "[{0}] {1}\n".format(k, sent[k]) outstr += ( "\n[LU] ({0.ID}) {0.name} in {0.frame.name}\n".format(sent.LU) if sent.LU - else "\n[LU] Not found!" + else '\n[LU] Not found!' ) outstr += "\n[frame] ({0.ID}) {0.name}\n".format( sent.frame @@ -420,7 +422,7 @@ def _pretty_annotation(sent, aset_level=False): - Scon: (none) - Art: (none) """ - for lyr in ("NER", "WSL", "Other", "Sent"): + for lyr in ('NER', 'WSL', 'Other', 'Sent'): if lyr in sent and sent[lyr]: outstr += "\n[{0}] {1} entr{2}\n".format( lyr, len(sent[lyr]), "ies" if len(sent[lyr]) != 1 else "y" @@ -428,12 +430,12 @@ def _pretty_annotation(sent, aset_level=False): outstr += "\n[text] + [Target] + [FE]" # POS-specific layers: syntactically important words that are neither the target # nor the FEs. Include these along with the first FE layer but with '^' underlining. - for lyr in ("Verb", "Noun", "Adj", "Adv", "Prep", "Scon", "Art"): + for lyr in ('Verb', 'Noun', 'Adj', 'Adv', 'Prep', 'Scon', 'Art'): if lyr in sent and sent[lyr]: outstr += " + [{0}]".format(lyr) - if "FE2" in sentkeys: + if 'FE2' in sentkeys: outstr += " + [FE2]" - if "FE3" in sentkeys: + if 'FE3' in sentkeys: outstr += " + [FE3]" outstr += "\n\n" outstr += sent._ascii() # -> _annotation_ascii() @@ -443,15 +445,15 @@ def _pretty_annotation(sent, aset_level=False): def _annotation_ascii(sent): - """ + ''' Given a sentence or FE annotation set, construct the width-limited string showing an ASCII visualization of the sentence's annotations, calling either _annotation_ascii_frames() or _annotation_ascii_FEs() as appropriate. This will be attached as a method to appropriate AttrDict instances and called in the full pretty-printing of the instance. - """ - if sent._type == "fulltext_sentence" or ( - "annotationSet" in sent and len(sent.annotationSet) > 2 + ''' + if sent._type == 'fulltext_sentence' or ( + 'annotationSet' in sent and len(sent.annotationSet) > 2 ): # a full-text sentence OR sentence with multiple targets. # (multiple targets = >2 annotation sets, because the first annotation set is POS.) @@ -461,24 +463,24 @@ def _annotation_ascii(sent): def _annotation_ascii_frames(sent): - """ + ''' ASCII string rendering of the sentence along with its targets and frame names. Called for all full-text sentences, as well as the few LU sentences with multiple targets (e.g., fn.lu(6412).exemplars[82] has two want.v targets). Line-wrapped to limit the display width. - """ + ''' # list the target spans and their associated aset index overt = [] for a, aset in enumerate(sent.annotationSet[1:]): for j, k in aset.Target: indexS = "[{0}]".format(a + 1) - if aset.status == "UNANN" or aset.LU.status == "Problem": + if aset.status == 'UNANN' or aset.LU.status == 'Problem': indexS += " " - if aset.status == "UNANN": + if aset.status == 'UNANN': indexS += ( "!" ) # warning indicator that there is a frame annotation but no FE annotation - if aset.LU.status == "Problem": + if aset.LU.status == 'Problem': indexS += ( "?" ) # warning indicator that there is a missing LU definition (because the LU has Problem status) @@ -497,37 +499,37 @@ def _annotation_ascii_frames(sent): combinedIndex = ( overt[o - 1][3] + asetIndex ) # e.g., '[1][2]', '[1]! [2]' - combinedIndex = combinedIndex.replace(" !", "! ").replace(" ?", "? ") + combinedIndex = combinedIndex.replace(' !', '! ').replace(' ?', '? ') overt[o - 1] = overt[o - 1][:3] + (combinedIndex,) duplicates.add(o) else: # different frames, same or overlapping targets s = sent.text for j, k, fname, asetIndex in overt: - s += "\n" + asetIndex + " " + sent.text[j:k] + " :: " + fname - s += "\n(Unable to display sentence with targets marked inline due to overlap)" + s += '\n' + asetIndex + ' ' + sent.text[j:k] + ' :: ' + fname + s += '\n(Unable to display sentence with targets marked inline due to overlap)' return s for o in reversed(sorted(duplicates)): del overt[o] s0 = sent.text - s1 = "" - s11 = "" - s2 = "" + s1 = '' + s11 = '' + s2 = '' i = 0 adjust = 0 fAbbrevs = OrderedDict() for j, k, fname, asetIndex in overt: if not j >= i: assert j >= i, ( - "Overlapping targets?" + 'Overlapping targets?' + ( - " UNANN" - if any(aset.status == "UNANN" for aset in sent.annotationSet[1:]) - else "" + ' UNANN' + if any(aset.status == 'UNANN' for aset in sent.annotationSet[1:]) + else '' ), (j, k, asetIndex), ) - s1 += " " * (j - i) + "*" * (k - j) + s1 += ' ' * (j - i) + '*' * (k - j) short = fname[: k - j] if (k - j) < len(fname): r = 0 @@ -538,39 +540,39 @@ def _annotation_ascii_frames(sent): short = fname[: k - j - 1] + str(r) else: # short not in fAbbrevs fAbbrevs[short] = fname - s11 += " " * (j - i) + short.ljust(k - j) + s11 += ' ' * (j - i) + short.ljust(k - j) if len(asetIndex) > (k - j): # add space in the sentence to make room for the annotation index amt = len(asetIndex) - (k - j) s0 = ( - s0[: k + adjust] + "~" * amt + s0[k + adjust :] + s0[: k + adjust] + '~' * amt + s0[k + adjust :] ) # '~' to prevent line wrapping - s1 = s1[: k + adjust] + " " * amt + s1[k + adjust :] - s11 = s11[: k + adjust] + " " * amt + s11[k + adjust :] + s1 = s1[: k + adjust] + ' ' * amt + s1[k + adjust :] + s11 = s11[: k + adjust] + ' ' * amt + s11[k + adjust :] adjust += amt - s2 += " " * (j - i) + asetIndex.ljust(k - j) + s2 += ' ' * (j - i) + asetIndex.ljust(k - j) i = k long_lines = [s0, s1, s11, s2] - outstr = "\n\n".join( - map("\n".join, zip_longest(*mimic_wrap(long_lines), fillvalue=" ")) - ).replace("~", " ") - outstr += "\n" + outstr = '\n\n'.join( + map('\n'.join, zip_longest(*mimic_wrap(long_lines), fillvalue=' ')) + ).replace('~', ' ') + outstr += '\n' if fAbbrevs: - outstr += " (" + ", ".join("=".join(pair) for pair in fAbbrevs.items()) + ")" - assert len(fAbbrevs) == len(dict(fAbbrevs)), "Abbreviation clash" + outstr += ' (' + ', '.join('='.join(pair) for pair in fAbbrevs.items()) + ')' + assert len(fAbbrevs) == len(dict(fAbbrevs)), 'Abbreviation clash' return outstr def _annotation_ascii_FE_layer(overt, ni, feAbbrevs): - """Helper for _annotation_ascii_FEs().""" - s1 = "" - s2 = "" + '''Helper for _annotation_ascii_FEs().''' + s1 = '' + s2 = '' i = 0 for j, k, fename in overt: - s1 += " " * (j - i) + ("^" if fename.islower() else "-") * (k - j) + s1 += ' ' * (j - i) + ('^' if fename.islower() else '-') * (k - j) short = fename[: k - j] if len(fename) > len(short): r = 0 @@ -581,30 +583,30 @@ def _annotation_ascii_FE_layer(overt, ni, feAbbrevs): short = fename[: k - j - 1] + str(r) else: # short not in feAbbrevs feAbbrevs[short] = fename - s2 += " " * (j - i) + short.ljust(k - j) + s2 += ' ' * (j - i) + short.ljust(k - j) i = k - sNI = "" + sNI = '' if ni: - sNI += " [" + ", ".join(":".join(x) for x in sorted(ni.items())) + "]" + sNI += ' [' + ', '.join(':'.join(x) for x in sorted(ni.items())) + ']' return [s1, s2, sNI] def _annotation_ascii_FEs(sent): - """ + ''' ASCII string rendering of the sentence along with a single target and its FEs. Secondary and tertiary FE layers are included if present. 'sent' can be an FE annotation set or an LU sentence with a single target. Line-wrapped to limit the display width. - """ + ''' feAbbrevs = OrderedDict() posspec = [] # POS-specific layer spans (e.g., Supp[ort], Cop[ula]) posspec_separate = False - for lyr in ("Verb", "Noun", "Adj", "Adv", "Prep", "Scon", "Art"): + for lyr in ('Verb', 'Noun', 'Adj', 'Adv', 'Prep', 'Scon', 'Art'): if lyr in sent and sent[lyr]: for a, b, lbl in sent[lyr]: if ( - lbl == "X" + lbl == 'X' ): # skip this, which covers an entire phrase typically containing the target and all its FEs # (but do display the Gov) continue @@ -614,7 +616,7 @@ def _annotation_ascii_FEs(sent): True ) # show POS-specific layers on a separate line posspec.append( - (a, b, lbl.lower().replace("-", "")) + (a, b, lbl.lower().replace('-', '')) ) # lowercase Cop=>cop, Non-Asp=>nonasp, etc. to distinguish from FE names if posspec_separate: POSSPEC = _annotation_ascii_FE_layer(posspec, {}, feAbbrevs) @@ -624,20 +626,20 @@ def _annotation_ascii_FEs(sent): feAbbrevs, ) FE2 = FE3 = None - if "FE2" in sent: + if 'FE2' in sent: FE2 = _annotation_ascii_FE_layer(sent.FE2[0], sent.FE2[1], feAbbrevs) - if "FE3" in sent: + if 'FE3' in sent: FE3 = _annotation_ascii_FE_layer(sent.FE3[0], sent.FE3[1], feAbbrevs) for i, j in sent.Target: FE1span, FE1name, FE1exp = FE1 if len(FE1span) < j: - FE1span += " " * (j - len(FE1span)) + FE1span += ' ' * (j - len(FE1span)) if len(FE1name) < j: - FE1name += " " * (j - len(FE1name)) + FE1name += ' ' * (j - len(FE1name)) FE1[1] = FE1name FE1[0] = ( - FE1span[:i] + FE1span[i:j].replace(" ", "*").replace("-", "=") + FE1span[j:] + FE1span[:i] + FE1span[i:j].replace(' ', '*').replace('-', '=') + FE1span[j:] ) long_lines = [sent.text] if posspec_separate: @@ -647,13 +649,13 @@ def _annotation_ascii_FEs(sent): long_lines.extend([FE2[0], FE2[1] + FE2[2]]) if FE3: long_lines.extend([FE3[0], FE3[1] + FE3[2]]) - long_lines.append("") - outstr = "\n".join( - map("\n".join, zip_longest(*mimic_wrap(long_lines), fillvalue=" ")) + long_lines.append('') + outstr = '\n'.join( + map('\n'.join, zip_longest(*mimic_wrap(long_lines), fillvalue=' ')) ) if feAbbrevs: - outstr += "(" + ", ".join("=".join(pair) for pair in feAbbrevs.items()) + ")" - assert len(feAbbrevs) == len(dict(feAbbrevs)), "Abbreviation clash" + outstr += '(' + ', '.join('='.join(pair) for pair in feAbbrevs.items()) + ')' + assert len(feAbbrevs) == len(dict(feAbbrevs)), 'Abbreviation clash' outstr += "\n" return outstr @@ -674,31 +676,31 @@ def _pretty_fe(fe): outstr += "frame element ({0.ID}): {0.name}\n of {1.name}({1.ID})\n".format( fe, fe.frame ) - if "definition" in fekeys: + if 'definition' in fekeys: outstr += "[definition]\n" - outstr += _pretty_longstring(fe.definition, " ") - if "abbrev" in fekeys: + outstr += _pretty_longstring(fe.definition, ' ') + if 'abbrev' in fekeys: outstr += "[abbrev] {0}\n".format(fe.abbrev) - if "coreType" in fekeys: + if 'coreType' in fekeys: outstr += "[coreType] {0}\n".format(fe.coreType) - if "requiresFE" in fekeys: + if 'requiresFE' in fekeys: outstr += "[requiresFE] " if fe.requiresFE is None: outstr += "\n" else: outstr += "{0}({1})\n".format(fe.requiresFE.name, fe.requiresFE.ID) - if "excludesFE" in fekeys: + if 'excludesFE' in fekeys: outstr += "[excludesFE] " if fe.excludesFE is None: outstr += "\n" else: outstr += "{0}({1})\n".format(fe.excludesFE.name, fe.excludesFE.ID) - if "semType" in fekeys: + if 'semType' in fekeys: outstr += "[semType] " if fe.semType is None: outstr += "\n" else: - outstr += "\n " + "{0}({1})".format(fe.semType.name, fe.semType.ID) + "\n" + outstr += "\n " + "{0}({1})".format(fe.semType.name, fe.semType.ID) + '\n' return outstr @@ -718,26 +720,26 @@ def _pretty_frame(frame): outstr += "frame ({0.ID}): {0.name}\n\n".format(frame) outstr += "[URL] {0}\n\n".format(frame.URL) outstr += "[definition]\n" - outstr += _pretty_longstring(frame.definition, " ") + "\n" + outstr += _pretty_longstring(frame.definition, ' ') + '\n' outstr += "[semTypes] {0} semantic types\n".format(len(frame.semTypes)) outstr += ( " " * (len(frame.semTypes) > 0) + ", ".join("{0}({1})".format(x.name, x.ID) for x in frame.semTypes) - + "\n" * (len(frame.semTypes) > 0) + + '\n' * (len(frame.semTypes) > 0) ) outstr += "\n[frameRelations] {0} frame relations\n".format( len(frame.frameRelations) ) - outstr += " " + "\n ".join(repr(frel) for frel in frame.frameRelations) + "\n" + outstr += ' ' + '\n '.join(repr(frel) for frel in frame.frameRelations) + '\n' outstr += "\n[lexUnit] {0} lexical units\n".format(len(frame.lexUnit)) lustrs = [] for luName, lu in sorted(frame.lexUnit.items()): - tmpstr = "{0} ({1})".format(luName, lu.ID) + tmpstr = '{0} ({1})'.format(luName, lu.ID) lustrs.append(tmpstr) - outstr += "{0}\n".format(_pretty_longstring(", ".join(lustrs), prefix=" ")) + outstr += "{0}\n".format(_pretty_longstring(', '.join(lustrs), prefix=' ')) outstr += "\n[FE] {0} frame elements\n".format(len(frame.FE)) fes = {} @@ -750,23 +752,23 @@ def _pretty_frame(frame): for ct in sorted( fes.keys(), key=lambda ct2: [ - "Core", - "Core-Unexpressed", - "Peripheral", - "Extra-Thematic", + 'Core', + 'Core-Unexpressed', + 'Peripheral', + 'Extra-Thematic', ].index(ct2), ): - outstr += "{0:>16}: {1}\n".format(ct, ", ".join(sorted(fes[ct]))) + outstr += "{0:>16}: {1}\n".format(ct, ', '.join(sorted(fes[ct]))) outstr += "\n[FEcoreSets] {0} frame element core sets\n".format( len(frame.FEcoreSets) ) outstr += ( " " - + "\n ".join( + + '\n '.join( ", ".join([x.name for x in coreSet]) for coreSet in frame.FEcoreSets ) - + "\n" + + '\n' ) return outstr @@ -777,6 +779,7 @@ class FramenetError(Exception): """An exception class for framenet-related errors.""" +@python_2_unicode_compatible class AttrDict(dict): """A class that wraps a dict and allows accessing the keys of the @@ -802,7 +805,7 @@ class AttrDict(dict): self[name] = value def __getattr__(self, name): - if name == "_short_repr": + if name == '_short_repr': return self._short_repr return self[name] @@ -813,59 +816,59 @@ class AttrDict(dict): return v def _short_repr(self): - if "_type" in self: - if self["_type"].endswith("relation"): + if '_type' in self: + if self['_type'].endswith('relation'): return self.__repr__() try: return "<{0} ID={1} name={2}>".format( - self["_type"], self["ID"], self["name"] + self['_type'], self['ID'], self['name'] ) except KeyError: try: # no ID--e.g., for _type=lusubcorpus - return "<{0} name={1}>".format(self["_type"], self["name"]) + return "<{0} name={1}>".format(self['_type'], self['name']) except KeyError: # no name--e.g., for _type=lusentence - return "<{0} ID={1}>".format(self["_type"], self["ID"]) + return "<{0} ID={1}>".format(self['_type'], self['ID']) else: return self.__repr__() def _str(self): outstr = "" - if "_type" not in self: + if '_type' not in self: outstr = _pretty_any(self) - elif self["_type"] == "frame": + elif self['_type'] == 'frame': outstr = _pretty_frame(self) - elif self["_type"] == "fe": + elif self['_type'] == 'fe': outstr = _pretty_fe(self) - elif self["_type"] == "lu": + elif self['_type'] == 'lu': outstr = _pretty_lu(self) - elif self["_type"] == "luexemplars": # list of ALL exemplars for LU + elif self['_type'] == 'luexemplars': # list of ALL exemplars for LU outstr = _pretty_exemplars(self, self[0].LU) elif ( - self["_type"] == "fulltext_annotation" + self['_type'] == 'fulltext_annotation' ): # list of all sentences for full-text doc outstr = _pretty_fulltext_sentences(self) - elif self["_type"] == "lusentence": + elif self['_type'] == 'lusentence': outstr = _pretty_annotation(self) - elif self["_type"] == "fulltext_sentence": + elif self['_type'] == 'fulltext_sentence': outstr = _pretty_fulltext_sentence(self) - elif self["_type"] in ("luannotationset", "fulltext_annotationset"): + elif self['_type'] in ('luannotationset', 'fulltext_annotationset'): outstr = _pretty_annotation(self, aset_level=True) - elif self["_type"] == "posannotationset": + elif self['_type'] == 'posannotationset': outstr = _pretty_pos(self) - elif self["_type"] == "semtype": + elif self['_type'] == 'semtype': outstr = _pretty_semtype(self) - elif self["_type"] == "framerelationtype": + elif self['_type'] == 'framerelationtype': outstr = _pretty_frame_relation_type(self) - elif self["_type"] == "framerelation": + elif self['_type'] == 'framerelation': outstr = _pretty_frame_relation(self) - elif self["_type"] == "ferelation": + elif self['_type'] == 'ferelation': outstr = _pretty_fe_relation(self) else: outstr = _pretty_any(self) # ensure result is unicode string prior to applying the - # decorator (because non-ASCII characters + # @python_2_unicode_compatible decorator (because non-ASCII characters # could in principle occur in the data and would trigger an encoding error when # passed as arguments to str.format()). # assert isinstance(outstr, unicode) # not in Python 3.2 @@ -878,6 +881,7 @@ class AttrDict(dict): return self.__str__() +@python_2_unicode_compatible class SpecialList(list): """ A list subclass which adds a '_type' attribute for special printing @@ -894,7 +898,7 @@ class SpecialList(list): assert self._type if len(self) == 0: outstr = "[]" - elif self._type == "luexemplars": # list of ALL exemplars for LU + elif self._type == 'luexemplars': # list of ALL exemplars for LU outstr = _pretty_exemplars(self, self[0].LU) else: assert False, self._type @@ -950,6 +954,7 @@ class Future(object): return self._data().__repr__() +@python_2_unicode_compatible class PrettyDict(AttrDict): """ Displays an abbreviated repr of values where possible. @@ -958,22 +963,23 @@ class PrettyDict(AttrDict): """ def __init__(self, *args, **kwargs): - _BREAK_LINES = kwargs.pop("breakLines", False) + _BREAK_LINES = kwargs.pop('breakLines', False) super(PrettyDict, self).__init__(*args, **kwargs) - dict.__setattr__(self, "_BREAK_LINES", _BREAK_LINES) + dict.__setattr__(self, '_BREAK_LINES', _BREAK_LINES) def __repr__(self): parts = [] for k, v in sorted(self.items()): - kv = repr(k) + ": " + kv = repr(k) + ': ' try: kv += v._short_repr() except AttributeError: kv += repr(v) parts.append(kv) - return "{" + (",\n " if self._BREAK_LINES else ", ").join(parts) + "}" + return '{' + (',\n ' if self._BREAK_LINES else ', ').join(parts) + '}' +@python_2_unicode_compatible class PrettyList(list): """ Displays an abbreviated repr of only the first several elements, not the whole list. @@ -981,8 +987,8 @@ class PrettyList(list): # from nltk.util def __init__(self, *args, **kwargs): - self._MAX_REPR_SIZE = kwargs.pop("maxReprSize", 60) - self._BREAK_LINES = kwargs.pop("breakLines", False) + self._MAX_REPR_SIZE = kwargs.pop('maxReprSize', 60) + self._BREAK_LINES = kwargs.pop('breakLines', False) super(PrettyList, self).__init__(*args, **kwargs) def __repr__(self): @@ -1000,12 +1006,13 @@ class PrettyList(list): ) # key difference from inherited version: call to _short_repr() length += len(pieces[-1]) + 2 if self._MAX_REPR_SIZE and length > self._MAX_REPR_SIZE and len(pieces) > 2: - return "[%s, ...]" % str( - ",\n " if self._BREAK_LINES else ", " + return "[%s, ...]" % text_type( + ',\n ' if self._BREAK_LINES else ', ' ).join(pieces[:-1]) - return "[%s]" % str(",\n " if self._BREAK_LINES else ", ").join(pieces) + return "[%s]" % text_type(',\n ' if self._BREAK_LINES else ', ').join(pieces) +@python_2_unicode_compatible class PrettyLazyMap(LazyMap): """ Displays an abbreviated repr of only the first several elements, not the whole list. @@ -1028,10 +1035,11 @@ class PrettyLazyMap(LazyMap): ) # key difference from inherited version: call to _short_repr() length += len(pieces[-1]) + 2 if length > self._MAX_REPR_SIZE and len(pieces) > 2: - return "[%s, ...]" % str(", ").join(pieces[:-1]) - return "[%s]" % str(", ").join(pieces) + return "[%s, ...]" % text_type(', ').join(pieces[:-1]) + return "[%s]" % text_type(', ').join(pieces) +@python_2_unicode_compatible class PrettyLazyIteratorList(LazyIteratorList): """ Displays an abbreviated repr of only the first several elements, not the whole list. @@ -1054,10 +1062,11 @@ class PrettyLazyIteratorList(LazyIteratorList): ) # key difference from inherited version: call to _short_repr() length += len(pieces[-1]) + 2 if length > self._MAX_REPR_SIZE and len(pieces) > 2: - return "[%s, ...]" % str(", ").join(pieces[:-1]) - return "[%s]" % str(", ").join(pieces) + return "[%s, ...]" % text_type(', ').join(pieces[:-1]) + return "[%s]" % text_type(', ').join(pieces) +@python_2_unicode_compatible class PrettyLazyConcatenation(LazyConcatenation): """ Displays an abbreviated repr of only the first several elements, not the whole list. @@ -1080,8 +1089,8 @@ class PrettyLazyConcatenation(LazyConcatenation): ) # key difference from inherited version: call to _short_repr() length += len(pieces[-1]) + 2 if length > self._MAX_REPR_SIZE and len(pieces) > 2: - return "[%s, ...]" % str(", ").join(pieces[:-1]) - return "[%s]" % str(", ").join(pieces) + return "[%s, ...]" % text_type(', ').join(pieces[:-1]) + return "[%s]" % text_type(', ').join(pieces) def __add__(self, other): """Return a list concatenating self with other.""" @@ -1104,7 +1113,7 @@ class FramenetCorpusReader(XMLCorpusReader): True """ - _bad_statuses = ["Problem"] + _bad_statuses = ['Problem'] """ When loading LUs for a frame, those whose status is in this list will be ignored. Due to caching, if user code modifies this, it should do so before loading any data. @@ -1240,9 +1249,9 @@ warnings(True) to display corpus consistency warnings when loading data # otherwise weird ordering effects might result in incomplete information self._frame_idx = {} for f in XMLCorpusView( - self.abspath("frameIndex.xml"), "frameIndex/frame", self._handle_elt + self.abspath("frameIndex.xml"), 'frameIndex/frame', self._handle_elt ): - self._frame_idx[f["ID"]] = f + self._frame_idx[f['ID']] = f def _buildcorpusindex(self): # The total number of fulltext annotated documents in Framenet @@ -1250,7 +1259,7 @@ warnings(True) to display corpus consistency warnings when loading data self._fulltext_idx = {} for doclist in XMLCorpusView( self.abspath("fulltextIndex.xml"), - "fulltextIndex/corpus", + 'fulltextIndex/corpus', self._handle_fulltextindex_elt, ): for doc in doclist: @@ -1261,10 +1270,10 @@ warnings(True) to display corpus consistency warnings when loading data # should not be very large self._lu_idx = {} for lu in XMLCorpusView( - self.abspath("luIndex.xml"), "luIndex/lu", self._handle_elt + self.abspath("luIndex.xml"), 'luIndex/lu', self._handle_elt ): self._lu_idx[ - lu["ID"] + lu['ID'] ] = lu # populate with LU index entries. if any of these # are looked up they will be replaced by full LU objects. @@ -1274,7 +1283,7 @@ warnings(True) to display corpus consistency warnings when loading data x for x in XMLCorpusView( self.abspath("frRelation.xml"), - "frameRelations/frameRelationType", + 'frameRelations/frameRelationType', self._handle_framerelationtype_elt, ) ) @@ -1309,7 +1318,7 @@ warnings(True) to display corpus consistency warnings when loading data def _warn(self, *message, **kwargs): if self._warnings: - kwargs.setdefault("file", sys.stderr) + kwargs.setdefault('file', sys.stderr) print(*message, **kwargs) def readme(self): @@ -1394,7 +1403,7 @@ warnings(True) to display corpus consistency warnings when loading data locpath = os.path.join("{0}".format(self._root), self._fulltext_dir, xmlfname) # Grab the top-level xml element containing the fulltext annotation - elt = XMLCorpusView(locpath, "fullTextAnnotation")[0] + elt = XMLCorpusView(locpath, 'fullTextAnnotation')[0] info = self._handle_fulltextannotation_elt(elt) # add metadata for k, v in self._fulltext_idx[fn_docid].items(): @@ -1432,14 +1441,14 @@ warnings(True) to display corpus consistency warnings when loading data # get the name of the frame with this id number try: fentry = self._frame_idx[fn_fid] - if "_type" in fentry: + if '_type' in fentry: return fentry # full frame object is cached - name = fentry["name"] + name = fentry['name'] except TypeError: self._buildframeindex() - name = self._frame_idx[fn_fid]["name"] + name = self._frame_idx[fn_fid]['name'] except KeyError: - raise FramenetError("Unknown frame id: {0}".format(fn_fid)) + raise FramenetError('Unknown frame id: {0}'.format(fn_fid)) return self.frame_by_name(name, ignorekeys, check_cache=False) @@ -1482,18 +1491,18 @@ warnings(True) to display corpus consistency warnings when loading data # print(locpath, file=sys.stderr) # Grab the xml for the frame try: - elt = XMLCorpusView(locpath, "frame")[0] + elt = XMLCorpusView(locpath, 'frame')[0] except IOError: - raise FramenetError("Unknown frame: {0}".format(fn_fname)) + raise FramenetError('Unknown frame: {0}'.format(fn_fname)) fentry = self._handle_frame_elt(elt, ignorekeys) assert fentry - fentry.URL = self._fnweb_url + "/" + self._frame_dir + "/" + fn_fname + ".xml" + fentry.URL = self._fnweb_url + '/' + self._frame_dir + '/' + fn_fname + '.xml' # INFERENCE RULE: propagate lexical semtypes from the frame to all its LUs for st in fentry.semTypes: - if st.rootType.name == "Lexical_type": + if st.rootType.name == 'Lexical_type': for lu in fentry.lexUnit.values(): if not any( x is st for x in lu.semTypes @@ -1502,12 +1511,12 @@ warnings(True) to display corpus consistency warnings when loading data self._frame_idx[fentry.ID] = fentry self._cached_frames[fentry.name] = fentry.ID - """ + ''' # now set up callables to resolve the LU pointers lazily. # (could also do this here--caching avoids infinite recursion.) for luName,luinfo in fentry.lexUnit.items(): fentry.lexUnit[luName] = (lambda luID: Future(lambda: self.lu(luID)))(luinfo.ID) - """ + ''' return fentry def frame(self, fn_fid_or_fname, ignorekeys=[]): @@ -1579,7 +1588,7 @@ warnings(True) to display corpus consistency warnings when loading data """ # get the frame info by name or id number - if isinstance(fn_fid_or_fname, str): + if isinstance(fn_fid_or_fname, string_types): f = self.frame_by_name(fn_fid_or_fname, ignorekeys) else: f = self.frame_by_id(fn_fid_or_fname, ignorekeys) @@ -1625,7 +1634,7 @@ warnings(True) to display corpus consistency warnings when loading data >>> lu # doctest: +ELLIPSIS {'ID': 256, 'POS': 'V', - 'URL': 'https://framenet2.icsi.berkeley.edu/fnReports/data/lu/lu256.xml', + 'URL': u'https://framenet2.icsi.berkeley.edu/fnReports/data/lu/lu256.xml', '_type': 'lu', 'cBy': ..., 'cDate': '02/08/2001 01:27:50 PST Thu', @@ -1644,7 +1653,7 @@ warnings(True) to display corpus consistency warnings when loading data :return: Basic information about the lexical unit :rtype: dict """ - return self.lu(fn_luid, ignorekeys=["subCorpus", "exemplars"]) + return self.lu(fn_luid, ignorekeys=['subCorpus', 'exemplars']) def lu(self, fn_luid, ignorekeys=[], luName=None, frameID=None, frameName=None): """ @@ -1784,24 +1793,24 @@ warnings(True) to display corpus consistency warnings when loading data # luName, frameID, and frameName. However, this will not be listed # among the LUs for its frame. self._warn( - "LU ID not found: {0} ({1}) in {2} ({3})".format( + 'LU ID not found: {0} ({1}) in {2} ({3})'.format( luName, fn_luid, frameName, frameID ) ) luinfo = AttrDict( { - "_type": "lu", - "ID": fn_luid, - "name": luName, - "frameID": frameID, - "status": "Problem", + '_type': 'lu', + 'ID': fn_luid, + 'name': luName, + 'frameID': frameID, + 'status': 'Problem', } ) f = self.frame_by_id(luinfo.frameID) assert f.name == frameName, (f.name, frameName) - luinfo["frame"] = f + luinfo['frame'] = f self._lu_idx[fn_luid] = luinfo - elif "_type" not in luinfo: + elif '_type' not in luinfo: # we only have an index entry for the LU. loading the frame will replace this. f = self.frame_by_id(luinfo.frameID) luinfo = self._lu_idx[fn_luid] @@ -1826,22 +1835,22 @@ warnings(True) to display corpus consistency warnings when loading data self._buildluindex() try: - elt = XMLCorpusView(locpath, "lexUnit")[0] + elt = XMLCorpusView(locpath, 'lexUnit')[0] except IOError: - raise FramenetError("Unknown LU id: {0}".format(fn_luid)) + raise FramenetError('Unknown LU id: {0}'.format(fn_luid)) lu2 = self._handle_lexunit_elt(elt, ignorekeys) - lu.URL = self._fnweb_url + "/" + self._lu_dir + "/" + fname + lu.URL = self._fnweb_url + '/' + self._lu_dir + '/' + fname lu.subCorpus = lu2.subCorpus lu.exemplars = SpecialList( - "luexemplars", [sent for subc in lu.subCorpus for sent in subc.sentence] + 'luexemplars', [sent for subc in lu.subCorpus for sent in subc.sentence] ) for sent in lu.exemplars: - sent["LU"] = lu - sent["frame"] = lu.frame + sent['LU'] = lu + sent['frame'] = lu.frame for aset in sent.annotationSet: - aset["LU"] = lu - aset["frame"] = lu.frame + aset['LU'] = lu + aset['frame'] = lu.frame return lu @@ -1852,14 +1861,14 @@ warnings(True) to display corpus consistency warnings when loading data x for x in XMLCorpusView( self.abspath("semTypes.xml"), - "semTypes/semType", + 'semTypes/semType', self._handle_semtype_elt, ) ] for st in semtypeXML: - n = st["name"] - a = st["abbrev"] - i = st["ID"] + n = st['name'] + a = st['abbrev'] + i = st['ID'] # Both name and abbrev should be able to retrieve the # ID. The ID will retrieve the semantic type dict itself. self._semtypes[n] = i @@ -1926,7 +1935,7 @@ warnings(True) to display corpus consistency warnings when loading data changed = True nPropagations += 1 if ( - ferel.type.name in ["Perspective_on", "Subframe", "Precedes"] + ferel.type.name in ['Perspective_on', 'Subframe', 'Precedes'] and subST and subST is not superST ): @@ -2073,7 +2082,7 @@ warnings(True) to display corpus consistency warnings when loading data ) def fes(self, name=None, frame=None): - """ + ''' Lists frame element objects. If 'name' is provided, this is treated as a case-insensitive regular expression to filter by frame name. (Case-insensitivity is because casing of frame element names is not always @@ -2105,12 +2114,12 @@ warnings(True) to display corpus consistency warnings when loading data :type name: str :return: A list of matching frame elements :rtype: list(AttrDict) - """ + ''' # what frames are we searching in? if frame is not None: if isinstance(frame, int): frames = [self.frame(frame)] - elif isinstance(frame, str): + elif isinstance(frame, string_types): frames = self.frames(frame) else: frames = [frame] @@ -2239,7 +2248,7 @@ warnings(True) to display corpus consistency warnings when loading data if frame is not None: if isinstance(frame, int): frameIDs = {frame} - elif isinstance(frame, str): + elif isinstance(frame, string_types): frameIDs = {f.ID for f in self.frames(frame)} else: frameIDs = {frame.ID} @@ -2247,7 +2256,7 @@ warnings(True) to display corpus consistency warnings when loading data elif frame is not None: # all LUs in matching frames if isinstance(frame, int): frames = [self.frame(frame)] - elif isinstance(frame, str): + elif isinstance(frame, string_types): frames = self.frames(frame) else: frames = [frame] @@ -2321,7 +2330,7 @@ warnings(True) to display corpus consistency warnings when loading data return ftlist else: return PrettyList( - x for x in ftlist if re.search(name, x["filename"]) is not None + x for x in ftlist if re.search(name, x['filename']) is not None ) def docs(self, name=None): @@ -2362,7 +2371,7 @@ warnings(True) to display corpus consistency warnings when loading data aset for sent in self.ft_sents() for aset in sent.annotationSet[1:] - if luNamePattern is None or aset.get("luID", "CXN_ASET") in matchedLUIDs + if luNamePattern is None or aset.get('luID', 'CXN_ASET') in matchedLUIDs ) else: ftpart = [] @@ -2383,17 +2392,17 @@ warnings(True) to display corpus consistency warnings when loading data be specified to retrieve sentences with both overt FEs (in either order). """ if fe is None and fe2 is not None: - raise FramenetError("exemplars(..., fe=None, fe2=) is not allowed") + raise FramenetError('exemplars(..., fe=None, fe2=) is not allowed') elif fe is not None and fe2 is not None: - if not isinstance(fe2, str): - if isinstance(fe, str): + if not isinstance(fe2, string_types): + if isinstance(fe, string_types): # fe2 is specific to a particular frame. swap fe and fe2 so fe is always used to determine the frame. fe, fe2 = fe2, fe elif fe.frame is not fe2.frame: # ensure frames match raise FramenetError( - "exemplars() call with inconsistent `fe` and `fe2` specification (frames must match)" + 'exemplars() call with inconsistent `fe` and `fe2` specification (frames must match)' ) - if frame is None and fe is not None and not isinstance(fe, str): + if frame is None and fe is not None and not isinstance(fe, string_types): frame = fe.frame # narrow down to frames matching criteria @@ -2402,7 +2411,7 @@ warnings(True) to display corpus consistency warnings when loading data list ) # frame name -> matching LUs, if luNamePattern is specified if frame is not None or luNamePattern is not None: - if frame is None or isinstance(frame, str): + if frame is None or isinstance(frame, string_types): if luNamePattern is not None: frames = set() for lu in self.lus(luNamePattern, frame=frame): @@ -2421,7 +2430,7 @@ warnings(True) to display corpus consistency warnings when loading data lusByFrame = {frame.name: self.lus(luNamePattern, frame=frame)} if fe is not None: # narrow to frames that define this FE - if isinstance(fe, str): + if isinstance(fe, string_types): frames = PrettyLazyIteratorList( f for f in frames @@ -2431,12 +2440,12 @@ warnings(True) to display corpus consistency warnings when loading data else: if fe.frame not in frames: raise FramenetError( - "exemplars() call with inconsistent `frame` and `fe` specification" + 'exemplars() call with inconsistent `frame` and `fe` specification' ) frames = [fe.frame] if fe2 is not None: # narrow to frames that ALSO define this FE - if isinstance(fe2, str): + if isinstance(fe2, string_types): frames = PrettyLazyIteratorList( f for f in frames @@ -2463,13 +2472,13 @@ warnings(True) to display corpus consistency warnings when loading data if fe is not None: fes = ( {ffe for ffe in f.FE.keys() if re.search(fe, ffe, re.I)} - if isinstance(fe, str) + if isinstance(fe, string_types) else {fe.name} ) if fe2 is not None: fes2 = ( {ffe for ffe in f.FE.keys() if re.search(fe2, ffe, re.I)} - if isinstance(fe2, str) + if isinstance(fe2, string_types) else {fe2.name} ) @@ -2494,9 +2503,9 @@ warnings(True) to display corpus consistency warnings when loading data If 'fes' is None, returns all overt FE names. """ overtNames = set(list(zip(*ex.FE[0]))[2]) if ex.FE[0] else set() - if "FE2" in ex: + if 'FE2' in ex: overtNames |= set(list(zip(*ex.FE2[0]))[2]) if ex.FE2[0] else set() - if "FE3" in ex: + if 'FE3' in ex: overtNames |= set(list(zip(*ex.FE3[0]))[2]) if ex.FE3[0] else set() return overtNames & fes if fes is not None else overtNames @@ -2581,7 +2590,7 @@ warnings(True) to display corpus consistency warnings when loading data # lookup by 'frame' if frame is not None: - if isinstance(frame, dict) and "frameRelations" in frame: + if isinstance(frame, dict) and 'frameRelations' in frame: rels = PrettyList(frame.frameRelations) else: if not isinstance(frame, int): @@ -2715,11 +2724,11 @@ warnings(True) to display corpus consistency warnings when loading data # Ignore these attributes when loading attributes from an xml node ignore_attrs = [ #'cBy', 'cDate', 'mDate', # <-- annotation metadata that could be of interest - "xsi", - "schemaLocation", - "xmlns", - "bgColor", - "fgColor", + 'xsi', + 'schemaLocation', + 'xmlns', + 'bgColor', + 'fgColor', ] for attr in attr_dict: @@ -2744,35 +2753,35 @@ warnings(True) to display corpus consistency warnings when loading data """ try: - """ + ''' # Look for boundary issues in markup. (Sometimes FEs are pluralized in definitions.) m = re.search(r'\w[<][^/]|[<][/][^>]+[>](s\w|[a-rt-z0-9])', data) if m: print('Markup boundary:', data[max(0,m.start(0)-10):m.end(0)+10].replace('\n',' '), file=sys.stderr) - """ - - data = data.replace("", "") - data = data.replace("", "") - data = re.sub('', "", data) - data = data.replace("", "") - data = data.replace("", "") - data = data.replace("", "") - data = data.replace("", "") - data = data.replace("", "") - data = data.replace("", "") - data = data.replace("", "") - data = data.replace("", "'") - data = data.replace("", "'") - data = data.replace("", "") - data = data.replace("", "") - data = data.replace("", "") - data = data.replace("", "") + ''' + + data = data.replace('', '') + data = data.replace('', '') + data = re.sub('', '', data) + data = data.replace('', '') + data = data.replace('', '') + data = data.replace('', '') + data = data.replace('', '') + data = data.replace('', '') + data = data.replace('', '') + data = data.replace('', '') + data = data.replace('', "'") + data = data.replace('', "'") + data = data.replace('', '') + data = data.replace('', '') + data = data.replace('', '') + data = data.replace('', '') # Get rid of and tags - data = data.replace("", "") - data = data.replace("", "") + data = data.replace('', '') + data = data.replace('', '') - data = data.replace("\n", " ") + data = data.replace('\n', ' ') except AttributeError: pass @@ -2799,15 +2808,15 @@ warnings(True) to display corpus consistency warnings when loading data corpid = ftinfo.ID retlist = [] for sub in elt: - if sub.tag.endswith("document"): + if sub.tag.endswith('document'): doc = self._load_xml_attributes(AttrDict(), sub) - if "name" in doc: + if 'name' in doc: docname = doc.name else: docname = doc.description doc.filename = "{0}__{1}.xml".format(corpname, docname) doc.URL = ( - self._fnweb_url + "/" + self._fulltext_dir + "/" + doc.filename + self._fnweb_url + '/' + self._fulltext_dir + '/' + doc.filename ) doc.corpname = corpname doc.corpid = corpid @@ -2819,59 +2828,59 @@ warnings(True) to display corpus consistency warnings when loading data """Load the info for a Frame from a frame xml file""" frinfo = self._load_xml_attributes(AttrDict(), elt) - frinfo["_type"] = "frame" - frinfo["definition"] = "" - frinfo["definitionMarkup"] = "" - frinfo["FE"] = PrettyDict() - frinfo["FEcoreSets"] = [] - frinfo["lexUnit"] = PrettyDict() - frinfo["semTypes"] = [] + frinfo['_type'] = 'frame' + frinfo['definition'] = "" + frinfo['definitionMarkup'] = "" + frinfo['FE'] = PrettyDict() + frinfo['FEcoreSets'] = [] + frinfo['lexUnit'] = PrettyDict() + frinfo['semTypes'] = [] for k in ignorekeys: if k in frinfo: del frinfo[k] for sub in elt: - if sub.tag.endswith("definition") and "definition" not in ignorekeys: - frinfo["definitionMarkup"] = sub.text - frinfo["definition"] = self._strip_tags(sub.text) - elif sub.tag.endswith("FE") and "FE" not in ignorekeys: + if sub.tag.endswith('definition') and 'definition' not in ignorekeys: + frinfo['definitionMarkup'] = sub.text + frinfo['definition'] = self._strip_tags(sub.text) + elif sub.tag.endswith('FE') and 'FE' not in ignorekeys: feinfo = self._handle_fe_elt(sub) - frinfo["FE"][feinfo.name] = feinfo - feinfo["frame"] = frinfo # backpointer - elif sub.tag.endswith("FEcoreSet") and "FEcoreSet" not in ignorekeys: + frinfo['FE'][feinfo.name] = feinfo + feinfo['frame'] = frinfo # backpointer + elif sub.tag.endswith('FEcoreSet') and 'FEcoreSet' not in ignorekeys: coreset = self._handle_fecoreset_elt(sub) # assumes all FEs have been loaded before coresets - frinfo["FEcoreSets"].append( - PrettyList(frinfo["FE"][fe.name] for fe in coreset) + frinfo['FEcoreSets'].append( + PrettyList(frinfo['FE'][fe.name] for fe in coreset) ) - elif sub.tag.endswith("lexUnit") and "lexUnit" not in ignorekeys: + elif sub.tag.endswith('lexUnit') and 'lexUnit' not in ignorekeys: luentry = self._handle_framelexunit_elt(sub) - if luentry["status"] in self._bad_statuses: + if luentry['status'] in self._bad_statuses: # problematic LU entry; ignore it continue - luentry["frame"] = frinfo - luentry["URL"] = ( + luentry['frame'] = frinfo + luentry['URL'] = ( self._fnweb_url - + "/" + + '/' + self._lu_dir - + "/" - + "lu{0}.xml".format(luentry["ID"]) + + '/' + + "lu{0}.xml".format(luentry['ID']) ) - luentry["subCorpus"] = Future( + luentry['subCorpus'] = Future( (lambda lu: lambda: self._lu_file(lu).subCorpus)(luentry) ) - luentry["exemplars"] = Future( + luentry['exemplars'] = Future( (lambda lu: lambda: self._lu_file(lu).exemplars)(luentry) ) - frinfo["lexUnit"][luentry.name] = luentry + frinfo['lexUnit'][luentry.name] = luentry if not self._lu_idx: self._buildluindex() self._lu_idx[luentry.ID] = luentry - elif sub.tag.endswith("semType") and "semTypes" not in ignorekeys: + elif sub.tag.endswith('semType') and 'semTypes' not in ignorekeys: semtypeinfo = self._load_xml_attributes(AttrDict(), sub) - frinfo["semTypes"].append(self.semtype(semtypeinfo.ID)) + frinfo['semTypes'].append(self.semtype(semtypeinfo.ID)) - frinfo["frameRelations"] = self.frame_relations(frame=frinfo) + frinfo['frameRelations'] = self.frame_relations(frame=frinfo) # resolve 'requires' and 'excludes' links between FEs of this frame for fe in frinfo.FE.values(): @@ -2898,32 +2907,32 @@ warnings(True) to display corpus consistency warnings when loading data def _handle_framerelationtype_elt(self, elt, *args): """Load frame-relation element and its child fe-relation elements from frRelation.xml.""" info = self._load_xml_attributes(AttrDict(), elt) - info["_type"] = "framerelationtype" - info["frameRelations"] = PrettyList() + info['_type'] = 'framerelationtype' + info['frameRelations'] = PrettyList() for sub in elt: - if sub.tag.endswith("frameRelation"): + if sub.tag.endswith('frameRelation'): frel = self._handle_framerelation_elt(sub) - frel["type"] = info # backpointer + frel['type'] = info # backpointer for ferel in frel.feRelations: - ferel["type"] = info - info["frameRelations"].append(frel) + ferel['type'] = info + info['frameRelations'].append(frel) return info def _handle_framerelation_elt(self, elt): """Load frame-relation element and its child fe-relation elements from frRelation.xml.""" info = self._load_xml_attributes(AttrDict(), elt) - assert info["superFrameName"] != info["subFrameName"], (elt, info) - info["_type"] = "framerelation" - info["feRelations"] = PrettyList() + assert info['superFrameName'] != info['subFrameName'], (elt, info) + info['_type'] = 'framerelation' + info['feRelations'] = PrettyList() for sub in elt: - if sub.tag.endswith("FERelation"): + if sub.tag.endswith('FERelation'): ferel = self._handle_elt(sub) - ferel["_type"] = "ferelation" - ferel["frameRelation"] = info # backpointer - info["feRelations"].append(ferel) + ferel['_type'] = 'ferelation' + ferel['frameRelation'] = info # backpointer + info['feRelations'].append(ferel) return info @@ -2933,16 +2942,16 @@ warnings(True) to display corpus consistency warnings when loading data element (which we ignore here) and a bunch of 'sentence' elements.""" info = AttrDict() - info["_type"] = "fulltext_annotation" - info["sentence"] = [] + info['_type'] = 'fulltext_annotation' + info['sentence'] = [] for sub in elt: - if sub.tag.endswith("header"): + if sub.tag.endswith('header'): continue # not used - elif sub.tag.endswith("sentence"): + elif sub.tag.endswith('sentence'): s = self._handle_fulltext_sentence_elt(sub) s.doc = info - info["sentence"].append(s) + info['sentence'].append(s) return info @@ -2951,28 +2960,28 @@ warnings(True) to display corpus consistency warnings when loading data 'sentence' element contains a "text" and "annotationSet" sub elements.""" info = self._load_xml_attributes(AttrDict(), elt) - info["_type"] = "fulltext_sentence" - info["annotationSet"] = [] - info["targets"] = [] + info['_type'] = "fulltext_sentence" + info['annotationSet'] = [] + info['targets'] = [] target_spans = set() - info["_ascii"] = types.MethodType( + info['_ascii'] = types.MethodType( _annotation_ascii, info ) # attach a method for this instance - info["text"] = "" + info['text'] = "" for sub in elt: - if sub.tag.endswith("text"): - info["text"] = self._strip_tags(sub.text) - elif sub.tag.endswith("annotationSet"): + if sub.tag.endswith('text'): + info['text'] = self._strip_tags(sub.text) + elif sub.tag.endswith('annotationSet'): a = self._handle_fulltextannotationset_elt( - sub, is_pos=(len(info["annotationSet"]) == 0) + sub, is_pos=(len(info['annotationSet']) == 0) ) - if "cxnID" in a: # ignoring construction annotations for now + if 'cxnID' in a: # ignoring construction annotations for now continue a.sent = info a.text = info.text - info["annotationSet"].append(a) - if "Target" in a: + info['annotationSet'].append(a) + if 'Target' in a: for tspan in a.Target: if tspan in target_spans: self._warn( @@ -2980,19 +2989,19 @@ warnings(True) to display corpus consistency warnings when loading data info.text[slice(*tspan)] ), tspan, - "in sentence", - info["ID"], + 'in sentence', + info['ID'], info.text, ) # this can happen in cases like "chemical and biological weapons" # being annotated as "chemical weapons" and "biological weapons" else: target_spans.add(tspan) - info["targets"].append((a.Target, a.luName, a.frameName)) + info['targets'].append((a.Target, a.luName, a.frameName)) - assert info["annotationSet"][0].status == "UNANN" - info["POS"] = info["annotationSet"][0].POS - info["POS_tagset"] = info["annotationSet"][0].POS_tagset + assert info['annotationSet'][0].status == 'UNANN' + info['POS'] = info['annotationSet'][0].POS + info['POS_tagset'] = info['annotationSet'][0].POS_tagset return info def _handle_fulltextannotationset_elt(self, elt, is_pos=False): @@ -3001,62 +3010,62 @@ warnings(True) to display corpus consistency warnings when loading data info = self._handle_luannotationset_elt(elt, is_pos=is_pos) if not is_pos: - info["_type"] = "fulltext_annotationset" - if "cxnID" not in info: # ignoring construction annotations for now - info["LU"] = self.lu( + info['_type'] = 'fulltext_annotationset' + if 'cxnID' not in info: # ignoring construction annotations for now + info['LU'] = self.lu( info.luID, luName=info.luName, frameID=info.frameID, frameName=info.frameName, ) - info["frame"] = info.LU.frame + info['frame'] = info.LU.frame return info def _handle_fulltextlayer_elt(self, elt): """Load information from the given 'layer' element. Each 'layer' contains several "label" elements.""" info = self._load_xml_attributes(AttrDict(), elt) - info["_type"] = "layer" - info["label"] = [] + info['_type'] = 'layer' + info['label'] = [] for sub in elt: - if sub.tag.endswith("label"): + if sub.tag.endswith('label'): l = self._load_xml_attributes(AttrDict(), sub) - info["label"].append(l) + info['label'].append(l) return info def _handle_framelexunit_elt(self, elt): """Load the lexical unit info from an xml element in a frame's xml file.""" luinfo = AttrDict() - luinfo["_type"] = "lu" + luinfo['_type'] = 'lu' luinfo = self._load_xml_attributes(luinfo, elt) luinfo["definition"] = "" luinfo["definitionMarkup"] = "" luinfo["sentenceCount"] = PrettyDict() - luinfo["lexemes"] = PrettyList() # multiword LUs have multiple lexemes - luinfo["semTypes"] = PrettyList() # an LU can have multiple semtypes + luinfo['lexemes'] = PrettyList() # multiword LUs have multiple lexemes + luinfo['semTypes'] = PrettyList() # an LU can have multiple semtypes for sub in elt: - if sub.tag.endswith("definition"): - luinfo["definitionMarkup"] = sub.text - luinfo["definition"] = self._strip_tags(sub.text) - elif sub.tag.endswith("sentenceCount"): - luinfo["sentenceCount"] = self._load_xml_attributes(PrettyDict(), sub) - elif sub.tag.endswith("lexeme"): + if sub.tag.endswith('definition'): + luinfo['definitionMarkup'] = sub.text + luinfo['definition'] = self._strip_tags(sub.text) + elif sub.tag.endswith('sentenceCount'): + luinfo['sentenceCount'] = self._load_xml_attributes(PrettyDict(), sub) + elif sub.tag.endswith('lexeme'): lexemeinfo = self._load_xml_attributes(PrettyDict(), sub) - if not isinstance(lexemeinfo.name, str): + if not isinstance(lexemeinfo.name, string_types): # some lexeme names are ints by default: e.g., # thousand.num has lexeme with name="1000" lexemeinfo.name = str(lexemeinfo.name) - luinfo["lexemes"].append(lexemeinfo) - elif sub.tag.endswith("semType"): + luinfo['lexemes'].append(lexemeinfo) + elif sub.tag.endswith('semType'): semtypeinfo = self._load_xml_attributes(PrettyDict(), sub) - luinfo["semTypes"].append(self.semtype(semtypeinfo.ID)) + luinfo['semTypes'].append(self.semtype(semtypeinfo.ID)) # sort lexemes by 'order' attribute # otherwise, e.g., 'write down.v' may have lexemes in wrong order - luinfo["lexemes"].sort(key=lambda x: x.order) + luinfo['lexemes'].sort(key=lambda x: x.order) return luinfo @@ -3067,33 +3076,33 @@ warnings(True) to display corpus consistency warnings when loading data (which are not included in frame files). """ luinfo = self._load_xml_attributes(AttrDict(), elt) - luinfo["_type"] = "lu" - luinfo["definition"] = "" - luinfo["definitionMarkup"] = "" - luinfo["subCorpus"] = PrettyList() - luinfo["lexemes"] = PrettyList() # multiword LUs have multiple lexemes - luinfo["semTypes"] = PrettyList() # an LU can have multiple semtypes + luinfo['_type'] = 'lu' + luinfo['definition'] = "" + luinfo['definitionMarkup'] = "" + luinfo['subCorpus'] = PrettyList() + luinfo['lexemes'] = PrettyList() # multiword LUs have multiple lexemes + luinfo['semTypes'] = PrettyList() # an LU can have multiple semtypes for k in ignorekeys: if k in luinfo: del luinfo[k] for sub in elt: - if sub.tag.endswith("header"): + if sub.tag.endswith('header'): continue # not used - elif sub.tag.endswith("valences"): + elif sub.tag.endswith('valences'): continue # not used - elif sub.tag.endswith("definition") and "definition" not in ignorekeys: - luinfo["definitionMarkup"] = sub.text - luinfo["definition"] = self._strip_tags(sub.text) - elif sub.tag.endswith("subCorpus") and "subCorpus" not in ignorekeys: + elif sub.tag.endswith('definition') and 'definition' not in ignorekeys: + luinfo['definitionMarkup'] = sub.text + luinfo['definition'] = self._strip_tags(sub.text) + elif sub.tag.endswith('subCorpus') and 'subCorpus' not in ignorekeys: sc = self._handle_lusubcorpus_elt(sub) if sc is not None: - luinfo["subCorpus"].append(sc) - elif sub.tag.endswith("lexeme") and "lexeme" not in ignorekeys: - luinfo["lexemes"].append(self._load_xml_attributes(PrettyDict(), sub)) - elif sub.tag.endswith("semType") and "semType" not in ignorekeys: + luinfo['subCorpus'].append(sc) + elif sub.tag.endswith('lexeme') and 'lexeme' not in ignorekeys: + luinfo['lexemes'].append(self._load_xml_attributes(PrettyDict(), sub)) + elif sub.tag.endswith('semType') and 'semType' not in ignorekeys: semtypeinfo = self._load_xml_attributes(AttrDict(), sub) - luinfo["semTypes"].append(self.semtype(semtypeinfo.ID)) + luinfo['semTypes'].append(self.semtype(semtypeinfo.ID)) return luinfo @@ -3101,99 +3110,99 @@ warnings(True) to display corpus consistency warnings when loading data """Load a subcorpus of a lexical unit from the given xml.""" sc = AttrDict() try: - sc["name"] = elt.get("name") + sc['name'] = elt.get('name') except AttributeError: return None - sc["_type"] = "lusubcorpus" - sc["sentence"] = [] + sc['_type'] = "lusubcorpus" + sc['sentence'] = [] for sub in elt: - if sub.tag.endswith("sentence"): + if sub.tag.endswith('sentence'): s = self._handle_lusentence_elt(sub) if s is not None: - sc["sentence"].append(s) + sc['sentence'].append(s) return sc def _handle_lusentence_elt(self, elt): """Load a sentence from a subcorpus of an LU from xml.""" info = self._load_xml_attributes(AttrDict(), elt) - info["_type"] = "lusentence" - info["annotationSet"] = [] - info["_ascii"] = types.MethodType( + info['_type'] = 'lusentence' + info['annotationSet'] = [] + info['_ascii'] = types.MethodType( _annotation_ascii, info ) # attach a method for this instance for sub in elt: - if sub.tag.endswith("text"): - info["text"] = self._strip_tags(sub.text) - elif sub.tag.endswith("annotationSet"): + if sub.tag.endswith('text'): + info['text'] = self._strip_tags(sub.text) + elif sub.tag.endswith('annotationSet'): annset = self._handle_luannotationset_elt( - sub, is_pos=(len(info["annotationSet"]) == 0) + sub, is_pos=(len(info['annotationSet']) == 0) ) if annset is not None: - assert annset.status == "UNANN" or "FE" in annset, annset - if annset.status != "UNANN": - info["frameAnnotation"] = annset + assert annset.status == 'UNANN' or 'FE' in annset, annset + if annset.status != 'UNANN': + info['frameAnnotation'] = annset # copy layer info up to current level for k in ( - "Target", - "FE", - "FE2", - "FE3", - "GF", - "PT", - "POS", - "POS_tagset", - "Other", - "Sent", - "Verb", - "Noun", - "Adj", - "Adv", - "Prep", - "Scon", - "Art", + 'Target', + 'FE', + 'FE2', + 'FE3', + 'GF', + 'PT', + 'POS', + 'POS_tagset', + 'Other', + 'Sent', + 'Verb', + 'Noun', + 'Adj', + 'Adv', + 'Prep', + 'Scon', + 'Art', ): if k in annset: info[k] = annset[k] - info["annotationSet"].append(annset) - annset["sent"] = info - annset["text"] = info.text + info['annotationSet'].append(annset) + annset['sent'] = info + annset['text'] = info.text return info def _handle_luannotationset_elt(self, elt, is_pos=False): """Load an annotation set from a sentence in an subcorpus of an LU""" info = self._load_xml_attributes(AttrDict(), elt) - info["_type"] = "posannotationset" if is_pos else "luannotationset" - info["layer"] = [] - info["_ascii"] = types.MethodType( + info['_type'] = 'posannotationset' if is_pos else 'luannotationset' + info['layer'] = [] + info['_ascii'] = types.MethodType( _annotation_ascii, info ) # attach a method for this instance - if "cxnID" in info: # ignoring construction annotations for now. + if 'cxnID' in info: # ignoring construction annotations for now. return info for sub in elt: - if sub.tag.endswith("layer"): + if sub.tag.endswith('layer'): l = self._handle_lulayer_elt(sub) if l is not None: overt = [] ni = {} # null instantiations - info["layer"].append(l) + info['layer'].append(l) for lbl in l.label: - if "start" in lbl: + if 'start' in lbl: thespan = (lbl.start, lbl.end + 1, lbl.name) if l.name not in ( - "Sent", - "Other", + 'Sent', + 'Other', ): # 'Sent' and 'Other' layers sometimes contain accidental duplicate spans assert thespan not in overt, (info.ID, l.name, thespan) overt.append(thespan) else: # null instantiation if lbl.name in ni: self._warn( - "FE with multiple NI entries:", + 'FE with multiple NI entries:', lbl.name, ni[lbl.name], lbl.itype, @@ -3202,120 +3211,120 @@ warnings(True) to display corpus consistency warnings when loading data ni[lbl.name] = lbl.itype overt = sorted(overt) - if l.name == "Target": + if l.name == 'Target': if not overt: self._warn( - "Skipping empty Target layer in annotation set ID={0}".format( + 'Skipping empty Target layer in annotation set ID={0}'.format( info.ID ) ) continue - assert all(lblname == "Target" for i, j, lblname in overt) - if "Target" in info: + assert all(lblname == 'Target' for i, j, lblname in overt) + if 'Target' in info: self._warn( - "Annotation set {0} has multiple Target layers".format( + 'Annotation set {0} has multiple Target layers'.format( info.ID ) ) else: - info["Target"] = [(i, j) for (i, j, _) in overt] - elif l.name == "FE": + info['Target'] = [(i, j) for (i, j, _) in overt] + elif l.name == 'FE': if l.rank == 1: - assert "FE" not in info - info["FE"] = (overt, ni) + assert 'FE' not in info + info['FE'] = (overt, ni) # assert False,info else: # sometimes there are 3 FE layers! e.g. Change_position_on_a_scale.fall.v assert 2 <= l.rank <= 3, l.rank - k = "FE" + str(l.rank) + k = 'FE' + str(l.rank) assert k not in info info[k] = (overt, ni) - elif l.name in ("GF", "PT"): + elif l.name in ('GF', 'PT'): assert l.rank == 1 info[l.name] = overt - elif l.name in ("BNC", "PENN"): + elif l.name in ('BNC', 'PENN'): assert l.rank == 1 - info["POS"] = overt - info["POS_tagset"] = l.name + info['POS'] = overt + info['POS_tagset'] = l.name else: if is_pos: - if l.name not in ("NER", "WSL"): + if l.name not in ('NER', 'WSL'): self._warn( - "Unexpected layer in sentence annotationset:", + 'Unexpected layer in sentence annotationset:', l.name, ) else: if l.name not in ( - "Sent", - "Verb", - "Noun", - "Adj", - "Adv", - "Prep", - "Scon", - "Art", - "Other", + 'Sent', + 'Verb', + 'Noun', + 'Adj', + 'Adv', + 'Prep', + 'Scon', + 'Art', + 'Other', ): self._warn( - "Unexpected layer in frame annotationset:", l.name + 'Unexpected layer in frame annotationset:', l.name ) info[l.name] = overt - if not is_pos and "cxnID" not in info: - if "Target" not in info: - self._warn("Missing target in annotation set ID={0}".format(info.ID)) - assert "FE" in info - if "FE3" in info: - assert "FE2" in info + if not is_pos and 'cxnID' not in info: + if 'Target' not in info: + self._warn('Missing target in annotation set ID={0}'.format(info.ID)) + assert 'FE' in info + if 'FE3' in info: + assert 'FE2' in info return info def _handle_lulayer_elt(self, elt): """Load a layer from an annotation set""" layer = self._load_xml_attributes(AttrDict(), elt) - layer["_type"] = "lulayer" - layer["label"] = [] + layer['_type'] = 'lulayer' + layer['label'] = [] for sub in elt: - if sub.tag.endswith("label"): + if sub.tag.endswith('label'): l = self._load_xml_attributes(AttrDict(), sub) if l is not None: - layer["label"].append(l) + layer['label'].append(l) return layer def _handle_fe_elt(self, elt): feinfo = self._load_xml_attributes(AttrDict(), elt) - feinfo["_type"] = "fe" - feinfo["definition"] = "" - feinfo["definitionMarkup"] = "" - feinfo["semType"] = None - feinfo["requiresFE"] = None - feinfo["excludesFE"] = None + feinfo['_type'] = 'fe' + feinfo['definition'] = "" + feinfo['definitionMarkup'] = "" + feinfo['semType'] = None + feinfo['requiresFE'] = None + feinfo['excludesFE'] = None for sub in elt: - if sub.tag.endswith("definition"): - feinfo["definitionMarkup"] = sub.text - feinfo["definition"] = self._strip_tags(sub.text) - elif sub.tag.endswith("semType"): + if sub.tag.endswith('definition'): + feinfo['definitionMarkup'] = sub.text + feinfo['definition'] = self._strip_tags(sub.text) + elif sub.tag.endswith('semType'): stinfo = self._load_xml_attributes(AttrDict(), sub) - feinfo["semType"] = self.semtype(stinfo.ID) - elif sub.tag.endswith("requiresFE"): - feinfo["requiresFE"] = self._load_xml_attributes(AttrDict(), sub) - elif sub.tag.endswith("excludesFE"): - feinfo["excludesFE"] = self._load_xml_attributes(AttrDict(), sub) + feinfo['semType'] = self.semtype(stinfo.ID) + elif sub.tag.endswith('requiresFE'): + feinfo['requiresFE'] = self._load_xml_attributes(AttrDict(), sub) + elif sub.tag.endswith('excludesFE'): + feinfo['excludesFE'] = self._load_xml_attributes(AttrDict(), sub) return feinfo def _handle_semtype_elt(self, elt, tagspec=None): semt = self._load_xml_attributes(AttrDict(), elt) - semt["_type"] = "semtype" - semt["superType"] = None - semt["subTypes"] = PrettyList() + semt['_type'] = 'semtype' + semt['superType'] = None + semt['subTypes'] = PrettyList() for sub in elt: if sub.text is not None: - semt["definitionMarkup"] = sub.text - semt["definition"] = self._strip_tags(sub.text) + semt['definitionMarkup'] = sub.text + semt['definition'] = self._strip_tags(sub.text) else: supertypeinfo = self._load_xml_attributes(AttrDict(), sub) - semt["superType"] = supertypeinfo + semt['superType'] = supertypeinfo # the supertype may not have been loaded yet return semt @@ -3332,15 +3341,15 @@ def demo(): # buildindexes(). We do this here just for demo purposes. If the # indexes are not built explicitely, they will be built as needed. # - print("Building the indexes...") + print('Building the indexes...') fn.buildindexes() # # Get some statistics about the corpus # - print("Number of Frames:", len(fn.frames())) - print("Number of Lexical Units:", len(fn.lus())) - print("Number of annotated documents:", len(fn.docs())) + print('Number of Frames:', len(fn.frames())) + print('Number of Lexical Units:', len(fn.lus())) + print('Number of annotated documents:', len(fn.docs())) print() # @@ -3349,7 +3358,7 @@ def demo(): print( 'getting frames whose name matches the (case insensitive) regex: "(?i)medical"' ) - medframes = fn.frames(r"(?i)medical") + medframes = fn.frames(r'(?i)medical') print('Found {0} Frames whose name matches "(?i)medical":'.format(len(medframes))) print([(f.name, f.ID) for f in medframes]) @@ -3369,7 +3378,7 @@ def demo(): len(m_frame.frameRelations), ) for fr in m_frame.frameRelations: - print(" ", fr) + print(' ', fr) # # get the names of the Frame Elements @@ -3378,13 +3387,13 @@ def demo(): '\nNumber of Frame Elements in the "{0}" frame:'.format(m_frame.name), len(m_frame.FE), ) - print(" ", [x for x in m_frame.FE]) + print(' ', [x for x in m_frame.FE]) # # get the names of the "Core" Frame Elements # print('\nThe "core" Frame Elements in the "{0}" frame:'.format(m_frame.name)) - print(" ", [x.name for x in m_frame.FE.values() if x.coreType == "Core"]) + print(' ', [x.name for x in m_frame.FE.values() if x.coreType == "Core"]) # # get all of the Lexical Units that are incorporated in the @@ -3395,9 +3404,9 @@ def demo(): ailment_lus = [ x for x in m_frame.lexUnit.values() - if "incorporatedFE" in x and x.incorporatedFE == "Ailment" + if 'incorporatedFE' in x and x.incorporatedFE == 'Ailment' ] - print(" ", [x.name for x in ailment_lus]) + print(' ', [x.name for x in ailment_lus]) # # get all of the Lexical Units for the frame @@ -3406,20 +3415,20 @@ def demo(): '\nNumber of Lexical Units in the "{0}" frame:'.format(m_frame.name), len(m_frame.lexUnit), ) - print(" ", [x.name for x in m_frame.lexUnit.values()][:5], "...") + print(' ', [x.name for x in m_frame.lexUnit.values()][:5], '...') # # get basic info on the second LU in the frame # - tmp_id = m_frame.lexUnit["ailment.n"].ID # grab the id of the specified LU + tmp_id = m_frame.lexUnit['ailment.n'].ID # grab the id of the specified LU luinfo = fn.lu_basic(tmp_id) # get basic info on the LU - print("\nInformation on the LU: {0}".format(luinfo.name)) + print('\nInformation on the LU: {0}'.format(luinfo.name)) pprint(luinfo) # # Get a list of all of the corpora used for fulltext annotation # - print("\nNames of all of the corpora used for fulltext annotation:") + print('\nNames of all of the corpora used for fulltext annotation:') allcorpora = set(x.corpname for x in fn.docs_metadata()) pprint(list(allcorpora)) @@ -3443,8 +3452,8 @@ def demo(): print( '\nSearching for all Frames that have a lemma that matches the regexp: "^run.v$":' ) - pprint(fn.frames_by_lemma(r"^run.v$")) + pprint(fn.frames_by_lemma(r'^run.v$')) -if __name__ == "__main__": +if __name__ == '__main__': demo() diff --git a/nlp_resource_data/nltk/corpus/reader/ieer.py b/nlp_resource_data/nltk/corpus/reader/ieer.py index 6f80742..1628e9c 100644 --- a/nlp_resource_data/nltk/corpus/reader/ieer.py +++ b/nlp_resource_data/nltk/corpus/reader/ieer.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: IEER Corpus Reader # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Steven Bird # Edward Loper # URL: @@ -20,28 +20,32 @@ and filenames were shortened. The corpus contains the following files: APW_19980314, APW_19980424, APW_19980429, NYT_19980315, NYT_19980403, and NYT_19980407. """ +from __future__ import unicode_literals + +from six import string_types import nltk +from nltk import compat from nltk.corpus.reader.api import * #: A dictionary whose keys are the names of documents in this corpus; #: and whose values are descriptions of those documents' contents. titles = { - "APW_19980314": "Associated Press Weekly, 14 March 1998", - "APW_19980424": "Associated Press Weekly, 24 April 1998", - "APW_19980429": "Associated Press Weekly, 29 April 1998", - "NYT_19980315": "New York Times, 15 March 1998", - "NYT_19980403": "New York Times, 3 April 1998", - "NYT_19980407": "New York Times, 7 April 1998", + 'APW_19980314': 'Associated Press Weekly, 14 March 1998', + 'APW_19980424': 'Associated Press Weekly, 24 April 1998', + 'APW_19980429': 'Associated Press Weekly, 29 April 1998', + 'NYT_19980315': 'New York Times, 15 March 1998', + 'NYT_19980403': 'New York Times, 3 April 1998', + 'NYT_19980407': 'New York Times, 7 April 1998', } #: A list of all documents in this corpus. documents = sorted(titles) - +@compat.python_2_unicode_compatible class IEERDocument(object): - def __init__(self, text, docno=None, doctype=None, date_time=None, headline=""): + def __init__(self, text, docno=None, doctype=None, date_time=None, headline=''): self.text = text self.docno = docno self.doctype = doctype @@ -50,15 +54,15 @@ class IEERDocument(object): def __repr__(self): if self.headline: - headline = " ".join(self.headline.leaves()) + headline = ' '.join(self.headline.leaves()) else: headline = ( - " ".join([w for w in self.text.leaves() if w[:1] != "<"][:12]) + "..." + ' '.join([w for w in self.text.leaves() if w[:1] != '<'][:12]) + '...' ) if self.docno is not None: - return "" % (self.docno, headline) + return '' % (self.docno, headline) else: - return "" % headline + return '' % headline class IEERCorpusReader(CorpusReader): @@ -68,7 +72,7 @@ class IEERCorpusReader(CorpusReader): def raw(self, fileids=None): if fileids is None: fileids = self._fileids - elif isinstance(fileids, str): + elif isinstance(fileids, string_types): fileids = [fileids] return concat([self.open(f).read() for f in fileids]) @@ -110,7 +114,7 @@ class IEERCorpusReader(CorpusReader): line = stream.readline() if not line: break - if line.strip() == "": + if line.strip() == '': break out.append(line) # Read the document @@ -119,7 +123,7 @@ class IEERCorpusReader(CorpusReader): if not line: break out.append(line) - if line.strip() == "": + if line.strip() == '': break # Return the document - return ["\n".join(out)] + return ['\n'.join(out)] diff --git a/nlp_resource_data/nltk/corpus/reader/indian.py b/nlp_resource_data/nltk/corpus/reader/indian.py index 0788b54..6f39754 100644 --- a/nlp_resource_data/nltk/corpus/reader/indian.py +++ b/nlp_resource_data/nltk/corpus/reader/indian.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Indian Language POS-Tagged Corpus Reader # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Steven Bird # Edward Loper # URL: @@ -18,6 +18,8 @@ Contents: - Telugu: IIIT Hyderabad """ +from six import string_types + from nltk.tag import str2tuple, map_tag from nltk.corpus.reader.util import * @@ -72,7 +74,7 @@ class IndianCorpusReader(CorpusReader): def raw(self, fileids=None): if fileids is None: fileids = self._fileids - elif isinstance(fileids, str): + elif isinstance(fileids, string_types): fileids = [fileids] return concat([self.open(f).read() for f in fileids]) @@ -88,9 +90,9 @@ class IndianCorpusView(StreamBackedCorpusView): def read_block(self, stream): line = stream.readline() - if line.startswith("<"): + if line.startswith('<'): return [] - sent = [str2tuple(word, sep="_") for word in line.split()] + sent = [str2tuple(word, sep='_') for word in line.split()] if self._tag_mapping_function: sent = [(w, self._tag_mapping_function(t)) for (w, t) in sent] if not self._tagged: diff --git a/nlp_resource_data/nltk/corpus/reader/ipipan.py b/nlp_resource_data/nltk/corpus/reader/ipipan.py index de983dd..47c509d 100644 --- a/nlp_resource_data/nltk/corpus/reader/ipipan.py +++ b/nlp_resource_data/nltk/corpus/reader/ipipan.py @@ -1,12 +1,14 @@ # Natural Language Toolkit: IPI PAN Corpus Reader # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Konrad Goluchowski # URL: # For license information, see LICENSE.TXT import functools +from six import string_types + from nltk.corpus.reader.util import StreamBackedCorpusView, concat from nltk.corpus.reader.api import CorpusReader @@ -14,7 +16,7 @@ from nltk.corpus.reader.api import CorpusReader def _parse_args(fun): @functools.wraps(fun) def decorator(self, fileids=None, **kwargs): - kwargs.pop("tags", None) + kwargs.pop('tags', None) if not fileids: fileids = self.fileids() return fun(self, fileids, **kwargs) @@ -67,48 +69,48 @@ class IPIPANCorpusReader(CorpusReader): filecontents = [] for fileid in self._list_morph_files(fileids): - with open(fileid, "r") as infile: + with open(fileid, 'r') as infile: filecontents.append(infile.read()) - return "".join(filecontents) + return ''.join(filecontents) def channels(self, fileids=None): if not fileids: fileids = self.fileids() - return self._parse_header(fileids, "channel") + return self._parse_header(fileids, 'channel') def domains(self, fileids=None): if not fileids: fileids = self.fileids() - return self._parse_header(fileids, "domain") + return self._parse_header(fileids, 'domain') def categories(self, fileids=None): if not fileids: fileids = self.fileids() return [ - self._map_category(cat) for cat in self._parse_header(fileids, "keyTerm") + self._map_category(cat) for cat in self._parse_header(fileids, 'keyTerm') ] def fileids(self, channels=None, domains=None, categories=None): if channels is not None and domains is not None and categories is not None: raise ValueError( - "You can specify only one of channels, domains " - "and categories parameter at once" + 'You can specify only one of channels, domains ' + 'and categories parameter at once' ) if channels is None and domains is None and categories is None: return CorpusReader.fileids(self) - if isinstance(channels, str): + if isinstance(channels, string_types): channels = [channels] - if isinstance(domains, str): + if isinstance(domains, string_types): domains = [domains] - if isinstance(categories, str): + if isinstance(categories, string_types): categories = [categories] if channels: - return self._list_morph_files_by("channel", channels) + return self._list_morph_files_by('channel', channels) elif domains: - return self._list_morph_files_by("domain", domains) + return self._list_morph_files_by('domain', domains) else: return self._list_morph_files_by( - "keyTerm", categories, map=self._map_category + 'keyTerm', categories, map=self._map_category ) @_parse_args @@ -171,7 +173,7 @@ class IPIPANCorpusReader(CorpusReader): def _list_header_files(self, fileids): return [ - f.replace("morph.xml", "header.xml") + f.replace('morph.xml', 'header.xml') for f in self._list_morph_files(fileids) ] @@ -187,7 +189,7 @@ class IPIPANCorpusReader(CorpusReader): fileids = self.fileids() ret_fileids = set() for f in fileids: - fp = self.abspath(f).replace("morph.xml", "header.xml") + fp = self.abspath(f).replace('morph.xml', 'header.xml') values_list = self._get_tag(fp, tag) for value in values_list: if map is not None: @@ -198,43 +200,43 @@ class IPIPANCorpusReader(CorpusReader): def _get_tag(self, f, tag): tags = [] - with open(f, "r") as infile: + with open(f, 'r') as infile: header = infile.read() tag_end = 0 while True: - tag_pos = header.find("<" + tag, tag_end) + tag_pos = header.find('<' + tag, tag_end) if tag_pos < 0: return tags - tag_end = header.find("", tag_pos) + tag_end = header.find('', tag_pos) tags.append(header[tag_pos + len(tag) + 2 : tag_end]) def _map_category(self, cat): - pos = cat.find(">") + pos = cat.find('>') if pos == -1: return cat else: return cat[pos + 1 :] def _view(self, filename, **kwargs): - tags = kwargs.pop("tags", True) - mode = kwargs.pop("mode", 0) - simplify_tags = kwargs.pop("simplify_tags", False) - one_tag = kwargs.pop("one_tag", True) - disamb_only = kwargs.pop("disamb_only", True) - append_no_space = kwargs.pop("append_no_space", False) - append_space = kwargs.pop("append_space", False) - replace_xmlentities = kwargs.pop("replace_xmlentities", True) + tags = kwargs.pop('tags', True) + mode = kwargs.pop('mode', 0) + simplify_tags = kwargs.pop('simplify_tags', False) + one_tag = kwargs.pop('one_tag', True) + disamb_only = kwargs.pop('disamb_only', True) + append_no_space = kwargs.pop('append_no_space', False) + append_space = kwargs.pop('append_space', False) + replace_xmlentities = kwargs.pop('replace_xmlentities', True) if len(kwargs) > 0: - raise ValueError("Unexpected arguments: %s" % kwargs.keys()) + raise ValueError('Unexpected arguments: %s' % kwargs.keys()) if not one_tag and not disamb_only: raise ValueError( - "You cannot specify both one_tag=False and " "disamb_only=False" + 'You cannot specify both one_tag=False and ' 'disamb_only=False' ) if not tags and (simplify_tags or not one_tag or not disamb_only): raise ValueError( - "You cannot specify simplify_tags, one_tag or " - "disamb_only with functions other than tagged_*" + 'You cannot specify simplify_tags, one_tag or ' + 'disamb_only with functions other than tagged_*' ) return IPIPANCorpusView( @@ -261,14 +263,14 @@ class IPIPANCorpusView(StreamBackedCorpusView): self.in_sentence = False self.position = 0 - self.show_tags = kwargs.pop("tags", True) - self.disamb_only = kwargs.pop("disamb_only", True) - self.mode = kwargs.pop("mode", IPIPANCorpusView.WORDS_MODE) - self.simplify_tags = kwargs.pop("simplify_tags", False) - self.one_tag = kwargs.pop("one_tag", True) - self.append_no_space = kwargs.pop("append_no_space", False) - self.append_space = kwargs.pop("append_space", False) - self.replace_xmlentities = kwargs.pop("replace_xmlentities", True) + self.show_tags = kwargs.pop('tags', True) + self.disamb_only = kwargs.pop('disamb_only', True) + self.mode = kwargs.pop('mode', IPIPANCorpusView.WORDS_MODE) + self.simplify_tags = kwargs.pop('simplify_tags', False) + self.one_tag = kwargs.pop('one_tag', True) + self.append_no_space = kwargs.pop('append_no_space', False) + self.append_space = kwargs.pop('append_space', False) + self.replace_xmlentities = kwargs.pop('replace_xmlentities', True) def read_block(self, stream): sentence = [] @@ -287,7 +289,7 @@ class IPIPANCorpusView(StreamBackedCorpusView): self._seek(stream) lines = self._read_data(stream) - if lines == [""]: + if lines == ['']: assert not sentences return [] @@ -298,14 +300,14 @@ class IPIPANCorpusView(StreamBackedCorpusView): self.in_sentence = True elif line.startswith('"): + elif line.startswith(''): if self.append_space: no_space = True if self.append_no_space: if self.show_tags: - sentence.append(("", "no-space")) + sentence.append(('', 'no-space')) else: - sentence.append("") - elif line.startswith(" # URL: # For license information, see LICENSE.TXT # For more information, see http://lilyx.net/pages/nltkjapanesecorpus.html +from __future__ import print_function import re +from six import string_types from nltk.parse import DependencyGraph @@ -19,7 +21,7 @@ from nltk.corpus.reader.util import ( from nltk.corpus.reader.api import SyntaxCorpusReader, CorpusReader # default function to convert morphlist to str for tree representation -_morphs2str_default = lambda morphs: "/".join(m[0] for m in morphs if m[0] != "EOS") +_morphs2str_default = lambda morphs: '/'.join(m[0] for m in morphs if m[0] != 'EOS') class KNBCorpusReader(SyntaxCorpusReader): @@ -54,7 +56,7 @@ class KNBCorpusReader(SyntaxCorpusReader): """ - def __init__(self, root, fileids, encoding="utf8", morphs2str=_morphs2str_default): + def __init__(self, root, fileids, encoding='utf8', morphs2str=_morphs2str_default): """ Initialize KNBCorpusReader morphs2str is a function to convert morphlist to str for tree representation @@ -87,7 +89,7 @@ class KNBCorpusReader(SyntaxCorpusReader): if not re.match(r"EOS|\*|\#|\+", line): cells = line.strip().split(" ") # convert cells to morph tuples - res.append((cells[0], " ".join(cells[1:]))) + res.append((cells[0], ' '.join(cells[1:]))) return res @@ -95,7 +97,7 @@ class KNBCorpusReader(SyntaxCorpusReader): dg = DependencyGraph() i = 0 for line in t.splitlines(): - if line[0] in "*+": + if line[0] in '*+': # start of bunsetsu or tag cells = line.strip().split(" ", 3) @@ -104,26 +106,26 @@ class KNBCorpusReader(SyntaxCorpusReader): assert m is not None node = dg.nodes[i] - node.update({"address": i, "rel": m.group(2), "word": []}) + node.update({'address': i, 'rel': m.group(2), 'word': []}) dep_parent = int(m.group(1)) if dep_parent == -1: dg.root = node else: - dg.nodes[dep_parent]["deps"].append(i) + dg.nodes[dep_parent]['deps'].append(i) i += 1 - elif line[0] != "#": + elif line[0] != '#': # normal morph cells = line.strip().split(" ") # convert cells to morph tuples - morph = cells[0], " ".join(cells[1:]) - dg.nodes[i - 1]["word"].append(morph) + morph = cells[0], ' '.join(cells[1:]) + dg.nodes[i - 1]['word'].append(morph) if self.morphs2str: for node in dg.nodes.values(): - node["word"] = self.morphs2str(node["word"]) + node['word'] = self.morphs2str(node['word']) return dg.tree() @@ -138,7 +140,7 @@ def demo(): import nltk from nltk.corpus.util import LazyCorpusLoader - root = nltk.data.find("corpora/knbc/corpus1") + root = nltk.data.find('corpora/knbc/corpus1') fileids = [ f for f in find_corpus_fileids(FileSystemPathPointer(root), ".*") @@ -146,30 +148,30 @@ def demo(): ] def _knbc_fileids_sort(x): - cells = x.split("-") + cells = x.split('-') return (cells[0], int(cells[1]), int(cells[2]), int(cells[3])) knbc = LazyCorpusLoader( - "knbc/corpus1", + 'knbc/corpus1', KNBCorpusReader, sorted(fileids, key=_knbc_fileids_sort), - encoding="euc-jp", + encoding='euc-jp', ) print(knbc.fileids()[:10]) - print("".join(knbc.words()[:100])) + print(''.join(knbc.words()[:100])) - print("\n\n".join(str(tree) for tree in knbc.parsed_sents()[:2])) + print('\n\n'.join(str(tree) for tree in knbc.parsed_sents()[:2])) - knbc.morphs2str = lambda morphs: "/".join( - "%s(%s)" % (m[0], m[1].split(" ")[2]) for m in morphs if m[0] != "EOS" - ).encode("utf-8") + knbc.morphs2str = lambda morphs: '/'.join( + "%s(%s)" % (m[0], m[1].split(' ')[2]) for m in morphs if m[0] != 'EOS' + ).encode('utf-8') - print("\n\n".join("%s" % tree for tree in knbc.parsed_sents()[:2])) + print('\n\n'.join('%s' % tree for tree in knbc.parsed_sents()[:2])) print( - "\n".join( - " ".join("%s/%s" % (w[0], w[1].split(" ")[2]) for w in sent) + '\n'.join( + ' '.join("%s/%s" % (w[0], w[1].split(' ')[2]) for w in sent) for sent in knbc.tagged_sents()[0:2] ) ) @@ -180,13 +182,13 @@ def test(): from nltk.corpus.util import LazyCorpusLoader knbc = LazyCorpusLoader( - "knbc/corpus1", KNBCorpusReader, r".*/KN.*", encoding="euc-jp" + 'knbc/corpus1', KNBCorpusReader, r'.*/KN.*', encoding='euc-jp' ) - assert isinstance(knbc.words()[0], str) - assert isinstance(knbc.sents()[0][0], str) + assert isinstance(knbc.words()[0], string_types) + assert isinstance(knbc.sents()[0][0], string_types) assert isinstance(knbc.tagged_words()[0], tuple) assert isinstance(knbc.tagged_sents()[0][0], tuple) -if __name__ == "__main__": +if __name__ == '__main__': demo() diff --git a/nlp_resource_data/nltk/corpus/reader/lin.py b/nlp_resource_data/nltk/corpus/reader/lin.py index 613a275..493b1b0 100644 --- a/nlp_resource_data/nltk/corpus/reader/lin.py +++ b/nlp_resource_data/nltk/corpus/reader/lin.py @@ -1,9 +1,10 @@ # Natural Language Toolkit: Lin's Thesaurus # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Dan Blanchard # URL: # For license information, see LICENSE.txt +from __future__ import print_function import re from collections import defaultdict @@ -21,20 +22,20 @@ class LinThesaurusCorpusReader(CorpusReader): @staticmethod def __defaultdict_factory(): - """ Factory for creating defaultdict of defaultdict(dict)s """ + ''' Factory for creating defaultdict of defaultdict(dict)s ''' return defaultdict(dict) def __init__(self, root, badscore=0.0): - """ + ''' Initialize the thesaurus. :param root: root directory containing thesaurus LISP files :type root: C{string} :param badscore: the score to give to words which do not appear in each other's sets of synonyms :type badscore: C{float} - """ + ''' - super(LinThesaurusCorpusReader, self).__init__(root, r"sim[A-Z]\.lsp") + super(LinThesaurusCorpusReader, self).__init__(root, r'sim[A-Z]\.lsp') self._thesaurus = defaultdict(LinThesaurusCorpusReader.__defaultdict_factory) self._badscore = badscore for path, encoding, fileid in self.abspaths( @@ -46,14 +47,14 @@ class LinThesaurusCorpusReader(CorpusReader): line = line.strip() # Start of entry if first: - key = LinThesaurusCorpusReader._key_re.sub(r"\1", line) + key = LinThesaurusCorpusReader._key_re.sub(r'\1', line) first = False # End of entry - elif line == "))": + elif line == '))': first = True # Lines with pairs of ngrams and scores else: - split_line = line.split("\t") + split_line = line.split('\t') if len(split_line) == 2: ngram, score = split_line self._thesaurus[fileid][key][ngram.strip('"')] = float( @@ -61,7 +62,7 @@ class LinThesaurusCorpusReader(CorpusReader): ) def similarity(self, ngram1, ngram2, fileid=None): - """ + ''' Returns the similarity score for two ngrams. :param ngram1: first ngram to compare @@ -72,7 +73,7 @@ class LinThesaurusCorpusReader(CorpusReader): :type fileid: C{string} :return: If fileid is specified, just the score for the two ngrams; otherwise, list of tuples of fileids and scores. - """ + ''' # Entries don't contain themselves, so make sure similarity between item and itself is 1.0 if ngram1 == ngram2: if fileid: @@ -100,7 +101,7 @@ class LinThesaurusCorpusReader(CorpusReader): ] def scored_synonyms(self, ngram, fileid=None): - """ + ''' Returns a list of scored synonyms (tuples of synonyms and scores) for the current ngram :param ngram: ngram to lookup @@ -110,7 +111,7 @@ class LinThesaurusCorpusReader(CorpusReader): :return: If fileid is specified, list of tuples of scores and synonyms; otherwise, list of tuples of fileids and lists, where inner lists consist of tuples of scores and synonyms. - """ + ''' if fileid: return self._thesaurus[fileid][ngram].items() else: @@ -120,7 +121,7 @@ class LinThesaurusCorpusReader(CorpusReader): ] def synonyms(self, ngram, fileid=None): - """ + ''' Returns a list of synonyms for the current ngram. :param ngram: ngram to lookup @@ -129,7 +130,7 @@ class LinThesaurusCorpusReader(CorpusReader): :type fileid: C{string} :return: If fileid is specified, list of synonyms; otherwise, list of tuples of fileids and lists, where inner lists contain synonyms. - """ + ''' if fileid: return self._thesaurus[fileid][ngram].keys() else: @@ -139,13 +140,13 @@ class LinThesaurusCorpusReader(CorpusReader): ] def __contains__(self, ngram): - """ + ''' Determines whether or not the given ngram is in the thesaurus. :param ngram: ngram to lookup :type ngram: C{string} :return: whether the given ngram is in the thesaurus. - """ + ''' return reduce( lambda accum, fileid: accum or (ngram in self._thesaurus[fileid]), self._fileids, @@ -179,5 +180,5 @@ def demo(): print(thes.similarity(word1, word2)) -if __name__ == "__main__": +if __name__ == '__main__': demo() diff --git a/nlp_resource_data/nltk/corpus/reader/mte.py b/nlp_resource_data/nltk/corpus/reader/mte.py index 085f257..4198d3f 100644 --- a/nlp_resource_data/nltk/corpus/reader/mte.py +++ b/nlp_resource_data/nltk/corpus/reader/mte.py @@ -5,6 +5,8 @@ import os import re from functools import reduce +from six import string_types + from nltk.corpus.reader import concat, TaggedCorpusReader from nltk.corpus.reader.xmldocs import XMLCorpusView @@ -38,11 +40,11 @@ class MTEFileReader: """ ns = { - "tei": "http://www.tei-c.org/ns/1.0", - "xml": "http://www.w3.org/XML/1998/namespace", + 'tei': 'http://www.tei-c.org/ns/1.0', + 'xml': 'http://www.w3.org/XML/1998/namespace', } - tag_ns = "{http://www.tei-c.org/ns/1.0}" - xml_ns = "{http://www.w3.org/XML/1998/namespace}" + tag_ns = '{http://www.tei-c.org/ns/1.0}' + xml_ns = '{http://www.w3.org/XML/1998/namespace}' word_path = "TEI/text/body/div/div/p/s/(w|c)" sent_path = "TEI/text/body/div/div/p/s" para_path = "TEI/text/body/div/div/p" @@ -56,30 +58,30 @@ class MTEFileReader: @classmethod def _sent_elt(cls, elt, context): - return [cls._word_elt(w, None) for w in xpath(elt, "*", cls.ns)] + return [cls._word_elt(w, None) for w in xpath(elt, '*', cls.ns)] @classmethod def _para_elt(cls, elt, context): - return [cls._sent_elt(s, None) for s in xpath(elt, "*", cls.ns)] + return [cls._sent_elt(s, None) for s in xpath(elt, '*', cls.ns)] @classmethod def _tagged_word_elt(cls, elt, context): - if "ana" not in elt.attrib: - return (elt.text, "") + if 'ana' not in elt.attrib: + return (elt.text, '') if cls.__tags == "" and cls.__tagset == "msd": - return (elt.text, elt.attrib["ana"]) + return (elt.text, elt.attrib['ana']) elif cls.__tags == "" and cls.__tagset == "universal": - return (elt.text, MTETagConverter.msd_to_universal(elt.attrib["ana"])) + return (elt.text, MTETagConverter.msd_to_universal(elt.attrib['ana'])) else: - tags = re.compile("^" + re.sub("-", ".", cls.__tags) + ".*$") - if tags.match(elt.attrib["ana"]): + tags = re.compile('^' + re.sub("-", ".", cls.__tags) + '.*$') + if tags.match(elt.attrib['ana']): if cls.__tagset == "msd": - return (elt.text, elt.attrib["ana"]) + return (elt.text, elt.attrib['ana']) else: return ( elt.text, - MTETagConverter.msd_to_universal(elt.attrib["ana"]), + MTETagConverter.msd_to_universal(elt.attrib['ana']), ) else: return None @@ -89,7 +91,7 @@ class MTEFileReader: return list( filter( lambda x: x is not None, - [cls._tagged_word_elt(w, None) for w in xpath(elt, "*", cls.ns)], + [cls._tagged_word_elt(w, None) for w in xpath(elt, '*', cls.ns)], ) ) @@ -98,24 +100,24 @@ class MTEFileReader: return list( filter( lambda x: x is not None, - [cls._tagged_sent_elt(s, None) for s in xpath(elt, "*", cls.ns)], + [cls._tagged_sent_elt(s, None) for s in xpath(elt, '*', cls.ns)], ) ) @classmethod def _lemma_word_elt(cls, elt, context): - if "lemma" not in elt.attrib: - return (elt.text, "") + if 'lemma' not in elt.attrib: + return (elt.text, '') else: - return (elt.text, elt.attrib["lemma"]) + return (elt.text, elt.attrib['lemma']) @classmethod def _lemma_sent_elt(cls, elt, context): - return [cls._lemma_word_elt(w, None) for w in xpath(elt, "*", cls.ns)] + return [cls._lemma_word_elt(w, None) for w in xpath(elt, '*', cls.ns)] @classmethod def _lemma_para_elt(cls, elt, context): - return [cls._lemma_sent_elt(s, None) for s in xpath(elt, "*", cls.ns)] + return [cls._lemma_sent_elt(s, None) for s in xpath(elt, '*', cls.ns)] def words(self): return MTECorpusView( @@ -176,18 +178,18 @@ class MTETagConverter: """ mapping_msd_universal = { - "A": "ADJ", - "S": "ADP", - "R": "ADV", - "C": "CONJ", - "D": "DET", - "N": "NOUN", - "M": "NUM", - "Q": "PRT", - "P": "PRON", - "V": "VERB", - ".": ".", - "-": "X", + 'A': 'ADJ', + 'S': 'ADP', + 'R': 'ADV', + 'C': 'CONJ', + 'D': 'DET', + 'N': 'NOUN', + 'M': 'NUM', + 'Q': 'PRT', + 'P': 'PRON', + 'V': 'VERB', + '.': '.', + '-': 'X', } @staticmethod @@ -201,7 +203,7 @@ class MTETagConverter: indicator = tag[0] if not tag[0] == "#" else tag[1] if not indicator in MTETagConverter.mapping_msd_universal: - indicator = "-" + indicator = '-' return MTETagConverter.mapping_msd_universal[indicator] @@ -213,7 +215,7 @@ class MTECorpusReader(TaggedCorpusReader): scheme. These tags can be converted to the Universal tagset """ - def __init__(self, root=None, fileids=None, encoding="utf8"): + def __init__(self, root=None, fileids=None, encoding='utf8'): """ Construct a new MTECorpusreader for a set of documents located at the given root directory. Example usage: @@ -230,7 +232,7 @@ class MTECorpusReader(TaggedCorpusReader): def __fileids(self, fileids): if fileids is None: fileids = self._fileids - elif isinstance(fileids, str): + elif isinstance(fileids, string_types): fileids = [fileids] # filter wrong userinput fileids = filter(lambda x: x in self._fileids, fileids) diff --git a/nlp_resource_data/nltk/corpus/reader/nkjp.py b/nlp_resource_data/nltk/corpus/reader/nkjp.py index 23be4b6..aea84b0 100644 --- a/nlp_resource_data/nltk/corpus/reader/nkjp.py +++ b/nlp_resource_data/nltk/corpus/reader/nkjp.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: NKJP Corpus Reader # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Gabriela Kaczka # URL: # For license information, see LICENSE.TXT @@ -10,6 +10,8 @@ import os import re import tempfile +from six import string_types + from nltk.corpus.reader.util import concat from nltk.corpus.reader.xmldocs import XMLCorpusReader, XMLCorpusView @@ -35,7 +37,7 @@ class NKJPCorpusReader(XMLCorpusReader): HEADER_MODE = 2 RAW_MODE = 3 - def __init__(self, root, fileids=".*"): + def __init__(self, root, fileids='.*'): """ Corpus reader designed to work with National Corpus of Polish. See http://nkjp.pl/ for more details about NKJP. @@ -53,11 +55,11 @@ class NKJPCorpusReader(XMLCorpusReader): x.header(fileids=['WilkDom', '/home/USER/nltk_data/corpora/nkjp/WilkWilczy']) x.tagged_words(fileids=['WilkDom', '/home/USER/nltk_data/corpora/nkjp/WilkWilczy'], tags=['subst', 'comp']) """ - if isinstance(fileids, str): - XMLCorpusReader.__init__(self, root, fileids + ".*/header.xml") + if isinstance(fileids, string_types): + XMLCorpusReader.__init__(self, root, fileids + '.*/header.xml') else: XMLCorpusReader.__init__( - self, root, [fileid + "/header.xml" for fileid in fileids] + self, root, [fileid + '/header.xml' for fileid in fileids] ) self._paths = self.get_paths() @@ -78,7 +80,7 @@ class NKJPCorpusReader(XMLCorpusReader): """ Returns a view specialised for use with particular corpus file. """ - mode = kwargs.pop("mode", NKJPCorpusReader.WORDS_MODE) + mode = kwargs.pop('mode', NKJPCorpusReader.WORDS_MODE) if mode is NKJPCorpusReader.WORDS_MODE: return NKJPCorpus_Morph_View(filename, tags=tags) elif mode is NKJPCorpusReader.SENTS_MODE: @@ -91,7 +93,7 @@ class NKJPCorpusReader(XMLCorpusReader): ) else: - raise NameError("No such mode!") + raise NameError('No such mode!') def add_root(self, fileid): """ @@ -150,7 +152,7 @@ class NKJPCorpusReader(XMLCorpusReader): Call with specified tags as a list, e.g. tags=['subst', 'comp']. Returns tagged words in specified fileids. """ - tags = kwargs.pop("tags", []) + tags = kwargs.pop('tags', []) return concat( [ self._view( @@ -186,7 +188,7 @@ class NKJPCorpus_Header_View(XMLCorpusView): header.xml files in NKJP corpus. """ self.tagspec = ".*/sourceDesc$" - XMLCorpusView.__init__(self, filename + "header.xml", self.tagspec) + XMLCorpusView.__init__(self, filename + 'header.xml', self.tagspec) def handle_query(self): self._open() @@ -200,43 +202,43 @@ class NKJPCorpus_Header_View(XMLCorpusView): return header def handle_elt(self, elt, context): - titles = elt.findall("bibl/title") + titles = elt.findall('bibl/title') title = [] if titles: - title = "\n".join(title.text.strip() for title in titles) + title = '\n'.join(title.text.strip() for title in titles) - authors = elt.findall("bibl/author") + authors = elt.findall('bibl/author') author = [] if authors: - author = "\n".join(author.text.strip() for author in authors) + author = '\n'.join(author.text.strip() for author in authors) - dates = elt.findall("bibl/date") + dates = elt.findall('bibl/date') date = [] if dates: - date = "\n".join(date.text.strip() for date in dates) + date = '\n'.join(date.text.strip() for date in dates) - publishers = elt.findall("bibl/publisher") + publishers = elt.findall('bibl/publisher') publisher = [] if publishers: - publisher = "\n".join(publisher.text.strip() for publisher in publishers) + publisher = '\n'.join(publisher.text.strip() for publisher in publishers) - idnos = elt.findall("bibl/idno") + idnos = elt.findall('bibl/idno') idno = [] if idnos: - idno = "\n".join(idno.text.strip() for idno in idnos) + idno = '\n'.join(idno.text.strip() for idno in idnos) - notes = elt.findall("bibl/note") + notes = elt.findall('bibl/note') note = [] if notes: - note = "\n".join(note.text.strip() for note in notes) + note = '\n'.join(note.text.strip() for note in notes) return { - "title": title, - "author": author, - "date": date, - "publisher": publisher, - "idno": idno, - "note": note, + 'title': title, + 'author': author, + 'date': date, + 'publisher': publisher, + 'idno': idno, + 'note': note, } @@ -253,21 +255,21 @@ class XML_Tool: def build_preprocessed_file(self): try: - fr = open(self.read_file, "r") + fr = open(self.read_file, 'r') fw = self.write_file - line = " " + line = ' ' while len(line): line = fr.readline() - x = re.split(r"nkjp:[^ ]* ", line) # in all files - ret = " ".join(x) - x = re.split("", ret) # in ann_segmentation.xml - ret = " ".join(x) - x = re.split("", ret) # in ann_segmentation.xml - ret = " ".join(x) - x = re.split("", ret) # in ann_segmentation.xml - ret = " ".join(x) - x = re.split("", ret) # in ann_segmentation.xml - ret = " ".join(x) + x = re.split(r'nkjp:[^ ]* ', line) # in all files + ret = ' '.join(x) + x = re.split('', ret) # in ann_segmentation.xml + ret = ' '.join(x) + x = re.split('', ret) # in ann_segmentation.xml + ret = ' '.join(x) + x = re.split('', ret) # in ann_segmentation.xml + ret = ' '.join(x) + x = re.split('', ret) # in ann_segmentation.xml + ret = ' '.join(x) fw.write(ret) fr.close() fw.close() @@ -287,29 +289,29 @@ class NKJPCorpus_Segmentation_View(XMLCorpusView): """ def __init__(self, filename, **kwargs): - self.tagspec = ".*p/.*s" + self.tagspec = '.*p/.*s' # intersperse NKJPCorpus_Text_View self.text_view = NKJPCorpus_Text_View( filename, mode=NKJPCorpus_Text_View.SENTS_MODE ) self.text_view.handle_query() # xml preprocessing - self.xml_tool = XML_Tool(filename, "ann_segmentation.xml") + self.xml_tool = XML_Tool(filename, 'ann_segmentation.xml') # base class init XMLCorpusView.__init__( self, self.xml_tool.build_preprocessed_file(), self.tagspec ) def get_segm_id(self, example_word): - return example_word.split("(")[1].split(",")[0] + return example_word.split('(')[1].split(',')[0] def get_sent_beg(self, beg_word): # returns index of beginning letter in sentence - return int(beg_word.split(",")[1]) + return int(beg_word.split(',')[1]) def get_sent_end(self, end_word): # returns index of end letter in sentence - splitted = end_word.split(")")[0].split(",") + splitted = end_word.split(')')[0].split(',') return int(splitted[1]) + int(splitted[2]) def get_sentences(self, sent_segm): @@ -355,7 +357,7 @@ class NKJPCorpus_Segmentation_View(XMLCorpusView): def handle_elt(self, elt, context): ret = [] for seg in elt: - ret.append(seg.get("corresp")) + ret.append(seg.get('corresp')) return ret @@ -369,11 +371,11 @@ class NKJPCorpus_Text_View(XMLCorpusView): RAW_MODE = 1 def __init__(self, filename, **kwargs): - self.mode = kwargs.pop("mode", 0) - self.tagspec = ".*/div/ab" + self.mode = kwargs.pop('mode', 0) + self.tagspec = '.*/div/ab' self.segm_dict = dict() # xml preprocessing - self.xml_tool = XML_Tool(filename, "text.xml") + self.xml_tool = XML_Tool(filename, 'text.xml') # base class init XMLCorpusView.__init__( self, self.xml_tool.build_preprocessed_file(), self.tagspec @@ -402,11 +404,11 @@ class NKJPCorpus_Text_View(XMLCorpusView): for part in segm: txt.append(part) - return [" ".join([segm for segm in txt])] + return [' '.join([segm for segm in txt])] def get_segm_id(self, elt): for attr in elt.attrib: - if attr.endswith("id"): + if attr.endswith('id'): return elt.get(attr) def handle_elt(self, elt, context): @@ -423,9 +425,9 @@ class NKJPCorpus_Morph_View(XMLCorpusView): """ def __init__(self, filename, **kwargs): - self.tags = kwargs.pop("tags", None) - self.tagspec = ".*/seg/fs" - self.xml_tool = XML_Tool(filename, "ann_morphosyntax.xml") + self.tags = kwargs.pop('tags', None) + self.tagspec = '.*/seg/fs' + self.xml_tool = XML_Tool(filename, 'ann_morphosyntax.xml') XMLCorpusView.__init__( self, self.xml_tool.build_preprocessed_file(), self.tagspec ) @@ -449,7 +451,7 @@ class NKJPCorpus_Morph_View(XMLCorpusView): raise Exception def handle_elt(self, elt, context): - word = "" + word = '' flag = False is_not_interp = True # if tags not specified, then always return word @@ -459,28 +461,28 @@ class NKJPCorpus_Morph_View(XMLCorpusView): for child in elt: # get word - if "name" in child.keys() and child.attrib["name"] == "orth": + if 'name' in child.keys() and child.attrib['name'] == 'orth': for symbol in child: - if symbol.tag == "string": + if symbol.tag == 'string': word = symbol.text - elif "name" in child.keys() and child.attrib["name"] == "interps": + elif 'name' in child.keys() and child.attrib['name'] == 'interps': for symbol in child: - if "type" in symbol.keys() and symbol.attrib["type"] == "lex": + if 'type' in symbol.keys() and symbol.attrib['type'] == 'lex': for symbol2 in symbol: if ( - "name" in symbol2.keys() - and symbol2.attrib["name"] == "ctag" + 'name' in symbol2.keys() + and symbol2.attrib['name'] == 'ctag' ): for symbol3 in symbol2: if ( - "value" in symbol3.keys() + 'value' in symbol3.keys() and self.tags is not None - and symbol3.attrib["value"] in self.tags + and symbol3.attrib['value'] in self.tags ): flag = True elif ( - "value" in symbol3.keys() - and symbol3.attrib["value"] == "interp" + 'value' in symbol3.keys() + and symbol3.attrib['value'] == 'interp' ): is_not_interp = False if flag and is_not_interp: diff --git a/nlp_resource_data/nltk/corpus/reader/nombank.py b/nlp_resource_data/nltk/corpus/reader/nombank.py index 06740d0..603646a 100644 --- a/nlp_resource_data/nltk/corpus/reader/nombank.py +++ b/nlp_resource_data/nltk/corpus/reader/nombank.py @@ -1,16 +1,20 @@ # Natural Language Toolkit: NomBank Corpus Reader # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Authors: Paul Bedaride # Edward Loper # URL: # For license information, see LICENSE.TXT +from __future__ import unicode_literals from xml.etree import ElementTree from functools import total_ordering +from six import string_types + from nltk.tree import Tree from nltk.internals import raise_unorderable_types +from nltk.compat import python_2_unicode_compatible from nltk.corpus.reader.util import * from nltk.corpus.reader.api import * @@ -34,11 +38,11 @@ class NombankCorpusReader(CorpusReader): self, root, nomfile, - framefiles="", + framefiles='', nounsfile=None, parse_fileid_xform=None, parse_corpus=None, - encoding="utf8", + encoding='utf8', ): """ :param root: The root directory for this corpus. @@ -54,16 +58,16 @@ class NombankCorpusReader(CorpusReader): corresponding to this corpus. These parse trees are necessary to resolve the tree pointers used by nombank. """ - # If framefiles is specified as a regexp, expand it. - if isinstance(framefiles, str): - self._fileids = find_corpus_fileids(root, framefiles) - self._fileids = list(framefiles) + if isinstance(framefiles, string_types): + framefiles = find_corpus_fileids(root, framefiles) + framefiles = list(framefiles) # Initialze the corpus reader. - CorpusReader.__init__(self, root, framefiles, encoding) + CorpusReader.__init__(self, root, [nomfile, nounsfile] + framefiles, encoding) - # Record our nom file & nouns file. + # Record our frame fileids & nom file. self._nomfile = nomfile + self._framefiles = framefiles self._nounsfile = nounsfile self._parse_fileid_xform = parse_fileid_xform self._parse_corpus = parse_corpus @@ -74,7 +78,7 @@ class NombankCorpusReader(CorpusReader): """ if fileids is None: fileids = self._fileids - elif isinstance(fileids, str): + elif isinstance(fileids, string_types): fileids = [fileids] return concat([self.open(f).read() for f in fileids]) @@ -85,7 +89,7 @@ class NombankCorpusReader(CorpusReader): """ kwargs = {} if baseform is not None: - kwargs["instance_filter"] = lambda inst: inst.baseform == baseform + kwargs['instance_filter'] = lambda inst: inst.baseform == baseform return StreamBackedCorpusView( self.abspath(self._nomfile), lambda stream: self._read_instance_block(stream, **kwargs), @@ -107,41 +111,41 @@ class NombankCorpusReader(CorpusReader): """ :return: the xml description for the given roleset. """ - baseform = roleset_id.split(".")[0] - baseform = baseform.replace("perc-sign", "%") - baseform = baseform.replace("oneslashonezero", "1/10").replace( - "1/10", "1-slash-10" + baseform = roleset_id.split('.')[0] + baseform = baseform.replace('perc-sign', '%') + baseform = baseform.replace('oneslashonezero', '1/10').replace( + '1/10', '1-slash-10' ) - framefile = "frames/%s.xml" % baseform - if framefile not in self.fileids(): - raise ValueError("Frameset file for %s not found" % roleset_id) + framefile = 'frames/%s.xml' % baseform + if framefile not in self._framefiles: + raise ValueError('Frameset file for %s not found' % roleset_id) # n.b.: The encoding for XML fileids is specified by the file # itself; so we ignore self._encoding here. etree = ElementTree.parse(self.abspath(framefile).open()).getroot() - for roleset in etree.findall("predicate/roleset"): - if roleset.attrib["id"] == roleset_id: + for roleset in etree.findall('predicate/roleset'): + if roleset.attrib['id'] == roleset_id: return roleset - raise ValueError("Roleset %s not found in %s" % (roleset_id, framefile)) + raise ValueError('Roleset %s not found in %s' % (roleset_id, framefile)) def rolesets(self, baseform=None): """ :return: list of xml descriptions for rolesets. """ if baseform is not None: - framefile = "frames/%s.xml" % baseform - if framefile not in self.fileids(): - raise ValueError("Frameset file for %s not found" % baseform) + framefile = 'frames/%s.xml' % baseform + if framefile not in self._framefiles: + raise ValueError('Frameset file for %s not found' % baseform) framefiles = [framefile] else: - framefiles = self.fileids() + framefiles = self._framefiles rsets = [] for framefile in framefiles: # n.b.: The encoding for XML fileids is specified by the file # itself; so we ignore self._encoding here. etree = ElementTree.parse(self.abspath(framefile).open()).getroot() - rsets.append(etree.findall("predicate/roleset")) + rsets.append(etree.findall('predicate/roleset')) return LazyConcatenation(rsets) def nouns(self): @@ -176,6 +180,7 @@ class NombankCorpusReader(CorpusReader): ###################################################################### +@python_2_unicode_compatible class NombankInstance(object): def __init__( self, @@ -232,28 +237,28 @@ class NombankInstance(object): """The name of the roleset used by this instance's predicate. Use ``nombank.roleset() `` to look up information about the roleset.""" - r = self.baseform.replace("%", "perc-sign") - r = r.replace("1/10", "1-slash-10").replace("1-slash-10", "oneslashonezero") - return "%s.%s" % (r, self.sensenumber) + r = self.baseform.replace('%', 'perc-sign') + r = r.replace('1/10', '1-slash-10').replace('1-slash-10', 'oneslashonezero') + return '%s.%s' % (r, self.sensenumber) def __repr__(self): - return "" % ( + return '' % ( self.fileid, self.sentnum, self.wordnum, ) def __str__(self): - s = "%s %s %s %s %s" % ( + s = '%s %s %s %s %s' % ( self.fileid, self.sentnum, self.wordnum, self.baseform, self.sensenumber, ) - items = self.arguments + ((self.predicate, "rel"),) + items = self.arguments + ((self.predicate, 'rel'),) for (argloc, argid) in sorted(items): - s += " %s-%s" % (argloc, argid) + s += ' %s-%s' % (argloc, argid) return s def _get_tree(self): @@ -274,15 +279,15 @@ class NombankInstance(object): def parse(s, parse_fileid_xform=None, parse_corpus=None): pieces = s.split() if len(pieces) < 6: - raise ValueError("Badly formatted nombank line: %r" % s) + raise ValueError('Badly formatted nombank line: %r' % s) # Divide the line into its basic pieces. (fileid, sentnum, wordnum, baseform, sensenumber) = pieces[:5] args = pieces[5:] - rel = [args.pop(i) for i, p in enumerate(args) if "-rel" in p] + rel = [args.pop(i) for i, p in enumerate(args) if '-rel' in p] if len(rel) != 1: - raise ValueError("Badly formatted nombank line: %r" % s) + raise ValueError('Badly formatted nombank line: %r' % s) # Apply the fileid selector, if any. if parse_fileid_xform is not None: @@ -294,13 +299,13 @@ class NombankInstance(object): # Parse the predicate location. - predloc, predid = rel[0].split("-", 1) + predloc, predid = rel[0].split('-', 1) predicate = NombankTreePointer.parse(predloc) # Parse the arguments. arguments = [] for arg in args: - argloc, argid = arg.split("-", 1) + argloc, argid = arg.split('-', 1) arguments.append((NombankTreePointer.parse(argloc), argid)) # Put it all together. @@ -337,6 +342,7 @@ class NombankPointer(object): raise NotImplementedError() +@python_2_unicode_compatible class NombankChainTreePointer(NombankPointer): def __init__(self, pieces): self.pieces = pieces @@ -345,17 +351,18 @@ class NombankChainTreePointer(NombankPointer): ``NombankTreePointer`` pointers.""" def __str__(self): - return "*".join("%s" % p for p in self.pieces) + return '*'.join('%s' % p for p in self.pieces) def __repr__(self): - return "" % self + return '' % self def select(self, tree): if tree is None: - raise ValueError("Parse tree not avaialable") - return Tree("*CHAIN*", [p.select(tree) for p in self.pieces]) + raise ValueError('Parse tree not avaialable') + return Tree('*CHAIN*', [p.select(tree) for p in self.pieces]) +@python_2_unicode_compatible class NombankSplitTreePointer(NombankPointer): def __init__(self, pieces): self.pieces = pieces @@ -363,18 +370,19 @@ class NombankSplitTreePointer(NombankPointer): all ``NombankTreePointer`` pointers.""" def __str__(self): - return ",".join("%s" % p for p in self.pieces) + return ','.join('%s' % p for p in self.pieces) def __repr__(self): - return "" % self + return '' % self def select(self, tree): if tree is None: - raise ValueError("Parse tree not avaialable") - return Tree("*SPLIT*", [p.select(tree) for p in self.pieces]) + raise ValueError('Parse tree not avaialable') + return Tree('*SPLIT*', [p.select(tree) for p in self.pieces]) @total_ordering +@python_2_unicode_compatible class NombankTreePointer(NombankPointer): """ wordnum:height*wordnum:height*... @@ -389,30 +397,30 @@ class NombankTreePointer(NombankPointer): @staticmethod def parse(s): # Deal with chains (xx*yy*zz) - pieces = s.split("*") + pieces = s.split('*') if len(pieces) > 1: return NombankChainTreePointer( [NombankTreePointer.parse(elt) for elt in pieces] ) # Deal with split args (xx,yy,zz) - pieces = s.split(",") + pieces = s.split(',') if len(pieces) > 1: return NombankSplitTreePointer( [NombankTreePointer.parse(elt) for elt in pieces] ) # Deal with normal pointers. - pieces = s.split(":") + pieces = s.split(':') if len(pieces) != 2: - raise ValueError("bad nombank pointer %r" % s) + raise ValueError('bad nombank pointer %r' % s) return NombankTreePointer(int(pieces[0]), int(pieces[1])) def __str__(self): - return "%s:%s" % (self.wordnum, self.height) + return '%s:%s' % (self.wordnum, self.height) def __repr__(self): - return "NombankTreePointer(%d, %d)" % (self.wordnum, self.height) + return 'NombankTreePointer(%d, %d)' % (self.wordnum, self.height) def __eq__(self, other): while isinstance(other, (NombankChainTreePointer, NombankSplitTreePointer)): @@ -437,7 +445,7 @@ class NombankTreePointer(NombankPointer): def select(self, tree): if tree is None: - raise ValueError("Parse tree not avaialable") + raise ValueError('Parse tree not avaialable') return tree[self.treepos(tree)] def treepos(self, tree): @@ -446,12 +454,14 @@ class NombankTreePointer(NombankPointer): given that it points to the given tree. """ if tree is None: - raise ValueError("Parse tree not avaialable") + raise ValueError('Parse tree not avaialable') stack = [tree] treepos = [] wordnum = 0 while True: + # print treepos + # print stack[-1] # tree node: if isinstance(stack[-1], Tree): # Select the next child. diff --git a/nlp_resource_data/nltk/corpus/reader/nps_chat.py b/nlp_resource_data/nltk/corpus/reader/nps_chat.py index 8dfd8a5..391f61d 100644 --- a/nlp_resource_data/nltk/corpus/reader/nps_chat.py +++ b/nlp_resource_data/nltk/corpus/reader/nps_chat.py @@ -1,9 +1,10 @@ # Natural Language Toolkit: NPS Chat Corpus Reader # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Edward Loper # URL: # For license information, see LICENSE.TXT +from __future__ import unicode_literals import re import textwrap @@ -26,14 +27,14 @@ class NPSChatCorpusReader(XMLCorpusReader): if self._wrap_etree: return concat( [ - XMLCorpusView(fileid, "Session/Posts/Post", self._wrap_elt) + XMLCorpusView(fileid, 'Session/Posts/Post', self._wrap_elt) for fileid in self.abspaths(fileids) ] ) else: return concat( [ - XMLCorpusView(fileid, "Session/Posts/Post") + XMLCorpusView(fileid, 'Session/Posts/Post') for fileid in self.abspaths(fileids) ] ) @@ -42,7 +43,7 @@ class NPSChatCorpusReader(XMLCorpusReader): return concat( [ XMLCorpusView( - fileid, "Session/Posts/Post/terminals", self._elt_to_words + fileid, 'Session/Posts/Post/terminals', self._elt_to_words ) for fileid in self.abspaths(fileids) ] @@ -54,7 +55,7 @@ class NPSChatCorpusReader(XMLCorpusReader): return concat( [ - XMLCorpusView(fileid, "Session/Posts/Post/terminals", reader) + XMLCorpusView(fileid, 'Session/Posts/Post/terminals', reader) for fileid in self.abspaths(fileids) ] ) @@ -69,12 +70,12 @@ class NPSChatCorpusReader(XMLCorpusReader): return ElementWrapper(elt) def _elt_to_words(self, elt, handler): - return [self._simplify_username(t.attrib["word"]) for t in elt.findall("t")] + return [self._simplify_username(t.attrib['word']) for t in elt.findall('t')] def _elt_to_tagged_words(self, elt, handler, tagset=None): tagged_post = [ - (self._simplify_username(t.attrib["word"]), t.attrib["pos"]) - for t in elt.findall("t") + (self._simplify_username(t.attrib['word']), t.attrib['pos']) + for t in elt.findall('t') ] if tagset and tagset != self._tagset: tagged_post = [ @@ -84,8 +85,8 @@ class NPSChatCorpusReader(XMLCorpusReader): @staticmethod def _simplify_username(word): - if "User" in word: - word = "U" + word.split("User", 1)[1] + if 'User' in word: + word = 'U' + word.split('User', 1)[1] elif isinstance(word, bytes): - word = word.decode("ascii") + word = word.decode('ascii') return word diff --git a/nlp_resource_data/nltk/corpus/reader/opinion_lexicon.py b/nlp_resource_data/nltk/corpus/reader/opinion_lexicon.py index 598db32..cfe7f6e 100644 --- a/nlp_resource_data/nltk/corpus/reader/opinion_lexicon.py +++ b/nlp_resource_data/nltk/corpus/reader/opinion_lexicon.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Opinion Lexicon Corpus Reader # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Pierpaolo Pantone <24alsecondo@gmail.com> # URL: # For license information, see LICENSE.TXT @@ -27,6 +27,7 @@ Related papers: Comparing Opinions on the Web". Proceedings of the 14th International World Wide Web conference (WWW-2005), May 10-14, 2005, Chiba, Japan. """ +from six import string_types from nltk.corpus.reader import WordListCorpusReader from nltk.corpus.reader.api import * @@ -85,7 +86,7 @@ class OpinionLexiconCorpusReader(WordListCorpusReader): """ if fileids is None: fileids = self._fileids - elif isinstance(fileids, str): + elif isinstance(fileids, string_types): fileids = [fileids] return concat( [ @@ -101,7 +102,7 @@ class OpinionLexiconCorpusReader(WordListCorpusReader): :return: a list of positive words. :rtype: list(str) """ - return self.words("positive-words.txt") + return self.words('positive-words.txt') def negative(self): """ @@ -110,7 +111,7 @@ class OpinionLexiconCorpusReader(WordListCorpusReader): :return: a list of negative words. :rtype: list(str) """ - return self.words("negative-words.txt") + return self.words('negative-words.txt') def _read_word_block(self, stream): words = [] diff --git a/nlp_resource_data/nltk/corpus/reader/panlex_lite.py b/nlp_resource_data/nltk/corpus/reader/panlex_lite.py index ab71dc7..44bfb96 100644 --- a/nlp_resource_data/nltk/corpus/reader/panlex_lite.py +++ b/nlp_resource_data/nltk/corpus/reader/panlex_lite.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: PanLex Corpus Reader # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: David Kamholz # URL: # For license information, see LICENSE.TXT @@ -43,12 +43,12 @@ class PanLexLiteCorpusReader(CorpusReader): """ def __init__(self, root): - self._c = sqlite3.connect(os.path.join(root, "db.sqlite")).cursor() + self._c = sqlite3.connect(os.path.join(root, 'db.sqlite')).cursor() self._uid_lv = {} self._lv_uid = {} - for row in self._c.execute("SELECT uid, lv FROM lv"): + for row in self._c.execute('SELECT uid, lv FROM lv'): self._uid_lv[row[0]] = row[1] self._lv_uid[row[1]] = row[0] @@ -65,10 +65,10 @@ class PanLexLiteCorpusReader(CorpusReader): """ if lc is None: - return self._c.execute("SELECT uid, tt FROM lv ORDER BY uid").fetchall() + return self._c.execute('SELECT uid, tt FROM lv ORDER BY uid').fetchall() else: return self._c.execute( - "SELECT uid, tt FROM lv WHERE lc = ? ORDER BY uid", (lc,) + 'SELECT uid, tt FROM lv WHERE lc = ? ORDER BY uid', (lc,) ).fetchall() def meanings(self, expr_uid, expr_tt): @@ -92,16 +92,16 @@ class PanLexLiteCorpusReader(CorpusReader): if not mn in mn_info: mn_info[mn] = { - "uq": i[1], - "ap": i[2], - "ui": i[3], - "ex": {expr_uid: [expr_tt]}, + 'uq': i[1], + 'ap': i[2], + 'ui': i[3], + 'ex': {expr_uid: [expr_tt]}, } - if not uid in mn_info[mn]["ex"]: - mn_info[mn]["ex"][uid] = [] + if not uid in mn_info[mn]['ex']: + mn_info[mn]['ex'][uid] = [] - mn_info[mn]["ex"][uid].append(i[4]) + mn_info[mn]['ex'][uid].append(i[4]) return [Meaning(mn, mn_info[mn]) for mn in mn_info] @@ -134,35 +134,35 @@ class Meaning(dict): def __init__(self, mn, attr): super(Meaning, self).__init__(**attr) - self["mn"] = mn + self['mn'] = mn def id(self): """ :return: the meaning's id. :rtype: int """ - return self["mn"] + return self['mn'] def quality(self): """ :return: the meaning's source's quality (0=worst, 9=best). :rtype: int """ - return self["uq"] + return self['uq'] def source(self): """ :return: the meaning's source id. :rtype: int """ - return self["ap"] + return self['ap'] def source_group(self): """ :return: the meaning's source group id. :rtype: int """ - return self["ui"] + return self['ui'] def expressions(self): """ @@ -171,4 +171,4 @@ class Meaning(dict): texts. :rtype: dict """ - return self["ex"] + return self['ex'] diff --git a/nlp_resource_data/nltk/corpus/reader/panlex_swadesh.py b/nlp_resource_data/nltk/corpus/reader/panlex_swadesh.py deleted file mode 100644 index ed46a4b..0000000 --- a/nlp_resource_data/nltk/corpus/reader/panlex_swadesh.py +++ /dev/null @@ -1,91 +0,0 @@ -# -*- coding: utf-8 -*- -# Natural Language Toolkit: Word List Corpus Reader -# -# Copyright (C) 2001-2020 NLTK Project -# Author: Steven Bird -# Edward Loper -# URL: -# For license information, see LICENSE.TXT - - -from collections import namedtuple, defaultdict -import re - -from nltk.tokenize import line_tokenize - -from nltk.corpus.reader.wordlist import WordListCorpusReader -from nltk.corpus.reader.util import * -from nltk.corpus.reader.api import * - -PanlexLanguage = namedtuple('PanlexLanguage', - ['panlex_uid', # (1) PanLex UID - 'iso639', # (2) ISO 639 language code - 'iso639_type', # (3) ISO 639 language type, see README - 'script', # (4) normal scripts of expressions - 'name', # (5) PanLex default name - 'langvar_uid' # (6) UID of the language variety in which the default name is an expression - ]) - -class PanlexSwadeshCorpusReader(WordListCorpusReader): - """ - This is a class to read the PanLex Swadesh list from - - David Kamholz, Jonathan Pool, and Susan M. Colowick (2014). - PanLex: Building a Resource for Panlingual Lexical Translation. - In LREC. http://www.lrec-conf.org/proceedings/lrec2014/pdf/1029_Paper.pdf - - License: CC0 1.0 Universal - https://creativecommons.org/publicdomain/zero/1.0/legalcode - """ - def __init__(self, *args, **kwargs): - super(PanlexSwadeshCorpusReader, self).__init__(*args, **kwargs) - # Find the swadesh size using the fileids' path. - self.swadesh_size = re.match(r'swadesh([0-9].*)\/', self.fileids()[0]).group(1) - self._languages = {lang.panlex_uid:lang for lang in self.get_languages()} - self._macro_langauges = self.get_macrolanguages() - - def license(self): - print('CC0 1.0 Universal') - - def readme(self): - print(self.raw('README')) - - def language_codes(self): - return self._languages.keys() - - def get_languages(self): - for line in self.raw('langs{}.txt'.format(self.swadesh_size)).split('\n'): - if not line.strip(): # Skip empty lines. - continue - yield PanlexLanguage(*line.strip().split('\t')) - - def get_macrolanguages(self): - macro_langauges = defaultdict(list) - for lang in self._languages.values(): - macro_langauges[lang.iso639].append(lang.panlex_uid) - return macro_langauges - - def words_by_lang(self, lang_code): - """ - :return: a list of list(str) - """ - fileid = 'swadesh{}/{}.txt'.format(self.swadesh_size, lang_code) - return [concept.split('\t') for concept in self.words(fileid)] - - def words_by_iso639(self, iso63_code): - """ - :return: a list of list(str) - """ - fileids = ['swadesh{}/{}.txt'.format(self.swadesh_size, lang_code) - for lang_code in self._macro_langauges[iso63_code]] - return [concept.split('\t') for fileid in fileids for concept in self.words(fileid)] - - def entries(self, fileids=None): - """ - :return: a tuple of words for the specified fileids. - """ - if not fileids: - fileids = self.fileids() - - wordlists = [self.words(f) for f in fileids] - return list(zip(*wordlists)) diff --git a/nlp_resource_data/nltk/corpus/reader/pl196x.py b/nlp_resource_data/nltk/corpus/reader/pl196x.py index aaf280d..a8a1f6f 100644 --- a/nlp_resource_data/nltk/corpus/reader/pl196x.py +++ b/nlp_resource_data/nltk/corpus/reader/pl196x.py @@ -1,19 +1,21 @@ # Natural Language Toolkit: # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Piotr Kasprzyk # URL: # For license information, see LICENSE.TXT +from six import string_types + from nltk.corpus.reader.api import * from nltk.corpus.reader.xmldocs import XMLCorpusReader -PARA = re.compile(r"]*){0,1}>(.*?)

    ") -SENT = re.compile(r"]*){0,1}>(.*?)
    ") +PARA = re.compile(r']*){0,1}>(.*?)

    ') +SENT = re.compile(r']*){0,1}>(.*?)
    ') -TAGGEDWORD = re.compile(r"<([wc](?: [^>]*){0,1}>)(.*?)") -WORD = re.compile(r"<[wc](?: [^>]*){0,1}>(.*?)") +TAGGEDWORD = re.compile(r'<([wc](?: [^>]*){0,1}>)(.*?)') +WORD = re.compile(r'<[wc](?: [^>]*){0,1}>(.*?)') TYPE = re.compile(r'type="(.*?)"') ANA = re.compile(r'ana="(.*?)"') @@ -46,22 +48,22 @@ class TEICorpusView(StreamBackedCorpusView): def read_block(self, stream): block = stream.readlines(self._pagesize) block = concat(block) - while (block.count(" block.count("")) or block.count( - " block.count('')) or block.count( + '") + len("") + end = block[beg:].find('') + len('') block = block[:beg] + block[beg + end :] output = [] @@ -84,7 +86,7 @@ class TEICorpusView(StreamBackedCorpusView): def _parse_tag(self, tag_word_tuple): (tag, word) = tag_word_tuple - if tag.startswith("w"): + if tag.startswith('w'): tag = ANA.search(tag).group(1) else: # tag.startswith('c') tag = TYPE.search(tag).group(1) @@ -95,8 +97,8 @@ class Pl196xCorpusReader(CategorizedCorpusReader, XMLCorpusReader): head_len = 2770 def __init__(self, *args, **kwargs): - if "textid_file" in kwargs: - self._textids = kwargs["textid_file"] + if 'textid_file' in kwargs: + self._textids = kwargs['textid_file'] else: self._textids = None @@ -112,10 +114,10 @@ class Pl196xCorpusReader(CategorizedCorpusReader, XMLCorpusReader): with open(self._textids) as fp: for line in fp: line = line.strip() - file_id, text_ids = line.split(" ", 1) + file_id, text_ids = line.split(' ', 1) if file_id not in self.fileids(): raise ValueError( - "In text_id mapping file %s: %s not found" + 'In text_id mapping file %s: %s not found' % (self._textids, file_id) ) for text_id in text_ids.split(self._delimiter): @@ -128,16 +130,16 @@ class Pl196xCorpusReader(CategorizedCorpusReader, XMLCorpusReader): def _resolve(self, fileids, categories, textids=None): tmp = None if ( - len(list( + len( filter( lambda accessor: accessor is None, (fileids, categories, textids) ) - )) + ) != 1 ): raise ValueError( - "Specify exactly one of: fileids, " "categories or textids" + 'Specify exactly one of: fileids, ' 'categories or textids' ) if fileids is not None: @@ -147,7 +149,7 @@ class Pl196xCorpusReader(CategorizedCorpusReader, XMLCorpusReader): return self.fileids(categories), None if textids is not None: - if isinstance(textids, str): + if isinstance(textids, string_types): textids = [textids] files = sum((self._t2f[t] for t in textids), []) tdict = dict() @@ -171,7 +173,7 @@ class Pl196xCorpusReader(CategorizedCorpusReader, XMLCorpusReader): if fileids is None: return sorted(self._t2f) - if isinstance(fileids, str): + if isinstance(fileids, string_types): fileids = [fileids] return sorted(sum((self._f2t[d] for d in fileids), [])) @@ -179,7 +181,7 @@ class Pl196xCorpusReader(CategorizedCorpusReader, XMLCorpusReader): fileids, textids = self._resolve(fileids, categories, textids) if fileids is None: fileids = self._fileids - elif isinstance(fileids, str): + elif isinstance(fileids, string_types): fileids = [fileids] if textids: @@ -214,7 +216,7 @@ class Pl196xCorpusReader(CategorizedCorpusReader, XMLCorpusReader): fileids, textids = self._resolve(fileids, categories, textids) if fileids is None: fileids = self._fileids - elif isinstance(fileids, str): + elif isinstance(fileids, string_types): fileids = [fileids] if textids: @@ -245,7 +247,7 @@ class Pl196xCorpusReader(CategorizedCorpusReader, XMLCorpusReader): fileids, textids = self._resolve(fileids, categories, textids) if fileids is None: fileids = self._fileids - elif isinstance(fileids, str): + elif isinstance(fileids, string_types): fileids = [fileids] if textids: @@ -276,7 +278,7 @@ class Pl196xCorpusReader(CategorizedCorpusReader, XMLCorpusReader): fileids, textids = self._resolve(fileids, categories, textids) if fileids is None: fileids = self._fileids - elif isinstance(fileids, str): + elif isinstance(fileids, string_types): fileids = [fileids] if textids: @@ -307,7 +309,7 @@ class Pl196xCorpusReader(CategorizedCorpusReader, XMLCorpusReader): fileids, textids = self._resolve(fileids, categories, textids) if fileids is None: fileids = self._fileids - elif isinstance(fileids, str): + elif isinstance(fileids, string_types): fileids = [fileids] if textids: @@ -338,7 +340,7 @@ class Pl196xCorpusReader(CategorizedCorpusReader, XMLCorpusReader): fileids, textids = self._resolve(fileids, categories, textids) if fileids is None: fileids = self._fileids - elif isinstance(fileids, str): + elif isinstance(fileids, string_types): fileids = [fileids] if textids: @@ -370,12 +372,12 @@ class Pl196xCorpusReader(CategorizedCorpusReader, XMLCorpusReader): if len(fileids) == 1: return XMLCorpusReader.xml(self, fileids[0]) else: - raise TypeError("Expected a single file") + raise TypeError('Expected a single file') def raw(self, fileids=None, categories=None): fileids, _ = self._resolve(fileids, categories) if fileids is None: fileids = self._fileids - elif isinstance(fileids, str): + elif isinstance(fileids, string_types): fileids = [fileids] return concat([self.open(f).read() for f in fileids]) diff --git a/nlp_resource_data/nltk/corpus/reader/plaintext.py b/nlp_resource_data/nltk/corpus/reader/plaintext.py index 17f484b..4de7787 100644 --- a/nlp_resource_data/nltk/corpus/reader/plaintext.py +++ b/nlp_resource_data/nltk/corpus/reader/plaintext.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Plaintext Corpus Reader # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Steven Bird # Edward Loper # Nitin Madnani @@ -40,9 +40,9 @@ class PlaintextCorpusReader(CorpusReader): root, fileids, word_tokenizer=WordPunctTokenizer(), - sent_tokenizer=nltk.data.LazyLoader("tokenizers/punkt/english.pickle"), + sent_tokenizer=nltk.data.LazyLoader('tokenizers/punkt/english.pickle'), para_block_reader=read_blankline_block, - encoding="utf8", + encoding='utf8', ): """ Construct a new plaintext corpus reader for a set of documents @@ -72,7 +72,7 @@ class PlaintextCorpusReader(CorpusReader): """ if fileids is None: fileids = self._fileids - elif isinstance(fileids, str): + elif isinstance(fileids, string_types): fileids = [fileids] raw_texts = [] for f in fileids: @@ -102,7 +102,7 @@ class PlaintextCorpusReader(CorpusReader): :rtype: list(list(str)) """ if self._sent_tokenizer is None: - raise ValueError("No sentence tokenizer for this corpus") + raise ValueError('No sentence tokenizer for this corpus') return concat( [ @@ -119,7 +119,7 @@ class PlaintextCorpusReader(CorpusReader): :rtype: list(list(list(str))) """ if self._sent_tokenizer is None: - raise ValueError("No sentence tokenizer for this corpus") + raise ValueError('No sentence tokenizer for this corpus') return concat( [ @@ -175,7 +175,7 @@ class CategorizedPlaintextCorpusReader(CategorizedCorpusReader, PlaintextCorpusR def _resolve(self, fileids, categories): if fileids is not None and categories is not None: - raise ValueError("Specify fileids or categories, not both") + raise ValueError('Specify fileids or categories, not both') if categories is not None: return self.fileids(categories) else: @@ -200,8 +200,8 @@ class CategorizedPlaintextCorpusReader(CategorizedCorpusReader, PlaintextCorpusR class PortugueseCategorizedPlaintextCorpusReader(CategorizedPlaintextCorpusReader): def __init__(self, *args, **kwargs): CategorizedCorpusReader.__init__(self, kwargs) - kwargs["sent_tokenizer"] = nltk.data.LazyLoader( - "tokenizers/punkt/portuguese.pickle" + kwargs['sent_tokenizer'] = nltk.data.LazyLoader( + 'tokenizers/punkt/portuguese.pickle' ) PlaintextCorpusReader.__init__(self, *args, **kwargs) @@ -259,5 +259,5 @@ class EuroparlCorpusReader(PlaintextCorpusReader): def paras(self, fileids=None): raise NotImplementedError( - "The Europarl corpus reader does not support paragraphs. Please use chapters() instead." + 'The Europarl corpus reader does not support paragraphs. Please use chapters() instead.' ) diff --git a/nlp_resource_data/nltk/corpus/reader/ppattach.py b/nlp_resource_data/nltk/corpus/reader/ppattach.py index 60c2b02..3bc06e4 100644 --- a/nlp_resource_data/nltk/corpus/reader/ppattach.py +++ b/nlp_resource_data/nltk/corpus/reader/ppattach.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: PP Attachment Corpus Reader # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Steven Bird # Edward Loper # URL: @@ -37,11 +37,16 @@ Conference. [http://www.cis.upenn.edu/~adwait/papers/hlt94.ps] The PP Attachment Corpus is distributed with NLTK with the permission of the author. """ +from __future__ import unicode_literals +from six import string_types + +from nltk import compat from nltk.corpus.reader.util import * from nltk.corpus.reader.api import * +@compat.python_2_unicode_compatible class PPAttachment(object): def __init__(self, sent, verb, noun1, prep, noun2, attachment): self.sent = sent @@ -53,8 +58,8 @@ class PPAttachment(object): def __repr__(self): return ( - "PPAttachment(sent=%r, verb=%r, noun1=%r, prep=%r, " - "noun2=%r, attachment=%r)" + 'PPAttachment(sent=%r, verb=%r, noun1=%r, prep=%r, ' + 'noun2=%r, attachment=%r)' % (self.sent, self.verb, self.noun1, self.prep, self.noun2, self.attachment) ) @@ -83,7 +88,7 @@ class PPAttachmentCorpusReader(CorpusReader): def raw(self, fileids=None): if fileids is None: fileids = self._fileids - elif isinstance(fileids, str): + elif isinstance(fileids, string_types): fileids = [fileids] return concat([self.open(f).read() for f in fileids]) diff --git a/nlp_resource_data/nltk/corpus/reader/propbank.py b/nlp_resource_data/nltk/corpus/reader/propbank.py index 7c49edc..5c9bdd9 100644 --- a/nlp_resource_data/nltk/corpus/reader/propbank.py +++ b/nlp_resource_data/nltk/corpus/reader/propbank.py @@ -1,14 +1,17 @@ # Natural Language Toolkit: PropBank Corpus Reader # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Edward Loper # URL: # For license information, see LICENSE.TXT +from __future__ import unicode_literals import re from functools import total_ordering from xml.etree import ElementTree +from six import string_types + from nltk.tree import Tree from nltk.internals import raise_unorderable_types @@ -34,11 +37,11 @@ class PropbankCorpusReader(CorpusReader): self, root, propfile, - framefiles="", + framefiles='', verbsfile=None, parse_fileid_xform=None, parse_corpus=None, - encoding="utf8", + encoding='utf8', ): """ :param root: The root directory for this corpus. @@ -55,7 +58,7 @@ class PropbankCorpusReader(CorpusReader): necessary to resolve the tree pointers used by propbank. """ # If framefiles is specified as a regexp, expand it. - if isinstance(framefiles, str): + if isinstance(framefiles, string_types): framefiles = find_corpus_fileids(root, framefiles) framefiles = list(framefiles) # Initialze the corpus reader. @@ -85,7 +88,7 @@ class PropbankCorpusReader(CorpusReader): """ kwargs = {} if baseform is not None: - kwargs["instance_filter"] = lambda inst: inst.baseform == baseform + kwargs['instance_filter'] = lambda inst: inst.baseform == baseform return StreamBackedCorpusView( self.abspath(self._propfile), lambda stream: self._read_instance_block(stream, **kwargs), @@ -107,27 +110,27 @@ class PropbankCorpusReader(CorpusReader): """ :return: the xml description for the given roleset. """ - baseform = roleset_id.split(".")[0] - framefile = "frames/%s.xml" % baseform + baseform = roleset_id.split('.')[0] + framefile = 'frames/%s.xml' % baseform if framefile not in self._framefiles: - raise ValueError("Frameset file for %s not found" % roleset_id) + raise ValueError('Frameset file for %s not found' % roleset_id) # n.b.: The encoding for XML fileids is specified by the file # itself; so we ignore self._encoding here. etree = ElementTree.parse(self.abspath(framefile).open()).getroot() - for roleset in etree.findall("predicate/roleset"): - if roleset.attrib["id"] == roleset_id: + for roleset in etree.findall('predicate/roleset'): + if roleset.attrib['id'] == roleset_id: return roleset - raise ValueError("Roleset %s not found in %s" % (roleset_id, framefile)) + raise ValueError('Roleset %s not found in %s' % (roleset_id, framefile)) def rolesets(self, baseform=None): """ :return: list of xml descriptions for rolesets. """ if baseform is not None: - framefile = "frames/%s.xml" % baseform + framefile = 'frames/%s.xml' % baseform if framefile not in self._framefiles: - raise ValueError("Frameset file for %s not found" % baseform) + raise ValueError('Frameset file for %s not found' % baseform) framefiles = [framefile] else: framefiles = self._framefiles @@ -137,7 +140,7 @@ class PropbankCorpusReader(CorpusReader): # n.b.: The encoding for XML fileids is specified by the file # itself; so we ignore self._encoding here. etree = ElementTree.parse(self.abspath(framefile).open()).getroot() - rsets.append(etree.findall("predicate/roleset")) + rsets.append(etree.findall('predicate/roleset')) return LazyConcatenation(rsets) def verbs(self): @@ -172,7 +175,7 @@ class PropbankCorpusReader(CorpusReader): ###################################################################### - +@compat.python_2_unicode_compatible class PropbankInstance(object): def __init__( self, @@ -231,27 +234,27 @@ class PropbankInstance(object): @property def baseform(self): """The baseform of the predicate.""" - return self.roleset.split(".")[0] + return self.roleset.split('.')[0] @property def sensenumber(self): """The sense number of the predicate.""" - return self.roleset.split(".")[1] + return self.roleset.split('.')[1] @property def predid(self): """Identifier of the predicate.""" - return "rel" + return 'rel' def __repr__(self): - return "" % ( + return '' % ( self.fileid, self.sentnum, self.wordnum, ) def __str__(self): - s = "%s %s %s %s %s %s" % ( + s = '%s %s %s %s %s %s' % ( self.fileid, self.sentnum, self.wordnum, @@ -259,9 +262,9 @@ class PropbankInstance(object): self.roleset, self.inflection, ) - items = self.arguments + ((self.predicate, "rel"),) + items = self.arguments + ((self.predicate, 'rel'),) for (argloc, argid) in sorted(items): - s += " %s-%s" % (argloc, argid) + s += ' %s-%s' % (argloc, argid) return s def _get_tree(self): @@ -282,14 +285,14 @@ class PropbankInstance(object): def parse(s, parse_fileid_xform=None, parse_corpus=None): pieces = s.split() if len(pieces) < 7: - raise ValueError("Badly formatted propbank line: %r" % s) + raise ValueError('Badly formatted propbank line: %r' % s) # Divide the line into its basic pieces. (fileid, sentnum, wordnum, tagger, roleset, inflection) = pieces[:6] - rel = [p for p in pieces[6:] if p.endswith("-rel")] - args = [p for p in pieces[6:] if not p.endswith("-rel")] + rel = [p for p in pieces[6:] if p.endswith('-rel')] + args = [p for p in pieces[6:] if not p.endswith('-rel')] if len(rel) != 1: - raise ValueError("Badly formatted propbank line: %r" % s) + raise ValueError('Badly formatted propbank line: %r' % s) # Apply the fileid selector, if any. if parse_fileid_xform is not None: @@ -308,7 +311,7 @@ class PropbankInstance(object): # Parse the arguments. arguments = [] for arg in args: - argloc, argid = arg.split("-", 1) + argloc, argid = arg.split('-', 1) arguments.append((PropbankTreePointer.parse(argloc), argid)) # Put it all together. @@ -345,7 +348,7 @@ class PropbankPointer(object): raise NotImplementedError() - +@compat.python_2_unicode_compatible class PropbankChainTreePointer(PropbankPointer): def __init__(self, pieces): self.pieces = pieces @@ -354,18 +357,18 @@ class PropbankChainTreePointer(PropbankPointer): ``PropbankTreePointer`` pointers.""" def __str__(self): - return "*".join("%s" % p for p in self.pieces) + return '*'.join('%s' % p for p in self.pieces) def __repr__(self): - return "" % self + return '' % self def select(self, tree): if tree is None: - raise ValueError("Parse tree not avaialable") - return Tree("*CHAIN*", [p.select(tree) for p in self.pieces]) - + raise ValueError('Parse tree not avaialable') + return Tree('*CHAIN*', [p.select(tree) for p in self.pieces]) +@compat.python_2_unicode_compatible class PropbankSplitTreePointer(PropbankPointer): def __init__(self, pieces): self.pieces = pieces @@ -373,19 +376,19 @@ class PropbankSplitTreePointer(PropbankPointer): all ``PropbankTreePointer`` pointers.""" def __str__(self): - return ",".join("%s" % p for p in self.pieces) + return ','.join('%s' % p for p in self.pieces) def __repr__(self): - return "" % self + return '' % self def select(self, tree): if tree is None: - raise ValueError("Parse tree not avaialable") - return Tree("*SPLIT*", [p.select(tree) for p in self.pieces]) + raise ValueError('Parse tree not avaialable') + return Tree('*SPLIT*', [p.select(tree) for p in self.pieces]) @total_ordering - +@compat.python_2_unicode_compatible class PropbankTreePointer(PropbankPointer): """ wordnum:height*wordnum:height*... @@ -400,30 +403,30 @@ class PropbankTreePointer(PropbankPointer): @staticmethod def parse(s): # Deal with chains (xx*yy*zz) - pieces = s.split("*") + pieces = s.split('*') if len(pieces) > 1: return PropbankChainTreePointer( [PropbankTreePointer.parse(elt) for elt in pieces] ) # Deal with split args (xx,yy,zz) - pieces = s.split(",") + pieces = s.split(',') if len(pieces) > 1: return PropbankSplitTreePointer( [PropbankTreePointer.parse(elt) for elt in pieces] ) # Deal with normal pointers. - pieces = s.split(":") + pieces = s.split(':') if len(pieces) != 2: - raise ValueError("bad propbank pointer %r" % s) + raise ValueError('bad propbank pointer %r' % s) return PropbankTreePointer(int(pieces[0]), int(pieces[1])) def __str__(self): - return "%s:%s" % (self.wordnum, self.height) + return '%s:%s' % (self.wordnum, self.height) def __repr__(self): - return "PropbankTreePointer(%d, %d)" % (self.wordnum, self.height) + return 'PropbankTreePointer(%d, %d)' % (self.wordnum, self.height) def __eq__(self, other): while isinstance(other, (PropbankChainTreePointer, PropbankSplitTreePointer)): @@ -448,7 +451,7 @@ class PropbankTreePointer(PropbankPointer): def select(self, tree): if tree is None: - raise ValueError("Parse tree not avaialable") + raise ValueError('Parse tree not avaialable') return tree[self.treepos(tree)] def treepos(self, tree): @@ -457,12 +460,14 @@ class PropbankTreePointer(PropbankPointer): given that it points to the given tree. """ if tree is None: - raise ValueError("Parse tree not avaialable") + raise ValueError('Parse tree not avaialable') stack = [tree] treepos = [] wordnum = 0 while True: + # print treepos + # print stack[-1] # tree node: if isinstance(stack[-1], Tree): # Select the next child. @@ -486,31 +491,31 @@ class PropbankTreePointer(PropbankPointer): stack.pop() - +@compat.python_2_unicode_compatible class PropbankInflection(object): # { Inflection Form - INFINITIVE = "i" - GERUND = "g" - PARTICIPLE = "p" - FINITE = "v" + INFINITIVE = 'i' + GERUND = 'g' + PARTICIPLE = 'p' + FINITE = 'v' # { Inflection Tense - FUTURE = "f" - PAST = "p" - PRESENT = "n" + FUTURE = 'f' + PAST = 'p' + PRESENT = 'n' # { Inflection Aspect - PERFECT = "p" - PROGRESSIVE = "o" - PERFECT_AND_PROGRESSIVE = "b" + PERFECT = 'p' + PROGRESSIVE = 'o' + PERFECT_AND_PROGRESSIVE = 'b' # { Inflection Person - THIRD_PERSON = "3" + THIRD_PERSON = '3' # { Inflection Voice - ACTIVE = "a" - PASSIVE = "p" + ACTIVE = 'a' + PASSIVE = 'p' # { Inflection - NONE = "-" + NONE = '-' # } - def __init__(self, form="-", tense="-", aspect="-", person="-", voice="-"): + def __init__(self, form='-', tense='-', aspect='-', person='-', voice='-'): self.form = form self.tense = tense self.aspect = aspect @@ -521,14 +526,14 @@ class PropbankInflection(object): return self.form + self.tense + self.aspect + self.person + self.voice def __repr__(self): - return "" % self + return '' % self - _VALIDATE = re.compile(r"[igpv\-][fpn\-][pob\-][3\-][ap\-]$") + _VALIDATE = re.compile(r'[igpv\-][fpn\-][pob\-][3\-][ap\-]$') @staticmethod def parse(s): - if not isinstance(s, str): - raise TypeError("expected a string") + if not isinstance(s, string_types): + raise TypeError('expected a string') if len(s) != 5 or not PropbankInflection._VALIDATE.match(s): - raise ValueError("Bad propbank inflection string %r" % s) + raise ValueError('Bad propbank inflection string %r' % s) return PropbankInflection(*s) diff --git a/nlp_resource_data/nltk/corpus/reader/pros_cons.py b/nlp_resource_data/nltk/corpus/reader/pros_cons.py index ca9e540..8117918 100644 --- a/nlp_resource_data/nltk/corpus/reader/pros_cons.py +++ b/nlp_resource_data/nltk/corpus/reader/pros_cons.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Pros and Cons Corpus Reader # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Pierpaolo Pantone <24alsecondo@gmail.com> # URL: # For license information, see LICENSE.TXT @@ -27,6 +27,8 @@ Related papers: """ import re +from six import string_types + from nltk.corpus.reader.api import * from nltk.tokenize import * @@ -51,7 +53,7 @@ class ProsConsCorpusReader(CategorizedCorpusReader, CorpusReader): root, fileids, word_tokenizer=WordPunctTokenizer(), - encoding="utf8", + encoding='utf8', **kwargs ): """ @@ -82,7 +84,7 @@ class ProsConsCorpusReader(CategorizedCorpusReader, CorpusReader): fileids = self._resolve(fileids, categories) if fileids is None: fileids = self._fileids - elif isinstance(fileids, str): + elif isinstance(fileids, string_types): fileids = [fileids] return concat( [ @@ -106,7 +108,7 @@ class ProsConsCorpusReader(CategorizedCorpusReader, CorpusReader): fileids = self._resolve(fileids, categories) if fileids is None: fileids = self._fileids - elif isinstance(fileids, str): + elif isinstance(fileids, string_types): fileids = [fileids] return concat( [ @@ -134,7 +136,7 @@ class ProsConsCorpusReader(CategorizedCorpusReader, CorpusReader): def _resolve(self, fileids, categories): if fileids is not None and categories is not None: - raise ValueError("Specify fileids or categories, not both") + raise ValueError('Specify fileids or categories, not both') if categories is not None: return self.fileids(categories) else: diff --git a/nlp_resource_data/nltk/corpus/reader/reviews.py b/nlp_resource_data/nltk/corpus/reader/reviews.py index fc0b61c..9a1f173 100644 --- a/nlp_resource_data/nltk/corpus/reader/reviews.py +++ b/nlp_resource_data/nltk/corpus/reader/reviews.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Product Reviews Corpus Reader # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Pierpaolo Pantone <24alsecondo@gmail.com> # URL: # For license information, see LICENSE.TXT @@ -59,19 +59,24 @@ Note: Some of the files (e.g. "ipod.txt", "Canon PowerShot SD500.txt") do not consideration. """ +from __future__ import division + import re +from six import string_types + from nltk.corpus.reader.api import * from nltk.tokenize import * -TITLE = re.compile(r"^\[t\](.*)$") # [t] Title +TITLE = re.compile(r'^\[t\](.*)$') # [t] Title FEATURES = re.compile( - r"((?:(?:\w+\s)+)?\w+)\[((?:\+|\-)\d)\]" + r'((?:(?:\w+\s)+)?\w+)\[((?:\+|\-)\d)\]' ) # find 'feature' in feature[+3] -NOTES = re.compile(r"\[(?!t)(p|u|s|cc|cs)\]") # find 'p' in camera[+2][p] -SENT = re.compile(r"##(.*)$") # find tokenized sentence +NOTES = re.compile(r'\[(?!t)(p|u|s|cc|cs)\]') # find 'p' in camera[+2][p] +SENT = re.compile(r'##(.*)$') # find tokenized sentence +@compat.python_2_unicode_compatible class Review(object): """ A Review is the main block of a ReviewsCorpusReader. @@ -120,11 +125,12 @@ class Review(object): return [review_line.sent for review_line in self.review_lines] def __repr__(self): - return 'Review(title="{}", review_lines={})'.format( + return 'Review(title=\"{}\", review_lines={})'.format( self.title, self.review_lines ) +@compat.python_2_unicode_compatible class ReviewLine(object): """ A ReviewLine represents a sentence of the review, together with (optional) @@ -144,7 +150,7 @@ class ReviewLine(object): self.notes = notes def __repr__(self): - return "ReviewLine(features={}, notes={}, sent={})".format( + return 'ReviewLine(features={}, notes={}, sent={})'.format( self.features, self.notes, self.sent ) @@ -174,8 +180,10 @@ class ReviewsCorpusReader(CorpusReader): We can compute stats for specific product features: + >>> from __future__ import division >>> n_reviews = len([(feat,score) for (feat,score) in product_reviews_1.features('Canon_G3.txt') if feat=='picture']) >>> tot = sum([int(score) for (feat,score) in product_reviews_1.features('Canon_G3.txt') if feat=='picture']) + >>> # We use float for backward compatibility with division in Python2.7 >>> mean = tot / n_reviews >>> print(n_reviews, tot, mean) 15 24 1.6 @@ -184,7 +192,7 @@ class ReviewsCorpusReader(CorpusReader): CorpusView = StreamBackedCorpusView def __init__( - self, root, fileids, word_tokenizer=WordPunctTokenizer(), encoding="utf8" + self, root, fileids, word_tokenizer=WordPunctTokenizer(), encoding='utf8' ): """ :param root: The root directory for the corpus. @@ -209,7 +217,7 @@ class ReviewsCorpusReader(CorpusReader): """ if fileids is None: fileids = self._fileids - elif isinstance(fileids, str): + elif isinstance(fileids, string_types): fileids = [fileids] return concat( [ @@ -227,7 +235,7 @@ class ReviewsCorpusReader(CorpusReader): """ if fileids is None: fileids = self._fileids - elif isinstance(fileids, str): + elif isinstance(fileids, string_types): fileids = [fileids] return concat([self.open(f).read() for f in fileids]) diff --git a/nlp_resource_data/nltk/corpus/reader/rte.py b/nlp_resource_data/nltk/corpus/reader/rte.py index 9538f47..0b0cd44 100644 --- a/nlp_resource_data/nltk/corpus/reader/rte.py +++ b/nlp_resource_data/nltk/corpus/reader/rte.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: RTE Corpus Reader # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Ewan Klein # URL: # For license information, see LICENSE.TXT @@ -32,6 +32,11 @@ In order to provide globally unique IDs for each pair, a new attribute file, taking values 1, 2 or 3. The GID is formatted 'm-n', where 'm' is the challenge number and 'n' is the pair ID. """ +from __future__ import unicode_literals + +from six import string_types + +from nltk import compat from nltk.corpus.reader.util import * from nltk.corpus.reader.api import * from nltk.corpus.reader.xmldocs import * @@ -51,6 +56,7 @@ def norm(value_string): return valdict[value_string.upper()] +@compat.python_2_unicode_compatible class RTEPair(object): """ Container for RTE text-hypothesis pairs. @@ -103,9 +109,9 @@ class RTEPair(object): def __repr__(self): if self.challenge: - return "" % (self.challenge, self.id) + return '' % (self.challenge, self.id) else: - return "" % self.id + return '' % self.id class RTECorpusReader(XMLCorpusReader): @@ -127,7 +133,7 @@ class RTECorpusReader(XMLCorpusReader): :rtype: list(RTEPair) """ try: - challenge = doc.attrib["challenge"] + challenge = doc.attrib['challenge'] except KeyError: challenge = None return [RTEPair(pair, challenge=challenge) for pair in doc.getiterator("pair")] @@ -140,6 +146,6 @@ class RTECorpusReader(XMLCorpusReader): :type: list :rtype: list(RTEPair) """ - if isinstance(fileids, str): + if isinstance(fileids, string_types): fileids = [fileids] return concat([self._read_etree(self.xml(fileid)) for fileid in fileids]) diff --git a/nlp_resource_data/nltk/corpus/reader/semcor.py b/nlp_resource_data/nltk/corpus/reader/semcor.py index f04ea45..1b6f515 100644 --- a/nlp_resource_data/nltk/corpus/reader/semcor.py +++ b/nlp_resource_data/nltk/corpus/reader/semcor.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: SemCor Corpus Reader # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Nathan Schneider # URL: # For license information, see LICENSE.TXT @@ -8,8 +8,9 @@ """ Corpus reader for the SemCor Corpus. """ +from __future__ import absolute_import, unicode_literals -__docformat__ = "epytext en" +__docformat__ = 'epytext en' from nltk.corpus.reader.api import * from nltk.corpus.reader.xmldocs import XMLCorpusReader, XMLCorpusView @@ -34,7 +35,7 @@ class SemcorCorpusReader(XMLCorpusReader): :return: the given file(s) as a list of words and punctuation symbols. :rtype: list(str) """ - return self._items(fileids, "word", False, False, False) + return self._items(fileids, 'word', False, False, False) def chunks(self, fileids=None): """ @@ -43,9 +44,9 @@ class SemcorCorpusReader(XMLCorpusReader): that form a unit. :rtype: list(list(str)) """ - return self._items(fileids, "chunk", False, False, False) + return self._items(fileids, 'chunk', False, False, False) - def tagged_chunks(self, fileids=None, tag=("pos" or "sem" or "both")): + def tagged_chunks(self, fileids=None, tag=('pos' or 'sem' or 'both')): """ :return: the given file(s) as a list of tagged chunks, represented in tree form. @@ -58,7 +59,7 @@ class SemcorCorpusReader(XMLCorpusReader): have no lemma. Other chunks not in WordNet have no semantic tag. Punctuation tokens have `None` for their part of speech tag.) """ - return self._items(fileids, "chunk", False, tag != "sem", tag != "pos") + return self._items(fileids, 'chunk', False, tag != 'sem', tag != 'pos') def sents(self, fileids=None): """ @@ -66,7 +67,7 @@ class SemcorCorpusReader(XMLCorpusReader): as a list of word strings. :rtype: list(list(str)) """ - return self._items(fileids, "word", True, False, False) + return self._items(fileids, 'word', True, False, False) def chunk_sents(self, fileids=None): """ @@ -74,9 +75,9 @@ class SemcorCorpusReader(XMLCorpusReader): as a list of chunks. :rtype: list(list(list(str))) """ - return self._items(fileids, "chunk", True, False, False) + return self._items(fileids, 'chunk', True, False, False) - def tagged_sents(self, fileids=None, tag=("pos" or "sem" or "both")): + def tagged_sents(self, fileids=None, tag=('pos' or 'sem' or 'both')): """ :return: the given file(s) as a list of sentences. Each sentence is represented as a list of tagged chunks (in tree form). @@ -89,10 +90,10 @@ class SemcorCorpusReader(XMLCorpusReader): have no lemma. Other chunks not in WordNet have no semantic tag. Punctuation tokens have `None` for their part of speech tag.) """ - return self._items(fileids, "chunk", True, tag != "sem", tag != "pos") + return self._items(fileids, 'chunk', True, tag != 'sem', tag != 'pos') def _items(self, fileids, unit, bracket_sent, pos_tag, sem_tag): - if unit == "word" and not bracket_sent: + if unit == 'word' and not bracket_sent: # the result of the SemcorWordView may be a multiword unit, so the # LazyConcatenation will make sure the sentence is flattened _ = lambda *args: LazyConcatenation( @@ -121,23 +122,23 @@ class SemcorCorpusReader(XMLCorpusReader): :param sem_tag: Whether to include semantic tags, namely WordNet lemma and OOV named entity status. """ - assert unit in ("token", "word", "chunk") + assert unit in ('token', 'word', 'chunk') result = [] xmldoc = ElementTree.parse(fileid).getroot() - for xmlsent in xmldoc.findall(".//s"): + for xmlsent in xmldoc.findall('.//s'): sent = [] for xmlword in _all_xmlwords_in(xmlsent): itm = SemcorCorpusReader._word( xmlword, unit, pos_tag, sem_tag, self._wordnet ) - if unit == "word": + if unit == 'word': sent.extend(itm) else: sent.append(itm) if bracket_sent: - result.append(SemcorSentence(xmlsent.attrib["snum"], sent)) + result.append(SemcorSentence(xmlsent.attrib['snum'], sent)) else: result.extend(sent) @@ -150,29 +151,29 @@ class SemcorCorpusReader(XMLCorpusReader): if not tkn: tkn = "" # fixes issue 337? - lemma = xmlword.get("lemma", tkn) # lemma or NE class - lexsn = xmlword.get("lexsn") # lex_sense (locator for the lemma's sense) + lemma = xmlword.get('lemma', tkn) # lemma or NE class + lexsn = xmlword.get('lexsn') # lex_sense (locator for the lemma's sense) if lexsn is not None: - sense_key = lemma + "%" + lexsn - wnpos = ("n", "v", "a", "r", "s")[ - int(lexsn.split(":")[0]) - 1 + sense_key = lemma + '%' + lexsn + wnpos = ('n', 'v', 'a', 'r', 's')[ + int(lexsn.split(':')[0]) - 1 ] # see http://wordnet.princeton.edu/man/senseidx.5WN.html else: sense_key = wnpos = None redef = xmlword.get( - "rdf", tkn + 'rdf', tkn ) # redefinition--this indicates the lookup string # does not exactly match the enclosed string, e.g. due to typographical adjustments # or discontinuity of a multiword expression. If a redefinition has occurred, # the "rdf" attribute holds its inflected form and "lemma" holds its lemma. # For NEs, "rdf", "lemma", and "pn" all hold the same value (the NE class). - sensenum = xmlword.get("wnsn") # WordNet sense number - isOOVEntity = "pn" in xmlword.keys() # a "personal name" (NE) not in WordNet + sensenum = xmlword.get('wnsn') # WordNet sense number + isOOVEntity = 'pn' in xmlword.keys() # a "personal name" (NE) not in WordNet pos = xmlword.get( - "pos" + 'pos' ) # part of speech for the whole chunk (None for punctuation) - if unit == "token": + if unit == 'token': if not pos_tag and not sem_tag: itm = tkn else: @@ -183,8 +184,8 @@ class SemcorCorpusReader(XMLCorpusReader): ) return itm else: - ww = tkn.split("_") # TODO: case where punctuation intervenes in MWE - if unit == "word": + ww = tkn.split('_') # TODO: case where punctuation intervenes in MWE + if unit == 'word': return ww else: if sensenum is not None: @@ -197,23 +198,23 @@ class SemcorCorpusReader(XMLCorpusReader): # nltk.corpus.reader.wordnet.WordNetError: No synset found for key u'such%5:00:01:specified:00' # solution: just use the lemma name as a string try: - sense = "%s.%s.%02d" % ( + sense = '%s.%s.%02d' % ( lemma, wnpos, int(sensenum), ) # e.g.: reach.v.02 except ValueError: sense = ( - lemma + "." + wnpos + "." + sensenum + lemma + '.' + wnpos + '.' + sensenum ) # e.g. the sense number may be "2;1" bottom = [Tree(pos, ww)] if pos_tag else ww if sem_tag and isOOVEntity: if sensenum is not None: - return Tree(sense, [Tree("NE", bottom)]) + return Tree(sense, [Tree('NE', bottom)]) else: # 'other' NE - return Tree("NE", bottom) + return Tree('NE', bottom) elif sem_tag and sensenum is not None: return Tree(sense, bottom) elif pos_tag: @@ -226,7 +227,7 @@ def _all_xmlwords_in(elt, result=None): if result is None: result = [] for child in elt: - if child.tag in ("wf", "punc"): + if child.tag in ('wf', 'punc'): result.append(child) else: _all_xmlwords_in(child, result) @@ -259,9 +260,9 @@ class SemcorWordView(XMLCorpusView): and OOV named entity status. """ if bracket_sent: - tagspec = ".*/s" + tagspec = '.*/s' else: - tagspec = ".*/s/(punc|wf)" + tagspec = '.*/s/(punc|wf)' self._unit = unit self._sent = bracket_sent @@ -285,12 +286,12 @@ class SemcorWordView(XMLCorpusView): def handle_sent(self, elt): sent = [] for child in elt: - if child.tag in ("wf", "punc"): + if child.tag in ('wf', 'punc'): itm = self.handle_word(child) - if self._unit == "word": + if self._unit == 'word': sent.extend(itm) else: sent.append(itm) else: - raise ValueError("Unexpected element %s" % child.tag) - return SemcorSentence(elt.attrib["snum"], sent) + raise ValueError('Unexpected element %s' % child.tag) + return SemcorSentence(elt.attrib['snum'], sent) diff --git a/nlp_resource_data/nltk/corpus/reader/senseval.py b/nlp_resource_data/nltk/corpus/reader/senseval.py index 5d1a250..66a5386 100644 --- a/nlp_resource_data/nltk/corpus/reader/senseval.py +++ b/nlp_resource_data/nltk/corpus/reader/senseval.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Senseval 2 Corpus Reader # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Trevor Cohn # Steven Bird (modifications) # URL: @@ -21,16 +21,21 @@ The NLTK version of the Senseval 2 files uses well-formed XML. Each instance of the ambiguous words "hard", "interest", "line", and "serve" is tagged with a sense identifier, and supplied with context. """ +from __future__ import print_function, unicode_literals import re from xml.etree import ElementTree +from six import string_types + +from nltk import compat from nltk.tokenize import * from nltk.corpus.reader.util import * from nltk.corpus.reader.api import * +@compat.python_2_unicode_compatible class SensevalInstance(object): def __init__(self, word, position, context, senses): self.word = word @@ -39,7 +44,7 @@ class SensevalInstance(object): self.context = context def __repr__(self): - return "SensevalInstance(word=%r, position=%r, " "context=%r, senses=%r)" % ( + return 'SensevalInstance(word=%r, position=%r, ' 'context=%r, senses=%r)' % ( self.word, self.position, self.context, @@ -62,16 +67,16 @@ class SensevalCorpusReader(CorpusReader): """ if fileids is None: fileids = self._fileids - elif isinstance(fileids, str): + elif isinstance(fileids, string_types): fileids = [fileids] return concat([self.open(f).read() for f in fileids]) def _entry(self, tree): elts = [] - for lexelt in tree.findall("lexelt"): - for inst in lexelt.findall("instance"): - sense = inst[0].attrib["senseid"] - context = [(w.text, w.attrib["pos"]) for w in inst[1]] + for lexelt in tree.findall('lexelt'): + for inst in lexelt.findall('instance'): + sense = inst[0].attrib['senseid'] + context = [(w.text, w.attrib['pos']) for w in inst[1]] elts.append((sense, context)) return elts @@ -93,14 +98,14 @@ class SensevalCorpusView(StreamBackedCorpusView): in_instance = False while True: line = stream.readline() - if line == "": + if line == '': assert instance_lines == [] return [] # Start of a lexical element? - if line.lstrip().startswith(" has no 'item=...' lexelt = m.group(1)[1:-1] if lexelt_num < len(self._lexelts): @@ -110,7 +115,7 @@ class SensevalCorpusView(StreamBackedCorpusView): self._lexelt_starts.append(stream.tell()) # Start of an instance? - if line.lstrip().startswith("" - elif cword.tag == "wf": - context.append((cword.text, cword.attrib["pos"])) - elif cword.tag == "s": + assert False, 'expected CDATA or wf in ' + elif cword.tag == 'wf': + context.append((cword.text, cword.attrib['pos'])) + elif cword.tag == 's': pass # Sentence boundary marker. else: - print("ACK", cword.tag) - assert False, "expected CDATA or or " + print('ACK', cword.tag) + assert False, 'expected CDATA or or ' if cword.tail: context += self._word_tokenizer.tokenize(cword.tail) else: - assert False, "unexpected tag %s" % child.tag + assert False, 'unexpected tag %s' % child.tag return SensevalInstance(lexelt, position, context, senses) @@ -174,31 +179,31 @@ def _fixXML(text): Fix the various issues with Senseval pseudo-XML. """ # <~> or <^> => ~ or ^ - text = re.sub(r"<([~\^])>", r"\1", text) + text = re.sub(r'<([~\^])>', r'\1', text) # fix lone & - text = re.sub(r"(\s+)\&(\s+)", r"\1&\2", text) + text = re.sub(r'(\s+)\&(\s+)', r'\1&\2', text) # fix """ - text = re.sub(r'"""', "'\"'", text) + text = re.sub(r'"""', '\'"\'', text) # fix => text = re.sub(r'(<[^<]*snum=)([^">]+)>', r'\1"\2"/>', text) # fix foreign word tag - text = re.sub(r"<\&frasl>\s*]*>", "FRASL", text) + text = re.sub(r'<\&frasl>\s*]*>', 'FRASL', text) # remove <&I .> - text = re.sub(r"<\&I[^>]*>", "", text) + text = re.sub(r'<\&I[^>]*>', '', text) # fix <{word}> - text = re.sub(r"<{([^}]+)}>", r"\1", text) + text = re.sub(r'<{([^}]+)}>', r'\1', text) # remove <@>,

    ,

    - text = re.sub(r"<(@|/?p)>", r"", text) + text = re.sub(r'<(@|/?p)>', r'', text) # remove <&M .> and <&T .> and <&Ms .> - text = re.sub(r"<&\w+ \.>", r"", text) + text = re.sub(r'<&\w+ \.>', r'', text) # remove lines - text = re.sub(r"]*>", r"", text) + text = re.sub(r']*>', r'', text) # remove <[hi]> and <[/p]> etc - text = re.sub(r"<\[\/?[^>]+\]*>", r"", text) + text = re.sub(r'<\[\/?[^>]+\]*>', r'', text) # take the thing out of the brackets: <…> - text = re.sub(r"<(\&\w+;)>", r"\1", text) + text = re.sub(r'<(\&\w+;)>', r'\1', text) # and remove the & for those patterns that aren't regular XML - text = re.sub(r"&(?!amp|gt|lt|apos|quot)", r"", text) + text = re.sub(r'&(?!amp|gt|lt|apos|quot)', r'', text) # fix 'abc ' style tags - now abc text = re.sub( r'[ \t]*([^<>\s]+?)[ \t]*', r' \1', text diff --git a/nlp_resource_data/nltk/corpus/reader/sentiwordnet.py b/nlp_resource_data/nltk/corpus/reader/sentiwordnet.py index f0097c2..bbe4fc9 100644 --- a/nlp_resource_data/nltk/corpus/reader/sentiwordnet.py +++ b/nlp_resource_data/nltk/corpus/reader/sentiwordnet.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Natural Language Toolkit: SentiWordNet # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Christopher Potts # URL: # For license information, see LICENSE.TXT @@ -37,19 +37,20 @@ http://sentiwordnet.isti.cnr.it/ """ import re - +from nltk.compat import python_2_unicode_compatible from nltk.corpus.reader import CorpusReader +@python_2_unicode_compatible class SentiWordNetCorpusReader(CorpusReader): - def __init__(self, root, fileids, encoding="utf-8"): + def __init__(self, root, fileids, encoding='utf-8'): """ Construct a new SentiWordNet Corpus Reader, using data from the specified file. """ super(SentiWordNetCorpusReader, self).__init__(root, fileids, encoding=encoding) if len(self._fileids) != 1: - raise ValueError("Exactly one file must be specified") + raise ValueError('Exactly one file must be specified') self._db = {} self._parse_src_file() @@ -61,7 +62,7 @@ class SentiWordNetCorpusReader(CorpusReader): try: pos, offset, pos_score, neg_score, synset_terms, gloss = fields except: - raise ValueError("Line %s formatted incorrectly: %s\n" % (i, line)) + raise ValueError('Line %s formatted incorrectly: %s\n' % (i, line)) if pos and offset: offset = int(offset) self._db[(pos, offset)] = (float(pos_score), float(neg_score)) @@ -72,15 +73,15 @@ class SentiWordNetCorpusReader(CorpusReader): if tuple(vals) in self._db: pos_score, neg_score = self._db[tuple(vals)] pos, offset = vals - if pos == "s": - pos = "a" + if pos == 's': + pos = 'a' synset = wn.synset_from_pos_and_offset(pos, offset) return SentiSynset(pos_score, neg_score, synset) else: synset = wn.synset(vals[0]) pos = synset.pos() - if pos == "s": - pos = "a" + if pos == 's': + pos = 'a' offset = synset.offset() if (pos, offset) in self._db: pos_score, neg_score = self._db[(pos, offset)] @@ -108,6 +109,7 @@ class SentiWordNetCorpusReader(CorpusReader): yield SentiSynset(pos_score, neg_score, synset) +@python_2_unicode_compatible class SentiSynset(object): def __init__(self, pos_score, neg_score, synset): self._pos_score = pos_score diff --git a/nlp_resource_data/nltk/corpus/reader/sinica_treebank.py b/nlp_resource_data/nltk/corpus/reader/sinica_treebank.py index 15b997c..fbbc92d 100644 --- a/nlp_resource_data/nltk/corpus/reader/sinica_treebank.py +++ b/nlp_resource_data/nltk/corpus/reader/sinica_treebank.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Sinica Treebank Reader # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Steven Bird # URL: # For license information, see LICENSE.TXT @@ -44,10 +44,10 @@ from nltk.tag import map_tag from nltk.corpus.reader.util import * from nltk.corpus.reader.api import * -IDENTIFIER = re.compile(r"^#\S+\s") -APPENDIX = re.compile(r"(?<=\))#.*$") -TAGWORD = re.compile(r":([^:()|]+):([^:()|]+)") -WORD = re.compile(r":[^:()|]+:([^:()|]+)") +IDENTIFIER = re.compile(r'^#\S+\s') +APPENDIX = re.compile(r'(?<=\))#.*$') +TAGWORD = re.compile(r':([^:()|]+):([^:()|]+)') +WORD = re.compile(r':[^:()|]+:([^:()|]+)') class SinicaTreebankCorpusReader(SyntaxCorpusReader): @@ -57,8 +57,8 @@ class SinicaTreebankCorpusReader(SyntaxCorpusReader): def _read_block(self, stream): sent = stream.readline() - sent = IDENTIFIER.sub("", sent) - sent = APPENDIX.sub("", sent) + sent = IDENTIFIER.sub('', sent) + sent = APPENDIX.sub('', sent) return [sent] def _parse(self, sent): diff --git a/nlp_resource_data/nltk/corpus/reader/string_category.py b/nlp_resource_data/nltk/corpus/reader/string_category.py index 136a62e..eaf5bf4 100644 --- a/nlp_resource_data/nltk/corpus/reader/string_category.py +++ b/nlp_resource_data/nltk/corpus/reader/string_category.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: String Category Corpus Reader # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Steven Bird # Edward Loper # URL: @@ -18,6 +18,9 @@ NUM:date When did Hawaii become a state ? """ # based on PPAttachmentCorpusReader +from six import string_types + +from nltk import compat from nltk.corpus.reader.util import * from nltk.corpus.reader.api import * @@ -25,7 +28,7 @@ from nltk.corpus.reader.api import * # in nltk, we use the form (data, tag) -- e.g., tagged words and # labeled texts for classifiers. class StringCategoryCorpusReader(CorpusReader): - def __init__(self, root, fileids, delimiter=" ", encoding="utf8"): + def __init__(self, root, fileids, delimiter=' ', encoding='utf8'): """ :param root: The root directory for this corpus. :param fileids: A list or regexp specifying the fileids in this corpus. @@ -37,7 +40,7 @@ class StringCategoryCorpusReader(CorpusReader): def tuples(self, fileids=None): if fileids is None: fileids = self._fileids - elif isinstance(fileids, str): + elif isinstance(fileids, string_types): fileids = [fileids] return concat( [ @@ -52,7 +55,7 @@ class StringCategoryCorpusReader(CorpusReader): """ if fileids is None: fileids = self._fileids - elif isinstance(fileids, str): + elif isinstance(fileids, string_types): fileids = [fileids] return concat([self.open(f).read() for f in fileids]) diff --git a/nlp_resource_data/nltk/corpus/reader/switchboard.py b/nlp_resource_data/nltk/corpus/reader/switchboard.py index 593ef45..ed65c42 100644 --- a/nlp_resource_data/nltk/corpus/reader/switchboard.py +++ b/nlp_resource_data/nltk/corpus/reader/switchboard.py @@ -1,17 +1,20 @@ # Natural Language Toolkit: Switchboard Corpus Reader # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Edward Loper # URL: # For license information, see LICENSE.TXT +from __future__ import unicode_literals import re from nltk.tag import str2tuple, map_tag +from nltk import compat from nltk.corpus.reader.util import * from nltk.corpus.reader.api import * +@compat.python_2_unicode_compatible class SwitchboardTurn(list): """ A specialized list object used to encode switchboard utterances. @@ -28,16 +31,16 @@ class SwitchboardTurn(list): def __repr__(self): if len(self) == 0: - text = "" + text = '' elif isinstance(self[0], tuple): - text = " ".join("%s/%s" % w for w in self) + text = ' '.join('%s/%s' % w for w in self) else: - text = " ".join(self) - return "<%s.%s: %r>" % (self.speaker, self.id, text) + text = ' '.join(self) + return '<%s.%s: %r>' % (self.speaker, self.id, text) class SwitchboardCorpusReader(CorpusReader): - _FILES = ["tagged"] + _FILES = ['tagged'] # Use the "tagged" file even for non-tagged data methods, since # it's tokenized. @@ -46,26 +49,26 @@ class SwitchboardCorpusReader(CorpusReader): self._tagset = tagset def words(self): - return StreamBackedCorpusView(self.abspath("tagged"), self._words_block_reader) + return StreamBackedCorpusView(self.abspath('tagged'), self._words_block_reader) def tagged_words(self, tagset=None): def tagged_words_block_reader(stream): return self._tagged_words_block_reader(stream, tagset) - return StreamBackedCorpusView(self.abspath("tagged"), tagged_words_block_reader) + return StreamBackedCorpusView(self.abspath('tagged'), tagged_words_block_reader) def turns(self): - return StreamBackedCorpusView(self.abspath("tagged"), self._turns_block_reader) + return StreamBackedCorpusView(self.abspath('tagged'), self._turns_block_reader) def tagged_turns(self, tagset=None): def tagged_turns_block_reader(stream): return self._tagged_turns_block_reader(stream, tagset) - return StreamBackedCorpusView(self.abspath("tagged"), tagged_turns_block_reader) + return StreamBackedCorpusView(self.abspath('tagged'), tagged_turns_block_reader) def discourses(self): return StreamBackedCorpusView( - self.abspath("tagged"), self._discourses_block_reader + self.abspath('tagged'), self._discourses_block_reader ) def tagged_discourses(self, tagset=False): @@ -73,7 +76,7 @@ class SwitchboardCorpusReader(CorpusReader): return self._tagged_discourses_block_reader(stream, tagset) return StreamBackedCorpusView( - self.abspath("tagged"), tagged_discourses_block_reader + self.abspath('tagged'), tagged_discourses_block_reader ) def _discourses_block_reader(self, stream): @@ -82,7 +85,7 @@ class SwitchboardCorpusReader(CorpusReader): [ self._parse_utterance(u, include_tag=False) for b in read_blankline_block(stream) - for u in b.split("\n") + for u in b.split('\n') if u.strip() ] ] @@ -93,7 +96,7 @@ class SwitchboardCorpusReader(CorpusReader): [ self._parse_utterance(u, include_tag=True, tagset=tagset) for b in read_blankline_block(stream) - for u in b.split("\n") + for u in b.split('\n') if u.strip() ] ] @@ -110,13 +113,13 @@ class SwitchboardCorpusReader(CorpusReader): def _tagged_words_block_reader(self, stream, tagset=None): return sum(self._tagged_discourses_block_reader(stream, tagset)[0], []) - _UTTERANCE_RE = re.compile("(\w+)\.(\d+)\:\s*(.*)") - _SEP = "/" + _UTTERANCE_RE = re.compile('(\w+)\.(\d+)\:\s*(.*)') + _SEP = '/' def _parse_utterance(self, utterance, include_tag, tagset=None): m = self._UTTERANCE_RE.match(utterance) if m is None: - raise ValueError("Bad utterance %r" % utterance) + raise ValueError('Bad utterance %r' % utterance) speaker, id, text = m.groups() words = [str2tuple(s, self._SEP) for s in text.split()] if not include_tag: diff --git a/nlp_resource_data/nltk/corpus/reader/tagged.py b/nlp_resource_data/nltk/corpus/reader/tagged.py index afd27b1..3af1653 100644 --- a/nlp_resource_data/nltk/corpus/reader/tagged.py +++ b/nlp_resource_data/nltk/corpus/reader/tagged.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Tagged Corpus Reader # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Edward Loper # Steven Bird # Jacob Perkins @@ -13,6 +13,8 @@ A reader for corpora whose documents contain part-of-speech-tagged words. import os +from six import string_types + from nltk.tag import str2tuple, map_tag from nltk.tokenize import * @@ -41,11 +43,11 @@ class TaggedCorpusReader(CorpusReader): self, root, fileids, - sep="/", + sep='/', word_tokenizer=WhitespaceTokenizer(), - sent_tokenizer=RegexpTokenizer("\n", gaps=True), + sent_tokenizer=RegexpTokenizer('\n', gaps=True), para_block_reader=read_blankline_block, - encoding="utf8", + encoding='utf8', tagset=None, ): """ @@ -72,7 +74,7 @@ class TaggedCorpusReader(CorpusReader): """ if fileids is None: fileids = self._fileids - elif isinstance(fileids, str): + elif isinstance(fileids, string_types): fileids = [fileids] return concat([self.open(f).read() for f in fileids]) @@ -256,7 +258,7 @@ class CategorizedTaggedCorpusReader(CategorizedCorpusReader, TaggedCorpusReader) def _resolve(self, fileids, categories): if fileids is not None and categories is not None: - raise ValueError("Specify fileids or categories, not both") + raise ValueError('Specify fileids or categories, not both') if categories is not None: return self.fileids(categories) else: @@ -358,21 +360,21 @@ class MacMorphoCorpusReader(TaggedCorpusReader): sentence. """ - def __init__(self, root, fileids, encoding="utf8", tagset=None): + def __init__(self, root, fileids, encoding='utf8', tagset=None): TaggedCorpusReader.__init__( self, root, fileids, - sep="_", + sep='_', word_tokenizer=LineTokenizer(), - sent_tokenizer=RegexpTokenizer(".*\n"), + sent_tokenizer=RegexpTokenizer('.*\n'), para_block_reader=self._read_block, encoding=encoding, tagset=tagset, ) def _read_block(self, stream): - return read_regexp_block(stream, r".*", r".*_\.") + return read_regexp_block(stream, r'.*', r'.*_\.') class TimitTaggedCorpusReader(TaggedCorpusReader): @@ -386,7 +388,7 @@ class TimitTaggedCorpusReader(TaggedCorpusReader): ) def paras(self): - raise NotImplementedError("use sents() instead") + raise NotImplementedError('use sents() instead') def tagged_paras(self): - raise NotImplementedError("use tagged_sents() instead") + raise NotImplementedError('use tagged_sents() instead') diff --git a/nlp_resource_data/nltk/corpus/reader/timit.py b/nlp_resource_data/nltk/corpus/reader/timit.py index 7d63248..bbd57c6 100644 --- a/nlp_resource_data/nltk/corpus/reader/timit.py +++ b/nlp_resource_data/nltk/corpus/reader/timit.py @@ -118,12 +118,17 @@ The 4 functions are as follows. timit.audiodata function. """ +from __future__ import print_function, unicode_literals + import sys import os import re import tempfile import time +from six import string_types + +from nltk import compat from nltk.tree import Tree from nltk.internals import import_from_stdlib @@ -149,18 +154,18 @@ class TimitCorpusReader(CorpusReader): - .wav: utterance sound file """ - _FILE_RE = r"(\w+-\w+/\w+\.(phn|txt|wav|wrd))|" + r"timitdic\.txt|spkrinfo\.txt" + _FILE_RE = r'(\w+-\w+/\w+\.(phn|txt|wav|wrd))|' + r'timitdic\.txt|spkrinfo\.txt' """A regexp matching fileids that are used by this corpus reader.""" - _UTTERANCE_RE = r"\w+-\w+/\w+\.txt" + _UTTERANCE_RE = r'\w+-\w+/\w+\.txt' - def __init__(self, root, encoding="utf8"): + def __init__(self, root, encoding='utf8'): """ Construct a new TIMIT corpus reader in the given directory. :param root: The root directory for this corpus. """ # Ensure that wave files don't get treated as unicode data: - if isinstance(encoding, str): - encoding = [(".*\.wav", None), (".*", encoding)] + if isinstance(encoding, string_types): + encoding = [('.*\.wav', None), ('.*', encoding)] CorpusReader.__init__( self, root, find_corpus_fileids(root, self._FILE_RE), encoding=encoding @@ -174,7 +179,7 @@ class TimitCorpusReader(CorpusReader): self._speakerinfo = None self._root = root - self.speakers = sorted(set(u.split("/")[0] for u in self._utterances)) + self.speakers = sorted(set(u.split('/')[0] for u in self._utterances)) def fileids(self, filetype=None): """ @@ -188,12 +193,12 @@ class TimitCorpusReader(CorpusReader): """ if filetype is None: return CorpusReader.fileids(self) - elif filetype in ("txt", "wrd", "phn", "wav"): - return ["%s.%s" % (u, filetype) for u in self._utterances] - elif filetype == "metadata": - return ["timitdic.txt", "spkrinfo.txt"] + elif filetype in ('txt', 'wrd', 'phn', 'wav'): + return ['%s.%s' % (u, filetype) for u in self._utterances] + elif filetype == 'metadata': + return ['timitdic.txt', 'spkrinfo.txt'] else: - raise ValueError("Bad value for filetype: %r" % filetype) + raise ValueError('Bad value for filetype: %r' % filetype) def utteranceids( self, dialect=None, sex=None, spkrid=None, sent_type=None, sentid=None @@ -204,15 +209,15 @@ class TimitCorpusReader(CorpusReader): region, gender, sentence type, or sentence number, if specified. """ - if isinstance(dialect, str): + if isinstance(dialect, string_types): dialect = [dialect] - if isinstance(sex, str): + if isinstance(sex, string_types): sex = [sex] - if isinstance(spkrid, str): + if isinstance(spkrid, string_types): spkrid = [spkrid] - if isinstance(sent_type, str): + if isinstance(sent_type, string_types): sent_type = [sent_type] - if isinstance(sentid, str): + if isinstance(sentid, string_types): sentid = [sentid] utterances = self._utterances[:] @@ -234,23 +239,23 @@ class TimitCorpusReader(CorpusReader): each word. """ _transcriptions = {} - for line in self.open("timitdic.txt"): - if not line.strip() or line[0] == ";": + for line in self.open('timitdic.txt'): + if not line.strip() or line[0] == ';': continue - m = re.match(r"\s*(\S+)\s+/(.*)/\s*$", line) + m = re.match(r'\s*(\S+)\s+/(.*)/\s*$', line) if not m: - raise ValueError("Bad line: %r" % line) + raise ValueError('Bad line: %r' % line) _transcriptions[m.group(1)] = m.group(2).split() return _transcriptions def spkrid(self, utterance): - return utterance.split("/")[0] + return utterance.split('/')[0] def sentid(self, utterance): - return utterance.split("/")[1] + return utterance.split('/')[1] def utterance(self, spkrid, sentid): - return "%s/%s" % (spkrid, sentid) + return '%s/%s' % (spkrid, sentid) def spkrutteranceids(self, speaker): """ @@ -260,7 +265,7 @@ class TimitCorpusReader(CorpusReader): return [ utterance for utterance in self._utterances - if utterance.startswith(speaker + "/") + if utterance.startswith(speaker + '/') ] def spkrinfo(self, speaker): @@ -272,8 +277,8 @@ class TimitCorpusReader(CorpusReader): if self._speakerinfo is None: self._speakerinfo = {} - for line in self.open("spkrinfo.txt"): - if not line.strip() or line[0] == ";": + for line in self.open('spkrinfo.txt'): + if not line.strip() or line[0] == ';': continue rec = line.strip().split(None, 9) key = "dr%s-%s%s" % (rec[2], rec[1].lower(), rec[0].lower()) @@ -284,7 +289,7 @@ class TimitCorpusReader(CorpusReader): def phones(self, utterances=None): return [ line.split()[-1] - for fileid in self._utterance_fileids(utterances, ".phn") + for fileid in self._utterance_fileids(utterances, '.phn') for line in self.open(fileid) if line.strip() ] @@ -295,7 +300,7 @@ class TimitCorpusReader(CorpusReader): """ return [ (line.split()[2], int(line.split()[0]), int(line.split()[1])) - for fileid in self._utterance_fileids(utterances, ".phn") + for fileid in self._utterance_fileids(utterances, '.phn') for line in self.open(fileid) if line.strip() ] @@ -303,7 +308,7 @@ class TimitCorpusReader(CorpusReader): def words(self, utterances=None): return [ line.split()[-1] - for fileid in self._utterance_fileids(utterances, ".wrd") + for fileid in self._utterance_fileids(utterances, '.wrd') for line in self.open(fileid) if line.strip() ] @@ -311,7 +316,7 @@ class TimitCorpusReader(CorpusReader): def word_times(self, utterances=None): return [ (line.split()[2], int(line.split()[0]), int(line.split()[1])) - for fileid in self._utterance_fileids(utterances, ".wrd") + for fileid in self._utterance_fileids(utterances, '.wrd') for line in self.open(fileid) if line.strip() ] @@ -319,7 +324,7 @@ class TimitCorpusReader(CorpusReader): def sents(self, utterances=None): return [ [line.split()[-1] for line in self.open(fileid) if line.strip()] - for fileid in self._utterance_fileids(utterances, ".wrd") + for fileid in self._utterance_fileids(utterances, '.wrd') ] def sent_times(self, utterances=None): @@ -329,7 +334,7 @@ class TimitCorpusReader(CorpusReader): int(line.split()[0]), int(line.split()[1]), ) - for fileid in self._utterance_fileids(utterances, ".txt") + for fileid in self._utterance_fileids(utterances, '.txt') for line in self.open(fileid) if line.strip() ] @@ -337,7 +342,7 @@ class TimitCorpusReader(CorpusReader): def phone_trees(self, utterances=None): if utterances is None: utterances = self._utterances - if isinstance(utterances, str): + if isinstance(utterances, string_types): utterances = [utterances] trees = [] @@ -348,7 +353,7 @@ class TimitCorpusReader(CorpusReader): while sent_times: (sent, sent_start, sent_end) = sent_times.pop(0) - trees.append(Tree("S", [])) + trees.append(Tree('S', [])) while ( word_times and phone_times and phone_times[0][2] <= word_times[0][1] ): @@ -367,9 +372,9 @@ class TimitCorpusReader(CorpusReader): # fileids. def wav(self, utterance, start=0, end=None): # nltk.chunk conflicts with the stdlib module 'chunk' - wave = import_from_stdlib("wave") + wave = import_from_stdlib('wave') - w = wave.open(self.open(utterance + ".wav"), "rb") + w = wave.open(self.open(utterance + '.wav'), 'rb') if end is None: end = w.getnframes() @@ -381,7 +386,7 @@ class TimitCorpusReader(CorpusReader): # Open a new temporary file -- the wave module requires # an actual file, and won't work w/ stringio. :( tf = tempfile.TemporaryFile() - out = wave.open(tf, "w") + out = wave.open(tf, 'w') # Write the parameters & data to the new file. out.setparams(w.getparams()) @@ -397,17 +402,17 @@ class TimitCorpusReader(CorpusReader): assert end is None or end > start headersize = 44 if end is None: - data = self.open(utterance + ".wav").read() + data = self.open(utterance + '.wav').read() else: - data = self.open(utterance + ".wav").read(headersize + end * 2) + data = self.open(utterance + '.wav').read(headersize + end * 2) return data[headersize + start * 2 :] def _utterance_fileids(self, utterances, extension): if utterances is None: utterances = self._utterances - if isinstance(utterances, str): + if isinstance(utterances, string_types): utterances = [utterances] - return ["%s%s" % (u, extension) for u in utterances] + return ['%s%s' % (u, extension) for u in utterances] def play(self, utterance, start=0, end=None): """ @@ -420,7 +425,7 @@ class TimitCorpusReader(CorpusReader): import ossaudiodev try: - dsp = ossaudiodev.open("w") + dsp = ossaudiodev.open('w') dsp.setfmt(ossaudiodev.AFMT_S16_LE) dsp.channels(1) dsp.speed(16000) @@ -460,6 +465,7 @@ class TimitCorpusReader(CorpusReader): ) +@compat.python_2_unicode_compatible class SpeakerInfo(object): def __init__( self, id, sex, dr, use, recdate, birthdate, ht, race, edu, comments=None @@ -476,9 +482,9 @@ class SpeakerInfo(object): self.comments = comments def __repr__(self): - attribs = "id sex dr use recdate birthdate ht race edu comments" - args = ["%s=%r" % (attr, getattr(self, attr)) for attr in attribs.split()] - return "SpeakerInfo(%s)" % (", ".join(args)) + attribs = 'id sex dr use recdate birthdate ht race edu comments' + args = ['%s=%r' % (attr, getattr(self, attr)) for attr in attribs.split()] + return 'SpeakerInfo(%s)' % (', '.join(args)) def read_timit_block(stream): @@ -489,5 +495,5 @@ def read_timit_block(stream): line = stream.readline() if not line: return [] - n, sent = line.split(" ", 1) + n, sent = line.split(' ', 1) return [sent] diff --git a/nlp_resource_data/nltk/corpus/reader/toolbox.py b/nlp_resource_data/nltk/corpus/reader/toolbox.py index aead10b..32acc01 100644 --- a/nlp_resource_data/nltk/corpus/reader/toolbox.py +++ b/nlp_resource_data/nltk/corpus/reader/toolbox.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Toolbox Reader # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Greg Aumann # Stuart Robinson # Steven Bird @@ -31,8 +31,8 @@ class ToolboxCorpusReader(CorpusReader): fileids, strip=True, unwrap=True, - encoding="utf8", - errors="strict", + encoding='utf8', + errors='strict', unicode_fields=None, ): return concat( @@ -48,11 +48,11 @@ class ToolboxCorpusReader(CorpusReader): # should probably be done lazily: def entries(self, fileids, **kwargs): - if "key" in kwargs: - key = kwargs["key"] - del kwargs["key"] + if 'key' in kwargs: + key = kwargs['key'] + del kwargs['key'] else: - key = "lx" # the default key in MDF + key = 'lx' # the default key in MDF entries = [] for marker, contents in self.fields(fileids, **kwargs): if marker == key: @@ -64,13 +64,13 @@ class ToolboxCorpusReader(CorpusReader): pass return entries - def words(self, fileids, key="lx"): + def words(self, fileids, key='lx'): return [contents for marker, contents in self.fields(fileids) if marker == key] def raw(self, fileids): if fileids is None: fileids = self._fileids - elif isinstance(fileids, str): + elif isinstance(fileids, string_types): fileids = [fileids] return concat([self.open(f).read() for f in fileids]) @@ -79,5 +79,5 @@ def demo(): pass -if __name__ == "__main__": +if __name__ == '__main__': demo() diff --git a/nlp_resource_data/nltk/corpus/reader/twitter.py b/nlp_resource_data/nltk/corpus/reader/twitter.py index 7f9b7b7..78b9de3 100644 --- a/nlp_resource_data/nltk/corpus/reader/twitter.py +++ b/nlp_resource_data/nltk/corpus/reader/twitter.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Twitter Corpus Reader # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Ewan Klein # URL: # For license information, see LICENSE.TXT @@ -13,6 +13,8 @@ have been serialised into line-delimited JSON. import json import os +from six import string_types + from nltk.tokenize import TweetTokenizer from nltk.corpus.reader.util import StreamBackedCorpusView, concat, ZipFilePathPointer @@ -57,7 +59,7 @@ class TwitterCorpusReader(CorpusReader): """ def __init__( - self, root, fileids=None, word_tokenizer=TweetTokenizer(), encoding="utf8" + self, root, fileids=None, word_tokenizer=TweetTokenizer(), encoding='utf8' ): """ @@ -108,7 +110,7 @@ class TwitterCorpusReader(CorpusReader): tweets = [] for jsono in fulltweets: try: - text = jsono["text"] + text = jsono['text'] if isinstance(text, bytes): text = text.decode(self.encoding) tweets.append(text) @@ -133,7 +135,7 @@ class TwitterCorpusReader(CorpusReader): """ if fileids is None: fileids = self._fileids - elif isinstance(fileids, str): + elif isinstance(fileids, string_types): fileids = [fileids] return concat([self.open(f).read() for f in fileids]) diff --git a/nlp_resource_data/nltk/corpus/reader/udhr.py b/nlp_resource_data/nltk/corpus/reader/udhr.py index 4bfb551..934a5b5 100644 --- a/nlp_resource_data/nltk/corpus/reader/udhr.py +++ b/nlp_resource_data/nltk/corpus/reader/udhr.py @@ -2,6 +2,7 @@ """ UDHR corpus reader. It mostly deals with encodings. """ +from __future__ import absolute_import, unicode_literals from nltk.corpus.reader.util import find_corpus_fileids from nltk.corpus.reader.plaintext import PlaintextCorpusReader @@ -10,65 +11,65 @@ from nltk.corpus.reader.plaintext import PlaintextCorpusReader class UdhrCorpusReader(PlaintextCorpusReader): ENCODINGS = [ - (".*-Latin1$", "latin-1"), - (".*-Hebrew$", "hebrew"), - (".*-Arabic$", "cp1256"), - ("Czech_Cesky-UTF8", "cp1250"), # yeah - (".*-Cyrillic$", "cyrillic"), - (".*-SJIS$", "SJIS"), - (".*-GB2312$", "GB2312"), - (".*-Latin2$", "ISO-8859-2"), - (".*-Greek$", "greek"), - (".*-UTF8$", "utf-8"), - ("Hungarian_Magyar-Unicode", "utf-16-le"), - ("Amahuaca", "latin1"), - ("Turkish_Turkce-Turkish", "latin5"), - ("Lithuanian_Lietuviskai-Baltic", "latin4"), - ("Japanese_Nihongo-EUC", "EUC-JP"), - ("Japanese_Nihongo-JIS", "iso2022_jp"), - ("Chinese_Mandarin-HZ", "hz"), - ("Abkhaz\-Cyrillic\+Abkh", "cp1251"), + ('.*-Latin1$', 'latin-1'), + ('.*-Hebrew$', 'hebrew'), + ('.*-Arabic$', 'cp1256'), + ('Czech_Cesky-UTF8', 'cp1250'), # yeah + ('.*-Cyrillic$', 'cyrillic'), + ('.*-SJIS$', 'SJIS'), + ('.*-GB2312$', 'GB2312'), + ('.*-Latin2$', 'ISO-8859-2'), + ('.*-Greek$', 'greek'), + ('.*-UTF8$', 'utf-8'), + ('Hungarian_Magyar-Unicode', 'utf-16-le'), + ('Amahuaca', 'latin1'), + ('Turkish_Turkce-Turkish', 'latin5'), + ('Lithuanian_Lietuviskai-Baltic', 'latin4'), + ('Japanese_Nihongo-EUC', 'EUC-JP'), + ('Japanese_Nihongo-JIS', 'iso2022_jp'), + ('Chinese_Mandarin-HZ', 'hz'), + ('Abkhaz\-Cyrillic\+Abkh', 'cp1251'), ] SKIP = set( [ # The following files are not fully decodable because they # were truncated at wrong bytes: - "Burmese_Myanmar-UTF8", - "Japanese_Nihongo-JIS", - "Chinese_Mandarin-HZ", - "Chinese_Mandarin-UTF8", - "Gujarati-UTF8", - "Hungarian_Magyar-Unicode", - "Lao-UTF8", - "Magahi-UTF8", - "Marathi-UTF8", - "Tamil-UTF8", + 'Burmese_Myanmar-UTF8', + 'Japanese_Nihongo-JIS', + 'Chinese_Mandarin-HZ', + 'Chinese_Mandarin-UTF8', + 'Gujarati-UTF8', + 'Hungarian_Magyar-Unicode', + 'Lao-UTF8', + 'Magahi-UTF8', + 'Marathi-UTF8', + 'Tamil-UTF8', # Unfortunately, encodings required for reading # the following files are not supported by Python: - "Vietnamese-VPS", - "Vietnamese-VIQR", - "Vietnamese-TCVN", - "Magahi-Agra", - "Bhojpuri-Agra", - "Esperanto-T61", # latin3 raises an exception + 'Vietnamese-VPS', + 'Vietnamese-VIQR', + 'Vietnamese-TCVN', + 'Magahi-Agra', + 'Bhojpuri-Agra', + 'Esperanto-T61', # latin3 raises an exception # The following files are encoded for specific fonts: - "Burmese_Myanmar-WinResearcher", - "Armenian-DallakHelv", - "Tigrinya_Tigrigna-VG2Main", - "Amharic-Afenegus6..60375", # ? - "Navaho_Dine-Navajo-Navaho-font", + 'Burmese_Myanmar-WinResearcher', + 'Armenian-DallakHelv', + 'Tigrinya_Tigrigna-VG2Main', + 'Amharic-Afenegus6..60375', # ? + 'Navaho_Dine-Navajo-Navaho-font', # What are these? - "Azeri_Azerbaijani_Cyrillic-Az.Times.Cyr.Normal0117", - "Azeri_Azerbaijani_Latin-Az.Times.Lat0117", + 'Azeri_Azerbaijani_Cyrillic-Az.Times.Cyr.Normal0117', + 'Azeri_Azerbaijani_Latin-Az.Times.Lat0117', # The following files are unintended: - "Czech-Latin2-err", - "Russian_Russky-UTF8~", + 'Czech-Latin2-err', + 'Russian_Russky-UTF8~', ] ) - def __init__(self, root="udhr"): - fileids = find_corpus_fileids(root, r"(?!README|\.).*") + def __init__(self, root='udhr'): + fileids = find_corpus_fileids(root, r'(?!README|\.).*') super(UdhrCorpusReader, self).__init__( root, [fileid for fileid in fileids if fileid not in self.SKIP], diff --git a/nlp_resource_data/nltk/corpus/reader/util.py b/nlp_resource_data/nltk/corpus/reader/util.py index b85c33b..b60f7ab 100644 --- a/nlp_resource_data/nltk/corpus/reader/util.py +++ b/nlp_resource_data/nltk/corpus/reader/util.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Corpus Reader Utilities # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Steven Bird # Edward Loper # URL: @@ -10,9 +10,19 @@ import os import bisect import re import tempfile -import pickle from functools import reduce -from xml.etree import ElementTree + +try: + import cPickle as pickle +except ImportError: + import pickle + +try: # Use the c version of ElementTree, which is faster, if possible. + from xml.etree import cElementTree as ElementTree +except ImportError: + from xml.etree import ElementTree + +from six import string_types, text_type from nltk.tokenize import wordpunct_tokenize from nltk.internals import slice_bounds @@ -118,7 +128,7 @@ class StreamBackedCorpusView(AbstractLazySequence): block; and tokens is a list of the tokens in the block. """ - def __init__(self, fileid, block_reader=None, startpos=0, encoding="utf8"): + def __init__(self, fileid, block_reader=None, startpos=0, encoding='utf8'): """ Create a new corpus view, based on the file ``fileid``, and read with ``block_reader``. See the class documentation @@ -170,7 +180,7 @@ class StreamBackedCorpusView(AbstractLazySequence): else: self._eofpos = os.stat(self._fileid).st_size except Exception as exc: - raise ValueError("Unable to open or access %r -- %s" % (fileid, exc)) + raise ValueError('Unable to open or access %r -- %s' % (fileid, exc)) # Maintain a cache of the most recently read block, to # increase efficiency of random access. @@ -193,7 +203,7 @@ class StreamBackedCorpusView(AbstractLazySequence): :param stream: an input stream :type stream: stream """ - raise NotImplementedError("Abstract Method") + raise NotImplementedError('Abstract Method') def _open(self): """ @@ -205,10 +215,10 @@ class StreamBackedCorpusView(AbstractLazySequence): self._stream = self._fileid.open(self._encoding) elif self._encoding: self._stream = SeekableUnicodeStreamReader( - open(self._fileid, "rb"), self._encoding + open(self._fileid, 'rb'), self._encoding ) else: - self._stream = open(self._fileid, "rb") + self._stream = open(self._fileid, 'rb') def close(self): """ @@ -245,7 +255,7 @@ class StreamBackedCorpusView(AbstractLazySequence): if i < 0: i += len(self) if i < 0: - raise IndexError("index out of range") + raise IndexError('index out of range') # Check if it's in the cache. offset = self._cache[0] if offset <= i < self._cache[1]: @@ -254,7 +264,7 @@ class StreamBackedCorpusView(AbstractLazySequence): try: return next(self.iterate_from(i)) except StopIteration: - raise IndexError("index out of range") + raise IndexError('index out of range') # If we wanted to be thread-safe, then this method would need to # do some locking. @@ -295,13 +305,13 @@ class StreamBackedCorpusView(AbstractLazySequence): self._current_blocknum = block_index tokens = self.read_block(self._stream) assert isinstance(tokens, (tuple, list, AbstractLazySequence)), ( - "block reader %s() should return list or tuple." + 'block reader %s() should return list or tuple.' % self.read_block.__name__ ) num_toks = len(tokens) new_filepos = self._stream.tell() assert new_filepos > filepos, ( - "block reader %s() should consume at least 1 byte (filepos=%d)" + 'block reader %s() should consume at least 1 byte (filepos=%d)' % (self.read_block.__name__, filepos) ) @@ -320,10 +330,10 @@ class StreamBackedCorpusView(AbstractLazySequence): # Check for consistency: assert ( new_filepos == self._filepos[block_index] - ), "inconsistent block reader (num chars read)" + ), 'inconsistent block reader (num chars read)' assert ( toknum + num_toks == self._toknum[block_index] - ), "inconsistent block reader (num tokens returned)" + ), 'inconsistent block reader (num tokens returned)' # If we reached the end of the file, then update self._len if new_filepos == self._eofpos: @@ -430,13 +440,13 @@ def concat(docs): if len(docs) == 1: return docs[0] if len(docs) == 0: - raise ValueError("concat() expects at least one object!") + raise ValueError('concat() expects at least one object!') types = set(d.__class__ for d in docs) # If they're all strings, use string concatenation. - if all(isinstance(doc, str) for doc in docs): - return "".join(docs) + if all(isinstance(doc, string_types) for doc in docs): + return ''.join(docs) # If they're all corpus views, then use ConcatenatedCorpusView. for typ in types: @@ -463,7 +473,7 @@ def concat(docs): return reduce((lambda a, b: a + b), docs, ()) if ElementTree.iselement(typ): - xmltree = ElementTree.Element("documents") + xmltree = ElementTree.Element('documents') for doc in docs: xmltree.append(doc) return xmltree @@ -524,7 +534,7 @@ class PickleCorpusView(StreamBackedCorpusView): fileid. (This method is called whenever a ``PickledCorpusView`` is garbage-collected. """ - if getattr(self, "_delete_on_gc"): + if getattr(self, '_delete_on_gc'): if os.path.exists(self._fileid): try: os.remove(self._fileid) @@ -534,8 +544,8 @@ class PickleCorpusView(StreamBackedCorpusView): @classmethod def write(cls, sequence, output_file): - if isinstance(output_file, str): - output_file = open(output_file, "wb") + if isinstance(output_file, string_types): + output_file = open(output_file, 'wb') for item in sequence: pickle.dump(item, output_file, cls.PROTOCOL) @@ -550,13 +560,13 @@ class PickleCorpusView(StreamBackedCorpusView): deleted whenever this object gets garbage-collected. """ try: - fd, output_file_name = tempfile.mkstemp(".pcv", "nltk-") - output_file = os.fdopen(fd, "wb") + fd, output_file_name = tempfile.mkstemp('.pcv', 'nltk-') + output_file = os.fdopen(fd, 'wb') cls.write(sequence, output_file) output_file.close() return PickleCorpusView(output_file_name, delete_on_gc) except (OSError, IOError) as e: - raise ValueError("Error while creating temp file: %s" % e) + raise ValueError('Error while creating temp file: %s' % e) ###################################################################### @@ -584,12 +594,12 @@ def read_line_block(stream): line = stream.readline() if not line: return toks - toks.append(line.rstrip("\n")) + toks.append(line.rstrip('\n')) return toks def read_blankline_block(stream): - s = "" + s = '' while True: line = stream.readline() # End of file: @@ -608,10 +618,10 @@ def read_blankline_block(stream): def read_alignedsent_block(stream): - s = "" + s = '' while True: line = stream.readline() - if line[0] == "=" or line[0] == "\n" or line[:2] == "\r\n": + if line[0] == '=' or line[0] == '\n' or line[:2] == '\r\n': continue # End of file: if not line: @@ -622,7 +632,7 @@ def read_alignedsent_block(stream): # Other line: else: s += line - if re.match("^\d+-\d+", line) is not None: + if re.match('^\d+-\d+', line) is not None: return [s] @@ -648,15 +658,15 @@ def read_regexp_block(stream, start_re, end_re=None): line = stream.readline() # End of file: if not line: - return ["".join(lines)] + return [''.join(lines)] # End of token: if end_re is not None and re.match(end_re, line): - return ["".join(lines)] + return [''.join(lines)] # Start of new token: backup to just before it starts, and # return the token we've already collected. if end_re is None and re.match(start_re, line): stream.seek(oldpos) - return ["".join(lines)] + return [''.join(lines)] # Anything else is part of the token. lines.append(line) @@ -682,20 +692,20 @@ def read_sexpr_block(stream, block_size=16384, comment_char=None): """ start = stream.tell() block = stream.read(block_size) - encoding = getattr(stream, "encoding", None) - assert encoding is not None or isinstance(block, str) - if encoding not in (None, "utf-8"): + encoding = getattr(stream, 'encoding', None) + assert encoding is not None or isinstance(block, text_type) + if encoding not in (None, 'utf-8'): import warnings warnings.warn( - "Parsing may fail, depending on the properties " - "of the %s encoding!" % encoding + 'Parsing may fail, depending on the properties ' + 'of the %s encoding!' % encoding ) # (e.g., the utf-16 encoding does not work because it insists # on adding BOMs to the beginning of encoded strings.) if comment_char: - COMMENT = re.compile("(?m)^%s.*$" % re.escape(comment_char)) + COMMENT = re.compile('(?m)^%s.*$' % re.escape(comment_char)) while True: try: # If we're stripping comments, then make sure our block ends @@ -708,7 +718,7 @@ def read_sexpr_block(stream, block_size=16384, comment_char=None): # Read the block. tokens, offset = _parse_sexpr_block(block) # Skip whitespace - offset = re.compile(r"\s*").search(block, offset).end() + offset = re.compile(r'\s*').search(block, offset).end() # Move to the end position. if encoding is None: @@ -719,7 +729,7 @@ def read_sexpr_block(stream, block_size=16384, comment_char=None): # Return the list of tokens we processed return tokens except ValueError as e: - if e.args[0] == "Block too small": + if e.args[0] == 'Block too small': next_block = stream.read(block_size) if next_block: block += next_block @@ -734,7 +744,7 @@ def read_sexpr_block(stream, block_size=16384, comment_char=None): def _sub_space(m): """Helper function: given a regexp match, return a string of spaces that's the same length as the matched string.""" - return " " * (m.end() - m.start()) + return ' ' * (m.end() - m.start()) def _parse_sexpr_block(block): @@ -742,27 +752,27 @@ def _parse_sexpr_block(block): start = end = 0 while end < len(block): - m = re.compile(r"\S").search(block, end) + m = re.compile(r'\S').search(block, end) if not m: return tokens, end start = m.start() # Case 1: sexpr is not parenthesized. - if m.group() != "(": - m2 = re.compile(r"[\s(]").search(block, start) + if m.group() != '(': + m2 = re.compile(r'[\s(]').search(block, start) if m2: end = m2.start() else: if tokens: return tokens, end - raise ValueError("Block too small") + raise ValueError('Block too small') # Case 2: parenthesized sexpr. else: nesting = 0 - for m in re.compile(r"[()]").finditer(block, start): - if m.group() == "(": + for m in re.compile(r'[()]').finditer(block, start): + if m.group() == '(': nesting += 1 else: nesting -= 1 @@ -772,7 +782,7 @@ def _parse_sexpr_block(block): else: if tokens: return tokens, end - raise ValueError("Block too small") + raise ValueError('Block too small') tokens.append(block[start:end]) @@ -786,8 +796,8 @@ def _parse_sexpr_block(block): def find_corpus_fileids(root, regexp): if not isinstance(root, PathPointer): - raise TypeError("find_corpus_fileids: expected a PathPointer") - regexp += "$" + raise TypeError('find_corpus_fileids: expected a PathPointer') + regexp += '$' # Find fileids in a zipfile: scan the zipfile's namelist. Filter # out entries that end in '/' -- they're directories. @@ -795,7 +805,7 @@ def find_corpus_fileids(root, regexp): fileids = [ name[len(root.entry) :] for name in root.zipfile.namelist() - if not name.endswith("/") + if not name.endswith('/') ] items = [name for name in fileids if re.match(regexp, name)] return sorted(items) @@ -807,17 +817,17 @@ def find_corpus_fileids(root, regexp): # workaround for py25 which doesn't support followlinks kwargs = {} if not py25(): - kwargs = {"followlinks": True} + kwargs = {'followlinks': True} for dirname, subdirs, fileids in os.walk(root.path, **kwargs): - prefix = "".join("%s/" % p for p in _path_from(root.path, dirname)) + prefix = ''.join('%s/' % p for p in _path_from(root.path, dirname)) items += [ prefix + fileid for fileid in fileids if re.match(regexp, prefix + fileid) ] # Don't visit svn directories: - if ".svn" in subdirs: - subdirs.remove(".svn") + if '.svn' in subdirs: + subdirs.remove('.svn') return sorted(items) else: @@ -825,7 +835,7 @@ def find_corpus_fileids(root, regexp): def _path_from(parent, child): - if os.path.split(parent)[1] == "": + if os.path.split(parent)[1] == '': parent = os.path.split(parent)[0] path = [] while parent != child: @@ -842,15 +852,15 @@ def _path_from(parent, child): def tagged_treebank_para_block_reader(stream): # Read the next paragraph. - para = "" + para = '' while True: line = stream.readline() # End of paragraph: - if re.match("======+\s*$", line): + if re.match('======+\s*$', line): if para.strip(): return [para] # End of file: - elif line == "": + elif line == '': if para.strip(): return [para] else: diff --git a/nlp_resource_data/nltk/corpus/reader/verbnet.py b/nlp_resource_data/nltk/corpus/reader/verbnet.py index 0ab5f59..d0492f5 100644 --- a/nlp_resource_data/nltk/corpus/reader/verbnet.py +++ b/nlp_resource_data/nltk/corpus/reader/verbnet.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Verbnet Corpus Reader # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Edward Loper # URL: # For license information, see LICENSE.TXT @@ -11,11 +11,14 @@ An NLTK interface to the VerbNet verb lexicon For details about VerbNet see: https://verbs.colorado.edu/~mpalmer/projects/verbnet.html """ +from __future__ import unicode_literals import re import textwrap from collections import defaultdict +from six import string_types + from nltk.corpus.reader.xmldocs import XMLCorpusReader @@ -57,10 +60,10 @@ class VerbnetCorpusReader(XMLCorpusReader): # runs 2-30 times faster. self._quick_index() - _LONGID_RE = re.compile(r"([^\-\.]*)-([\d+.\-]+)$") + _LONGID_RE = re.compile(r'([^\-\.]*)-([\d+.\-]+)$') """Regular expression that matches (and decomposes) longids""" - _SHORTID_RE = re.compile(r"[\d+.\-]+$") + _SHORTID_RE = re.compile(r'[\d+.\-]+$') """Regular expression that matches shortids""" _INDEX_RE = re.compile( @@ -78,9 +81,9 @@ class VerbnetCorpusReader(XMLCorpusReader): return sorted(self._lemma_to_class.keys()) else: # [xx] should this include subclass members? - if isinstance(vnclass, str): + if isinstance(vnclass, string_types): vnclass = self.vnclass(vnclass) - return [member.get("name") for member in vnclass.findall("MEMBERS/MEMBER")] + return [member.get('name') for member in vnclass.findall('MEMBERS/MEMBER')] def wordnetids(self, vnclass=None): """ @@ -91,12 +94,12 @@ class VerbnetCorpusReader(XMLCorpusReader): return sorted(self._wordnet_to_class.keys()) else: # [xx] should this include subclass members? - if isinstance(vnclass, str): + if isinstance(vnclass, string_types): vnclass = self.vnclass(vnclass) return sum( [ - member.get("wn", "").split() - for member in vnclass.findall("MEMBERS/MEMBER") + member.get('wn', '').split() + for member in vnclass.findall('MEMBERS/MEMBER') ], [], ) @@ -123,8 +126,8 @@ class VerbnetCorpusReader(XMLCorpusReader): elif classid is not None: xmltree = self.vnclass(classid) return [ - subclass.get("ID") - for subclass in xmltree.findall("SUBCLASSES/VNSUBCLASS") + subclass.get('ID') + for subclass in xmltree.findall('SUBCLASSES/VNSUBCLASS') ] else: return sorted(self._class_to_fileid.keys()) @@ -150,17 +153,17 @@ class VerbnetCorpusReader(XMLCorpusReader): if classid in self._class_to_fileid: fileid = self._class_to_fileid[self.longid(classid)] tree = self.xml(fileid) - if classid == tree.get("ID"): + if classid == tree.get('ID'): return tree else: - for subclass in tree.findall(".//VNSUBCLASS"): - if classid == subclass.get("ID"): + for subclass in tree.findall('.//VNSUBCLASS'): + if classid == subclass.get('ID'): return subclass else: assert False # we saw it during _index()! else: - raise ValueError("Unknown identifier {}".format(fileid_or_classid)) + raise ValueError('Unknown identifier {}'.format(fileid_or_classid)) def fileids(self, vnclass_ids=None): """ @@ -170,7 +173,7 @@ class VerbnetCorpusReader(XMLCorpusReader): """ if vnclass_ids is None: return self._fileids - elif isinstance(vnclass_ids, str): + elif isinstance(vnclass_ids, string_types): return [self._class_to_fileid[self.longid(vnclass_ids)]] else: return [ @@ -191,17 +194,17 @@ class VerbnetCorpusReader(XMLCorpusReader): containing the xml contents of a VerbNet class. :return: frames - a list of frame dictionaries """ - if isinstance(vnclass, str): + if isinstance(vnclass, string_types): vnclass = self.vnclass(vnclass) frames = [] - vnframes = vnclass.findall("FRAMES/FRAME") + vnframes = vnclass.findall('FRAMES/FRAME') for vnframe in vnframes: frames.append( { - "example": self._get_example_within_frame(vnframe), - "description": self._get_description_within_frame(vnframe), - "syntax": self._get_syntactic_list_within_frame(vnframe), - "semantics": self._get_semantics_within_frame(vnframe), + 'example': self._get_example_within_frame(vnframe), + 'description': self._get_description_within_frame(vnframe), + 'syntax': self._get_syntactic_list_within_frame(vnframe), + 'semantics': self._get_semantics_within_frame(vnframe), } ) return frames @@ -216,11 +219,11 @@ class VerbnetCorpusReader(XMLCorpusReader): containing the xml contents of a VerbNet class. :return: list of subclasses """ - if isinstance(vnclass, str): + if isinstance(vnclass, string_types): vnclass = self.vnclass(vnclass) subclasses = [ - subclass.get("ID") for subclass in vnclass.findall("SUBCLASSES/VNSUBCLASS") + subclass.get('ID') for subclass in vnclass.findall('SUBCLASSES/VNSUBCLASS') ] return subclasses @@ -235,17 +238,17 @@ class VerbnetCorpusReader(XMLCorpusReader): containing the xml contents of a VerbNet class. :return: themroles: A list of thematic roles in the VerbNet class """ - if isinstance(vnclass, str): + if isinstance(vnclass, string_types): vnclass = self.vnclass(vnclass) themroles = [] - for trole in vnclass.findall("THEMROLES/THEMROLE"): + for trole in vnclass.findall('THEMROLES/THEMROLE'): themroles.append( { - "type": trole.get("type"), - "modifiers": [ - {"value": restr.get("Value"), "type": restr.get("type")} - for restr in trole.findall("SELRESTRS/SELRESTR") + 'type': trole.get('type'), + 'modifiers': [ + {'value': restr.get('Value'), 'type': restr.get('type')} + for restr in trole.findall('SELRESTRS/SELRESTR') ], } ) @@ -259,23 +262,23 @@ class VerbnetCorpusReader(XMLCorpusReader): """ Initialize the indexes ``_lemma_to_class``, ``_wordnet_to_class``, and ``_class_to_fileid`` by scanning - through the corpus fileids. This is fast if ElementTree - uses the C implementation (<0.1 secs), but quite slow (>10 secs) - if only the python implementation is available. + through the corpus fileids. This is fast with cElementTree + (<0.1 secs), but quite slow (>10 secs) with the python + implementation of ElementTree. """ for fileid in self._fileids: self._index_helper(self.xml(fileid), fileid) def _index_helper(self, xmltree, fileid): """Helper for ``_index()``""" - vnclass = xmltree.get("ID") + vnclass = xmltree.get('ID') self._class_to_fileid[vnclass] = fileid self._shortid_to_longid[self.shortid(vnclass)] = vnclass - for member in xmltree.findall("MEMBERS/MEMBER"): - self._lemma_to_class[member.get("name")].append(vnclass) - for wn in member.get("wn", "").split(): + for member in xmltree.findall('MEMBERS/MEMBER'): + self._lemma_to_class[member.get('name')].append(vnclass) + for wn in member.get('wn', '').split(): self._wordnet_to_class[wn].append(vnclass) - for subclass in xmltree.findall("SUBCLASSES/VNSUBCLASS"): + for subclass in xmltree.findall('SUBCLASSES/VNSUBCLASS'): self._index_helper(subclass, fileid) def _quick_index(self): @@ -285,8 +288,8 @@ class VerbnetCorpusReader(XMLCorpusReader): through the corpus fileids. This doesn't do proper xml parsing, but is good enough to find everything in the standard VerbNet corpus -- and it runs about 30 times faster than xml parsing - (with the python ElementTree; only 2-3 times faster - if ElementTree uses the C implementation). + (with the python ElementTree; only 2-3 times faster with + cElementTree). """ # nb: if we got rid of wordnet_to_class, this would run 2-3 # times faster. @@ -305,7 +308,7 @@ class VerbnetCorpusReader(XMLCorpusReader): vnclass = groups[2] # for elts. self._shortid_to_longid[self.shortid(vnclass)] = vnclass else: - assert False, "unexpected match condition" + assert False, 'unexpected match condition' ###################################################################### # { Identifier conversion @@ -320,11 +323,11 @@ class VerbnetCorpusReader(XMLCorpusReader): if self._LONGID_RE.match(shortid): return shortid # it's already a longid. elif not self._SHORTID_RE.match(shortid): - raise ValueError("vnclass identifier %r not found" % shortid) + raise ValueError('vnclass identifier %r not found' % shortid) try: return self._shortid_to_longid[shortid] except KeyError: - raise ValueError("vnclass identifier %r not found" % shortid) + raise ValueError('vnclass identifier %r not found' % shortid) def shortid(self, longid): """Returns shortid of a VerbNet class @@ -338,7 +341,7 @@ class VerbnetCorpusReader(XMLCorpusReader): if m: return m.group(2) else: - raise ValueError("vnclass identifier %r not found" % longid) + raise ValueError('vnclass identifier %r not found' % longid) ###################################################################### # { Frame access utility functions @@ -357,13 +360,13 @@ class VerbnetCorpusReader(XMLCorpusReader): :return: semantics: semantics dictionary """ semantics_within_single_frame = [] - for pred in vnframe.findall("SEMANTICS/PRED"): + for pred in vnframe.findall('SEMANTICS/PRED'): arguments = [ - {"type": arg.get("type"), "value": arg.get("value")} - for arg in pred.findall("ARGS/ARG") + {'type': arg.get('type'), 'value': arg.get('value')} + for arg in pred.findall('ARGS/ARG') ] semantics_within_single_frame.append( - {"predicate_value": pred.get("value"), "arguments": arguments} + {'predicate_value': pred.get('value'), 'arguments': arguments} ) return semantics_within_single_frame @@ -376,7 +379,7 @@ class VerbnetCorpusReader(XMLCorpusReader): a VerbNet frame. :return: example_text: The example sentence for this particular frame """ - example_element = vnframe.find("EXAMPLES/EXAMPLE") + example_element = vnframe.find('EXAMPLES/EXAMPLE') if example_element is not None: example_text = example_element.text else: @@ -393,10 +396,10 @@ class VerbnetCorpusReader(XMLCorpusReader): a VerbNet frame. :return: description: a description dictionary with members - primary and secondary """ - description_element = vnframe.find("DESCRIPTION") + description_element = vnframe.find('DESCRIPTION') return { - "primary": description_element.attrib["primary"], - "secondary": description_element.get("secondary", ""), + 'primary': description_element.attrib['primary'], + 'secondary': description_element.get('secondary', ''), } def _get_syntactic_list_within_frame(self, vnframe): @@ -412,20 +415,20 @@ class VerbnetCorpusReader(XMLCorpusReader): :return: syntax_within_single_frame """ syntax_within_single_frame = [] - for elt in vnframe.find("SYNTAX"): + for elt in vnframe.find('SYNTAX'): pos_tag = elt.tag modifiers = dict() - modifiers["value"] = elt.get("value") if "value" in elt.attrib else "" - modifiers["selrestrs"] = [ - {"value": restr.get("Value"), "type": restr.get("type")} - for restr in elt.findall("SELRESTRS/SELRESTR") + modifiers['value'] = elt.get('value') if 'value' in elt.attrib else "" + modifiers['selrestrs'] = [ + {'value': restr.get('Value'), 'type': restr.get('type')} + for restr in elt.findall('SELRESTRS/SELRESTR') ] - modifiers["synrestrs"] = [ - {"value": restr.get("Value"), "type": restr.get("type")} - for restr in elt.findall("SYNRESTRS/SYNRESTR") + modifiers['synrestrs'] = [ + {'value': restr.get('Value'), 'type': restr.get('type')} + for restr in elt.findall('SYNRESTRS/SYNRESTR') ] syntax_within_single_frame.append( - {"pos_tag": pos_tag, "modifiers": modifiers} + {'pos_tag': pos_tag, 'modifiers': modifiers} ) return syntax_within_single_frame @@ -442,19 +445,19 @@ class VerbnetCorpusReader(XMLCorpusReader): :param vnclass: A VerbNet class identifier; or an ElementTree containing the xml contents of a VerbNet class. """ - if isinstance(vnclass, str): + if isinstance(vnclass, string_types): vnclass = self.vnclass(vnclass) - s = vnclass.get("ID") + "\n" - s += self.pprint_subclasses(vnclass, indent=" ") + "\n" - s += self.pprint_members(vnclass, indent=" ") + "\n" - s += " Thematic roles:\n" - s += self.pprint_themroles(vnclass, indent=" ") + "\n" - s += " Frames:\n" - s += self.pprint_frames(vnclass, indent=" ") + s = vnclass.get('ID') + '\n' + s += self.pprint_subclasses(vnclass, indent=' ') + '\n' + s += self.pprint_members(vnclass, indent=' ') + '\n' + s += ' Thematic roles:\n' + s += self.pprint_themroles(vnclass, indent=' ') + '\n' + s += ' Frames:\n' + s += self.pprint_frames(vnclass, indent=' ') return s - def pprint_subclasses(self, vnclass, indent=""): + def pprint_subclasses(self, vnclass, indent=''): """Returns pretty printed version of subclasses of VerbNet class Return a string containing a pretty-printed representation of @@ -463,18 +466,18 @@ class VerbnetCorpusReader(XMLCorpusReader): :param vnclass: A VerbNet class identifier; or an ElementTree containing the xml contents of a VerbNet class. """ - if isinstance(vnclass, str): + if isinstance(vnclass, string_types): vnclass = self.vnclass(vnclass) subclasses = self.subclasses(vnclass) if not subclasses: - subclasses = ["(none)"] - s = "Subclasses: " + " ".join(subclasses) + subclasses = ['(none)'] + s = 'Subclasses: ' + ' '.join(subclasses) return textwrap.fill( - s, 70, initial_indent=indent, subsequent_indent=indent + " " + s, 70, initial_indent=indent, subsequent_indent=indent + ' ' ) - def pprint_members(self, vnclass, indent=""): + def pprint_members(self, vnclass, indent=''): """Returns pretty printed version of members in a VerbNet class Return a string containing a pretty-printed representation of @@ -483,18 +486,18 @@ class VerbnetCorpusReader(XMLCorpusReader): :param vnclass: A VerbNet class identifier; or an ElementTree containing the xml contents of a VerbNet class. """ - if isinstance(vnclass, str): + if isinstance(vnclass, string_types): vnclass = self.vnclass(vnclass) members = self.lemmas(vnclass) if not members: - members = ["(none)"] - s = "Members: " + " ".join(members) + members = ['(none)'] + s = 'Members: ' + ' '.join(members) return textwrap.fill( - s, 70, initial_indent=indent, subsequent_indent=indent + " " + s, 70, initial_indent=indent, subsequent_indent=indent + ' ' ) - def pprint_themroles(self, vnclass, indent=""): + def pprint_themroles(self, vnclass, indent=''): """Returns pretty printed version of thematic roles in a VerbNet class Return a string containing a pretty-printed representation of @@ -503,22 +506,22 @@ class VerbnetCorpusReader(XMLCorpusReader): :param vnclass: A VerbNet class identifier; or an ElementTree containing the xml contents of a VerbNet class. """ - if isinstance(vnclass, str): + if isinstance(vnclass, string_types): vnclass = self.vnclass(vnclass) pieces = [] for themrole in self.themroles(vnclass): - piece = indent + "* " + themrole.get("type") + piece = indent + '* ' + themrole.get('type') modifiers = [ - modifier["value"] + modifier["type"] - for modifier in themrole["modifiers"] + modifier['value'] + modifier['type'] + for modifier in themrole['modifiers'] ] if modifiers: - piece += "[{}]".format(" ".join(modifiers)) + piece += '[{}]'.format(' '.join(modifiers)) pieces.append(piece) - return "\n".join(pieces) + return '\n'.join(pieces) - def pprint_frames(self, vnclass, indent=""): + def pprint_frames(self, vnclass, indent=''): """Returns pretty version of all frames in a VerbNet class Return a string containing a pretty-printed representation of @@ -527,14 +530,14 @@ class VerbnetCorpusReader(XMLCorpusReader): :param vnclass: A VerbNet class identifier; or an ElementTree containing the xml contents of a VerbNet class. """ - if isinstance(vnclass, str): + if isinstance(vnclass, string_types): vnclass = self.vnclass(vnclass) pieces = [] for vnframe in self.frames(vnclass): pieces.append(self._pprint_single_frame(vnframe, indent)) - return "\n".join(pieces) + return '\n'.join(pieces) - def _pprint_single_frame(self, vnframe, indent=""): + def _pprint_single_frame(self, vnframe, indent=''): """Returns pretty printed version of a single frame in a VerbNet class Returns a string containing a pretty-printed representation of @@ -543,16 +546,16 @@ class VerbnetCorpusReader(XMLCorpusReader): :param vnframe: An ElementTree containing the xml contents of a VerbNet frame. """ - frame_string = self._pprint_description_within_frame(vnframe, indent) + "\n" - frame_string += self._pprint_example_within_frame(vnframe, indent + " ") + "\n" + frame_string = self._pprint_description_within_frame(vnframe, indent) + '\n' + frame_string += self._pprint_example_within_frame(vnframe, indent + ' ') + '\n' frame_string += ( - self._pprint_syntax_within_frame(vnframe, indent + " Syntax: ") + "\n" + self._pprint_syntax_within_frame(vnframe, indent + ' Syntax: ') + '\n' ) - frame_string += indent + " Semantics:\n" - frame_string += self._pprint_semantics_within_frame(vnframe, indent + " ") + frame_string += indent + ' Semantics:\n' + frame_string += self._pprint_semantics_within_frame(vnframe, indent + ' ') return frame_string - def _pprint_example_within_frame(self, vnframe, indent=""): + def _pprint_example_within_frame(self, vnframe, indent=''): """Returns pretty printed version of example within frame in a VerbNet class Return a string containing a pretty-printed representation of @@ -561,10 +564,10 @@ class VerbnetCorpusReader(XMLCorpusReader): :param vnframe: An ElementTree containing the xml contents of a Verbnet frame. """ - if vnframe["example"]: - return indent + " Example: " + vnframe["example"] + if vnframe['example']: + return indent + ' Example: ' + vnframe['example'] - def _pprint_description_within_frame(self, vnframe, indent=""): + def _pprint_description_within_frame(self, vnframe, indent=''): """Returns pretty printed version of a VerbNet frame description Return a string containing a pretty-printed representation of @@ -573,12 +576,12 @@ class VerbnetCorpusReader(XMLCorpusReader): :param vnframe: An ElementTree containing the xml contents of a VerbNet frame. """ - description = indent + vnframe["description"]["primary"] - if vnframe["description"]["secondary"]: - description += " ({})".format(vnframe["description"]["secondary"]) + description = indent + vnframe['description']['primary'] + if vnframe['description']['secondary']: + description += ' ({})'.format(vnframe['description']['secondary']) return description - def _pprint_syntax_within_frame(self, vnframe, indent=""): + def _pprint_syntax_within_frame(self, vnframe, indent=''): """Returns pretty printed version of syntax within a frame in a VerbNet class Return a string containing a pretty-printed representation of @@ -588,25 +591,25 @@ class VerbnetCorpusReader(XMLCorpusReader): a VerbNet frame. """ pieces = [] - for element in vnframe["syntax"]: - piece = element["pos_tag"] + for element in vnframe['syntax']: + piece = element['pos_tag'] modifier_list = [] - if "value" in element["modifiers"] and element["modifiers"]["value"]: - modifier_list.append(element["modifiers"]["value"]) + if 'value' in element['modifiers'] and element['modifiers']['value']: + modifier_list.append(element['modifiers']['value']) modifier_list += [ - "{}{}".format(restr["value"], restr["type"]) + '{}{}'.format(restr['value'], restr['type']) for restr in ( - element["modifiers"]["selrestrs"] - + element["modifiers"]["synrestrs"] + element['modifiers']['selrestrs'] + + element['modifiers']['synrestrs'] ) ] if modifier_list: - piece += "[{}]".format(" ".join(modifier_list)) + piece += '[{}]'.format(' '.join(modifier_list)) pieces.append(piece) - return indent + " ".join(pieces) + return indent + ' '.join(pieces) - def _pprint_semantics_within_frame(self, vnframe, indent=""): + def _pprint_semantics_within_frame(self, vnframe, indent=''): """Returns a pretty printed version of semantics within frame in a VerbNet class Return a string containing a pretty-printed representation of @@ -616,9 +619,9 @@ class VerbnetCorpusReader(XMLCorpusReader): a VerbNet frame. """ pieces = [] - for predicate in vnframe["semantics"]: - arguments = [argument["value"] for argument in predicate["arguments"]] + for predicate in vnframe['semantics']: + arguments = [argument['value'] for argument in predicate['arguments']] pieces.append( - "{}({})".format(predicate["predicate_value"], ", ".join(arguments)) + '{}({})'.format(predicate['predicate_value'], ', '.join(arguments)) ) - return "\n".join("{}* {}".format(indent, piece) for piece in pieces) + return '\n'.join('{}* {}'.format(indent, piece) for piece in pieces) diff --git a/nlp_resource_data/nltk/corpus/reader/wordlist.py b/nlp_resource_data/nltk/corpus/reader/wordlist.py index 0d0d214..31332d7 100644 --- a/nlp_resource_data/nltk/corpus/reader/wordlist.py +++ b/nlp_resource_data/nltk/corpus/reader/wordlist.py @@ -1,11 +1,13 @@ # -*- coding: utf-8 -*- # Natural Language Toolkit: Word List Corpus Reader # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Steven Bird # Edward Loper # URL: # For license information, see LICENSE.TXT +from six import string_types + from nltk.tokenize import line_tokenize from nltk.corpus.reader.util import * @@ -17,7 +19,7 @@ class WordListCorpusReader(CorpusReader): List of words, one per line. Blank lines are ignored. """ - def words(self, fileids=None, ignore_lines_startswith="\n"): + def words(self, fileids=None, ignore_lines_startswith='\n'): return [ line for line in line_tokenize(self.raw(fileids)) @@ -27,7 +29,7 @@ class WordListCorpusReader(CorpusReader): def raw(self, fileids=None): if fileids is None: fileids = self._fileids - elif isinstance(fileids, str): + elif isinstance(fileids, string_types): fileids = [fileids] return concat([self.open(f).read() for f in fileids]) @@ -52,32 +54,32 @@ class NonbreakingPrefixesCorpusReader(WordListCorpusReader): """ available_langs = { - "catalan": "ca", - "czech": "cs", - "german": "de", - "greek": "el", - "english": "en", - "spanish": "es", - "finnish": "fi", - "french": "fr", - "hungarian": "hu", - "icelandic": "is", - "italian": "it", - "latvian": "lv", - "dutch": "nl", - "polish": "pl", - "portuguese": "pt", - "romanian": "ro", - "russian": "ru", - "slovak": "sk", - "slovenian": "sl", - "swedish": "sv", - "tamil": "ta", + 'catalan': 'ca', + 'czech': 'cs', + 'german': 'de', + 'greek': 'el', + 'english': 'en', + 'spanish': 'es', + 'finnish': 'fi', + 'french': 'fr', + 'hungarian': 'hu', + 'icelandic': 'is', + 'italian': 'it', + 'latvian': 'lv', + 'dutch': 'nl', + 'polish': 'pl', + 'portuguese': 'pt', + 'romanian': 'ro', + 'russian': 'ru', + 'slovak': 'sk', + 'slovenian': 'sl', + 'swedish': 'sv', + 'tamil': 'ta', } # Also, add the lang IDs as the keys. available_langs.update({v: v for v in available_langs.values()}) - def words(self, lang=None, fileids=None, ignore_lines_startswith="#"): + def words(self, lang=None, fileids=None, ignore_lines_startswith='#'): """ This module returns a list of nonbreaking prefixes for the specified language(s). @@ -95,7 +97,7 @@ class NonbreakingPrefixesCorpusReader(WordListCorpusReader): # all languages when fileids==None. if lang in self.available_langs: lang = self.available_langs[lang] - fileids = ["nonbreaking_prefix." + lang] + fileids = ['nonbreaking_prefix.' + lang] return [ line for line in line_tokenize(self.raw(fileids)) @@ -113,21 +115,21 @@ class UnicharsCorpusReader(WordListCorpusReader): # These are categories similar to the Perl Unicode Properties available_categories = [ - "Close_Punctuation", - "Currency_Symbol", - "IsAlnum", - "IsAlpha", - "IsLower", - "IsN", - "IsSc", - "IsSo", - "IsUpper", - "Line_Separator", - "Number", - "Open_Punctuation", - "Punctuation", - "Separator", - "Symbol", + 'Close_Punctuation', + 'Currency_Symbol', + 'IsAlnum', + 'IsAlpha', + 'IsLower', + 'IsN', + 'IsSc', + 'IsSo', + 'IsUpper', + 'Line_Separator', + 'Number', + 'Open_Punctuation', + 'Punctuation', + 'Separator', + 'Symbol', ] def chars(self, category=None, fileids=None): @@ -146,7 +148,7 @@ class UnicharsCorpusReader(WordListCorpusReader): :return: a list of characters given the specific unicode character category """ if category in self.available_categories: - fileids = [category + ".txt"] + fileids = [category + '.txt'] return list(self.raw(fileids).strip()) @@ -165,10 +167,10 @@ class MWAPPDBCorpusReader(WordListCorpusReader): :return: a list of tuples of similar lexical terms. """ - mwa_ppdb_xxxl_file = "ppdb-1.0-xxxl-lexical.extended.synonyms.uniquepairs" + mwa_ppdb_xxxl_file = 'ppdb-1.0-xxxl-lexical.extended.synonyms.uniquepairs' def entries(self, fileids=mwa_ppdb_xxxl_file): """ :return: a tuple of synonym word pairs. """ - return [tuple(line.split("\t")) for line in line_tokenize(self.raw(fileids))] + return [tuple(line.split('\t')) for line in line_tokenize(self.raw(fileids))] diff --git a/nlp_resource_data/nltk/corpus/reader/wordnet.py b/nlp_resource_data/nltk/corpus/reader/wordnet.py index 3ced8a4..e67664e 100644 --- a/nlp_resource_data/nltk/corpus/reader/wordnet.py +++ b/nlp_resource_data/nltk/corpus/reader/wordnet.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Natural Language Toolkit: WordNet # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Steven Bethard # Steven Bird # Edward Loper @@ -29,6 +29,8 @@ http://compling.hss.ntu.edu.sg/omw/ """ +from __future__ import print_function, unicode_literals + import math import re from itertools import islice, chain @@ -36,9 +38,13 @@ from functools import total_ordering from operator import itemgetter from collections import defaultdict, deque +from six import iteritems +from six.moves import range + from nltk.corpus.reader import CorpusReader from nltk.util import binary_search_file as _binary_search_file from nltk.probability import FreqDist +from nltk.compat import python_2_unicode_compatible from nltk.internals import deprecated ###################################################################### @@ -62,7 +68,7 @@ from nltk.internals import deprecated _INF = 1e300 # { Part-of-speech constants -ADJ, ADJ_SAT, ADV, NOUN, VERB = "a", "s", "r", "n", "v" +ADJ, ADJ_SAT, ADV, NOUN, VERB = 'a', 's', 'r', 'n', 'v' # } POS_LIST = [NOUN, VERB, ADJ, ADV] @@ -107,7 +113,7 @@ VERB_FRAME_STRINGS = ( "Something %s INFINITIVE", ) -SENSENUM_RE = re.compile(r"\.[\d]+\.") +SENSENUM_RE = re.compile(r'\.[\d]+\.') ###################################################################### @@ -124,76 +130,76 @@ class _WordNetObject(object): """A common base class for lemmas and synsets.""" def hypernyms(self): - return self._related("@") + return self._related('@') def _hypernyms(self): - return self._related("@") + return self._related('@') def instance_hypernyms(self): - return self._related("@i") + return self._related('@i') def _instance_hypernyms(self): - return self._related("@i") + return self._related('@i') def hyponyms(self): - return self._related("~") + return self._related('~') def instance_hyponyms(self): - return self._related("~i") + return self._related('~i') def member_holonyms(self): - return self._related("#m") + return self._related('#m') def substance_holonyms(self): - return self._related("#s") + return self._related('#s') def part_holonyms(self): - return self._related("#p") + return self._related('#p') def member_meronyms(self): - return self._related("%m") + return self._related('%m') def substance_meronyms(self): - return self._related("%s") + return self._related('%s') def part_meronyms(self): - return self._related("%p") + return self._related('%p') def topic_domains(self): - return self._related(";c") + return self._related(';c') def in_topic_domains(self): - return self._related("-c") + return self._related('-c') def region_domains(self): - return self._related(";r") + return self._related(';r') def in_region_domains(self): - return self._related("-r") + return self._related('-r') def usage_domains(self): - return self._related(";u") + return self._related(';u') def in_usage_domains(self): - return self._related("-u") + return self._related('-u') def attributes(self): - return self._related("=") + return self._related('=') def entailments(self): - return self._related("*") + return self._related('*') def causes(self): - return self._related(">") + return self._related('>') def also_sees(self): - return self._related("^") + return self._related('^') def verb_groups(self): - return self._related("$") + return self._related('$') def similar_tos(self): - return self._related("&") + return self._related('&') def __hash__(self): return hash(self._name) @@ -208,6 +214,7 @@ class _WordNetObject(object): return self._name < other._name +@python_2_unicode_compatible class Lemma(_WordNetObject): """ The lexical entry for a single morphological form of a @@ -223,13 +230,13 @@ class Lemma(_WordNetObject): 'salt.n.03' has the Lemmas 'salt.n.03.salt', 'salt.n.03.saltiness' and 'salt.n.03.salinity'. - Lemma attributes, accessible via methods with the same name: + Lemma attributes, accessible via methods with the same name:: - name: The canonical name of this lemma. - synset: The synset that this lemma belongs to. - syntactic_marker: For adjectives, the WordNet string identifying the syntactic position relative modified noun. See: - https://wordnet.princeton.edu/documentation/wninput5wn + http://wordnet.princeton.edu/man/wninput.5WN.html#sect10 For all other parts of speech, this attribute is None. - count: The frequency of this lemma in wordnet. @@ -237,7 +244,7 @@ class Lemma(_WordNetObject): Lemmas have the following methods for retrieving related Lemmas. They correspond to the names for the pointer symbols defined here: - https://wordnet.princeton.edu/documentation/wninput5wn + http://wordnet.princeton.edu/man/wninput.5WN.html#sect3 These methods all return lists of Lemmas: - antonyms @@ -257,16 +264,16 @@ class Lemma(_WordNetObject): """ __slots__ = [ - "_wordnet_corpus_reader", - "_name", - "_syntactic_marker", - "_synset", - "_frame_strings", - "_frame_ids", - "_lexname_index", - "_lex_id", - "_lang", - "_key", + '_wordnet_corpus_reader', + '_name', + '_syntactic_marker', + '_synset', + '_frame_strings', + '_frame_ids', + '_lexname_index', + '_lex_id', + '_lang', + '_key', ] def __init__( @@ -286,7 +293,7 @@ class Lemma(_WordNetObject): self._frame_ids = [] self._lexname_index = lexname_index self._lex_id = lex_id - self._lang = "eng" + self._lang = 'eng' self._key = None # gets set later. @@ -331,15 +338,16 @@ class Lemma(_WordNetObject): return self._wordnet_corpus_reader.lemma_count(self) def antonyms(self): - return self._related("!") + return self._related('!') def derivationally_related_forms(self): - return self._related("+") + return self._related('+') def pertainyms(self): - return self._related("\\") + return self._related('\\') +@python_2_unicode_compatible class Synset(_WordNetObject): """Create a Synset from a ".." string where: is the word's morphological stem @@ -364,7 +372,7 @@ class Synset(_WordNetObject): Synsets have the following methods for retrieving related Synsets. They correspond to the names for the pointer symbols defined here: - https://wordnet.princeton.edu/documentation/wninput5wn + http://wordnet.princeton.edu/man/wninput.5WN.html#sect3 These methods all return lists of Synsets. - hypernyms, instance_hypernyms @@ -394,19 +402,19 @@ class Synset(_WordNetObject): """ __slots__ = [ - "_pos", - "_offset", - "_name", - "_frame_ids", - "_lemmas", - "_lemma_names", - "_definition", - "_examples", - "_lexname", - "_pointers", - "_lemma_pointers", - "_max_depth", - "_min_depth", + '_pos', + '_offset', + '_name', + '_frame_ids', + '_lemmas', + '_lemma_names', + '_definition', + '_examples', + '_lexname', + '_pointers', + '_lemma_pointers', + '_max_depth', + '_min_depth', ] def __init__(self, wordnet_corpus_reader): @@ -451,16 +459,16 @@ class Synset(_WordNetObject): def _needs_root(self): if self._pos == NOUN: - if self._wordnet_corpus_reader.get_version() == "1.6": + if self._wordnet_corpus_reader.get_version() == '1.6': return True else: return False elif self._pos == VERB: return True - def lemma_names(self, lang="eng"): - """Return all the lemma_names associated with the synset""" - if lang == "eng": + def lemma_names(self, lang='eng'): + '''Return all the lemma_names associated with the synset''' + if lang == 'eng': return self._lemma_names else: self._wordnet_corpus_reader._load_lang_data(lang) @@ -471,9 +479,9 @@ class Synset(_WordNetObject): else: return [] - def lemmas(self, lang="eng"): - """Return all the lemma objects associated with the synset""" - if lang == "eng": + def lemmas(self, lang='eng'): + '''Return all the lemma objects associated with the synset''' + if lang == 'eng': return self._lemmas else: self._wordnet_corpus_reader._load_lang_data(lang) @@ -659,7 +667,7 @@ class Synset(_WordNetObject): synsets = self.common_hypernyms(other) if simulate_root: fake_synset = Synset(None) - fake_synset._name = "*ROOT*" + fake_synset._name = '*ROOT*' fake_synset.hypernyms = lambda: [] fake_synset.instance_hypernyms = lambda: [] synsets.append(fake_synset) @@ -692,13 +700,13 @@ class Synset(_WordNetObject): distances |= hypernym.hypernym_distances(distance + 1, simulate_root=False) if simulate_root: fake_synset = Synset(None) - fake_synset._name = "*ROOT*" + fake_synset._name = '*ROOT*' fake_synset_distance = max(distances, key=itemgetter(1))[1] distances.add((fake_synset, fake_synset_distance + 1)) return distances def _shortest_hypernym_paths(self, simulate_root): - if self._name == "*ROOT*": + if self._name == '*ROOT*': return {self: 0} queue = deque([(self, 0)]) @@ -716,7 +724,7 @@ class Synset(_WordNetObject): if simulate_root: fake_synset = Synset(None) - fake_synset._name = "*ROOT*" + fake_synset._name = '*ROOT*' path[fake_synset] = max(path.values()) + 1 return path @@ -745,9 +753,9 @@ class Synset(_WordNetObject): # For each ancestor synset common to both subject synsets, find the # connecting path length. Return the shortest of these. - inf = float("inf") + inf = float('inf') path_distance = inf - for synset, d1 in dist_dict1.items(): + for synset, d1 in iteritems(dist_dict1): d2 = dist_dict2.get(synset, inf) path_distance = min(path_distance, d1 + d2) @@ -854,8 +862,8 @@ class Synset(_WordNetObject): if self._pos != other._pos: raise WordNetError( - "Computing the lch similarity requires " - "%s and %s to have the same part of speech." % (self, other) + 'Computing the lch similarity requires ' + '%s and %s to have the same part of speech.' % (self, other) ) need_root = self._needs_root() @@ -1066,14 +1074,14 @@ class WordNetCorpusReader(CorpusReader): A corpus reader used to access wordnet or its variants. """ - _ENCODING = "utf8" + _ENCODING = 'utf8' # { Part-of-speech constants - ADJ, ADJ_SAT, ADV, NOUN, VERB = "a", "s", "r", "n", "v" + ADJ, ADJ_SAT, ADV, NOUN, VERB = 'a', 's', 'r', 'n', 'v' # } # { Filename constants - _FILEMAP = {ADJ: "adj", ADV: "adv", NOUN: "noun", VERB: "verb"} + _FILEMAP = {ADJ: 'adj', ADV: 'adv', NOUN: 'noun', VERB: 'verb'} # } # { Part of speech constants @@ -1084,21 +1092,21 @@ class WordNetCorpusReader(CorpusReader): #: A list of file identifiers for all the fileids used by this #: corpus reader. _FILES = ( - "cntlist.rev", - "lexnames", - "index.sense", - "index.adj", - "index.adv", - "index.noun", - "index.verb", - "data.adj", - "data.adv", - "data.noun", - "data.verb", - "adj.exc", - "adv.exc", - "noun.exc", - "verb.exc", + 'cntlist.rev', + 'lexnames', + 'index.sense', + 'index.adj', + 'index.adv', + 'index.noun', + 'index.verb', + 'data.adj', + 'data.adv', + 'data.noun', + 'data.verb', + 'adj.exc', + 'adv.exc', + 'noun.exc', + 'verb.exc', ) def __init__(self, root, omw_reader): @@ -1135,7 +1143,7 @@ class WordNetCorpusReader(CorpusReader): self._key_synset_file = None # Load the lexnames - for i, line in enumerate(self.open("lexnames")): + for i, line in enumerate(self.open('lexnames')): index, lexname, _ = line.split() assert int(index) == i self._lexnames.append(lexname) @@ -1150,20 +1158,20 @@ class WordNetCorpusReader(CorpusReader): # Nasruddin A’aidil Shari, Sim Wei Ying Geraldine, and Soe Lynn def of2ss(self, of): - """ take an id and return the synsets """ + ''' take an id and return the synsets ''' return self.synset_from_pos_and_offset(of[-1], int(of[:8])) def ss2of(self, ss, lang=None): - """ return the ID of the synset """ + ''' return the ID of the synset ''' pos = ss.pos() # Only these 3 WordNets retain the satellite pos tag - if lang not in ["nld", "lit", "slk"] and pos == "s": - pos = "a" + if lang not in ["nld", "lit", "slk"] and pos == 's': + pos = 'a' return "{:08d}-{}".format(ss.offset(), pos) def _load_lang_data(self, lang): - """ load the wordnet data of the requested language from the file to - the cache, _lang_data """ + ''' load the wordnet data of the requested language from the file to + the cache, _lang_data ''' if lang in self._lang_data.keys(): return @@ -1171,20 +1179,20 @@ class WordNetCorpusReader(CorpusReader): if lang not in self.langs(): raise WordNetError("Language is not supported.") - f = self._omw_reader.open("{0:}/wn-data-{0:}.tab".format(lang)) + f = self._omw_reader.open('{0:}/wn-data-{0:}.tab'.format(lang)) self.custom_lemmas(f, lang) f.close() def langs(self): - """ return a list of languages supported by Multilingual Wordnet """ + ''' return a list of languages supported by Multilingual Wordnet ''' import os - langs = ["eng"] + langs = ['eng'] fileids = self._omw_reader.fileids() for fileid in fileids: file_name, file_extension = os.path.splitext(fileid) - if file_extension == ".tab": - langs.append(file_name.split("-")[-1]) + if file_extension == '.tab': + langs.append(file_name.split('-')[-1]) return langs @@ -1192,8 +1200,8 @@ class WordNetCorpusReader(CorpusReader): for suffix in self._FILEMAP.values(): # parse each line of the file (ignoring comment lines) - for i, line in enumerate(self.open("index.%s" % suffix)): - if line.startswith(" "): + for i, line in enumerate(self.open('index.%s' % suffix)): + if line.startswith(' '): continue _iter = iter(line.split()) @@ -1229,8 +1237,8 @@ class WordNetCorpusReader(CorpusReader): # raise more informative error with file name and line number except (AssertionError, ValueError) as e: - tup = ("index.%s" % suffix), (i + 1), e - raise WordNetError("file %s, line %i: %s" % tup) + tup = ('index.%s' % suffix), (i + 1), e + raise WordNetError('file %s, line %i: %s' % tup) # map lemmas and parts of speech to synsets self._lemma_pos_offset_map[lemma][pos] = synset_offsets @@ -1241,7 +1249,7 @@ class WordNetCorpusReader(CorpusReader): # load the exception file data into memory for pos, suffix in self._FILEMAP.items(): self._exception_map[pos] = {} - for line in self.open("%s.exc" % suffix): + for line in self.open('%s.exc' % suffix): terms = line.split() self._exception_map[pos][terms[0]] = terms[1:] self._exception_map[ADJ_SAT] = self._exception_map[ADJ] @@ -1264,7 +1272,7 @@ class WordNetCorpusReader(CorpusReader): def get_version(self): fh = self._data_file(ADJ) for line in fh: - match = re.search(r"WordNet (\d+\.\d+) Copyright", line) + match = re.search(r'WordNet (\d+\.\d+) Copyright', line) if match is not None: version = match.group(1) fh.seek(0) @@ -1274,8 +1282,8 @@ class WordNetCorpusReader(CorpusReader): # Loading Lemmas ############################################################# - def lemma(self, name, lang="eng"): - """Return lemma object that matches the name""" + def lemma(self, name, lang='eng'): + '''Return lemma object that matches the name''' # cannot simply split on first '.', # e.g.: '.45_caliber.a.01..45_caliber' separator = SENSENUM_RE.search(name).end() @@ -1286,19 +1294,19 @@ class WordNetCorpusReader(CorpusReader): for lemma in synset.lemmas(lang): if lemma._name == lemma_name: return lemma - raise WordNetError("no lemma %r in %r" % (lemma_name, synset_name)) + raise WordNetError('no lemma %r in %r' % (lemma_name, synset_name)) def lemma_from_key(self, key): # Keys are case sensitive and always lower-case key = key.lower() - lemma_name, lex_sense = key.split("%") - pos_number, lexname_index, lex_id, _, _ = lex_sense.split(":") + lemma_name, lex_sense = key.split('%') + pos_number, lexname_index, lex_id, _, _ = lex_sense.split(':') pos = self._pos_names[int(pos_number)] # open the key -> synset file if necessary if self._key_synset_file is None: - self._key_synset_file = self.open("index.sense") + self._key_synset_file = self.open('index.sense') # Find the synset for the lemma. synset_line = _binary_search_file(self._key_synset_file, key) @@ -1318,14 +1326,14 @@ class WordNetCorpusReader(CorpusReader): ############################################################# def synset(self, name): # split name into lemma, part of speech and synset number - lemma, pos, synset_index_str = name.lower().rsplit(".", 2) + lemma, pos, synset_index_str = name.lower().rsplit('.', 2) synset_index = int(synset_index_str) - 1 # get the offset for this synset try: offset = self._lemma_pos_offset_map[lemma][pos][synset_index] except KeyError: - message = "no lemma %r with part of speech %r" + message = 'no lemma %r with part of speech %r' raise WordNetError(message % (lemma, pos)) except IndexError: n_senses = len(self._lemma_pos_offset_map[lemma][pos]) @@ -1340,13 +1348,13 @@ class WordNetCorpusReader(CorpusReader): synset = self.synset_from_pos_and_offset(pos, offset) # some basic sanity checks on loaded attributes - if pos == "s" and synset._pos == "a": + if pos == 's' and synset._pos == 'a': message = ( - "adjective satellite requested but only plain " - "adjective found for lemma %r" + 'adjective satellite requested but only plain ' + 'adjective found for lemma %r' ) raise WordNetError(message % lemma) - assert synset._pos == pos or (pos == "a" and synset._pos == "s") + assert synset._pos == pos or (pos == 'a' and synset._pos == 's') # Return the synset object. return synset @@ -1359,7 +1367,7 @@ class WordNetCorpusReader(CorpusReader): if pos == ADJ_SAT: pos = ADJ if self._data_file_map.get(pos) is None: - fileid = "data.%s" % self._FILEMAP[pos] + fileid = 'data.%s' % self._FILEMAP[pos] self._data_file_map[pos] = self.open(fileid) return self._data_file_map[pos] @@ -1376,7 +1384,7 @@ class WordNetCorpusReader(CorpusReader): self._synset_offset_cache[pos][offset] = synset return synset - @deprecated("Use public method synset_from_pos_and_offset() instead") + @deprecated('Use public method synset_from_pos_and_offset() instead') def _synset_from_pos_and_offset(self, *args, **kwargs): """ Hack to help people like the readers of @@ -1393,13 +1401,16 @@ class WordNetCorpusReader(CorpusReader): try: # parse out the definitions and examples from the gloss - columns_str, gloss = data_file_line.strip().split("|") - definition = re.sub(r"[\"].*?[\"]", "", gloss).strip() - examples = re.findall(r'"([^"]*)"', gloss) - for example in examples: - synset._examples.append(example) - - synset._definition = definition.strip("; ") + columns_str, gloss = data_file_line.split('|') + gloss = gloss.strip() + definitions = [] + for gloss_part in gloss.split(';'): + gloss_part = gloss_part.strip() + if gloss_part.startswith('"'): + synset._examples.append(gloss_part.strip('"')) + else: + definitions.append(gloss_part) + synset._definition = '; '.join(definitions) # split the other info into fields _iter = iter(columns_str.split()) @@ -1425,7 +1436,7 @@ class WordNetCorpusReader(CorpusReader): # get the lex_id (used for sense_keys) lex_id = int(_next_token(), 16) # If the lemma has a syntactic marker, extract it. - m = re.match(r"(.*?)(\(.*\))?$", lemma_name) + m = re.match(r'(.*?)(\(.*\))?$', lemma_name) lemma_name, syn_mark = m.groups() # create the lemma object lemma = Lemma(self, synset, lemma_name, lexname_index, lex_id, syn_mark) @@ -1439,7 +1450,7 @@ class WordNetCorpusReader(CorpusReader): offset = int(_next_token()) pos = _next_token() lemma_ids_str = _next_token() - if lemma_ids_str == "0000": + if lemma_ids_str == '0000': synset._pointers[symbol].add((pos, offset)) else: source_index = int(lemma_ids_str[:2], 16) - 1 @@ -1458,7 +1469,7 @@ class WordNetCorpusReader(CorpusReader): for _ in range(frame_count): # read the plus sign plus = _next_token() - assert plus == "+" + assert plus == '+' # read the frame and lemma number frame_number = int(_next_token()) frame_string_fmt = VERB_FRAME_STRINGS[frame_number] @@ -1477,7 +1488,7 @@ class WordNetCorpusReader(CorpusReader): # raise a more informative error with line text except ValueError as e: - raise WordNetError("line %r: %s" % (data_file_line, e)) + raise WordNetError('line %r: %s' % (data_file_line, e)) # set sense keys for Lemma objects - note that this has to be # done afterwards so that the relations are available @@ -1485,9 +1496,9 @@ class WordNetCorpusReader(CorpusReader): if synset._pos == ADJ_SAT: head_lemma = synset.similar_tos()[0]._lemmas[0] head_name = head_lemma._name - head_id = "%02d" % head_lemma._lex_id + head_id = '%02d' % head_lemma._lex_id else: - head_name = head_id = "" + head_name = head_id = '' tup = ( lemma._name, WordNetCorpusReader._pos_numbers[synset._pos], @@ -1496,14 +1507,14 @@ class WordNetCorpusReader(CorpusReader): head_name, head_id, ) - lemma._key = ("%s%%%d:%02d:%02d:%s:%s" % tup).lower() + lemma._key = ('%s%%%d:%02d:%02d:%s:%s' % tup).lower() # the canonical name is based on the first lemma lemma_name = synset._lemmas[0]._name.lower() offsets = self._lemma_pos_offset_map[lemma_name][synset._pos] sense_index = offsets.index(synset._offset) tup = lemma_name, synset._pos, sense_index + 1 - synset._name = "%s.%s.%02i" % tup + synset._name = '%s.%s.%02i' % tup return synset @@ -1512,7 +1523,7 @@ class WordNetCorpusReader(CorpusReader): Retrieves synset based on a given sense_key. Sense keys can be obtained from lemma.key() - From https://wordnet.princeton.edu/documentation/senseidx5wn: + From https://wordnet.princeton.edu/wordnet/man/senseidx.5WN.html: A sense_key is represented as: lemma % lex_sense (e.g. 'dog%1:18:01::') where lex_sense is encoded as: @@ -1550,14 +1561,14 @@ class WordNetCorpusReader(CorpusReader): "valid {} could not be extracted from the sense key".format(error) ) - synset_id = ".".join([lemma, synset_types[int(ss_type)], lex_id]) + synset_id = '.'.join([lemma, synset_types[int(ss_type)], lex_id]) return self.synset(synset_id) ############################################################# # Retrieve synsets and lemmas. ############################################################# - def synsets(self, lemma, pos=None, lang="eng", check_exceptions=True): + def synsets(self, lemma, pos=None, lang='eng', check_exceptions=True): """Load all synsets with a given lemma and part of speech tag. If no pos is specified, all synsets for all parts of speech will be loaded. @@ -1566,7 +1577,7 @@ class WordNetCorpusReader(CorpusReader): """ lemma = lemma.lower() - if lang == "eng": + if lang == 'eng': get_synset = self.synset_from_pos_and_offset index = self._lemma_pos_offset_map if pos is None: @@ -1588,13 +1599,13 @@ class WordNetCorpusReader(CorpusReader): synset_list.append(self.of2ss(l)) return synset_list - def lemmas(self, lemma, pos=None, lang="eng"): + def lemmas(self, lemma, pos=None, lang='eng'): """Return all Lemma objects with a name matching the specified lemma name and part of speech tag. Matches any part of speech tag if none is specified.""" lemma = lemma.lower() - if lang == "eng": + if lang == 'eng': return [ lemma_obj for synset in self.synsets(lemma, pos) @@ -1614,13 +1625,13 @@ class WordNetCorpusReader(CorpusReader): lemmas.append(lemma_obj) return lemmas - def all_lemma_names(self, pos=None, lang="eng"): + def all_lemma_names(self, pos=None, lang='eng'): """Return all lemma names for all synsets for the given part of speech tag and language or languages. If pos is not specified, all synsets for all parts of speech will be used.""" - if lang == "eng": + if lang == 'eng': if pos is None: return iter(self._lemma_pos_offset_map) else: @@ -1637,7 +1648,7 @@ class WordNetCorpusReader(CorpusReader): continue lemma.extend(self._lang_data[lang][0][i]) - lemma = iter(set(lemma)) + lemma = list(set(lemma)) return lemma def all_synsets(self, pos=None): @@ -1661,7 +1672,7 @@ class WordNetCorpusReader(CorpusReader): # be moved while we're not looking. if pos_tag == ADJ_SAT: pos_tag = ADJ - fileid = "data.%s" % self._FILEMAP[pos_tag] + fileid = 'data.%s' % self._FILEMAP[pos_tag] data_file = self.open(fileid) try: @@ -1698,18 +1709,18 @@ class WordNetCorpusReader(CorpusReader): else: data_file.close() - def words(self, lang="eng"): + def words(self, lang='eng'): """return lemmas of the given language as list of words""" return self.all_lemma_names(lang=lang) - def license(self, lang="eng"): + def license(self, lang='eng'): """Return the contents of LICENSE (for omw) use lang=lang to get the license for an individual language""" - if lang == "eng": + if lang == 'eng': return self.open("LICENSE").read() elif lang in self.langs(): return self._omw_reader.open("{}/LICENSE".format(lang)).read() - elif lang == "omw": + elif lang == 'omw': # under the assumption you don't mean Omwunra-Toqura return self._omw_reader.open("LICENSE").read() elif lang in self._lang_data: @@ -1717,14 +1728,14 @@ class WordNetCorpusReader(CorpusReader): else: raise WordNetError("Language is not supported.") - def readme(self, lang="omw"): + def readme(self, lang='omw'): """Return the contents of README (for omw) use lang=lang to get the readme for an individual language""" - if lang == "eng": + if lang == 'eng': return self.open("README").read() elif lang in self.langs(): return self._omw_reader.open("{}/README".format(lang)).read() - elif lang == "omw": + elif lang == 'omw': # under the assumption you don't mean Omwunra-Toqura return self._omw_reader.open("README").read() elif lang in self._lang_data: @@ -1732,14 +1743,14 @@ class WordNetCorpusReader(CorpusReader): else: raise WordNetError("Language is not supported.") - def citation(self, lang="omw"): + def citation(self, lang='omw'): """Return the contents of citation.bib file (for omw) use lang=lang to get the citation for an individual language""" - if lang == "eng": + if lang == 'eng': return self.open("citation.bib").read() elif lang in self.langs(): return self._omw_reader.open("{}/citation.bib".format(lang)).read() - elif lang == "omw": + elif lang == 'omw': # under the assumption you don't mean Omwunra-Toqura return self._omw_reader.open("citation.bib").read() elif lang in self._lang_data: @@ -1753,15 +1764,15 @@ class WordNetCorpusReader(CorpusReader): def lemma_count(self, lemma): """Return the frequency count for this Lemma""" # Currently, count is only work for English - if lemma._lang != "eng": + if lemma._lang != 'eng': return 0 # open the count file if we haven't already if self._key_count_file is None: - self._key_count_file = self.open("cntlist.rev") + self._key_count_file = self.open('cntlist.rev') # find the key in the counts file and return the count line = _binary_search_file(self._key_count_file, lemma._key) if line: - return int(line.rsplit(" ", 1)[-1]) + return int(line.rsplit(' ', 1)[-1]) else: return 0 @@ -1836,27 +1847,27 @@ class WordNetCorpusReader(CorpusReader): MORPHOLOGICAL_SUBSTITUTIONS = { NOUN: [ - ("s", ""), - ("ses", "s"), - ("ves", "f"), - ("xes", "x"), - ("zes", "z"), - ("ches", "ch"), - ("shes", "sh"), - ("men", "man"), - ("ies", "y"), + ('s', ''), + ('ses', 's'), + ('ves', 'f'), + ('xes', 'x'), + ('zes', 'z'), + ('ches', 'ch'), + ('shes', 'sh'), + ('men', 'man'), + ('ies', 'y'), ], VERB: [ - ("s", ""), - ("ies", "y"), - ("es", "e"), - ("es", ""), - ("ed", "e"), - ("ed", ""), - ("ing", "e"), - ("ing", ""), + ('s', ''), + ('ies', 'y'), + ('es', 'e'), + ('es', ''), + ('ed', 'e'), + ('ed', ''), + ('ing', 'e'), + ('ing', ''), ], - ADJ: [("er", ""), ("est", ""), ("er", "e"), ("est", "e")], + ADJ: [('er', ''), ('est', ''), ('er', 'e'), ('est', 'e')], ADV: [], } @@ -1986,18 +1997,19 @@ class WordNetCorpusReader(CorpusReader): :param lang ISO 639-3 code of the language of the tab file """ if len(lang) != 3: - raise ValueError("lang should be a (3 character) ISO 639-3 code") + raise ValueError('lang should be a (3 character) ISO 639-3 code') self._lang_data[lang] = [defaultdict(list), defaultdict(list)] - for line in tab_file.readlines(): - if isinstance(line, bytes): + for l in tab_file.readlines(): + if isinstance(l, bytes): # Support byte-stream files (e.g. as returned by Python 2's # open() function) as well as text-stream ones - line = line.decode("utf-8") - if not line.startswith("#"): - offset_pos, lemma_type, lemma = line.strip().split("\t") - lemma = lemma.strip().replace(" ", "_") - self._lang_data[lang][0][offset_pos].append(lemma) - self._lang_data[lang][1][lemma.lower()].append(offset_pos) + l = l.decode('utf-8') + l = l.replace('\n', '') + l = l.replace(' ', '_') + if l[0] != '#': + word = l.split('\t') + self._lang_data[lang][0][word[0]].append(word[2]) + self._lang_data[lang][1][word[2].lower()].append(word[0]) # Make sure no more entries are accidentally added subsequently self._lang_data[lang][0].default_factory = None self._lang_data[lang][1].default_factory = None @@ -2014,7 +2026,7 @@ class WordNetICCorpusReader(CorpusReader): """ def __init__(self, root, fileids): - CorpusReader.__init__(self, root, fileids, encoding="utf8") + CorpusReader.__init__(self, root, fileids, encoding='utf8') # this load function would be more efficient if the data was pickled # Note that we can't use NLTK's frequency distributions because @@ -2111,8 +2123,8 @@ def _lcs_ic(synset1, synset2, ic, verbose=False): """ if synset1._pos != synset2._pos: raise WordNetError( - "Computing the least common subsumer requires " - "%s and %s to have the same part of speech." % (synset1, synset2) + 'Computing the least common subsumer requires ' + '%s and %s to have the same part of speech.' % (synset1, synset2) ) ic1 = information_content(synset1, ic) @@ -2136,7 +2148,7 @@ def information_content(synset, ic): try: icpos = ic[synset._pos] except KeyError: - msg = "Information content file has no entries for part-of-speech: %s" + msg = 'Information content file has no entries for part-of-speech: %s' raise WordNetError(msg % synset._pos) counts = icpos[synset._offset] @@ -2151,9 +2163,9 @@ def information_content(synset, ic): def _get_pos(field): - if field[-1] == "n": + if field[-1] == 'n': return NOUN - elif field[-1] == "v": + elif field[-1] == 'v': return VERB else: msg = ( diff --git a/nlp_resource_data/nltk/corpus/reader/xmldocs.py b/nlp_resource_data/nltk/corpus/reader/xmldocs.py index 6f928b7..8a66720 100644 --- a/nlp_resource_data/nltk/corpus/reader/xmldocs.py +++ b/nlp_resource_data/nltk/corpus/reader/xmldocs.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: XML Corpus Reader # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Steven Bird # URL: # For license information, see LICENSE.TXT @@ -10,9 +10,17 @@ Corpus reader for corpora whose documents are xml files. (note -- not named 'xml' to avoid conflicting w/ standard xml package) """ +from __future__ import print_function, unicode_literals import codecs -from xml.etree import ElementTree + +# Use the c version of ElementTree, which is faster, if possible: +try: + from xml.etree import cElementTree as ElementTree +except ImportError: + from xml.etree import ElementTree + +from six import string_types from nltk.data import SeekableUnicodeStreamReader from nltk.tokenize import WordPunctTokenizer @@ -39,8 +47,8 @@ class XMLCorpusReader(CorpusReader): # Make sure we have exactly one file -- no concatenating XML. if fileid is None and len(self._fileids) == 1: fileid = self._fileids[0] - if not isinstance(fileid, str): - raise TypeError("Expected a single file identifier string") + if not isinstance(fileid, string_types): + raise TypeError('Expected a single file identifier string') # Read the XML in using ElementTree. elt = ElementTree.parse(self.abspath(fileid).open()).getroot() # If requested, wrap it. @@ -77,7 +85,7 @@ class XMLCorpusReader(CorpusReader): def raw(self, fileids=None): if fileids is None: fileids = self._fileids - elif isinstance(fileids, str): + elif isinstance(fileids, string_types): fileids = [fileids] return concat([self.open(f).read() for f in fileids]) @@ -142,7 +150,7 @@ class XMLCorpusView(StreamBackedCorpusView): if elt_handler: self.handle_elt = elt_handler - self._tagspec = re.compile(tagspec + r"\Z") + self._tagspec = re.compile(tagspec + r'\Z') """The tag specification for this corpus view.""" self._tag_context = {0: ()} @@ -162,18 +170,18 @@ class XMLCorpusView(StreamBackedCorpusView): finally: infile.close() else: - with open(fileid, "rb") as infile: + with open(fileid, 'rb') as infile: s = infile.readline() if s.startswith(codecs.BOM_UTF16_BE): - return "utf-16-be" + return 'utf-16-be' if s.startswith(codecs.BOM_UTF16_LE): - return "utf-16-le" + return 'utf-16-le' if s.startswith(codecs.BOM_UTF32_BE): - return "utf-32-be" + return 'utf-32-be' if s.startswith(codecs.BOM_UTF32_LE): - return "utf-32-le" + return 'utf-32-le' if s.startswith(codecs.BOM_UTF8): - return "utf-8" + return 'utf-8' m = re.match(br'\s*<\?xml\b.*\bencoding="([^"]+)"', s) if m: return m.group(1).decode() @@ -181,7 +189,7 @@ class XMLCorpusView(StreamBackedCorpusView): if m: return m.group(1).decode() # No encoding found -- what should the default be? - return "utf-8" + return 'utf-8' def handle_elt(self, elt, context): """ @@ -222,7 +230,7 @@ class XMLCorpusView(StreamBackedCorpusView): #: A regular expression used to extract the tag name from a start tag, #: end tag, or empty-elt tag string. - _XML_TAG_NAME = re.compile("<\s*/?\s*([^\s>]+)") + _XML_TAG_NAME = re.compile('<\s*/?\s*([^\s>]+)') #: A regular expression used to find all start-tags, end-tags, and #: emtpy-elt tags in an XML file. This regexp is more lenient than @@ -251,7 +259,7 @@ class XMLCorpusView(StreamBackedCorpusView): then this function either backtracks to the last '<', or reads another block. """ - fragment = "" + fragment = '' if isinstance(stream, SeekableUnicodeStreamReader): startpos = stream.tell() @@ -265,20 +273,20 @@ class XMLCorpusView(StreamBackedCorpusView): return fragment # Do we have a fragment that will never be well-formed? - if re.search("[<>]", fragment).group(0) == ">": + if re.search('[<>]', fragment).group(0) == '>': pos = stream.tell() - ( - len(fragment) - re.search("[<>]", fragment).end() + len(fragment) - re.search('[<>]', fragment).end() ) raise ValueError('Unexpected ">" near char %s' % pos) # End of file? if not xml_block: - raise ValueError("Unexpected end of file: tag not closed") + raise ValueError('Unexpected end of file: tag not closed') # If not, then we must be in the middle of a <..tag..>. # If appropriate, backtrack to the most recent '<' # character. - last_open_bracket = fragment.rfind("<") + last_open_bracket = fragment.rfind('<') if last_open_bracket > 0: if self._VALID_XML_RE.match(fragment[:last_open_bracket]): if isinstance(stream, SeekableUnicodeStreamReader): @@ -310,7 +318,7 @@ class XMLCorpusView(StreamBackedCorpusView): elt_start = None # where does the elt start elt_depth = None # what context depth - elt_text = "" + elt_text = '' while elts == [] or elt_start is not None: if isinstance(stream, SeekableUnicodeStreamReader): @@ -322,46 +330,46 @@ class XMLCorpusView(StreamBackedCorpusView): if elt_start is None: break else: - raise ValueError("Unexpected end of file") + raise ValueError('Unexpected end of file') # Process each in the xml fragment. for piece in self._XML_PIECE.finditer(xml_fragment): if self._DEBUG: - print("%25s %s" % ("/".join(context)[-20:], piece.group())) + print('%25s %s' % ('/'.join(context)[-20:], piece.group())) - if piece.group("START_TAG"): + if piece.group('START_TAG'): name = self._XML_TAG_NAME.match(piece.group()).group(1) # Keep context up-to-date. context.append(name) # Is this one of the elts we're looking for? if elt_start is None: - if re.match(tagspec, "/".join(context)): + if re.match(tagspec, '/'.join(context)): elt_start = piece.start() elt_depth = len(context) - elif piece.group("END_TAG"): + elif piece.group('END_TAG'): name = self._XML_TAG_NAME.match(piece.group()).group(1) # sanity checks: if not context: - raise ValueError("Unmatched tag " % name) + raise ValueError('Unmatched tag ' % name) if name != context[-1]: raise ValueError( - "Unmatched tag <%s>..." % (context[-1], name) + 'Unmatched tag <%s>...' % (context[-1], name) ) # Is this the end of an element? if elt_start is not None and elt_depth == len(context): elt_text += xml_fragment[elt_start : piece.end()] - elts.append((elt_text, "/".join(context))) + elts.append((elt_text, '/'.join(context))) elt_start = elt_depth = None - elt_text = "" + elt_text = '' # Keep context up-to-date context.pop() - elif piece.group("EMPTY_ELT_TAG"): + elif piece.group('EMPTY_ELT_TAG'): name = self._XML_TAG_NAME.match(piece.group()).group(1) if elt_start is None: - if re.match(tagspec, "/".join(context) + "/" + name): - elts.append((piece.group(), "/".join(context) + "/" + name)) + if re.match(tagspec, '/'.join(context) + '/' + name): + elts.append((piece.group(), '/'.join(context) + '/' + name)) if elt_start is not None: # If we haven't found any elements yet, then keep @@ -377,7 +385,7 @@ class XMLCorpusView(StreamBackedCorpusView): # take back the last start-tag, and return what # we've gotten so far (elts is non-empty). if self._DEBUG: - print(" " * 36 + "(backtrack)") + print(' ' * 36 + '(backtrack)') if isinstance(stream, SeekableUnicodeStreamReader): stream.seek(startpos) stream.char_seek_forward(elt_start) @@ -385,7 +393,7 @@ class XMLCorpusView(StreamBackedCorpusView): stream.seek(-(len(xml_fragment) - elt_start), 1) context = context[: elt_depth - 1] elt_start = elt_depth = None - elt_text = "" + elt_text = '' # Update the _tag_context dict. pos = stream.tell() @@ -396,7 +404,7 @@ class XMLCorpusView(StreamBackedCorpusView): return [ elt_handler( - ElementTree.fromstring(elt.encode("ascii", "xmlcharrefreplace")), + ElementTree.fromstring(elt.encode('ascii', 'xmlcharrefreplace')), context, ) for (elt, context) in elts diff --git a/nlp_resource_data/nltk/corpus/reader/ycoe.py b/nlp_resource_data/nltk/corpus/reader/ycoe.py index 75ffda2..49a6685 100644 --- a/nlp_resource_data/nltk/corpus/reader/ycoe.py +++ b/nlp_resource_data/nltk/corpus/reader/ycoe.py @@ -22,6 +22,8 @@ to the YCOE standard: http://www-users.york.ac.uk/~lang22/YCOE/YcoeHome.htm import os import re +from six import string_types + from nltk.tokenize import RegexpTokenizer from nltk.corpus.reader.bracket_parse import BracketParseCorpusReader from nltk.corpus.reader.tagged import TaggedCorpusReader @@ -37,22 +39,22 @@ class YCOECorpusReader(CorpusReader): corpus of Old English prose texts. """ - def __init__(self, root, encoding="utf8"): + def __init__(self, root, encoding='utf8'): CorpusReader.__init__(self, root, [], encoding) self._psd_reader = YCOEParseCorpusReader( - self.root.join("psd"), ".*", ".psd", encoding=encoding + self.root.join('psd'), '.*', '.psd', encoding=encoding ) - self._pos_reader = YCOETaggedCorpusReader(self.root.join("pos"), ".*", ".pos") + self._pos_reader = YCOETaggedCorpusReader(self.root.join('pos'), '.*', '.pos') # Make sure we have a consistent set of items: documents = set(f[:-4] for f in self._psd_reader.fileids()) if set(f[:-4] for f in self._pos_reader.fileids()) != documents: - raise ValueError('Items in "psd" and "pos" ' "subdirectories do not match.") + raise ValueError('Items in "psd" and "pos" ' 'subdirectories do not match.') fileids = sorted( - ["%s.psd" % doc for doc in documents] - + ["%s.pos" % doc for doc in documents] + ['%s.psd' % doc for doc in documents] + + ['%s.pos' % doc for doc in documents] ) CorpusReader.__init__(self, root, fileids, encoding) self._documents = sorted(documents) @@ -65,11 +67,11 @@ class YCOECorpusReader(CorpusReader): """ if fileids is None: return self._documents - if isinstance(fileids, str): + if isinstance(fileids, string_types): fileids = [fileids] for f in fileids: if f not in self._fileids: - raise KeyError("File id %s not found" % fileids) + raise KeyError('File id %s not found' % fileids) # Strip off the '.pos' and '.psd' extensions. return sorted(set(f[:-4] for f in fileids)) @@ -80,12 +82,12 @@ class YCOECorpusReader(CorpusReader): """ if documents is None: return self._fileids - elif isinstance(documents, str): + elif isinstance(documents, string_types): documents = [documents] return sorted( set( - ["%s.pos" % doc for doc in documents] - + ["%s.psd" % doc for doc in documents] + ['%s.pos' % doc for doc in documents] + + ['%s.psd' % doc for doc in documents] ) ) @@ -97,41 +99,41 @@ class YCOECorpusReader(CorpusReader): if documents is None: documents = self._documents else: - if isinstance(documents, str): + if isinstance(documents, string_types): documents = [documents] for document in documents: if document not in self._documents: - if document[-4:] in (".pos", ".psd"): + if document[-4:] in ('.pos', '.psd'): raise ValueError( - "Expected a document identifier, not a file " - "identifier. (Use corpus.documents() to get " - "a list of document identifiers." + 'Expected a document identifier, not a file ' + 'identifier. (Use corpus.documents() to get ' + 'a list of document identifiers.' ) else: - raise ValueError("Document identifier %s not found" % document) - return ["%s.%s" % (d, subcorpus) for d in documents] + raise ValueError('Document identifier %s not found' % document) + return ['%s.%s' % (d, subcorpus) for d in documents] # Delegate to one of our two sub-readers: def words(self, documents=None): - return self._pos_reader.words(self._getfileids(documents, "pos")) + return self._pos_reader.words(self._getfileids(documents, 'pos')) def sents(self, documents=None): - return self._pos_reader.sents(self._getfileids(documents, "pos")) + return self._pos_reader.sents(self._getfileids(documents, 'pos')) def paras(self, documents=None): - return self._pos_reader.paras(self._getfileids(documents, "pos")) + return self._pos_reader.paras(self._getfileids(documents, 'pos')) def tagged_words(self, documents=None): - return self._pos_reader.tagged_words(self._getfileids(documents, "pos")) + return self._pos_reader.tagged_words(self._getfileids(documents, 'pos')) def tagged_sents(self, documents=None): - return self._pos_reader.tagged_sents(self._getfileids(documents, "pos")) + return self._pos_reader.tagged_sents(self._getfileids(documents, 'pos')) def tagged_paras(self, documents=None): - return self._pos_reader.tagged_paras(self._getfileids(documents, "pos")) + return self._pos_reader.tagged_paras(self._getfileids(documents, 'pos')) def parsed_sents(self, documents=None): - return self._psd_reader.parsed_sents(self._getfileids(documents, "psd")) + return self._psd_reader.parsed_sents(self._getfileids(documents, 'psd')) class YCOEParseCorpusReader(BracketParseCorpusReader): @@ -139,121 +141,121 @@ class YCOEParseCorpusReader(BracketParseCorpusReader): that strips out (CODE ...) and (ID ...) nodes.""" def _parse(self, t): - t = re.sub(r"(?u)\((CODE|ID)[^\)]*\)", "", t) - if re.match(r"\s*\(\s*\)\s*$", t): + t = re.sub(r'(?u)\((CODE|ID)[^\)]*\)', '', t) + if re.match(r'\s*\(\s*\)\s*$', t): return None return BracketParseCorpusReader._parse(self, t) class YCOETaggedCorpusReader(TaggedCorpusReader): - def __init__(self, root, items, encoding="utf8"): - gaps_re = r"(?u)(?<=/\.)\s+|\s*\S*_CODE\s*|\s*\S*_ID\s*" + def __init__(self, root, items, encoding='utf8'): + gaps_re = r'(?u)(?<=/\.)\s+|\s*\S*_CODE\s*|\s*\S*_ID\s*' sent_tokenizer = RegexpTokenizer(gaps_re, gaps=True) TaggedCorpusReader.__init__( - self, root, items, sep="_", sent_tokenizer=sent_tokenizer + self, root, items, sep='_', sent_tokenizer=sent_tokenizer ) #: A list of all documents and their titles in ycoe. documents = { - "coadrian.o34": "Adrian and Ritheus", - "coaelhom.o3": "Ælfric, Supplemental Homilies", - "coaelive.o3": "Ælfric's Lives of Saints", - "coalcuin": "Alcuin De virtutibus et vitiis", - "coalex.o23": "Alexander's Letter to Aristotle", - "coapollo.o3": "Apollonius of Tyre", - "coaugust": "Augustine", - "cobede.o2": "Bede's History of the English Church", - "cobenrul.o3": "Benedictine Rule", - "coblick.o23": "Blickling Homilies", - "coboeth.o2": "Boethius' Consolation of Philosophy", - "cobyrhtf.o3": "Byrhtferth's Manual", - "cocanedgD": "Canons of Edgar (D)", - "cocanedgX": "Canons of Edgar (X)", - "cocathom1.o3": "Ælfric's Catholic Homilies I", - "cocathom2.o3": "Ælfric's Catholic Homilies II", - "cochad.o24": "Saint Chad", - "cochdrul": "Chrodegang of Metz, Rule", - "cochristoph": "Saint Christopher", - "cochronA.o23": "Anglo-Saxon Chronicle A", - "cochronC": "Anglo-Saxon Chronicle C", - "cochronD": "Anglo-Saxon Chronicle D", - "cochronE.o34": "Anglo-Saxon Chronicle E", - "cocura.o2": "Cura Pastoralis", - "cocuraC": "Cura Pastoralis (Cotton)", - "codicts.o34": "Dicts of Cato", - "codocu1.o1": "Documents 1 (O1)", - "codocu2.o12": "Documents 2 (O1/O2)", - "codocu2.o2": "Documents 2 (O2)", - "codocu3.o23": "Documents 3 (O2/O3)", - "codocu3.o3": "Documents 3 (O3)", - "codocu4.o24": "Documents 4 (O2/O4)", - "coeluc1": "Honorius of Autun, Elucidarium 1", - "coeluc2": "Honorius of Autun, Elucidarium 1", - "coepigen.o3": "Ælfric's Epilogue to Genesis", - "coeuphr": "Saint Euphrosyne", - "coeust": "Saint Eustace and his companions", - "coexodusP": "Exodus (P)", - "cogenesiC": "Genesis (C)", - "cogregdC.o24": "Gregory's Dialogues (C)", - "cogregdH.o23": "Gregory's Dialogues (H)", - "coherbar": "Pseudo-Apuleius, Herbarium", - "coinspolD.o34": "Wulfstan's Institute of Polity (D)", - "coinspolX": "Wulfstan's Institute of Polity (X)", - "cojames": "Saint James", - "colacnu.o23": "Lacnunga", - "colaece.o2": "Leechdoms", - "colaw1cn.o3": "Laws, Cnut I", - "colaw2cn.o3": "Laws, Cnut II", - "colaw5atr.o3": "Laws, Æthelred V", - "colaw6atr.o3": "Laws, Æthelred VI", - "colawaf.o2": "Laws, Alfred", - "colawafint.o2": "Alfred's Introduction to Laws", - "colawger.o34": "Laws, Gerefa", - "colawine.ox2": "Laws, Ine", - "colawnorthu.o3": "Northumbra Preosta Lagu", - "colawwllad.o4": "Laws, William I, Lad", - "coleofri.o4": "Leofric", - "colsigef.o3": "Ælfric's Letter to Sigefyrth", - "colsigewB": "Ælfric's Letter to Sigeweard (B)", - "colsigewZ.o34": "Ælfric's Letter to Sigeweard (Z)", - "colwgeat": "Ælfric's Letter to Wulfgeat", - "colwsigeT": "Ælfric's Letter to Wulfsige (T)", - "colwsigeXa.o34": "Ælfric's Letter to Wulfsige (Xa)", - "colwstan1.o3": "Ælfric's Letter to Wulfstan I", - "colwstan2.o3": "Ælfric's Letter to Wulfstan II", - "comargaC.o34": "Saint Margaret (C)", - "comargaT": "Saint Margaret (T)", - "comart1": "Martyrology, I", - "comart2": "Martyrology, II", - "comart3.o23": "Martyrology, III", - "comarvel.o23": "Marvels of the East", - "comary": "Mary of Egypt", - "coneot": "Saint Neot", - "conicodA": "Gospel of Nicodemus (A)", - "conicodC": "Gospel of Nicodemus (C)", - "conicodD": "Gospel of Nicodemus (D)", - "conicodE": "Gospel of Nicodemus (E)", - "coorosiu.o2": "Orosius", - "cootest.o3": "Heptateuch", - "coprefcath1.o3": "Ælfric's Preface to Catholic Homilies I", - "coprefcath2.o3": "Ælfric's Preface to Catholic Homilies II", - "coprefcura.o2": "Preface to the Cura Pastoralis", - "coprefgen.o3": "Ælfric's Preface to Genesis", - "copreflives.o3": "Ælfric's Preface to Lives of Saints", - "coprefsolilo": "Preface to Augustine's Soliloquies", - "coquadru.o23": "Pseudo-Apuleius, Medicina de quadrupedibus", - "corood": "History of the Holy Rood-Tree", - "cosevensl": "Seven Sleepers", - "cosolilo": "St. Augustine's Soliloquies", - "cosolsat1.o4": "Solomon and Saturn I", - "cosolsat2": "Solomon and Saturn II", - "cotempo.o3": "Ælfric's De Temporibus Anni", - "coverhom": "Vercelli Homilies", - "coverhomE": "Vercelli Homilies (E)", - "coverhomL": "Vercelli Homilies (L)", - "covinceB": "Saint Vincent (Bodley 343)", - "covinsal": "Vindicta Salvatoris", - "cowsgosp.o3": "West-Saxon Gospels", - "cowulf.o34": "Wulfstan's Homilies", + 'coadrian.o34': 'Adrian and Ritheus', + 'coaelhom.o3': 'Ælfric, Supplemental Homilies', + 'coaelive.o3': 'Ælfric\'s Lives of Saints', + 'coalcuin': 'Alcuin De virtutibus et vitiis', + 'coalex.o23': 'Alexander\'s Letter to Aristotle', + 'coapollo.o3': 'Apollonius of Tyre', + 'coaugust': 'Augustine', + 'cobede.o2': 'Bede\'s History of the English Church', + 'cobenrul.o3': 'Benedictine Rule', + 'coblick.o23': 'Blickling Homilies', + 'coboeth.o2': 'Boethius\' Consolation of Philosophy', + 'cobyrhtf.o3': 'Byrhtferth\'s Manual', + 'cocanedgD': 'Canons of Edgar (D)', + 'cocanedgX': 'Canons of Edgar (X)', + 'cocathom1.o3': 'Ælfric\'s Catholic Homilies I', + 'cocathom2.o3': 'Ælfric\'s Catholic Homilies II', + 'cochad.o24': 'Saint Chad', + 'cochdrul': 'Chrodegang of Metz, Rule', + 'cochristoph': 'Saint Christopher', + 'cochronA.o23': 'Anglo-Saxon Chronicle A', + 'cochronC': 'Anglo-Saxon Chronicle C', + 'cochronD': 'Anglo-Saxon Chronicle D', + 'cochronE.o34': 'Anglo-Saxon Chronicle E', + 'cocura.o2': 'Cura Pastoralis', + 'cocuraC': 'Cura Pastoralis (Cotton)', + 'codicts.o34': 'Dicts of Cato', + 'codocu1.o1': 'Documents 1 (O1)', + 'codocu2.o12': 'Documents 2 (O1/O2)', + 'codocu2.o2': 'Documents 2 (O2)', + 'codocu3.o23': 'Documents 3 (O2/O3)', + 'codocu3.o3': 'Documents 3 (O3)', + 'codocu4.o24': 'Documents 4 (O2/O4)', + 'coeluc1': 'Honorius of Autun, Elucidarium 1', + 'coeluc2': 'Honorius of Autun, Elucidarium 1', + 'coepigen.o3': 'Ælfric\'s Epilogue to Genesis', + 'coeuphr': 'Saint Euphrosyne', + 'coeust': 'Saint Eustace and his companions', + 'coexodusP': 'Exodus (P)', + 'cogenesiC': 'Genesis (C)', + 'cogregdC.o24': 'Gregory\'s Dialogues (C)', + 'cogregdH.o23': 'Gregory\'s Dialogues (H)', + 'coherbar': 'Pseudo-Apuleius, Herbarium', + 'coinspolD.o34': 'Wulfstan\'s Institute of Polity (D)', + 'coinspolX': 'Wulfstan\'s Institute of Polity (X)', + 'cojames': 'Saint James', + 'colacnu.o23': 'Lacnunga', + 'colaece.o2': 'Leechdoms', + 'colaw1cn.o3': 'Laws, Cnut I', + 'colaw2cn.o3': 'Laws, Cnut II', + 'colaw5atr.o3': 'Laws, Æthelred V', + 'colaw6atr.o3': 'Laws, Æthelred VI', + 'colawaf.o2': 'Laws, Alfred', + 'colawafint.o2': 'Alfred\'s Introduction to Laws', + 'colawger.o34': 'Laws, Gerefa', + 'colawine.ox2': 'Laws, Ine', + 'colawnorthu.o3': 'Northumbra Preosta Lagu', + 'colawwllad.o4': 'Laws, William I, Lad', + 'coleofri.o4': 'Leofric', + 'colsigef.o3': 'Ælfric\'s Letter to Sigefyrth', + 'colsigewB': 'Ælfric\'s Letter to Sigeweard (B)', + 'colsigewZ.o34': 'Ælfric\'s Letter to Sigeweard (Z)', + 'colwgeat': 'Ælfric\'s Letter to Wulfgeat', + 'colwsigeT': 'Ælfric\'s Letter to Wulfsige (T)', + 'colwsigeXa.o34': 'Ælfric\'s Letter to Wulfsige (Xa)', + 'colwstan1.o3': 'Ælfric\'s Letter to Wulfstan I', + 'colwstan2.o3': 'Ælfric\'s Letter to Wulfstan II', + 'comargaC.o34': 'Saint Margaret (C)', + 'comargaT': 'Saint Margaret (T)', + 'comart1': 'Martyrology, I', + 'comart2': 'Martyrology, II', + 'comart3.o23': 'Martyrology, III', + 'comarvel.o23': 'Marvels of the East', + 'comary': 'Mary of Egypt', + 'coneot': 'Saint Neot', + 'conicodA': 'Gospel of Nicodemus (A)', + 'conicodC': 'Gospel of Nicodemus (C)', + 'conicodD': 'Gospel of Nicodemus (D)', + 'conicodE': 'Gospel of Nicodemus (E)', + 'coorosiu.o2': 'Orosius', + 'cootest.o3': 'Heptateuch', + 'coprefcath1.o3': 'Ælfric\'s Preface to Catholic Homilies I', + 'coprefcath2.o3': 'Ælfric\'s Preface to Catholic Homilies II', + 'coprefcura.o2': 'Preface to the Cura Pastoralis', + 'coprefgen.o3': 'Ælfric\'s Preface to Genesis', + 'copreflives.o3': 'Ælfric\'s Preface to Lives of Saints', + 'coprefsolilo': 'Preface to Augustine\'s Soliloquies', + 'coquadru.o23': 'Pseudo-Apuleius, Medicina de quadrupedibus', + 'corood': 'History of the Holy Rood-Tree', + 'cosevensl': 'Seven Sleepers', + 'cosolilo': 'St. Augustine\'s Soliloquies', + 'cosolsat1.o4': 'Solomon and Saturn I', + 'cosolsat2': 'Solomon and Saturn II', + 'cotempo.o3': 'Ælfric\'s De Temporibus Anni', + 'coverhom': 'Vercelli Homilies', + 'coverhomE': 'Vercelli Homilies (E)', + 'coverhomL': 'Vercelli Homilies (L)', + 'covinceB': 'Saint Vincent (Bodley 343)', + 'covinsal': 'Vindicta Salvatoris', + 'cowsgosp.o3': 'West-Saxon Gospels', + 'cowulf.o34': 'Wulfstan\'s Homilies', } diff --git a/nlp_resource_data/nltk/corpus/util.py b/nlp_resource_data/nltk/corpus/util.py index ecd147e..382edc1 100644 --- a/nlp_resource_data/nltk/corpus/util.py +++ b/nlp_resource_data/nltk/corpus/util.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Corpus Reader Utility Functions # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Edward Loper # URL: # For license information, see LICENSE.TXT @@ -9,13 +9,16 @@ # { Lazy Corpus Loader ###################################################################### +from __future__ import unicode_literals import re import gc import nltk +from nltk.compat import python_2_unicode_compatible TRY_ZIPFILE_FIRST = False +@python_2_unicode_compatible class LazyCorpusLoader(object): """ To see the API documentation for this lazily loaded corpus, first @@ -54,33 +57,33 @@ class LazyCorpusLoader(object): self.__name = self.__name__ = name self.__reader_cls = reader_cls # If nltk_data_subdir is set explicitly - if "nltk_data_subdir" in kwargs: + if 'nltk_data_subdir' in kwargs: # Use the specified subdirectory path - self.subdir = kwargs["nltk_data_subdir"] + self.subdir = kwargs['nltk_data_subdir'] # Pops the `nltk_data_subdir` argument, we don't need it anymore. - kwargs.pop("nltk_data_subdir", None) + kwargs.pop('nltk_data_subdir', None) else: # Otherwise use 'nltk_data/corpora' - self.subdir = "corpora" + self.subdir = 'corpora' self.__args = args self.__kwargs = kwargs def __load(self): # Find the corpus root directory. - zip_name = re.sub(r"(([^/]+)(/.*)?)", r"\2.zip/\1/", self.__name) + zip_name = re.sub(r'(([^/]+)(/.*)?)', r'\2.zip/\1/', self.__name) if TRY_ZIPFILE_FIRST: try: - root = nltk.data.find("{}/{}".format(self.subdir, zip_name)) + root = nltk.data.find('{}/{}'.format(self.subdir, zip_name)) except LookupError as e: try: - root = nltk.data.find("{}/{}".format(self.subdir, self.__name)) + root = nltk.data.find('{}/{}'.format(self.subdir, self.__name)) except LookupError: raise e else: try: - root = nltk.data.find("{}/{}".format(self.subdir, self.__name)) + root = nltk.data.find('{}/{}'.format(self.subdir, self.__name)) except LookupError as e: try: - root = nltk.data.find("{}/{}".format(self.subdir, zip_name)) + root = nltk.data.find('{}/{}'.format(self.subdir, zip_name)) except LookupError: raise e @@ -114,7 +117,7 @@ class LazyCorpusLoader(object): # (see http://bugs.python.org/issue1225107). # Without this fix tests may take extra 1.5GB RAM # because all corpora gets loaded during test collection. - if attr == "__bases__": + if attr == '__bases__': raise AttributeError("LazyCorpusLoader object has no attribute '__bases__'") self.__load() @@ -123,9 +126,9 @@ class LazyCorpusLoader(object): return getattr(self, attr) def __repr__(self): - return "<%s in %r (not loaded yet)>" % ( + return '<%s in %r (not loaded yet)>' % ( self.__reader_cls.__name__, - ".../corpora/" + self.__name, + '.../corpora/' + self.__name, ) def _unload(self): diff --git a/nlp_resource_data/nltk/data.py b/nlp_resource_data/nltk/data.py index e1cc913..65a0c42 100644 --- a/nlp_resource_data/nltk/data.py +++ b/nlp_resource_data/nltk/data.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Utility functions # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Edward Loper # URL: # For license information, see LICENSE.TXT @@ -30,22 +30,42 @@ resource file, given its URL: ``load()`` loads a given resource, and adds it to a resource cache; and ``retrieve()`` copies a given resource to a local file. """ +from __future__ import print_function, unicode_literals, division import functools import textwrap import io -from io import BytesIO import os import re import sys import zipfile import codecs -import pickle from abc import ABCMeta, abstractmethod from gzip import GzipFile, WRITE as GZ_WRITE -from urllib.request import urlopen, url2pathname +from six import add_metaclass +from six import string_types, text_type +from six.moves.urllib.request import urlopen, url2pathname + +try: + import cPickle as pickle +except ImportError: + import pickle + +try: # Python 3. + textwrap_indent = functools.partial(textwrap.indent, prefix=' ') +except AttributeError: # Python 2; indent() not available for Python2. + textwrap_fill = functools.partial( + textwrap.fill, + initial_indent=' ', + subsequent_indent=' ', + replace_whitespace=False, + ) + + def textwrap_indent(text): + return '\n'.join(textwrap_fill(line) for line in text.splitlines()) + try: from zlib import Z_SYNC_FLUSH as FLUSH @@ -54,10 +74,7 @@ except ImportError: # this import should be more specific: import nltk -from nltk.compat import py3_data, add_py3_data -from nltk.internals import deprecated - -textwrap_indent = functools.partial(textwrap.indent, prefix=" ") +from nltk.compat import py3_data, add_py3_data, BytesIO ###################################################################### # Search Path @@ -71,32 +88,32 @@ path = [] (e.g., in their home directory under ~/nltk_data).""" # User-specified locations: -_paths_from_env = os.environ.get("NLTK_DATA", str("")).split(os.pathsep) +_paths_from_env = os.environ.get('NLTK_DATA', str('')).split(os.pathsep) path += [d for d in _paths_from_env if d] -if "APPENGINE_RUNTIME" not in os.environ and os.path.expanduser("~/") != "~/": - path.append(os.path.expanduser(str("~/nltk_data"))) +if 'APPENGINE_RUNTIME' not in os.environ and os.path.expanduser('~/') != '~/': + path.append(os.path.expanduser(str('~/nltk_data'))) -if sys.platform.startswith("win"): +if sys.platform.startswith('win'): # Common locations on Windows: path += [ - os.path.join(sys.prefix, str("nltk_data")), - os.path.join(sys.prefix, str("share"), str("nltk_data")), - os.path.join(sys.prefix, str("lib"), str("nltk_data")), - os.path.join(os.environ.get(str("APPDATA"), str("C:\\")), str("nltk_data")), - str(r"C:\nltk_data"), - str(r"D:\nltk_data"), - str(r"E:\nltk_data"), + os.path.join(sys.prefix, str('nltk_data')), + os.path.join(sys.prefix, str('share'), str('nltk_data')), + os.path.join(sys.prefix, str('lib'), str('nltk_data')), + os.path.join(os.environ.get(str('APPDATA'), str('C:\\')), str('nltk_data')), + str(r'C:\nltk_data'), + str(r'D:\nltk_data'), + str(r'E:\nltk_data'), ] else: # Common locations on UNIX & OS X: path += [ - os.path.join(sys.prefix, str("nltk_data")), - os.path.join(sys.prefix, str("share"), str("nltk_data")), - os.path.join(sys.prefix, str("lib"), str("nltk_data")), - str("/usr/share/nltk_data"), - str("/usr/local/share/nltk_data"), - str("/usr/lib/nltk_data"), - str("/usr/local/lib/nltk_data"), + os.path.join(sys.prefix, str('nltk_data')), + os.path.join(sys.prefix, str('share'), str('nltk_data')), + os.path.join(sys.prefix, str('lib'), str('nltk_data')), + str('/usr/share/nltk_data'), + str('/usr/local/share/nltk_data'), + str('/usr/lib/nltk_data'), + str('/usr/local/lib/nltk_data'), ] @@ -109,7 +126,7 @@ def gzip_open_unicode( filename, mode="rb", compresslevel=9, - encoding="utf-8", + encoding='utf-8', fileobj=None, errors=None, newline=None, @@ -135,14 +152,14 @@ def split_resource_url(resource_url): >>> split_resource_url('file:///C:/home/nltk') ('file', '/C:/home/nltk') """ - protocol, path_ = resource_url.split(":", 1) - if protocol == "nltk": + protocol, path_ = resource_url.split(':', 1) + if protocol == 'nltk': pass - elif protocol == "file": - if path_.startswith("/"): - path_ = "/" + path_.lstrip("/") + elif protocol == 'file': + if path_.startswith('/'): + path_ = '/' + path_.lstrip('/') else: - path_ = re.sub(r"^/{0,2}", "", path_) + path_ = re.sub(r'^/{0,2}', '', path_) return protocol, path_ @@ -183,23 +200,23 @@ def normalize_resource_url(resource_url): protocol, name = split_resource_url(resource_url) except ValueError: # the resource url has no protocol, use the nltk protocol by default - protocol = "nltk" + protocol = 'nltk' name = resource_url # use file protocol if the path is an absolute path - if protocol == "nltk" and os.path.isabs(name): - protocol = "file://" + if protocol == 'nltk' and os.path.isabs(name): + protocol = 'file://' name = normalize_resource_name(name, False, None) - elif protocol == "file": - protocol = "file://" + elif protocol == 'file': + protocol = 'file://' # name is absolute name = normalize_resource_name(name, False, None) - elif protocol == "nltk": - protocol = "nltk:" + elif protocol == 'nltk': + protocol = 'nltk:' name = normalize_resource_name(name, True) else: # handled by urllib - protocol += "://" - return "".join([protocol, name]) + protocol += '://' + return ''.join([protocol, name]) def normalize_resource_name(resource_name, allow_relative=True, relative_path=None): @@ -229,24 +246,24 @@ def normalize_resource_name(resource_name, allow_relative=True, relative_path=No >>> windows or normalize_resource_name('/dir/file', True, '/') == '/dir/file' True """ - is_dir = bool(re.search(r"[\\/.]$", resource_name)) or resource_name.endswith( + is_dir = bool(re.search(r'[\\/.]$', resource_name)) or resource_name.endswith( os.path.sep ) - if sys.platform.startswith("win"): - resource_name = resource_name.lstrip("/") + if sys.platform.startswith('win'): + resource_name = resource_name.lstrip('/') else: - resource_name = re.sub(r"^/+", "/", resource_name) + resource_name = re.sub(r'^/+', '/', resource_name) if allow_relative: resource_name = os.path.normpath(resource_name) else: if relative_path is None: relative_path = os.curdir resource_name = os.path.abspath(os.path.join(relative_path, resource_name)) - resource_name = resource_name.replace("\\", "/").replace(os.path.sep, "/") - if sys.platform.startswith("win") and os.path.isabs(resource_name): - resource_name = "/" + resource_name - if is_dir and not resource_name.endswith("/"): - resource_name += "/" + resource_name = resource_name.replace('\\', '/').replace(os.path.sep, '/') + if sys.platform.startswith('win') and os.path.isabs(resource_name): + resource_name = '/' + resource_name + if is_dir and not resource_name.endswith('/'): + resource_name += '/' return resource_name @@ -255,7 +272,8 @@ def normalize_resource_name(resource_name, allow_relative=True, relative_path=No ###################################################################### -class PathPointer(metaclass=ABCMeta): +@add_metaclass(ABCMeta) +class PathPointer(object): """ An abstract base class for 'path pointers,' used by NLTK's data package to identify specific paths. Two subclasses exist: @@ -296,7 +314,7 @@ class PathPointer(metaclass=ABCMeta): """ -class FileSystemPathPointer(PathPointer, str): +class FileSystemPathPointer(PathPointer, text_type): """ A path pointer that identifies a file which can be accessed directly via a given absolute path. @@ -312,7 +330,7 @@ class FileSystemPathPointer(PathPointer, str): _path = os.path.abspath(_path) if not os.path.exists(_path): - raise IOError("No such file or directory: %r" % _path) + raise IOError('No such file or directory: %r' % _path) self._path = _path # There's no need to call str.__init__(), since it's a no-op; @@ -324,7 +342,7 @@ class FileSystemPathPointer(PathPointer, str): return self._path def open(self, encoding=None): - stream = open(self._path, "rb") + stream = open(self._path, 'rb') if encoding is not None: stream = SeekableUnicodeStreamReader(stream, encoding) return stream @@ -337,30 +355,116 @@ class FileSystemPathPointer(PathPointer, str): return FileSystemPathPointer(_path) def __repr__(self): - return "FileSystemPathPointer(%r)" % self._path + # This should be a byte string under Python 2.x; + # we don't want transliteration here so + # @python_2_unicode_compatible is not used. + return str('FileSystemPathPointer(%r)' % self._path) def __str__(self): return self._path -@deprecated("Use gzip.GzipFile instead as it also uses a buffer.") + class BufferedGzipFile(GzipFile): - """A ``GzipFile`` subclass for compatibility with older nltk releases. + """ + A ``GzipFile`` subclass that buffers calls to ``read()`` and ``write()``. + This allows faster reads and writes of data to and from gzip-compressed + files at the cost of using more memory. + + The default buffer size is 2MB. - Use ``GzipFile`` directly as it also buffers in all supported - Python versions. + ``BufferedGzipFile`` is useful for loading large gzipped pickle objects + as well as writing large encoded feature files for classifier training. """ + MB = 2 ** 20 + SIZE = 2 * MB + @py3_data def __init__( self, filename=None, mode=None, compresslevel=9, fileobj=None, **kwargs ): - """Return a buffered gzip file object.""" + """ + Return a buffered gzip file object. + + :param filename: a filesystem path + :type filename: str + :param mode: a file mode which can be any of 'r', 'rb', 'a', 'ab', + 'w', or 'wb' + :type mode: str + :param compresslevel: The compresslevel argument is an integer from 1 + to 9 controlling the level of compression; 1 is fastest and + produces the least compression, and 9 is slowest and produces the + most compression. The default is 9. + :type compresslevel: int + :param fileobj: a BytesIO stream to read from instead of a file. + :type fileobj: BytesIO + :param size: number of bytes to buffer during calls to read() and write() + :type size: int + :rtype: BufferedGzipFile + """ GzipFile.__init__(self, filename, mode, compresslevel, fileobj) + self._size = kwargs.get('size', self.SIZE) + self._nltk_buffer = BytesIO() + # cStringIO does not support len. + self._len = 0 + + def _reset_buffer(self): + # For some reason calling BytesIO.truncate() here will lead to + # inconsistent writes so just set _buffer to a new BytesIO object. + self._nltk_buffer = BytesIO() + self._len = 0 + + def _write_buffer(self, data): + # Simply write to the buffer and increment the buffer size. + if data is not None: + self._nltk_buffer.write(data) + self._len += len(data) + + def _write_gzip(self, data): + # Write the current buffer to the GzipFile. + GzipFile.write(self, self._nltk_buffer.getvalue()) + # Then reset the buffer and write the new data to the buffer. + self._reset_buffer() + self._write_buffer(data) - def write(self, data): - # This is identical to GzipFile.write but does not return - # the bytes written to retain compatibility. - super().write(data) + def close(self): + # GzipFile.close() doesn't actuallly close anything. + if self.mode == GZ_WRITE: + self._write_gzip(None) + self._reset_buffer() + return GzipFile.close(self) + + def flush(self, lib_mode=FLUSH): + self._nltk_buffer.flush() + GzipFile.flush(self, lib_mode) + + def read(self, size=None): + if not size: + size = self._size + contents = BytesIO() + while True: + blocks = GzipFile.read(self, size) + if not blocks: + contents.flush() + break + contents.write(blocks) + return contents.getvalue() + else: + return GzipFile.read(self, size) + + def write(self, data, size=-1): + """ + :param data: bytes to write to file or buffer + :type data: bytes + :param size: buffer at least size bytes before writing to file + :type size: int + """ + if not size: + size = self._size + if self._len + len(data) <= size: + self._write_buffer(data) + else: + self._write_gzip(data) class GzipFileSystemPathPointer(FileSystemPathPointer): @@ -371,7 +475,13 @@ class GzipFileSystemPathPointer(FileSystemPathPointer): """ def open(self, encoding=None): - stream = GzipFile(self._path, "rb") + # Note: In >= Python3.5, GzipFile is already using a + # buffered reader in the backend which has a variable self._buffer + # See https://github.com/nltk/nltk/issues/1308 + if sys.version.startswith('2.7') or sys.version.startswith('3.4'): + stream = BufferedGzipFile(self._path, 'rb') + else: + stream = GzipFile(self._path, 'rb') if encoding: stream = SeekableUnicodeStreamReader(stream, encoding) return stream @@ -384,7 +494,7 @@ class ZipFilePathPointer(PathPointer): """ @py3_data - def __init__(self, zipfile, entry=""): + def __init__(self, zipfile, entry=''): """ Create a new path pointer pointing at the specified entry in the given zipfile. @@ -392,14 +502,14 @@ class ZipFilePathPointer(PathPointer): :raise IOError: If the given zipfile does not exist, or if it does not contain the specified entry. """ - if isinstance(zipfile, str): + if isinstance(zipfile, string_types): zipfile = OpenOnDemandZipFile(os.path.abspath(zipfile)) # Check that the entry exists: if entry: # Normalize the entry string, it should be relative: - entry = normalize_resource_name(entry, True, "/").lstrip("/") + entry = normalize_resource_name(entry, True, '/').lstrip('/') try: zipfile.getinfo(entry) @@ -408,14 +518,14 @@ class ZipFilePathPointer(PathPointer): # the zip file. So if `entry` is a directory name, # then check if the zipfile contains any files that # are under the given directory. - if entry.endswith("/") and [ + if entry.endswith('/') and [ n for n in zipfile.namelist() if n.startswith(entry) ]: pass # zipfile contains a file in that directory. else: # Otherwise, complain. raise IOError( - "Zipfile %r does not contain %r" % (zipfile.filename, entry) + 'Zipfile %r does not contain %r' % (zipfile.filename, entry) ) self._zipfile = zipfile self._entry = entry @@ -439,8 +549,14 @@ class ZipFilePathPointer(PathPointer): def open(self, encoding=None): data = self._zipfile.read(self._entry) stream = BytesIO(data) - if self._entry.endswith(".gz"): - stream = GzipFile(self._entry, fileobj=stream) + if self._entry.endswith('.gz'): + # Note: In >= Python3.5, GzipFile is already using a + # buffered reader in the backend which has a variable self._buffer + # See https://github.com/nltk/nltk/issues/1308 + if sys.version.startswith('2.7') or sys.version.startswith('3.4'): + stream = BufferedGzipFile(self._entry, fileobj=stream) + else: + stream = GzipFile(self._entry, fileobj=stream) elif encoding is not None: stream = SeekableUnicodeStreamReader(stream, encoding) return stream @@ -449,11 +565,11 @@ class ZipFilePathPointer(PathPointer): return self._zipfile.getinfo(self._entry).file_size def join(self, fileid): - entry = "%s/%s" % (self._entry, fileid) + entry = '%s/%s' % (self._entry, fileid) return ZipFilePathPointer(self._zipfile, entry) def __repr__(self): - return str("ZipFilePathPointer(%r, %r)") % (self._zipfile.filename, self._entry) + return str('ZipFilePathPointer(%r, %r)') % (self._zipfile.filename, self._entry) def __str__(self): return os.path.normpath(os.path.join(self._zipfile.filename, self._entry)) @@ -515,13 +631,13 @@ def find(resource_name, paths=None): paths = path # Check if the resource name includes a zipfile name - m = re.match(r"(.*\.zip)/?(.*)$|", resource_name) + m = re.match(r'(.*\.zip)/?(.*)$|', resource_name) zipfile, zipentry = m.groups() # Check each item in our path for path_ in paths: # Is the path item a zipfile? - if path_ and (os.path.isfile(path_) and path_.endswith(".zip")): + if path_ and (os.path.isfile(path_) and path_.endswith('.zip')): try: return ZipFilePathPointer(path_, resource_name) except IOError: @@ -533,7 +649,7 @@ def find(resource_name, paths=None): if zipfile is None: p = os.path.join(path_, url2pathname(resource_name)) if os.path.exists(p): - if p.endswith(".gz"): + if p.endswith('.gz'): return GzipFileSystemPathPointer(p) else: return FileSystemPathPointer(p) @@ -550,38 +666,36 @@ def find(resource_name, paths=None): # again, assuming that one of the path components is inside a # zipfile of the same name. if zipfile is None: - pieces = resource_name.split("/") + pieces = resource_name.split('/') for i in range(len(pieces)): - modified_name = "/".join(pieces[:i] + [pieces[i] + ".zip"] + pieces[i:]) + modified_name = '/'.join(pieces[:i] + [pieces[i] + '.zip'] + pieces[i:]) try: return find(modified_name, paths) except LookupError: pass # Identify the package (i.e. the .zip file) to download. - resource_zipname = resource_name.split("/")[1] - if resource_zipname.endswith(".zip"): - resource_zipname = resource_zipname.rpartition(".")[0] + resource_zipname = resource_name.split('/')[1] + if resource_zipname.endswith('.zip'): + resource_zipname = resource_zipname.rpartition('.')[0] # Display a friendly error message if the resource wasn't found: msg = str( "Resource \33[93m{resource}\033[0m not found.\n" "Please use the NLTK Downloader to obtain the resource:\n\n" "\33[31m" # To display red text in terminal. ">>> import nltk\n" - ">>> nltk.download('{resource}')\n" + ">>> nltk.download(\'{resource}\')\n" "\033[0m" ).format(resource=resource_zipname) msg = textwrap_indent(msg) - msg += "\n For more information see: https://www.nltk.org/data.html\n" - - msg += "\n Attempted to load \33[93m{resource_name}\033[0m\n".format( + msg += '\n Attempted to load \33[93m{resource_name}\033[0m\n'.format( resource_name=resource_name ) - msg += "\n Searched in:" + "".join("\n - %r" % d for d in paths) - sep = "*" * 70 - resource_not_found = "\n%s\n%s\n%s\n" % (sep, msg, sep) + msg += '\n Searched in:' + ''.join('\n - %r' % d for d in paths) + sep = '*' * 70 + resource_not_found = '\n%s\n%s\n%s\n' % (sep, msg, sep) raise LookupError(resource_not_found) @@ -598,16 +712,16 @@ def retrieve(resource_url, filename=None, verbose=True): """ resource_url = normalize_resource_url(resource_url) if filename is None: - if resource_url.startswith("file:"): + if resource_url.startswith('file:'): filename = os.path.split(resource_url)[-1] else: - filename = re.sub(r"(^\w+:)?.*/", "", resource_url) + filename = re.sub(r'(^\w+:)?.*/', '', resource_url) if os.path.exists(filename): filename = os.path.abspath(filename) raise ValueError("File %r already exists!" % filename) if verbose: - print("Retrieving %r, saving to %r" % (resource_url, filename)) + print('Retrieving %r, saving to %r' % (resource_url, filename)) # Open the input & output streams. infile = _open(resource_url) @@ -627,43 +741,43 @@ def retrieve(resource_url, filename=None, verbose=True): #: load() method. Keys are format names, and values are format #: descriptions. FORMATS = { - "pickle": "A serialized python object, stored using the pickle module.", - "json": "A serialized python object, stored using the json module.", - "yaml": "A serialized python object, stored using the yaml module.", - "cfg": "A context free grammar.", - "pcfg": "A probabilistic CFG.", - "fcfg": "A feature CFG.", - "fol": "A list of first order logic expressions, parsed with " + 'pickle': "A serialized python object, stored using the pickle module.", + 'json': "A serialized python object, stored using the json module.", + 'yaml': "A serialized python object, stored using the yaml module.", + 'cfg': "A context free grammar.", + 'pcfg': "A probabilistic CFG.", + 'fcfg': "A feature CFG.", + 'fol': "A list of first order logic expressions, parsed with " "nltk.sem.logic.Expression.fromstring.", - "logic": "A list of first order logic expressions, parsed with " + 'logic': "A list of first order logic expressions, parsed with " "nltk.sem.logic.LogicParser. Requires an additional logic_parser " "parameter", - "val": "A semantic valuation, parsed by nltk.sem.Valuation.fromstring.", - "raw": "The raw (byte string) contents of a file.", - "text": "The raw (unicode string) contents of a file. ", + 'val': "A semantic valuation, parsed by nltk.sem.Valuation.fromstring.", + 'raw': "The raw (byte string) contents of a file.", + 'text': "The raw (unicode string) contents of a file. ", } #: A dictionary mapping from file extensions to format names, used #: by load() when format="auto" to decide the format for a #: given resource url. AUTO_FORMATS = { - "pickle": "pickle", - "json": "json", - "yaml": "yaml", - "cfg": "cfg", - "pcfg": "pcfg", - "fcfg": "fcfg", - "fol": "fol", - "logic": "logic", - "val": "val", - "txt": "text", - "text": "text", + 'pickle': 'pickle', + 'json': 'json', + 'yaml': 'yaml', + 'cfg': 'cfg', + 'pcfg': 'pcfg', + 'fcfg': 'fcfg', + 'fol': 'fol', + 'logic': 'logic', + 'val': 'val', + 'txt': 'text', + 'text': 'text', } def load( resource_url, - format="auto", + format='auto', cache=True, verbose=False, logic_parser=None, @@ -702,7 +816,9 @@ def load( :type cache: bool :param cache: If true, add this resource to a cache. If load() finds a resource in its cache, then it will return it from the - cache rather than loading it. + cache rather than loading it. The cache uses weak references, + so a resource wil automatically be expunged from the cache + when no more objects are using it. :type verbose: bool :param verbose: If true, print a message when loading a resource. Messages are not displayed when a resource is retrieved from @@ -720,42 +836,42 @@ def load( resource_url = add_py3_data(resource_url) # Determine the format of the resource. - if format == "auto": - resource_url_parts = resource_url.split(".") + if format == 'auto': + resource_url_parts = resource_url.split('.') ext = resource_url_parts[-1] - if ext == "gz": + if ext == 'gz': ext = resource_url_parts[-2] format = AUTO_FORMATS.get(ext) if format is None: raise ValueError( - "Could not determine format for %s based " + 'Could not determine format for %s based ' 'on its file\nextension; use the "format" ' - "argument to specify the format explicitly." % resource_url + 'argument to specify the format explicitly.' % resource_url ) if format not in FORMATS: - raise ValueError("Unknown format type: %s!" % (format,)) + raise ValueError('Unknown format type: %s!' % (format,)) # If we've cached the resource, then just return it. if cache: resource_val = _resource_cache.get((resource_url, format)) if resource_val is not None: if verbose: - print("<>" % (resource_url,)) + print('<>' % (resource_url,)) return resource_val # Let the user know what's going on. if verbose: - print("<>" % (resource_url,)) + print('<>' % (resource_url,)) # Load the resource. opened_resource = _open(resource_url) - if format == "raw": + if format == 'raw': resource_val = opened_resource.read() - elif format == "pickle": + elif format == 'pickle': resource_val = pickle.load(opened_resource) - elif format == "json": + elif format == 'json': import json from nltk.jsontags import json_tags @@ -764,11 +880,11 @@ def load( if len(resource_val) != 1: tag = next(resource_val.keys()) if tag not in json_tags: - raise ValueError("Unknown json tag.") - elif format == "yaml": + raise ValueError('Unknown json tag.') + elif format == 'yaml': import yaml - resource_val = yaml.safe_load(opened_resource) + resource_val = yaml.load(opened_resource) else: # The resource is a text format. binary_data = opened_resource.read() @@ -776,33 +892,33 @@ def load( string_data = binary_data.decode(encoding) else: try: - string_data = binary_data.decode("utf-8") + string_data = binary_data.decode('utf-8') except UnicodeDecodeError: - string_data = binary_data.decode("latin-1") - if format == "text": + string_data = binary_data.decode('latin-1') + if format == 'text': resource_val = string_data - elif format == "cfg": + elif format == 'cfg': resource_val = nltk.grammar.CFG.fromstring(string_data, encoding=encoding) - elif format == "pcfg": + elif format == 'pcfg': resource_val = nltk.grammar.PCFG.fromstring(string_data, encoding=encoding) - elif format == "fcfg": + elif format == 'fcfg': resource_val = nltk.grammar.FeatureGrammar.fromstring( string_data, logic_parser=logic_parser, fstruct_reader=fstruct_reader, encoding=encoding, ) - elif format == "fol": + elif format == 'fol': resource_val = nltk.sem.read_logic( string_data, logic_parser=nltk.sem.logic.LogicParser(), encoding=encoding, ) - elif format == "logic": + elif format == 'logic': resource_val = nltk.sem.read_logic( string_data, logic_parser=logic_parser, encoding=encoding ) - elif format == "val": + elif format == 'val': resource_val = nltk.sem.read_valuation(string_data, encoding=encoding) else: raise AssertionError( @@ -826,7 +942,7 @@ def load( return resource_val -def show_cfg(resource_url, escape="##"): +def show_cfg(resource_url, escape='##'): """ Write out a grammar file, ignoring escaped and empty lines. @@ -838,12 +954,12 @@ def show_cfg(resource_url, escape="##"): :param escape: Prepended string that signals lines to be ignored """ resource_url = normalize_resource_url(resource_url) - resource_val = load(resource_url, format="text", cache=False) + resource_val = load(resource_url, format='text', cache=False) lines = resource_val.splitlines() for l in lines: if l.startswith(escape): continue - if re.match("^$", l): + if re.match('^$', l): continue print(l) @@ -873,11 +989,11 @@ def _open(resource_url): resource_url = normalize_resource_url(resource_url) protocol, path_ = split_resource_url(resource_url) - if protocol is None or protocol.lower() == "nltk": - return find(path_, path + [""]).open() - elif protocol.lower() == "file": + if protocol is None or protocol.lower() == 'nltk': + return find(path_, path + ['']).open() + elif protocol.lower() == 'file': # urllib might not use mode='rb', so handle this one ourselves: - return find(path_, [""]).open() + return find(path_, ['']).open() else: return urlopen(resource_url) @@ -886,6 +1002,9 @@ def _open(resource_url): # Lazy Resource Loader ###################################################################### +# We shouldn't apply @python_2_unicode_compatible +# decorator to LazyLoader, this is resource.__class__ responsibility. + class LazyLoader(object): @py3_data @@ -931,8 +1050,8 @@ class OpenOnDemandZipFile(zipfile.ZipFile): @py3_data def __init__(self, filename): - if not isinstance(filename, str): - raise TypeError("ReopenableZipFile filename must be a string") + if not isinstance(filename, string_types): + raise TypeError('ReopenableZipFile filename must be a string') zipfile.ZipFile.__init__(self, filename) assert self.filename == filename self.close() @@ -942,7 +1061,7 @@ class OpenOnDemandZipFile(zipfile.ZipFile): def read(self, name): assert self.fp is None - self.fp = open(self.filename, "rb") + self.fp = open(self.filename, 'rb') value = zipfile.ZipFile.read(self, name) # Ensure that _fileRefCnt needs to be set for Python2and3 compatible code. # Since we only opened one file here, we add 1. @@ -952,14 +1071,14 @@ class OpenOnDemandZipFile(zipfile.ZipFile): def write(self, *args, **kwargs): """:raise NotImplementedError: OpenOnDemandZipfile is read-only""" - raise NotImplementedError("OpenOnDemandZipfile is read-only") + raise NotImplementedError('OpenOnDemandZipfile is read-only') def writestr(self, *args, **kwargs): """:raise NotImplementedError: OpenOnDemandZipfile is read-only""" - raise NotImplementedError("OpenOnDemandZipfile is read-only") + raise NotImplementedError('OpenOnDemandZipfile is read-only') def __repr__(self): - return repr(str("OpenOnDemandZipFile(%r)") % self.filename) + return repr(str('OpenOnDemandZipFile(%r)') % self.filename) ###################################################################### @@ -987,7 +1106,7 @@ class SeekableUnicodeStreamReader(object): DEBUG = True # : If true, then perform extra sanity checks. @py3_data - def __init__(self, stream, encoding, errors="strict"): + def __init__(self, stream, encoding, errors='strict'): # Rewind the stream to its beginning. stream.seek(0) @@ -1007,7 +1126,7 @@ class SeekableUnicodeStreamReader(object): """The function that is used to decode byte strings into unicode strings.""" - self.bytebuffer = b"" + self.bytebuffer = b'' """A buffer to use bytes that have been read but have not yet been decoded. This is only used when the final bytes from a read do not form a complete encoding for a character.""" @@ -1057,7 +1176,7 @@ class SeekableUnicodeStreamReader(object): # If linebuffer is not empty, then include it in the result if self.linebuffer: - chars = "".join(self.linebuffer) + chars + chars = ''.join(self.linebuffer) + chars self.linebuffer = None self._rewind_numchars = None @@ -1089,7 +1208,7 @@ class SeekableUnicodeStreamReader(object): return line readsize = size or 72 - chars = "" + chars = '' # If there's a remaining incomplete line in the buffer, add it. if self.linebuffer: @@ -1102,7 +1221,7 @@ class SeekableUnicodeStreamReader(object): # If we're at a '\r', then read one extra character, since # it might be a '\n', to get the proper line ending. - if new_chars and new_chars.endswith("\r"): + if new_chars and new_chars.endswith('\r'): new_chars += self._read(1) chars += new_chars @@ -1208,13 +1327,13 @@ class SeekableUnicodeStreamReader(object): """ if whence == 1: raise ValueError( - "Relative seek is not supported for " - "SeekableUnicodeStreamReader -- consider " - "using char_seek_forward() instead." + 'Relative seek is not supported for ' + 'SeekableUnicodeStreamReader -- consider ' + 'using char_seek_forward() instead.' ) self.stream.seek(offset, whence) self.linebuffer = None - self.bytebuffer = b"" + self.bytebuffer = b'' self._rewind_numchars = None self._rewind_checkpoint = self.stream.tell() @@ -1223,7 +1342,7 @@ class SeekableUnicodeStreamReader(object): Move the read pointer forward by ``offset`` characters. """ if offset < 0: - raise ValueError("Negative offsets are not supported") + raise ValueError('Negative offsets are not supported') # Clear all buffers. self.seek(self.tell()) # Perform the seek operation. @@ -1240,7 +1359,7 @@ class SeekableUnicodeStreamReader(object): """ if est_bytes is None: est_bytes = offset - bytes = b"" + bytes = b'' while True: # Read in a block of bytes. @@ -1301,7 +1420,7 @@ class SeekableUnicodeStreamReader(object): if self.DEBUG: self.stream.seek(filepos) check1 = self._incr_decode(self.stream.read(50))[0] - check2 = "".join(self.linebuffer) + check2 = ''.join(self.linebuffer) assert check1.startswith(check2) or check2.startswith(check1) # Return to our original filepos (so we don't have to throw @@ -1322,7 +1441,7 @@ class SeekableUnicodeStreamReader(object): unicode string. ``linebuffer`` is not included in the result. """ if size == 0: - return "" + return '' # Skip past the byte order marker, if present. if self._bom and self.stream.tell() == 0: @@ -1367,7 +1486,7 @@ class SeekableUnicodeStreamReader(object): """ while True: try: - return self.decode(bytes, "strict") + return self.decode(bytes, 'strict') except UnicodeDecodeError as exc: # If the exception occurs at the end of the string, # then assume that it's a truncation error. @@ -1375,7 +1494,7 @@ class SeekableUnicodeStreamReader(object): return self.decode(bytes[: exc.start], self.errors) # Otherwise, if we're being strict, then raise it. - elif self.errors == "strict": + elif self.errors == 'strict': raise # If we're not strict, then re-process it with our @@ -1384,18 +1503,18 @@ class SeekableUnicodeStreamReader(object): return self.decode(bytes, self.errors) _BOM_TABLE = { - "utf8": [(codecs.BOM_UTF8, None)], - "utf16": [(codecs.BOM_UTF16_LE, "utf16-le"), (codecs.BOM_UTF16_BE, "utf16-be")], - "utf16le": [(codecs.BOM_UTF16_LE, None)], - "utf16be": [(codecs.BOM_UTF16_BE, None)], - "utf32": [(codecs.BOM_UTF32_LE, "utf32-le"), (codecs.BOM_UTF32_BE, "utf32-be")], - "utf32le": [(codecs.BOM_UTF32_LE, None)], - "utf32be": [(codecs.BOM_UTF32_BE, None)], + 'utf8': [(codecs.BOM_UTF8, None)], + 'utf16': [(codecs.BOM_UTF16_LE, 'utf16-le'), (codecs.BOM_UTF16_BE, 'utf16-be')], + 'utf16le': [(codecs.BOM_UTF16_LE, None)], + 'utf16be': [(codecs.BOM_UTF16_BE, None)], + 'utf32': [(codecs.BOM_UTF32_LE, 'utf32-le'), (codecs.BOM_UTF32_BE, 'utf32-be')], + 'utf32le': [(codecs.BOM_UTF32_LE, None)], + 'utf32be': [(codecs.BOM_UTF32_BE, None)], } def _check_bom(self): # Normalize our encoding name - enc = re.sub("[ -]", "", self.encoding.lower()) + enc = re.sub('[ -]', '', self.encoding.lower()) # Look up our encoding in the BOM table. bom_info = self._BOM_TABLE.get(enc) @@ -1416,21 +1535,21 @@ class SeekableUnicodeStreamReader(object): __all__ = [ - "path", - "PathPointer", - "FileSystemPathPointer", - "BufferedGzipFile", - "GzipFileSystemPathPointer", - "GzipFileSystemPathPointer", - "find", - "retrieve", - "FORMATS", - "AUTO_FORMATS", - "load", - "show_cfg", - "clear_cache", - "LazyLoader", - "OpenOnDemandZipFile", - "GzipFileSystemPathPointer", - "SeekableUnicodeStreamReader", + 'path', + 'PathPointer', + 'FileSystemPathPointer', + 'BufferedGzipFile', + 'GzipFileSystemPathPointer', + 'GzipFileSystemPathPointer', + 'find', + 'retrieve', + 'FORMATS', + 'AUTO_FORMATS', + 'load', + 'show_cfg', + 'clear_cache', + 'LazyLoader', + 'OpenOnDemandZipFile', + 'GzipFileSystemPathPointer', + 'SeekableUnicodeStreamReader', ] diff --git a/nlp_resource_data/nltk/decorators.py b/nlp_resource_data/nltk/decorators.py index 8ab4f7d..b61db66 100644 --- a/nlp_resource_data/nltk/decorators.py +++ b/nlp_resource_data/nltk/decorators.py @@ -5,8 +5,9 @@ http://www.phyast.pitt.edu/~micheles/python/documentation.html Included in NLTK for its support of a nice memoization decorator. """ +from __future__ import print_function -__docformat__ = "restructuredtext en" +__docformat__ = 'restructuredtext en' ## The basic trick is to generate the source code for the decorated function ## with the right signature and to evaluate it. @@ -19,25 +20,11 @@ import sys # Hack to keep NLTK's "tokenize" module from colliding with the "tokenize" in # the Python standard library. -OLD_SYS_PATH = sys.path[:] +old_sys_path = sys.path[:] sys.path = [p for p in sys.path if p and "nltk" not in p] import inspect -sys.path = OLD_SYS_PATH - -def __legacysignature(signature): - """ - For retrocompatibility reasons, we don't use a standard Signature. - Instead, we use the string generated by this method. - Basically, from a Signature we create a string and remove the default values. - """ - listsignature = str(signature)[1:-1].split(",") - for counter, param in enumerate(listsignature): - if param.count("=") > 0: - listsignature[counter] = param[0:param.index("=")].strip() - else: - listsignature[counter] = param.strip() - return ", ".join(listsignature) +sys.path = old_sys_path def getinfo(func): @@ -47,7 +34,6 @@ def getinfo(func): - argnames (the names of the arguments : list) - defaults (the values of the default arguments : tuple) - signature (the signature : str) - - fullsignature (the full signature : Signature) - doc (the docstring : str) - module (the module name : str) - dict (the function __dict__ : str) @@ -66,25 +52,24 @@ def getinfo(func): >>> info["signature"] 'self, x, y, *args, **kw' - - >>> info["fullsignature"] - """ assert inspect.ismethod(func) or inspect.isfunction(func) - argspec = inspect.getfullargspec(func) - regargs, varargs, varkwargs = argspec[:3] + if sys.version_info[0] >= 3: + argspec = inspect.getfullargspec(func) + else: + argspec = inspect.getargspec(func) + regargs, varargs, varkwargs, defaults = argspec[:4] argnames = list(regargs) if varargs: argnames.append(varargs) if varkwargs: argnames.append(varkwargs) - fullsignature = inspect.signature(func) - # Convert Signature to str - signature = __legacysignature(fullsignature) - + signature = inspect.formatargspec( + regargs, varargs, varkwargs, defaults, formatvalue=lambda value: "" + )[1:-1] # pypy compatibility - if hasattr(func, "__closure__"): + if hasattr(func, '__closure__'): _closure = func.__closure__ _globals = func.__globals__ else: @@ -95,7 +80,6 @@ def getinfo(func): name=func.__name__, argnames=argnames, signature=signature, - fullsignature=fullsignature, defaults=func.__defaults__, doc=func.__doc__, module=func.__module__, @@ -105,14 +89,14 @@ def getinfo(func): ) +# akin to functools.update_wrapper def update_wrapper(wrapper, model, infodict=None): - " akin to functools.update_wrapper " infodict = infodict or getinfo(model) - wrapper.__name__ = infodict["name"] - wrapper.__doc__ = infodict["doc"] - wrapper.__module__ = infodict["module"] - wrapper.__dict__.update(infodict["dict"]) - wrapper.__defaults__ = infodict["defaults"] + wrapper.__name__ = infodict['name'] + wrapper.__doc__ = infodict['doc'] + wrapper.__module__ = infodict['module'] + wrapper.__dict__.update(infodict['dict']) + wrapper.__defaults__ = infodict['defaults'] wrapper.undecorated = model return wrapper @@ -130,7 +114,7 @@ def new_wrapper(wrapper, model): else: # assume model is a function infodict = getinfo(model) assert ( - not "_wrapper_" in infodict["argnames"] + not '_wrapper_' in infodict["argnames"] ), '"_wrapper_" is a reserved argument name!' src = "lambda %(signature)s: _wrapper_(%(signature)s)" % infodict funcopy = eval(src, dict(_wrapper_=wrapper)) @@ -150,12 +134,12 @@ def decorator_factory(cls): method. """ attrs = set(dir(cls)) - if "__call__" in attrs: + if '__call__' in attrs: raise TypeError( - "You cannot decorate a class with a nontrivial " "__call__ method" + 'You cannot decorate a class with a nontrivial ' '__call__ method' ) - if "call" not in attrs: - raise TypeError("You cannot decorate a class without a " ".call method") + if 'call' not in attrs: + raise TypeError('You cannot decorate a class without a ' '.call method') cls.__call__ = __call__ return cls @@ -195,10 +179,10 @@ def decorator(caller): def _decorator(func): # the real meat is here infodict = getinfo(func) - argnames = infodict["argnames"] + argnames = infodict['argnames'] assert not ( - "_call_" in argnames or "_func_" in argnames - ), "You cannot use _call_ or _func_ as argument names!" + '_call_' in argnames or '_func_' in argnames + ), 'You cannot use _call_ or _func_ as argument names!' src = "lambda %(signature)s: _call_(_func_, %(signature)s)" % infodict # import sys; print >> sys.stderr, src # for debugging purposes dec_func = eval(src, dict(_func_=func, _call_=caller)) @@ -223,9 +207,10 @@ def memoize(func, *args): # memoize_dic is created at the first call if args in dic: return dic[args] - result = func(*args) - dic[args] = result - return result + else: + result = func(*args) + dic[args] = result + return result ########################## LEGALESE ############################### diff --git a/nlp_resource_data/nltk/downloader.py b/nlp_resource_data/nltk/downloader.py index 097e574..8874e7c 100644 --- a/nlp_resource_data/nltk/downloader.py +++ b/nlp_resource_data/nltk/downloader.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Corpus & Model Downloader # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Edward Loper # URL: # For license information, see LICENSE.TXT @@ -67,6 +67,7 @@ or:: python -m nltk.downloader [-d DATADIR] [-q] [-f] [-k] PACKAGE_IDS """ # ---------------------------------------------------------------------- +from __future__ import print_function, division, unicode_literals """ @@ -165,7 +166,7 @@ from xml.etree import ElementTree try: TKINTER = True - from tkinter import ( + from six.moves.tkinter import ( Tk, Frame, Label, @@ -176,17 +177,20 @@ try: IntVar, TclError, ) - from tkinter.messagebox import showerror + from six.moves.tkinter_messagebox import showerror from nltk.draw.table import Table from nltk.draw.util import ShowText except ImportError: TKINTER = False TclError = ValueError -from urllib.request import urlopen -from urllib.error import HTTPError, URLError +from six import string_types, text_type +from six.moves import input +from six.moves.urllib.request import urlopen +from six.moves.urllib.error import HTTPError, URLError import nltk +from nltk.compat import python_2_unicode_compatible # urllib2 = nltk.internals.import_from_stdlib('urllib2') @@ -195,6 +199,8 @@ import nltk # Directory entry objects (from the data server's index file) ###################################################################### + +@python_2_unicode_compatible class Package(object): """ A directory entry for a downloadable package. These entries are @@ -209,15 +215,15 @@ class Package(object): id, url, name=None, - subdir="", + subdir='', size=None, unzipped_size=None, checksum=None, svn_revision=None, - copyright="Unknown", - contact="Unknown", - license="Unknown", - author="Unknown", + copyright='Unknown', + contact='Unknown', + license='Unknown', + author='Unknown', unzip=True, **kw ): @@ -260,7 +266,7 @@ class Package(object): self.author = author """Author of this package.""" - ext = os.path.splitext(url.split("/")[-1])[1] + ext = os.path.splitext(url.split('/')[-1])[1] self.filename = os.path.join(subdir, id + ext) """The filename that should be used for this package's file. It is formed by joining ``self.subdir`` with ``self.id``, and @@ -275,19 +281,20 @@ class Package(object): @staticmethod def fromxml(xml): - if isinstance(xml, str): + if isinstance(xml, string_types): xml = ElementTree.parse(xml) for key in xml.attrib: - xml.attrib[key] = str(xml.attrib[key]) + xml.attrib[key] = text_type(xml.attrib[key]) return Package(**xml.attrib) def __lt__(self, other): return self.id < other.id def __repr__(self): - return "" % self.id + return '' % self.id +@python_2_unicode_compatible class Collection(object): """ A directory entry for a collection of downloadable packages. @@ -315,18 +322,18 @@ class Collection(object): @staticmethod def fromxml(xml): - if isinstance(xml, str): + if isinstance(xml, string_types): xml = ElementTree.parse(xml) for key in xml.attrib: - xml.attrib[key] = str(xml.attrib[key]) - children = [child.get("ref") for child in xml.findall("item")] + xml.attrib[key] = text_type(xml.attrib[key]) + children = [child.get('ref') for child in xml.findall('item')] return Collection(children=children, **xml.attrib) def __lt__(self, other): return self.id < other.id def __repr__(self): - return "" % self.id + return '' % self.id ###################################################################### @@ -454,7 +461,7 @@ class Downloader(object): server index will be considered 'stale,' and will be re-downloaded.""" - DEFAULT_URL = "https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/index.xml" + DEFAULT_URL = 'https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/index.xml' """The default URL for the NLTK data server's index. An alternative URL can be specified when creating a new ``Downloader`` object.""" @@ -463,16 +470,16 @@ class Downloader(object): # Status Constants # ///////////////////////////////////////////////////////////////// - INSTALLED = "installed" + INSTALLED = 'installed' """A status string indicating that a package or collection is installed and up-to-date.""" - NOT_INSTALLED = "not installed" + NOT_INSTALLED = 'not installed' """A status string indicating that a package or collection is not installed.""" - STALE = "out of date" + STALE = 'out of date' """A status string indicating that a package or collection is corrupt or out-of-date.""" - PARTIAL = "partial" + PARTIAL = 'partial' """A status string indicating that a collection is partially installed (i.e., only some of its packages are installed.)""" @@ -529,21 +536,21 @@ class Downloader(object): lines = 0 # for more_prompt if download_dir is None: download_dir = self._download_dir - print("Using default data directory (%s)" % download_dir) + print('Using default data directory (%s)' % download_dir) if header: - print("=" * (26 + len(self._url))) - print(" Data server index for <%s>" % self._url) - print("=" * (26 + len(self._url))) + print('=' * (26 + len(self._url))) + print(' Data server index for <%s>' % self._url) + print('=' * (26 + len(self._url))) lines += 3 # for more_prompt stale = partial = False categories = [] if show_packages: - categories.append("packages") + categories.append('packages') if show_collections: - categories.append("collections") + categories.append('collections') for category in categories: - print("%s:" % category.capitalize()) + print('%s:' % category.capitalize()) lines += 1 # for more_prompt for info in sorted(getattr(self, category)(), key=str): status = self.status(info, download_dir) @@ -554,28 +561,28 @@ class Downloader(object): if status == self.PARTIAL: partial = True prefix = { - self.INSTALLED: "*", - self.STALE: "-", - self.PARTIAL: "P", - self.NOT_INSTALLED: " ", + self.INSTALLED: '*', + self.STALE: '-', + self.PARTIAL: 'P', + self.NOT_INSTALLED: ' ', }[status] name = textwrap.fill( - "-" * 27 + (info.name or info.id), 75, subsequent_indent=27 * " " + '-' * 27 + (info.name or info.id), 75, subsequent_indent=27 * ' ' )[27:] - print(" [%s] %s %s" % (prefix, info.id.ljust(20, "."), name)) - lines += len(name.split("\n")) # for more_prompt + print(' [%s] %s %s' % (prefix, info.id.ljust(20, '.'), name)) + lines += len(name.split('\n')) # for more_prompt if more_prompt and lines > 20: user_input = input("Hit Enter to continue: ") - if user_input.lower() in ("x", "q"): + if user_input.lower() in ('x', 'q'): return lines = 0 print() - msg = "([*] marks installed packages" + msg = '([*] marks installed packages' if stale: - msg += "; [-] marks out-of-date or corrupt packages" + msg += '; [-] marks out-of-date or corrupt packages' if partial: - msg += "; [P] marks partially installed collections" - print(textwrap.fill(msg + ")", subsequent_indent=" ", width=76)) + msg += '; [P] marks partially installed collections' + print(textwrap.fill(msg + ')', subsequent_indent=' ', width=76)) def packages(self): self._update_index() @@ -583,11 +590,11 @@ class Downloader(object): def corpora(self): self._update_index() - return [pkg for (id, pkg) in self._packages.items() if pkg.subdir == "corpora"] + return [pkg for (id, pkg) in self._packages.items() if pkg.subdir == 'corpora'] def models(self): self._update_index() - return [pkg for (id, pkg) in self._packages.items() if pkg.subdir != "corpora"] + return [pkg for (id, pkg) in self._packages.items() if pkg.subdir != 'corpora'] def collections(self): self._update_index() @@ -598,7 +605,7 @@ class Downloader(object): # ///////////////////////////////////////////////////////////////// def _info_or_id(self, info_or_id): - if isinstance(info_or_id, str): + if isinstance(info_or_id, string_types): return self.info(info_or_id) else: return info_or_id @@ -628,7 +635,7 @@ class Downloader(object): try: info = self._info_or_id(info_or_id) except (IOError, ValueError) as e: - yield ErrorMessage(None, "Error loading %s: %s" % (info_or_id, e)) + yield ErrorMessage(None, 'Error loading %s: %s' % (info_or_id, e)) return # Handle collections. @@ -708,7 +715,8 @@ class Downloader(object): yield ProgressMessage(5) try: infile = urlopen(info.url) - with open(filepath, "wb") as outfile: + with open(filepath, 'wb') as outfile: + # print info.size num_blocks = max(1, info.size / (1024 * 16)) for block in itertools.count(): s = infile.read(1024 * 16) # 16k blocks. @@ -721,14 +729,14 @@ class Downloader(object): except IOError as e: yield ErrorMessage( info, - "Error downloading %r from <%s>:" "\n %s" % (info.id, info.url, e), + 'Error downloading %r from <%s>:' '\n %s' % (info.id, info.url, e), ) return yield FinishDownloadMessage(info) yield ProgressMessage(80) # If it's a zipfile, uncompress it. - if info.filename.endswith(".zip"): + if info.filename.endswith('.zip'): zipdir = os.path.join(download_dir, info.subdir) # Unzip if we're unzipping by default; *or* if it's already # been unzipped (presumably a previous version). @@ -748,7 +756,7 @@ class Downloader(object): download_dir=None, quiet=False, force=False, - prefix="[nltk_data] ", + prefix='[nltk_data] ', halt_on_error=True, raise_on_error=False, print_error_to=sys.stderr, @@ -767,12 +775,12 @@ class Downloader(object): else: # Define a helper function for displaying output: - def show(s, prefix2=""): + def show(s, prefix2=''): print_to( textwrap.fill( s, initial_indent=prefix + prefix2, - subsequent_indent=prefix + prefix2 + " " * 4, + subsequent_indent=prefix + prefix2 + ' ' * 4, ) ) @@ -788,7 +796,7 @@ class Downloader(object): if not quiet: print_to("Error installing package. Retry? [n/y/e]") choice = input().strip() - if choice in ["y", "Y"]: + if choice in ['y', 'Y']: if not self.download( msg.package.id, download_dir, @@ -799,40 +807,40 @@ class Downloader(object): raise_on_error, ): return False - elif choice in ["e", "E"]: + elif choice in ['e', 'E']: return False # All other messages if not quiet: # Collection downloading messages: if isinstance(msg, StartCollectionMessage): - show("Downloading collection %r" % msg.collection.id) - prefix += " | " + show('Downloading collection %r' % msg.collection.id) + prefix += ' | ' print_to(prefix) elif isinstance(msg, FinishCollectionMessage): print_to(prefix) prefix = prefix[:-4] if self._errors: show( - "Downloaded collection %r with errors" + 'Downloaded collection %r with errors' % msg.collection.id ) else: - show("Done downloading collection %s" % msg.collection.id) + show('Done downloading collection %s' % msg.collection.id) # Package downloading messages: elif isinstance(msg, StartPackageMessage): show( - "Downloading package %s to %s..." + 'Downloading package %s to %s...' % (msg.package.id, download_dir) ) elif isinstance(msg, UpToDateMessage): - show("Package %s is already up-to-date!" % msg.package.id, " ") + show('Package %s is already up-to-date!' % msg.package.id, ' ') # elif isinstance(msg, StaleMessage): # show('Package %s is out-of-date or corrupt' % # msg.package.id, ' ') elif isinstance(msg, StartUnzipMessage): - show("Unzipping %s." % msg.package.filename, " ") + show('Unzipping %s.' % msg.package.filename, ' ') # Data directory message: elif isinstance(msg, SelectDownloadDirMessage): @@ -903,7 +911,7 @@ class Downloader(object): # If it's a zipfile, and it's been at least partially # unzipped, then check if it's been fully unzipped. - if filepath.endswith(".zip"): + if filepath.endswith('.zip'): unzipdir = filepath[:-4] if not os.path.exists(unzipdir): return self.INSTALLED # but not unzipped -- ok! @@ -921,7 +929,7 @@ class Downloader(object): # Otherwise, everything looks good. return self.INSTALLED - def update(self, quiet=False, prefix="[nltk_data] "): + def update(self, quiet=False, prefix='[nltk_data] '): """ Re-download any packages whose status is STALE. """ @@ -956,12 +964,12 @@ class Downloader(object): self._index_timestamp = time.time() # Build a dictionary of packages. - packages = [Package.fromxml(p) for p in self._index.findall("packages/package")] + packages = [Package.fromxml(p) for p in self._index.findall('packages/package')] self._packages = dict((p.id, p) for p in packages) # Build a dictionary of collections. collections = [ - Collection.fromxml(c) for c in self._index.findall("collections/collection") + Collection.fromxml(c) for c in self._index.findall('collections/collection') ] self._collections = dict((c.id, c) for c in collections) @@ -974,7 +982,7 @@ class Downloader(object): collection.children[i] = self._collections[child_id] else: print( - "removing collection member with no package: {}".format( + 'removing collection member with no package: {}'.format( child_id ) ) @@ -1013,18 +1021,18 @@ class Downloader(object): return self._packages[id] if id in self._collections: return self._collections[id] - raise ValueError("Package %r not found in index" % id) + raise ValueError('Package %r not found in index' % id) def xmlinfo(self, id): """Return the XML info record for the given item""" self._update_index() - for package in self._index.findall("packages/package"): - if package.get("id") == id: + for package in self._index.findall('packages/package'): + if package.get('id') == id: return package - for collection in self._index.findall("collections/collection"): - if collection.get("id") == id: + for collection in self._index.findall('collections/collection'): + if collection.get('id') == id: return collection - raise ValueError("Package %r not found in index" % id) + raise ValueError('Package %r not found in index' % id) # ///////////////////////////////////////////////////////////////// # URL & Data Directory @@ -1065,7 +1073,7 @@ class Downloader(object): ``/usr/lib/nltk_data``, ``/usr/local/lib/nltk_data``, ``~/nltk_data``. """ # Check if we are on GAE where we cannot write into filesystem. - if "APPENGINE_RUNTIME" in os.environ: + if 'APPENGINE_RUNTIME' in os.environ: return # Check if we have sufficient permissions to install in a @@ -1075,17 +1083,17 @@ class Downloader(object): return nltkdir # On Windows, use %APPDATA% - if sys.platform == "win32" and "APPDATA" in os.environ: - homedir = os.environ["APPDATA"] + if sys.platform == 'win32' and 'APPDATA' in os.environ: + homedir = os.environ['APPDATA'] # Otherwise, install in the user's home directory. else: - homedir = os.path.expanduser("~/") - if homedir == "~/": + homedir = os.path.expanduser('~/') + if homedir == '~/': raise ValueError("Could not find a default download directory") # append "nltk_data" to the home directory - return os.path.join(homedir, "nltk_data") + return os.path.join(homedir, 'nltk_data') def _get_download_dir(self): """ @@ -1124,48 +1132,51 @@ class DownloaderShell(object): self._ds = dataserver def _simple_interactive_menu(self, *options): - print("-" * 75) - spc = (68 - sum(len(o) for o in options)) // (len(options) - 1) * " " - print(" " + spc.join(options)) - print("-" * 75) + print('-' * 75) + spc = (68 - sum(len(o) for o in options)) // (len(options) - 1) * ' ' + print(' ' + spc.join(options)) + # w = 76/len(options) + # fmt = ' ' + ('%-'+str(w)+'s')*(len(options)-1) + '%s' + # print fmt % options + print('-' * 75) def run(self): - print("NLTK Downloader") + print('NLTK Downloader') while True: self._simple_interactive_menu( - "d) Download", - "l) List", - " u) Update", - "c) Config", - "h) Help", - "q) Quit", + 'd) Download', + 'l) List', + ' u) Update', + 'c) Config', + 'h) Help', + 'q) Quit', ) - user_input = input("Downloader> ").strip() + user_input = input('Downloader> ').strip() if not user_input: print() continue command = user_input.lower().split()[0] args = user_input.split()[1:] try: - if command == "l": + if command == 'l': print() self._ds.list(self._ds.download_dir, header=False, more_prompt=True) - elif command == "h": + elif command == 'h': self._simple_interactive_help() - elif command == "c": + elif command == 'c': self._simple_interactive_config() - elif command in ("q", "x"): + elif command in ('q', 'x'): return - elif command == "d": + elif command == 'd': self._simple_interactive_download(args) - elif command == "u": + elif command == 'u': self._simple_interactive_update() else: - print("Command %r unrecognized" % user_input) + print('Command %r unrecognized' % user_input) except HTTPError as e: - print("Error reading from server: %s" % e) + print('Error reading from server: %s' % e) except URLError as e: - print("Error connecting to server: %s" % e.reason) + print('Error connecting to server: %s' % e.reason) # try checking if user_input is a package name, & # downloading it? print() @@ -1174,15 +1185,15 @@ class DownloaderShell(object): if args: for arg in args: try: - self._ds.download(arg, prefix=" ") + self._ds.download(arg, prefix=' ') except (IOError, ValueError) as e: print(e) else: while True: print() - print("Download which package (l=list; x=cancel)?") - user_input = input(" Identifier> ") - if user_input.lower() == "l": + print('Download which package (l=list; x=cancel)?') + user_input = input(' Identifier> ') + if user_input.lower() == 'l': self._ds.list( self._ds.download_dir, header=False, @@ -1190,12 +1201,12 @@ class DownloaderShell(object): skip_installed=True, ) continue - elif user_input.lower() in ("x", "q", ""): + elif user_input.lower() in ('x', 'q', ''): return elif user_input: for id in user_input.split(): try: - self._ds.download(id, prefix=" ") + self._ds.download(id, prefix=' ') except (IOError, ValueError) as e: print(e) break @@ -1204,83 +1215,83 @@ class DownloaderShell(object): while True: stale_packages = [] stale = partial = False - for info in sorted(getattr(self._ds, "packages")(), key=str): + for info in sorted(getattr(self._ds, 'packages')(), key=str): if self._ds.status(info) == self._ds.STALE: stale_packages.append((info.id, info.name)) print() if stale_packages: - print("Will update following packages (o=ok; x=cancel)") + print('Will update following packages (o=ok; x=cancel)') for pid, pname in stale_packages: name = textwrap.fill( - "-" * 27 + (pname), 75, subsequent_indent=27 * " " + '-' * 27 + (pname), 75, subsequent_indent=27 * ' ' )[27:] - print(" [ ] %s %s" % (pid.ljust(20, "."), name)) + print(' [ ] %s %s' % (pid.ljust(20, '.'), name)) print() - user_input = input(" Identifier> ") - if user_input.lower() == "o": + user_input = input(' Identifier> ') + if user_input.lower() == 'o': for pid, pname in stale_packages: try: - self._ds.download(pid, prefix=" ") + self._ds.download(pid, prefix=' ') except (IOError, ValueError) as e: print(e) break - elif user_input.lower() in ("x", "q", ""): + elif user_input.lower() in ('x', 'q', ''): return else: - print("Nothing to update.") + print('Nothing to update.') return def _simple_interactive_help(self): print() - print("Commands:") + print('Commands:') print( - " d) Download a package or collection u) Update out of date packages" + ' d) Download a package or collection u) Update out of date packages' ) - print(" l) List packages & collections h) Help") - print(" c) View & Modify Configuration q) Quit") + print(' l) List packages & collections h) Help') + print(' c) View & Modify Configuration q) Quit') def _show_config(self): print() - print("Data Server:") - print(" - URL: <%s>" % self._ds.url) - print((" - %d Package Collections Available" % len(self._ds.collections()))) - print((" - %d Individual Packages Available" % len(self._ds.packages()))) + print('Data Server:') + print(' - URL: <%s>' % self._ds.url) + print((' - %d Package Collections Available' % len(self._ds.collections()))) + print((' - %d Individual Packages Available' % len(self._ds.packages()))) print() - print("Local Machine:") - print(" - Data directory: %s" % self._ds.download_dir) + print('Local Machine:') + print(' - Data directory: %s' % self._ds.download_dir) def _simple_interactive_config(self): self._show_config() while True: print() self._simple_interactive_menu( - "s) Show Config", "u) Set Server URL", "d) Set Data Dir", "m) Main Menu" + 's) Show Config', 'u) Set Server URL', 'd) Set Data Dir', 'm) Main Menu' ) - user_input = input("Config> ").strip().lower() - if user_input == "s": + user_input = input('Config> ').strip().lower() + if user_input == 's': self._show_config() - elif user_input == "d": - new_dl_dir = input(" New Directory> ").strip() - if new_dl_dir in ("", "x", "q", "X", "Q"): - print(" Cancelled!") + elif user_input == 'd': + new_dl_dir = input(' New Directory> ').strip() + if new_dl_dir in ('', 'x', 'q', 'X', 'Q'): + print(' Cancelled!') elif os.path.isdir(new_dl_dir): self._ds.download_dir = new_dl_dir else: - print(("Directory %r not found! Create it first." % new_dl_dir)) - elif user_input == "u": - new_url = input(" New URL> ").strip() - if new_url in ("", "x", "q", "X", "Q"): - print(" Cancelled!") + print(('Directory %r not found! Create it first.' % new_dl_dir)) + elif user_input == 'u': + new_url = input(' New URL> ').strip() + if new_url in ('', 'x', 'q', 'X', 'Q'): + print(' Cancelled!') else: - if not new_url.startswith(("http://", "https://")): - new_url = "http://" + new_url + if not new_url.startswith(('http://', 'https://')): + new_url = 'http://' + new_url try: self._ds.url = new_url except Exception as e: - print("Error reading <%r>:\n %s" % (new_url, e)) - elif user_input == "m": + print('Error reading <%r>:\n %s' % (new_url, e)) + elif user_input == 'm': break @@ -1295,36 +1306,36 @@ class DownloaderGUI(object): # ///////////////////////////////////////////////////////////////// COLUMNS = [ - "", - "Identifier", - "Name", - "Size", - "Status", - "Unzipped Size", - "Copyright", - "Contact", - "License", - "Author", - "Subdir", - "Checksum", + '', + 'Identifier', + 'Name', + 'Size', + 'Status', + 'Unzipped Size', + 'Copyright', + 'Contact', + 'License', + 'Author', + 'Subdir', + 'Checksum', ] """A list of the names of columns. This controls the order in which the columns will appear. If this is edited, then ``_package_to_columns()`` may need to be edited to match.""" - COLUMN_WEIGHTS = {"": 0, "Name": 5, "Size": 0, "Status": 0} + COLUMN_WEIGHTS = {'': 0, 'Name': 5, 'Size': 0, 'Status': 0} """A dictionary specifying how columns should be resized when the table is resized. Columns with weight 0 will not be resized at all; and columns with high weight will be resized more. Default weight (for columns not explicitly listed) is 1.""" COLUMN_WIDTHS = { - "": 1, - "Identifier": 20, - "Name": 45, - "Size": 10, - "Unzipped Size": 10, - "Status": 12, + '': 1, + 'Identifier': 20, + 'Name': 45, + 'Size': 10, + 'Unzipped Size': 10, + 'Status': 12, } """A dictionary specifying how wide each column should be, in characters. The default width (for columns not explicitly @@ -1334,7 +1345,7 @@ class DownloaderGUI(object): """The default width for columns that are not explicitly listed in ``COLUMN_WIDTHS``.""" - INITIAL_COLUMNS = ["", "Identifier", "Name", "Size", "Status"] + INITIAL_COLUMNS = ['', 'Identifier', 'Name', 'Size', 'Status'] """The set of columns that should be displayed by default.""" # Perform a few import-time sanity checks to make sure that the @@ -1350,25 +1361,25 @@ class DownloaderGUI(object): # Color Configuration # ///////////////////////////////////////////////////////////////// - _BACKDROP_COLOR = ("#000", "#ccc") + _BACKDROP_COLOR = ('#000', '#ccc') _ROW_COLOR = { - Downloader.INSTALLED: ("#afa", "#080"), - Downloader.PARTIAL: ("#ffa", "#880"), - Downloader.STALE: ("#faa", "#800"), - Downloader.NOT_INSTALLED: ("#fff", "#888"), + Downloader.INSTALLED: ('#afa', '#080'), + Downloader.PARTIAL: ('#ffa', '#880'), + Downloader.STALE: ('#faa', '#800'), + Downloader.NOT_INSTALLED: ('#fff', '#888'), } - _MARK_COLOR = ("#000", "#ccc") + _MARK_COLOR = ('#000', '#ccc') # _FRONT_TAB_COLOR = ('#ccf', '#008') # _BACK_TAB_COLOR = ('#88a', '#448') - _FRONT_TAB_COLOR = ("#fff", "#45c") - _BACK_TAB_COLOR = ("#aaa", "#67a") + _FRONT_TAB_COLOR = ('#fff', '#45c') + _BACK_TAB_COLOR = ('#aaa', '#67a') - _PROGRESS_COLOR = ("#f00", "#aaa") + _PROGRESS_COLOR = ('#f00', '#aaa') - _TAB_FONT = "helvetica -16 bold" + _TAB_FONT = 'helvetica -16 bold' # ///////////////////////////////////////////////////////////////// # Constructor @@ -1390,17 +1401,17 @@ class DownloaderGUI(object): # A message log. self._log_messages = [] self._log_indent = 0 - self._log("NLTK Downloader Started!") + self._log('NLTK Downloader Started!') # Create the main window. top = self.top = Tk() - top.geometry("+50+50") - top.title("NLTK Downloader") + top.geometry('+50+50') + top.title('NLTK Downloader') top.configure(background=self._BACKDROP_COLOR[1]) # Set up some bindings now, in case anything goes wrong. - top.bind("", self.destroy) - top.bind("", self.destroy) + top.bind('', self.destroy) + top.bind('', self.destroy) self._destroyed = False self._column_vars = {} @@ -1411,9 +1422,9 @@ class DownloaderGUI(object): try: self._fill_table() except HTTPError as e: - showerror("Error reading from server", e) + showerror('Error reading from server', e) except URLError as e: - showerror("Error connecting to server", e.reason) + showerror('Error connecting to server', e.reason) self._show_info() self._select_columns() @@ -1421,11 +1432,11 @@ class DownloaderGUI(object): # Make sure we get notified when we're destroyed, so we can # cancel any download in progress. - self._table.bind("", self._destroy) + self._table.bind('', self._destroy) def _log(self, msg): self._log_messages.append( - "%s %s%s" % (time.ctime(), " | " * self._log_indent, msg) + '%s %s%s' % (time.ctime(), ' | ' * self._log_indent, msg) ) # ///////////////////////////////////////////////////////////////// @@ -1434,35 +1445,35 @@ class DownloaderGUI(object): def _init_widgets(self): # Create the top-level frame structures - f1 = Frame(self.top, relief="raised", border=2, padx=8, pady=0) - f1.pack(sid="top", expand=True, fill="both") + f1 = Frame(self.top, relief='raised', border=2, padx=8, pady=0) + f1.pack(sid='top', expand=True, fill='both') f1.grid_rowconfigure(2, weight=1) f1.grid_columnconfigure(0, weight=1) Frame(f1, height=8).grid(column=0, row=0) # spacer tabframe = Frame(f1) - tabframe.grid(column=0, row=1, sticky="news") + tabframe.grid(column=0, row=1, sticky='news') tableframe = Frame(f1) - tableframe.grid(column=0, row=2, sticky="news") + tableframe.grid(column=0, row=2, sticky='news') buttonframe = Frame(f1) - buttonframe.grid(column=0, row=3, sticky="news") + buttonframe.grid(column=0, row=3, sticky='news') Frame(f1, height=8).grid(column=0, row=4) # spacer infoframe = Frame(f1) - infoframe.grid(column=0, row=5, sticky="news") + infoframe.grid(column=0, row=5, sticky='news') Frame(f1, height=8).grid(column=0, row=6) # spacer progressframe = Frame( self.top, padx=3, pady=3, background=self._BACKDROP_COLOR[1] ) - progressframe.pack(side="bottom", fill="x") - self.top["border"] = 0 - self.top["highlightthickness"] = 0 + progressframe.pack(side='bottom', fill='x') + self.top['border'] = 0 + self.top['highlightthickness'] = 0 # Create the tabs - self._tab_names = ["Collections", "Corpora", "Models", "All Packages"] + self._tab_names = ['Collections', 'Corpora', 'Models', 'All Packages'] self._tabs = {} for i, tab in enumerate(self._tab_names): label = Label(tabframe, text=tab, font=self._TAB_FONT) - label.pack(side="left", padx=((i + 1) % 2) * 10) - label.bind("", self._select_tab) + label.pack(side='left', padx=((i + 1) % 2) * 10) + label.bind('', self._select_tab) self._tabs[tab.lower()] = label # Create the table. @@ -1479,51 +1490,51 @@ class DownloaderGUI(object): for i, column in enumerate(self.COLUMNS): width = self.COLUMN_WIDTHS.get(column, self.DEFAULT_COLUMN_WIDTH) self._table.columnconfig(i, width=width) - self._table.pack(expand=True, fill="both") + self._table.pack(expand=True, fill='both') self._table.focus() - self._table.bind_to_listboxes("", self._download) - self._table.bind("", self._table_mark) - self._table.bind("", self._download) - self._table.bind("", self._prev_tab) - self._table.bind("", self._next_tab) - self._table.bind("", self._mark_all) + self._table.bind_to_listboxes('', self._download) + self._table.bind('', self._table_mark) + self._table.bind('', self._download) + self._table.bind('', self._prev_tab) + self._table.bind('', self._next_tab) + self._table.bind('', self._mark_all) # Create entry boxes for URL & download_dir infoframe.grid_columnconfigure(1, weight=1) info = [ - ("url", "Server Index:", self._set_url), - ("download_dir", "Download Directory:", self._set_download_dir), + ('url', 'Server Index:', self._set_url), + ('download_dir', 'Download Directory:', self._set_download_dir), ] self._info = {} for (i, (key, label, callback)) in enumerate(info): - Label(infoframe, text=label).grid(column=0, row=i, sticky="e") + Label(infoframe, text=label).grid(column=0, row=i, sticky='e') entry = Entry( - infoframe, font="courier", relief="groove", disabledforeground="black" + infoframe, font='courier', relief='groove', disabledforeground='black' ) self._info[key] = (entry, callback) - entry.bind("", self._info_save) - entry.bind("", lambda e, key=key: self._info_edit(key)) - entry.grid(column=1, row=i, sticky="ew") + entry.bind('', self._info_save) + entry.bind('', lambda e, key=key: self._info_edit(key)) + entry.grid(column=1, row=i, sticky='ew') # If the user edits url or download_dir, and then clicks outside # the entry box, then save their results. - self.top.bind("", self._info_save) + self.top.bind('', self._info_save) # Create Download & Refresh buttons. self._download_button = Button( - buttonframe, text="Download", command=self._download, width=8 + buttonframe, text='Download', command=self._download, width=8 ) - self._download_button.pack(side="left") + self._download_button.pack(side='left') self._refresh_button = Button( - buttonframe, text="Refresh", command=self._refresh, width=8 + buttonframe, text='Refresh', command=self._refresh, width=8 ) - self._refresh_button.pack(side="right") + self._refresh_button.pack(side='right') # Create Progress bar self._progresslabel = Label( progressframe, - text="", + text='', foreground=self._BACKDROP_COLOR[0], background=self._BACKDROP_COLOR[1], ) @@ -1532,38 +1543,38 @@ class DownloaderGUI(object): width=200, height=16, background=self._PROGRESS_COLOR[1], - relief="sunken", + relief='sunken', border=1, ) self._init_progressbar() - self._progressbar.pack(side="right") - self._progresslabel.pack(side="left") + self._progressbar.pack(side='right') + self._progresslabel.pack(side='left') def _init_menu(self): menubar = Menu(self.top) filemenu = Menu(menubar, tearoff=0) filemenu.add_command( - label="Download", underline=0, command=self._download, accelerator="Return" + label='Download', underline=0, command=self._download, accelerator='Return' ) filemenu.add_separator() filemenu.add_command( - label="Change Server Index", + label='Change Server Index', underline=7, - command=lambda: self._info_edit("url"), + command=lambda: self._info_edit('url'), ) filemenu.add_command( - label="Change Download Directory", + label='Change Download Directory', underline=0, - command=lambda: self._info_edit("download_dir"), + command=lambda: self._info_edit('download_dir'), ) filemenu.add_separator() - filemenu.add_command(label="Show Log", underline=5, command=self._show_log) + filemenu.add_command(label='Show Log', underline=5, command=self._show_log) filemenu.add_separator() filemenu.add_command( - label="Exit", underline=1, command=self.destroy, accelerator="Ctrl-x" + label='Exit', underline=1, command=self.destroy, accelerator='Ctrl-x' ) - menubar.add_cascade(label="File", underline=0, menu=filemenu) + menubar.add_cascade(label='File', underline=0, menu=filemenu) # Create a menu to control which columns of the table are # shown. n.b.: we never hide the first two columns (mark and @@ -1578,7 +1589,7 @@ class DownloaderGUI(object): viewmenu.add_checkbutton( label=column, underline=0, variable=var, command=self._select_columns ) - menubar.add_cascade(label="View", underline=0, menu=viewmenu) + menubar.add_cascade(label='View', underline=0, menu=viewmenu) # Create a sort menu # [xx] this should be selectbuttons; and it should include @@ -1586,25 +1597,25 @@ class DownloaderGUI(object): sortmenu = Menu(menubar, tearoff=0) for column in self._table.column_names[1:]: sortmenu.add_command( - label="Sort by %s" % column, - command=(lambda c=column: self._table.sort_by(c, "ascending")), + label='Sort by %s' % column, + command=(lambda c=column: self._table.sort_by(c, 'ascending')), ) sortmenu.add_separator() # sortmenu.add_command(label='Descending Sort:') for column in self._table.column_names[1:]: sortmenu.add_command( - label="Reverse sort by %s" % column, - command=(lambda c=column: self._table.sort_by(c, "descending")), + label='Reverse sort by %s' % column, + command=(lambda c=column: self._table.sort_by(c, 'descending')), ) - menubar.add_cascade(label="Sort", underline=0, menu=sortmenu) + menubar.add_cascade(label='Sort', underline=0, menu=sortmenu) helpmenu = Menu(menubar, tearoff=0) - helpmenu.add_command(label="About", underline=0, command=self.about) + helpmenu.add_command(label='About', underline=0, command=self.about) helpmenu.add_command( - label="Instructions", underline=0, command=self.help, accelerator="F1" + label='Instructions', underline=0, command=self.help, accelerator='F1' ) - menubar.add_cascade(label="Help", underline=0, menu=helpmenu) - self.top.bind("", self.help) + menubar.add_cascade(label='Help', underline=0, menu=helpmenu) + self.top.bind('', self.help) self.top.config(menu=menubar) @@ -1620,46 +1631,46 @@ class DownloaderGUI(object): try: self._fill_table() except HTTPError as e: - showerror("Error reading from server", e) + showerror('Error reading from server', e) except URLError as e: - showerror("Error connecting to server", e.reason) + showerror('Error connecting to server', e.reason) self._table.select(0) def _info_edit(self, info_key): self._info_save() # just in case. (entry, callback) = self._info[info_key] - entry["state"] = "normal" - entry["relief"] = "sunken" + entry['state'] = 'normal' + entry['relief'] = 'sunken' entry.focus() def _info_save(self, e=None): focus = self._table for entry, callback in self._info.values(): - if entry["state"] == "disabled": + if entry['state'] == 'disabled': continue - if e is not None and e.widget is entry and e.keysym != "Return": + if e is not None and e.widget is entry and e.keysym != 'Return': focus = entry else: - entry["state"] = "disabled" - entry["relief"] = "groove" + entry['state'] = 'disabled' + entry['relief'] = 'groove' callback(entry.get()) focus.focus() def _table_reprfunc(self, row, col, val): - if self._table.column_names[col].endswith("Size"): - if isinstance(val, str): - return " %s" % val + if self._table.column_names[col].endswith('Size'): + if isinstance(val, string_types): + return ' %s' % val elif val < 1024 ** 2: - return " %.1f KB" % (val / 1024.0 ** 1) + return ' %.1f KB' % (val / 1024.0 ** 1) elif val < 1024 ** 3: - return " %.1f MB" % (val / 1024.0 ** 2) + return ' %.1f MB' % (val / 1024.0 ** 2) else: - return " %.1f GB" % (val / 1024.0 ** 3) + return ' %.1f GB' % (val / 1024.0 ** 3) - if col in (0, ""): + if col in (0, ''): return str(val) else: - return " %s" % val + return ' %s' % val def _set_url(self, url): if url == self._ds.url: @@ -1668,7 +1679,7 @@ class DownloaderGUI(object): self._ds.url = url self._fill_table() except IOError as e: - showerror("Error Setting Server Index", str(e)) + showerror('Error Setting Server Index', str(e)) self._show_info() def _set_download_dir(self, download_dir): @@ -1681,20 +1692,20 @@ class DownloaderGUI(object): try: self._fill_table() except HTTPError as e: - showerror("Error reading from server", e) + showerror('Error reading from server', e) except URLError as e: - showerror("Error connecting to server", e.reason) + showerror('Error connecting to server', e.reason) self._show_info() def _show_info(self): - print("showing info", self._ds.url) + print('showing info', self._ds.url) for entry, cb in self._info.values(): - entry["state"] = "normal" - entry.delete(0, "end") - self._info["url"][0].insert(0, self._ds.url) - self._info["download_dir"][0].insert(0, self._ds.download_dir) + entry['state'] = 'normal' + entry.delete(0, 'end') + self._info['url'][0].insert(0, self._ds.url) + self._info['download_dir'][0].insert(0, self._ds.download_dir) for entry, cb in self._info.values(): - entry["state"] = "disabled" + entry['state'] = 'disabled' def _prev_tab(self, *e): for i, tab in enumerate(self._tab_names): @@ -1703,9 +1714,9 @@ class DownloaderGUI(object): try: return self._fill_table() except HTTPError as e: - showerror("Error reading from server", e) + showerror('Error reading from server', e) except URLError as e: - showerror("Error connecting to server", e.reason) + showerror('Error connecting to server', e.reason) def _next_tab(self, *e): for i, tab in enumerate(self._tab_names): @@ -1714,36 +1725,36 @@ class DownloaderGUI(object): try: return self._fill_table() except HTTPError as e: - showerror("Error reading from server", e) + showerror('Error reading from server', e) except URLError as e: - showerror("Error connecting to server", e.reason) + showerror('Error connecting to server', e.reason) def _select_tab(self, event): - self._tab = event.widget["text"].lower() + self._tab = event.widget['text'].lower() try: self._fill_table() except HTTPError as e: - showerror("Error reading from server", e) + showerror('Error reading from server', e) except URLError as e: - showerror("Error connecting to server", e.reason) + showerror('Error connecting to server', e.reason) - _tab = "collections" + _tab = 'collections' # _tab = 'corpora' _rows = None def _fill_table(self): selected_row = self._table.selected_row() self._table.clear() - if self._tab == "all packages": + if self._tab == 'all packages': items = self._ds.packages() - elif self._tab == "corpora": + elif self._tab == 'corpora': items = self._ds.corpora() - elif self._tab == "models": + elif self._tab == 'models': items = self._ds.models() - elif self._tab == "collections": + elif self._tab == 'collections': items = self._ds.collections() else: - assert 0, "bad tab value %r" % self._tab + assert 0, 'bad tab value %r' % self._tab rows = [self._package_to_columns(item) for item in items] self._table.extend(rows) @@ -1760,7 +1771,7 @@ class DownloaderGUI(object): background=self._BACK_TAB_COLOR[1], ) - self._table.sort_by("Identifier", order="ascending") + self._table.sort_by('Identifier', order='ascending') self._color_table() self._table.select(selected_row) @@ -1774,8 +1785,8 @@ class DownloaderGUI(object): def _update_table_status(self): for row_num in range(len(self._table)): - status = self._ds.status(self._table[row_num, "Identifier"]) - self._table[row_num, "Status"] = status + status = self._ds.status(self._table[row_num, 'Identifier']) + self._table[row_num, 'Status'] = status self._color_table() def _download(self, *e): @@ -1785,13 +1796,13 @@ class DownloaderGUI(object): return self._download_threaded(*e) marked = [ - self._table[row, "Identifier"] + self._table[row, 'Identifier'] for row in range(len(self._table)) - if self._table[row, 0] != "" + if self._table[row, 0] != '' ] selection = self._table.selected_row() if not marked and selection is not None: - marked = [self._table[selection, "Identifier"]] + marked = [self._table[selection, 'Identifier']] download_iter = self._ds.incr_download(marked, self._ds.download_dir) self._log_indent = 0 @@ -1806,11 +1817,11 @@ class DownloaderGUI(object): # self._fill_table(sort=False) self._update_table_status() afterid = self.top.after(10, self._show_progress, 0) - self._afterid["_download_cb"] = afterid + self._afterid['_download_cb'] = afterid return def show(s): - self._progresslabel["text"] = s + self._progresslabel['text'] = s self._log(s) if isinstance(msg, ProgressMessage): @@ -1822,38 +1833,38 @@ class DownloaderGUI(object): self._show_progress(None) return # halt progress. elif isinstance(msg, StartCollectionMessage): - show("Downloading collection %s" % msg.collection.id) + show('Downloading collection %s' % msg.collection.id) self._log_indent += 1 elif isinstance(msg, StartPackageMessage): - show("Downloading package %s" % msg.package.id) + show('Downloading package %s' % msg.package.id) elif isinstance(msg, UpToDateMessage): - show("Package %s is up-to-date!" % msg.package.id) + show('Package %s is up-to-date!' % msg.package.id) # elif isinstance(msg, StaleMessage): # show('Package %s is out-of-date or corrupt' % msg.package.id) elif isinstance(msg, FinishDownloadMessage): - show("Finished downloading %r." % msg.package.id) + show('Finished downloading %r.' % msg.package.id) elif isinstance(msg, StartUnzipMessage): - show("Unzipping %s" % msg.package.filename) + show('Unzipping %s' % msg.package.filename) elif isinstance(msg, FinishCollectionMessage): self._log_indent -= 1 - show("Finished downloading collection %r." % msg.collection.id) + show('Finished downloading collection %r.' % msg.collection.id) self._clear_mark(msg.collection.id) elif isinstance(msg, FinishPackageMessage): self._clear_mark(msg.package.id) afterid = self.top.after(self._DL_DELAY, self._download_cb, download_iter, ids) - self._afterid["_download_cb"] = afterid + self._afterid['_download_cb'] = afterid def _select(self, id): for row in range(len(self._table)): - if self._table[row, "Identifier"] == id: + if self._table[row, 'Identifier'] == id: self._table.select(row) return def _color_table(self): # Color rows according to status. for row in range(len(self._table)): - bg, sbg = self._ROW_COLOR[self._table[row, "Status"]] - fg, sfg = ("black", "white") + bg, sbg = self._ROW_COLOR[self._table[row, 'Status']] + fg, sfg = ('black', 'white') self._table.rowconfig( row, foreground=fg, @@ -1868,25 +1879,25 @@ class DownloaderGUI(object): def _clear_mark(self, id): for row in range(len(self._table)): - if self._table[row, "Identifier"] == id: - self._table[row, 0] = "" + if self._table[row, 'Identifier'] == id: + self._table[row, 0] = '' def _mark_all(self, *e): for row in range(len(self._table)): - self._table[row, 0] = "X" + self._table[row, 0] = 'X' def _table_mark(self, *e): selection = self._table.selected_row() if selection >= 0: - if self._table[selection][0] != "": - self._table[selection, 0] = "" + if self._table[selection][0] != '': + self._table[selection, 0] = '' else: - self._table[selection, 0] = "X" + self._table[selection, 0] = 'X' self._table.select(delta=1) def _show_log(self): - text = "\n".join(self._log_messages) - ShowText(self.top, "NLTK Downloader Log", text) + text = '\n'.join(self._log_messages) + ShowText(self.top, 'NLTK Downloader Log', text) def _package_to_columns(self, pkg): """ @@ -1896,14 +1907,14 @@ class DownloaderGUI(object): row = [] for column_index, column_name in enumerate(self.COLUMNS): if column_index == 0: # Mark: - row.append("") - elif column_name == "Identifier": + row.append('') + elif column_name == 'Identifier': row.append(pkg.id) - elif column_name == "Status": + elif column_name == 'Status': row.append(self._ds.status(pkg)) else: - attr = column_name.lower().replace(" ", "_") - row.append(getattr(pkg, attr, "n/a")) + attr = column_name.lower().replace(' ', '_') + row.append(getattr(pkg, attr, 'n/a')) return row # ///////////////////////////////////////////////////////////////// @@ -1971,19 +1982,19 @@ class DownloaderGUI(object): try: ShowText( self.top, - "Help: NLTK Dowloader", + 'Help: NLTK Dowloader', self.HELP.strip(), width=75, - font="fixed", + font='fixed', ) except: - ShowText(self.top, "Help: NLTK Downloader", self.HELP.strip(), width=75) + ShowText(self.top, 'Help: NLTK Downloader', self.HELP.strip(), width=75) def about(self, *e): ABOUT = "NLTK Downloader\n" + "Written by Edward Loper" - TITLE = "About: NLTK Downloader" + TITLE = 'About: NLTK Downloader' try: - from tkinter.messagebox import Message + from six.moves.tkinter_messagebox import Message Message(message=ABOUT, title=TITLE).show() except ImportError: @@ -1997,47 +2008,47 @@ class DownloaderGUI(object): def _init_progressbar(self): c = self._progressbar - width, height = int(c["width"]), int(c["height"]) - for i in range(0, (int(c["width"]) * 2) // self._gradient_width): + width, height = int(c['width']), int(c['height']) + for i in range(0, (int(c['width']) * 2) // self._gradient_width): c.create_line( i * self._gradient_width + 20, -20, i * self._gradient_width - height - 20, height + 20, width=self._gradient_width, - fill="#%02x0000" % (80 + abs(i % 6 - 3) * 12), + fill='#%02x0000' % (80 + abs(i % 6 - 3) * 12), ) - c.addtag_all("gradient") - c.itemconfig("gradient", state="hidden") + c.addtag_all('gradient') + c.itemconfig('gradient', state='hidden') # This is used to display progress c.addtag_withtag( - "redbox", c.create_rectangle(0, 0, 0, 0, fill=self._PROGRESS_COLOR[0]) + 'redbox', c.create_rectangle(0, 0, 0, 0, fill=self._PROGRESS_COLOR[0]) ) def _show_progress(self, percent): c = self._progressbar if percent is None: - c.coords("redbox", 0, 0, 0, 0) - c.itemconfig("gradient", state="hidden") + c.coords('redbox', 0, 0, 0, 0) + c.itemconfig('gradient', state='hidden') else: - width, height = int(c["width"]), int(c["height"]) + width, height = int(c['width']), int(c['height']) x = percent * int(width) // 100 + 1 - c.coords("redbox", 0, 0, x, height + 1) + c.coords('redbox', 0, 0, x, height + 1) def _progress_alive(self): c = self._progressbar if not self._downloading: - c.itemconfig("gradient", state="hidden") + c.itemconfig('gradient', state='hidden') else: - c.itemconfig("gradient", state="normal") - x1, y1, x2, y2 = c.bbox("gradient") + c.itemconfig('gradient', state='normal') + x1, y1, x2, y2 = c.bbox('gradient') if x1 <= -100: - c.move("gradient", (self._gradient_width * 6) - 4, 0) + c.move('gradient', (self._gradient_width * 6) - 4, 0) else: - c.move("gradient", -4, 0) + c.move('gradient', -4, 0) afterid = self.top.after(200, self._progress_alive) - self._afterid["_progress_alive"] = afterid + self._afterid['_progress_alive'] = afterid # ///////////////////////////////////////////////////////////////// # Threaded downloader @@ -2051,16 +2062,16 @@ class DownloaderGUI(object): return # Change the 'download' button to an 'abort' button. - self._download_button["text"] = "Cancel" + self._download_button['text'] = 'Cancel' marked = [ - self._table[row, "Identifier"] + self._table[row, 'Identifier'] for row in range(len(self._table)) - if self._table[row, 0] != "" + if self._table[row, 0] != '' ] selection = self._table.selected_row() if not marked and selection is not None: - marked = [self._table[selection, "Identifier"]] + marked = [self._table[selection, 'Identifier']] # Create a new data server object for the download operation, # just in case the user modifies our data server during the @@ -2090,7 +2101,7 @@ class DownloaderGUI(object): def _abort_download(self): if self._downloading: self._download_lock.acquire() - self._download_abort_queue.append("abort") + self._download_abort_queue.append('abort') self._download_lock.release() class _DownloadThread(threading.Thread): @@ -2108,19 +2119,19 @@ class DownloaderGUI(object): self.message_queue.append(msg) # Check if we've been told to kill ourselves: if self.abort: - self.message_queue.append("aborted") + self.message_queue.append('aborted') self.lock.release() return self.lock.release() self.lock.acquire() - self.message_queue.append("finished") + self.message_queue.append('finished') self.lock.release() _MONITOR_QUEUE_DELAY = 100 def _monitor_message_queue(self): def show(s): - self._progresslabel["text"] = s + self._progresslabel['text'] = s self._log(s) # Try to acquire the lock; if it's busy, then just try again later. @@ -2129,20 +2140,20 @@ class DownloaderGUI(object): for msg in self._download_msg_queue: # Done downloading? - if msg == "finished" or msg == "aborted": + if msg == 'finished' or msg == 'aborted': # self._fill_table(sort=False) self._update_table_status() self._downloading = False - self._download_button["text"] = "Download" + self._download_button['text'] = 'Download' del self._download_msg_queue[:] del self._download_abort_queue[:] self._download_lock.release() - if msg == "aborted": - show("Download aborted!") + if msg == 'aborted': + show('Download aborted!') self._show_progress(None) else: afterid = self.top.after(100, self._show_progress, None) - self._afterid["_monitor_message_queue"] = afterid + self._afterid['_monitor_message_queue'] = afterid return # All other messages @@ -2156,25 +2167,25 @@ class DownloaderGUI(object): self._downloading = False return # halt progress. elif isinstance(msg, StartCollectionMessage): - show("Downloading collection %r" % msg.collection.id) + show('Downloading collection %r' % msg.collection.id) self._log_indent += 1 elif isinstance(msg, StartPackageMessage): self._ds.clear_status_cache(msg.package.id) - show("Downloading package %r" % msg.package.id) + show('Downloading package %r' % msg.package.id) elif isinstance(msg, UpToDateMessage): - show("Package %s is up-to-date!" % msg.package.id) + show('Package %s is up-to-date!' % msg.package.id) # elif isinstance(msg, StaleMessage): # show('Package %s is out-of-date or corrupt; updating it' % # msg.package.id) elif isinstance(msg, FinishDownloadMessage): - show("Finished downloading %r." % msg.package.id) + show('Finished downloading %r.' % msg.package.id) elif isinstance(msg, StartUnzipMessage): - show("Unzipping %s" % msg.package.filename) + show('Unzipping %s' % msg.package.filename) elif isinstance(msg, FinishUnzipMessage): - show("Finished installing %s" % msg.package.id) + show('Finished installing %s' % msg.package.id) elif isinstance(msg, FinishCollectionMessage): self._log_indent -= 1 - show("Finished downloading collection %r." % msg.collection.id) + show('Finished downloading collection %r.' % msg.collection.id) self._clear_mark(msg.collection.id) elif isinstance(msg, FinishPackageMessage): self._update_table_status() @@ -2184,7 +2195,7 @@ class DownloaderGUI(object): # waiting for a good point to abort it, so we don't end up # with a partially unzipped package or anything like that). if self._download_abort_queue: - self._progresslabel["text"] = "Aborting download..." + self._progresslabel['text'] = 'Aborting download...' # Clear the message queue and then release the lock del self._download_msg_queue[:] @@ -2192,7 +2203,7 @@ class DownloaderGUI(object): # Check the queue again after MONITOR_QUEUE_DELAY msec. afterid = self.top.after(self._MONITOR_QUEUE_DELAY, self._monitor_message_queue) - self._afterid["_monitor_message_queue"] = afterid + self._afterid['_monitor_message_queue'] = afterid ###################################################################### @@ -2206,8 +2217,8 @@ def md5_hexdigest(file): Calculate and return the MD5 checksum for a given file. ``file`` may either be a filename or an open stream. """ - if isinstance(file, str): - with open(file, "rb") as infile: + if isinstance(file, string_types): + with open(file, 'rb') as infile: return _md5_hexdigest(infile) return _md5_hexdigest(file) @@ -2237,20 +2248,54 @@ def unzip(filename, root, verbose=True): def _unzip_iter(filename, root, verbose=True): if verbose: - sys.stdout.write("Unzipping %s" % os.path.split(filename)[1]) + sys.stdout.write('Unzipping %s' % os.path.split(filename)[1]) sys.stdout.flush() try: zf = zipfile.ZipFile(filename) except zipfile.error as e: - yield ErrorMessage(filename, "Error with downloaded zip file") + yield ErrorMessage(filename, 'Error with downloaded zip file') return except Exception as e: yield ErrorMessage(filename, e) return - zf.extractall(root) + # Get lists of directories & files + namelist = zf.namelist() + dirlist = set() + for x in namelist: + if x.endswith('/'): + dirlist.add(x) + else: + dirlist.add(x.rsplit('/', 1)[0] + '/') + filelist = [x for x in namelist if not x.endswith('/')] + + # Create the target directory if it doesn't exist + if not os.path.exists(root): + os.mkdir(root) + + # Create the directory structure + for dirname in sorted(dirlist): + pieces = dirname[:-1].split('/') + for i in range(len(pieces)): + dirpath = os.path.join(root, *pieces[: i + 1]) + if not os.path.exists(dirpath): + os.mkdir(dirpath) + + # Extract files. + for i, filename in enumerate(filelist): + filepath = os.path.join(root, *filename.split('/')) + + try: + with open(filepath, 'wb') as dstfile, zf.open(filename) as srcfile: + shutil.copyfileobj(srcfile, dstfile) + except Exception as e: + yield ErrorMessage(filename, e) + return + if verbose and (i * 10 / len(filelist) > (i - 1) * 10 / len(filelist)): + sys.stdout.write('.') + sys.stdout.flush() if verbose: print() @@ -2293,39 +2338,39 @@ def build_index(root, base_url): """ # Find all packages. packages = [] - for pkg_xml, zf, subdir in _find_packages(os.path.join(root, "packages")): + for pkg_xml, zf, subdir in _find_packages(os.path.join(root, 'packages')): zipstat = os.stat(zf.filename) - url = "%s/%s/%s" % (base_url, subdir, os.path.split(zf.filename)[1]) + url = '%s/%s/%s' % (base_url, subdir, os.path.split(zf.filename)[1]) unzipped_size = sum(zf_info.file_size for zf_info in zf.infolist()) # Fill in several fields of the package xml with calculated values. - pkg_xml.set("unzipped_size", "%s" % unzipped_size) - pkg_xml.set("size", "%s" % zipstat.st_size) - pkg_xml.set("checksum", "%s" % md5_hexdigest(zf.filename)) - pkg_xml.set("subdir", subdir) + pkg_xml.set('unzipped_size', '%s' % unzipped_size) + pkg_xml.set('size', '%s' % zipstat.st_size) + pkg_xml.set('checksum', '%s' % md5_hexdigest(zf.filename)) + pkg_xml.set('subdir', subdir) # pkg_xml.set('svn_revision', _svn_revision(zf.filename)) - if not pkg_xml.get("url"): - pkg_xml.set("url", url) + if not pkg_xml.get('url'): + pkg_xml.set('url', url) # Record the package. packages.append(pkg_xml) # Find all collections - collections = list(_find_collections(os.path.join(root, "collections"))) + collections = list(_find_collections(os.path.join(root, 'collections'))) # Check that all UIDs are unique uids = set() for item in packages + collections: - if item.get("id") in uids: - raise ValueError("Duplicate UID: %s" % item.get("id")) - uids.add(item.get("id")) + if item.get('id') in uids: + raise ValueError('Duplicate UID: %s' % item.get('id')) + uids.add(item.get('id')) # Put it all together - top_elt = ElementTree.Element("nltk_data") - top_elt.append(ElementTree.Element("packages")) + top_elt = ElementTree.Element('nltk_data') + top_elt.append(ElementTree.Element('packages')) for package in packages: top_elt[0].append(package) - top_elt.append(ElementTree.Element("collections")) + top_elt.append(ElementTree.Element('collections')) for collection in collections: top_elt[1].append(collection) @@ -2333,7 +2378,7 @@ def build_index(root, base_url): return top_elt -def _indent_xml(xml, prefix=""): +def _indent_xml(xml, prefix=''): """ Helper for ``build_index()``: Given an XML ``ElementTree``, modify it (and its descendents) ``text`` and ``tail`` attributes to generate @@ -2341,12 +2386,12 @@ def _indent_xml(xml, prefix=""): spaces with respect to its parent. """ if len(xml) > 0: - xml.text = (xml.text or "").strip() + "\n" + prefix + " " + xml.text = (xml.text or '').strip() + '\n' + prefix + ' ' for child in xml: - _indent_xml(child, prefix + " ") + _indent_xml(child, prefix + ' ') for child in xml[:-1]: - child.tail = (child.tail or "").strip() + "\n" + prefix + " " - xml[-1].tail = (xml[-1].tail or "").strip() + "\n" + prefix + child.tail = (child.tail or '').strip() + '\n' + prefix + ' ' + xml[-1].tail = (xml[-1].tail or '').strip() + '\n' + prefix def _check_package(pkg_xml, zipfilename, zf): @@ -2356,16 +2401,16 @@ def _check_package(pkg_xml, zipfilename, zf): """ # The filename must patch the id given in the XML file. uid = os.path.splitext(os.path.split(zipfilename)[1])[0] - if pkg_xml.get("id") != uid: + if pkg_xml.get('id') != uid: raise ValueError( - "package identifier mismatch (%s vs %s)" % (pkg_xml.get("id"), uid) + 'package identifier mismatch (%s vs %s)' % (pkg_xml.get('id'), uid) ) # Zip file must expand to a subdir whose name matches uid. - if sum((name != uid and not name.startswith(uid + "/")) for name in zf.namelist()): + if sum((name != uid and not name.startswith(uid + '/')) for name in zf.namelist()): raise ValueError( - "Zipfile %s.zip does not expand to a single " - "subdirectory %s/" % (uid, uid) + 'Zipfile %s.zip does not expand to a single ' + 'subdirectory %s/' % (uid, uid) ) @@ -2376,14 +2421,14 @@ def _svn_revision(filename): number for a given file (by using ``subprocess`` to run ``svn``). """ p = subprocess.Popen( - ["svn", "status", "-v", filename], + ['svn', 'status', '-v', filename], stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) (stdout, stderr) = p.communicate() if p.returncode != 0 or stderr or not stdout: raise ValueError( - "Error determining svn_revision for %s: %s" + 'Error determining svn_revision for %s: %s' % (os.path.split(filename)[1], textwrap.fill(stderr)) ) return stdout.split()[2] @@ -2397,7 +2442,7 @@ def _find_collections(root): packages = [] for dirname, subdirs, files in os.walk(root): for filename in files: - if filename.endswith(".xml"): + if filename.endswith('.xml'): xmlfile = os.path.join(dirname, filename) yield ElementTree.parse(xmlfile).getroot() @@ -2417,43 +2462,43 @@ def _find_packages(root): # Find all packages. packages = [] for dirname, subdirs, files in os.walk(root): - relpath = "/".join(_path_from(root, dirname)) + relpath = '/'.join(_path_from(root, dirname)) for filename in files: - if filename.endswith(".xml"): + if filename.endswith('.xml'): xmlfilename = os.path.join(dirname, filename) - zipfilename = xmlfilename[:-4] + ".zip" + zipfilename = xmlfilename[:-4] + '.zip' try: zf = zipfile.ZipFile(zipfilename) except Exception as e: - raise ValueError("Error reading file %r!\n%s" % (zipfilename, e)) + raise ValueError('Error reading file %r!\n%s' % (zipfilename, e)) try: pkg_xml = ElementTree.parse(xmlfilename).getroot() except Exception as e: - raise ValueError("Error reading file %r!\n%s" % (xmlfilename, e)) + raise ValueError('Error reading file %r!\n%s' % (xmlfilename, e)) # Check that the UID matches the filename uid = os.path.split(xmlfilename[:-4])[1] - if pkg_xml.get("id") != uid: + if pkg_xml.get('id') != uid: raise ValueError( - "package identifier mismatch (%s " - "vs %s)" % (pkg_xml.get("id"), uid) + 'package identifier mismatch (%s ' + 'vs %s)' % (pkg_xml.get('id'), uid) ) # Check that the zipfile expands to a subdir whose # name matches the uid. if sum( - (name != uid and not name.startswith(uid + "/")) + (name != uid and not name.startswith(uid + '/')) for name in zf.namelist() ): raise ValueError( - "Zipfile %s.zip does not expand to a " - "single subdirectory %s/" % (uid, uid) + 'Zipfile %s.zip does not expand to a ' + 'single subdirectory %s/' % (uid, uid) ) yield pkg_xml, zf, relpath # Don't recurse into svn subdirectories: try: - subdirs.remove(".svn") + subdirs.remove('.svn') except ValueError: pass @@ -2481,7 +2526,7 @@ def update(): _downloader.update() -if __name__ == "__main__": +if __name__ == '__main__': from optparse import OptionParser parser = OptionParser() @@ -2520,7 +2565,7 @@ if __name__ == "__main__": "-u", "--url", dest="server_index_url", - default=os.environ.get("NLTK_DOWNLOAD_URL"), + default=os.environ.get('NLTK_DOWNLOAD_URL'), help="download server index url", ) diff --git a/nlp_resource_data/nltk/draw/__init__.py b/nlp_resource_data/nltk/draw/__init__.py index 8e90fd1..f5c6a6e 100644 --- a/nlp_resource_data/nltk/draw/__init__.py +++ b/nlp_resource_data/nltk/draw/__init__.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: graphical representations package # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Edward Loper # Steven Bird # URL: @@ -8,7 +8,7 @@ # Import Tkinter-based modules if Tkinter is installed try: - import tkinter + from six.moves import tkinter except ImportError: import warnings diff --git a/nlp_resource_data/nltk/draw/__pycache__/__init__.cpython-37.pyc b/nlp_resource_data/nltk/draw/__pycache__/__init__.cpython-37.pyc index f6793124be2de1768d289001f8e9a3d18b302248..2beb7b00346e3d632da7ec0ff693a0bd2ef659dc 100644 GIT binary patch delta 422 zcmYk2y-ve06oqXkjgvG^(^4TJu^=Q+2ilFP>Hv?>0k{h)vWKX4iky_6sS6t{RbK!` zM%Z|j4$SZhO!$U@+R|s=bC2yKe+FNHI1wTyZp``e@_i|G7Ei9Y&m@sl6GMSgaz=^+ zpC~qs?J*@dU6*MOJmo$6(tB~93sQ{EQTKJj1P~Yw+=LL;-iD4$46$iK(0m7t^ z*1bTtOa@uK=elk7JJ4}OD5D1gTat}QFaE}WJ-X)AG%qS;-#$+1oGaSrIJ6QGDc)N|RE<|O1*Eh2pRa)kh)gwDvcy42TM8g5nLRZtiw3FL@WtFl4 zb~qwLLHdqJ% delta 356 zcmYjLy-EW?5T4!pOD=mqF)1{NjUWU}4txL+Z7hWp>NIj;7oxX&k>x~ErLnM;!(Oqn z^eudb6k@5bVCC*rnSpO+zHk11{4YN`j-n8Z?M%7_p9qB3$J>{dX0t}j7SGH6e+bYCRb+DrtG8JLeXr7E! zf=QG6GF4sdl0NW&w;ljDIGMn({cE55_&gyaW|ARY`D)JZq|ml}2OXR6cbd)SN@O?r zVm80cb&-wheL6`}JJDMORdHQbLf6l9v`Y=A9Sg1jj7wP+W3Kc2aixXOo+0aR+V5eq mrsPv|Fszv~7}=KKS#-%6GM diff --git a/nlp_resource_data/nltk/draw/__pycache__/cfg.cpython-37.pyc b/nlp_resource_data/nltk/draw/__pycache__/cfg.cpython-37.pyc index 9a77630ddbb8204fca5cf51e7a5d026aa679b103..376fd861a6ac654fe1ab3d4e8c6cfc4c1dfaa3f7 100644 GIT binary patch delta 5638 zcmb7Id2n4-8P9oJUS9U3X`7~L)23 zi9*UyWD!9)AR;0lg5V&6PZn`Q#e(Bh8Bo!y&LI5ZfH(>|G8Aq7e&5a3XMv$J{oU_; z=R4}0i8ot&f15ji_@j65T9GRKIRRag17xkkQLL%af`ka&gW zJmSsYu`b6bGKz^&%#2#2#3((Pqb)GXjB@(c8Pkjk`YtrPZ04b}~slcMbV$94Thz4Z6Ivc30!VZ#Q5m9bLPdH{$m*#-%R?C9j+h>5A zyYEo@*3L`z$tFlI2CM{3E}qyyR?@^C`frB_!Xn}e=gP{=OTnk}#4L4VvCLmU{KUV` zojWJ%GWjJ1B2-8WS*)r{*N6gjp!6nDul`b6TRFKoYZlh^L@MFv`eUlL?6lad-YQ!a z!x{2oz$Jhnfg93oCng=)4o;XkiIi;;XFCQ#Z@f809&!e)C8=PM$wR7xa=3=CMqZg8(@n~oN0i>2bGr3H4H z)_^hgwJd+nmBS2NCx-}}S{)iG!%-jmKyCrA4Zxo9%dMc9yBSm!!ANdxbMva^=4RT1 z(;3<7C8m%yM7u##%KWs{{L(Z?s81DC9j)6(Gf|%_OxXbW^VN5Q@!_~-&QNz&IV&+m z3N3|Lv$x8(wAmE{S&Pm@)>{eNA&Uk!&Q{xIZxW5_R>0@Rz=w)~A zxPq5qUx!zdRMrV@zD~}INu8$buFMT{+l1&=FVsxi1GC*ArIR^CmuJ9AIPy4l`V9Dm zdMp*QhDS_x11ZFLWnum#Aj^yq;&o?M&U;pT0%n{Mc_X1kE%CmX?1+XuV?s7*5$|(D zhH06ODd9n`J8mv)iy6jwr*PtAoiapAdNYS=T@m4A^DWsZtJQ|O+I+5G9LK9ve_d7A zgiGY-X$`61)q`ZriT8O1T#e%=#m+y;Et)XjN%xzn%hhk|T1BlYT6pz%mA+%)#^rt3 zTS(*!>CVpK{N=r~b_~*0Gv$}mBlRoQruvm@Z}8TBq%A0p$IQEEi_ZOQD-M=x3EZGg zWqh05NwgM*J5eUa5=F8)d>e1Wf-eBh*G}F-yxTH+8fwM5F~x2r4jH8&%(1^f*xkbM zBw;T-roI*3upH|~+b(U}e{pArzV{-j)ZE6#=)@sz z0EsQXh?BF_{4DYA%gDx+V)Q5<6Y+M;)3@y1+j#(+-AH_@u9xiG$6I5Q$tZKmqu>w3 zq&YP;yqvg)q_s#)%JgwBQzlntDROs!M4wiTi~p9u#go?n1YiR}M>JO=SLL+;4%Ky_ zt_OS?07LLpgGSzHVX8DR+rHrS8YYn!GbfhJ6Qek81d0DSBF=~!Y13D7(8AtPcM!w% zk$rbzhHnNR{1ps~2*&f`Q(*8L81#<52aNMAb<1?}m2K)MZ-a41)iedfA5>k_6)X5z z$~8=CWYTyfCXC76BCL-I<@nUgO$QqJ&3Xumy#FG8-9qYMv)%r6XvIZW0|oPBAz%l= zxGiv^WP>RIy*vf;^^E2@$C~^aA|_iy(6X+KQ}VwheSG+ zy+2;zQdP3dYUKC$!wke5Ua1~f_V~0KqNt2Kj88yLr;ofyrI%OKAz~loBRu&v>Y?Rp zM{6NDU#`O1R={ckx(2$Dj_bOd0oED-r^Ed`iEEIsO!-yt?*-fm_%h&jz{ddSmbe*_ zyFlT+@~$>YzL({<=Ft1iC4}=@-i;Y{ABPT($Zl3(Ja;TpoXFu?gb9?i+tuSMwurTv zcUBw|qldsv0C2~sENuqOtp}QrXbC;nz0~U^W0FdtV-7ho9}*V>jsS8AXv$8EnDQ7H zY)uFh-h6uRxCu?ti{C>9plb@RSE@HxH8jtAdWaT9^xm3N{x~&cqrM6K+QH4wU_)vp>BBv% z*Q8$1N-7uANQ{0C9Z$=Cdmt7!WxKN0&3S)wo+A09nFrTZiNywkrI#51Pe2h=l z5bfp}GS=H0>!C~ajQacf%HE%X8yYi>AA|2;%^iSe3A945rPGfm40{hA}SwePYlrk9w8fP~VEW=Z0lE?WEF%sfD;%u-yO|YG?r(9m!6-pV10O zo!$^zF*WhNLEJ1!w`{^&I$g1T)WBOaJsZnK^Uopu48cgaearr~oi$tbM@!f?@1eV# zrnIm<*n>DzAFS@*RAWuH(J+aB9=Jg()?-?HuOvH`jlY8CsYvtP3*cBcfQ{q{;=AIg z8}Mu$b05s`ROfs65)E(9v}_&}qllb51wiEFa|D!IzXbg&0HP?L2jH%_p*}Ow+vh~f z<&VMp0fCm+M=T3$`4cc71T+JF4Vbj46vL$e4kE%nDktM!PGc?5(~};Oh@XU(;`)YLG~duN`Yv0>Q2K|0p_D5y!*d*RlnH^7s_@s}lWnSiu zohQUxz8gaFnG-fjKFJsFsa4nQTA6?@T42v$!Xe`^;9vI90{Iqb?|4wK(TyCD>7@5( zDew8jJIsG+@#?-GLr!~kM6u_dF&`}6UBCQY2(Ui?0kx4p3zPY^OTgzHpRpZyOECNc00< zXYf5xe8q6xUk_tG-!q&e$cpDfs25NPKrbMFNT5;T%l>rA8kYNLNUtIdqeWiq*F)-U zz&`kyOsW;i9a+@O--b9J=!22!Gik<=7X2VzPk5w1Sl7^e-K6N;qAMum?vL25NaM!9^2=Nh{(p zJyge+GLtKCdDhIJld#mAowF`ur+kxU-B7ycD76RNn|Zh4Jz~qV0NCO6o|DEh#mN|6 apQN7-mbo!PvUUSOpg!y?5URUt{{H}q&-9=G delta 5602 zcmb7I32a=|74@6VN|lG0U>K8m0Xv=oX;K`nF$1xkSyP}EjcOI4-nuMjCnR8&H0De7WM(|hjsmLUZZ zDd)d;-+k}h?cICF4?ZA1_?8H7FDwk@(7*XTUHZS$XTx*m{e@;`j+SEz(_;%w(AR5w z;$F=g_i1N$FB}zE^4W#)uojNb&}PJow4!*iR?Kw2T@o+VN_iZxXU5C4viK}*7SD6- z+3`8rocLUAZoFJ8k5_0F@k*^yZqUD-{aWR|KHOEQ`$ z_XH|PJGnrOcvsGj&~U7^p+6b78jc#n{mDT?4m7M#PkCF#0X62mP__`vl>{T+s@fDl z%5rtlx3n;fm3agapR7}1|J(|kCK>8!BW_L81{ca}2rj65O8-522u!)rWIS#p zOj(6lUfLM2dXv$il)R4S5h1tINAtG~4%)+-zuQV#X)VxdB%+qB`PwXdP<{%0Avq;7 ztRy`Ij#p0(u)HL^ffMK}afFkrM-5d}7M>45<);Bx+C_E}?bn%2WggEi7{c+VV` zKx4;mNtvINnpavTG4-m-ilepNv=Z?+!jg-@f2H`2KRO(>ty${PigYuUNTB6lYjAgY zRx~rS-2L#o{%Gxpxu3DNqLEVle zti(`UTB>MXgV?FI&0BV;5v=^$G#W+nTyqc5Sj!)jNmEY`4cb=95iu4~^U3}c%~C9@ zY{Kpjo!ZC+itfr=?V9=G=l-AvATVYUY0E?f5C4tP}Kn2JKi{4&kIzpcm>!!mx)& zEVcs)#DR4b>$d{3)Hp#jEwgjs%i;*s7!!FTfu$>>{f6v_1YBhTDUu@E?*vTCw$he_ z1v&FLxzNop%`0x<$jQ3p7;Wjz#A+N7;%M_M-y$njdu>fVXE1i;E$XJ)impkM$gj{I z62Z*~aU&h=cNMq|*H5usd6N^KR6m^@uo5?`w`$jlYBj&^%tX$9q;5-7FU}Sc0YkF0 zaX5ha?5q?0^wunSuX=uQv+7*jyx|M(-cNJ{`SF-_j*jR&$hzWUxt_rB>y*fM$lWy7 zf-ooY#CV`c7KfkXSFu3>u9QyRLA1Lv#*!Mbaa^#oL?NZ_1T*{oDsFcQCxV2n^tgH< zvbhQSJ0kfUMy}_*eU$QiC_b*RVZE!nbH}x<2lsS#=-oTzovNX}J~DZU<3nJ}ui@q_ zF+WeVZ)a|+Zx*Bdd`-wbF;8#l?(RHdV7U`$lW&(K%0(}M0*cN4+!kbYms3cn1# z_#2oO5=_L!FbG@){qEJ!Kg;A!iB7gkSvu0&P&BO?8+^i1%?&44b6v_ctyEejQ6(mo z$vz^ek275s>dy^_>W|=>?+|>%CN6J&)!&$Ee-P}Pbg#z<>Pc=ccHsy&8M=>;6=I(q z1XGsJju0(REm?WrVYZ>SHWea2v3I@<_MHGeLc<6)mwd&v7ayN2!{!}ow5hBX9{VU8q2@18&opfqtp;bcY{uR- zfVBkl40JOY)pa=wq;&uehX;5O&md-7@|&RF54Z<#FW@e~wSa2?=tkro0P2IYj@OQVS@9(RXCp@+97p#UF*kkLVk@n%hhY^PIe)dBwxK9BP68{+0dbiY+F%E z&D>Q`KM&UL6F7PN+|fa~{dTu{bp0Z?S-qN+Kcqc3sCU+%=;_4~t$-~Aq}im~D6+jo za%SjhX`ut47C3RtNV(1kPEbF}7#Z4cstX&2wmpZ#se7akx2bMldQ~kcL6qX&kbXQV z2U3GZ)ROzu%^TTMVpQ-az=eJhb zQ;joJ;$H-g-!^(I+buv|iZoaGn`xcWGQT|^u62AkNS-9RBaS*g=&A+A&6oACUL~fx zGcDVO#3(!`e+Gc(BJj)}l5|7csT4n605szig>_~_bbOlbR%WX|ik_*(WZSBh@ z_mbW=4jUz)e8Ek`)Nu)-zOGL0y1kJX-T-F-fZHWpCNB|?tEQG_fVxpN?_N~Tt-^WC z4-q)RWSJ+ffxwR6q+e{S{-{pv-YUk_OS_*A@J*js%3x-!?G~|s--eKU=cJC3Z}LQY zs@1*MH1l6IG~fQASelfFVEkh~QXv0|v3os6(9sF?%4D3LUM$h)mQU1S{_BdH_c`Bj z*k7WB$PCv!t_{HXH^94qzXRUO$x&^4XRTlX-vi7yK7K5F$(X^GJbj;=wsq(wZ zuYH7AoItYYC^ZJ$l}UaFW7zot05ZFM=9FSQf;*hxra1jfu&pgHX$tAXw>aogw{}(k E5BVACo&W#< diff --git a/nlp_resource_data/nltk/draw/__pycache__/dispersion.cpython-37.pyc b/nlp_resource_data/nltk/draw/__pycache__/dispersion.cpython-37.pyc index ee174f39c7863526c65c252f58e2074832f27f3c..58927ade4b7bfb5f0937bd9d706ccd00f17d5566 100644 GIT binary patch delta 196 zcmbQnyPTKTiI~eKL@}iBr7)+kv@k|7rZT0lW-}GJrLd*2 z=P*aH0BMdK)?BtIHb#aN&J?Z`?i8MMmMHdA##FWi>?ynp8B^JtnSgTa8?!o?+3fU7 zi;MIpv#`u!yft|~i!@`@TN&gC@sh mGd4x8+=9fCD3-jOlI+QTY*qq1Kp7ThE&~om4n`g(9!3BlR4bPN delta 205 zcmZ3^JB^puiI@op^YU>5Sy&&bbB)z3<- z%*ZcDEXvl`o5V7UF>3N`7HP&?lee=(07(H>C7_5gkd&Gn#@aLaF{`VxpC*%A5hKtQ vnrye&TVJ?aZO@sqhjwSCTgDMK^`WqakXw@C1y8Orjo7Q)Rsxb&6=F=+AKl*Pk&PH#Rl~UGy*SKV_Xe)i|?|p_ha!>Pi?1Gi9XB zsA&Lqr`%CDJv}K;+8gzzeNkW9AN8jL(LlN`T4%U!b|t(C-w{{BXM68;M}ze4PXy>a zVAs=oL!vGbJmQKr+D(c2epBqX8}D_IWcTf^aKq@P;LiSJCTEMeryg6^5H?gGksM4q z$!zBQ8@GAZDNj!}lM_%>1$(l>?iJa5CLs(O=u)0cHe-kVqLJRBQyn{Hcjw|+k+8*3 zGLh>O4KnU+H|mRzcz@(I>ZD!&Q{SrAydzrWZ4D9Qh&W<;iJF7DZ@4O|t~6 zX?15)`E&g%GH`_MlCHdo##x1z^`y{j;9N zJ|#~~e`P6zh;0BL0pK9m3T!(85iuz3-44ckuo{sVdMj0@pqpfoh)SR&O>T6ZRvww+gt`KxA`txJhqET5RC zvBZ;##qE^yv*OpAzhEq4CaiXXiRw~GhthZ0cHWkc&bmK@r6_;Kj*$xZ{J%j2+NHHDahco9RjR=7k;hvNoztWQR(5R#9v`FR2D1tZ?62 zeC_&&jVpMsFyp-rmD}lMnIi172!vL3DDeJet_~4>>=59ojK*DlxDfeh{ zKs%KE?n`DITja{}5j@I6W^$C9HY~Pc)I*q4<05g8D2Ix{mKTjqEQz*acdwlbo62li z%4~#sdy>M*DNia!8OaMr;e0pPi74sLrUxP?rF*GW&;kT9p3<*C4{;^HSX+sEXzUd1 zV-n(XZKE(^1C4@~nM)~fHwONefbvmf=54J!iLQLm8}(r#Icm6yZ!PULdU?j_G!phi z>v}~pVa2Js@C?esD3UN7)ENM3{mQeM5$%I|nVID7jzP;TjOaqHoU;)~A zGk_t>#Y}+GuB<2aigt`~hOYs}cH~)0m5yfxam^xBz0*h8rla;eoY6!?E^l$qieDJK z&rsEi&`8;lx5(`sSBF@CIG>&PvK;M*blnB2lE)szrxO4V0Dzpl%Q%nnckrtwY*kh+ zYw6A6rQ&-X&l@+h)p%N66GV-`17)dtIV@>vMN)Yq`N^|uer&9N2(%}m!Nkg4BqmfA zqiy26DMM>x%h{kk5TXsLJTcMhC|~~&o;5}lApCFn_}bk@+v!NN-nLFbjne>Zys8By zjs1^Y8;Q4L5*6T@h}Hd2&Nc zPdb0nlY+8dUbiu!89z8_Mo@<23mbbhBX;P?EfbVi$iLaNcLOhTN~JZhVwvu~teBjX zvm|APd}mX?p5B`D^q{<2K77-D&A4yUjG(+mwr%cP#d>?Wt14~WVyt(RF<2YwXy@n4 zS2xGkGIyybOJ@h|Ty}CRevBmCARBL{{3Lo?@qq6)6TYk}!ZjP4qqck4QP?TkG< zAR^~v$DJn^Rz#M!j7Py-8P!`g_Wmd2@9$i#PqCNj5}uITqYuo*PE0aC3vo{p7!7uA z%;$31%#vl1@C?x+|2I1O3beJ#XY~j=uLySYO1bE+B?YWZd8~9Qt~>*=?tbBe7??`A zsW)Y>MuVmEK|Cf@z;d!8NA0lodIXeS9;0@)Cz(oFxmY}9hnsW*u@T}pNDcwJi9obY zpEgKZRK9-K%w?PLr3(PjYnB4af0duL9L}aIQjdUzmE1vt(B0&C2`C4p5 zYmRI!&wc47gYwt%p7=q{IO*<#`;_0vW!;A~BdWsWW(4Jb7T@Y#Xs&=A$cJCRL#M7X z_6uYoWe=~K4}EMd&Uz%HUvB7WZ9Y$!cn07g07U4!j%VfZp82iJ2MH=bXA17cIc~?u zWszY2Q}O3LdyTcLA>=Cn&l4yGWE6+IQZxSozKwOB^4Y^VJCkUXLw)b9VqIE5#i~_h zl`c2N5g;&YvwSLfHnNAXZRB+3Jm^wRs5N;_prw83lUxNf{ymd=2U$Iwryx)Q1 z%OEJZkOhz9!hLeef%ayoD83HR3Q$$BQ|>)5qnWM1eq~ea1`XTyj68Xuc_#WHB9*}D`jUaOBB0u(~MRo|0k>#wkkVryl{eDhUKyRL0u-J zARjkwcn|n!F+6y{Xy?W9RS(bc^hI-wb9W#RJSXJKLx)@0HOJ_c$I5!)drh8DFX3`M zCJzpeXrAMf;Q`Snit|Ti8XY_lcO85f*0#omn5vmbHRul2PFR$eX`AB3H{bwBCsg;@SXtSiNB(>s5tw7Q#U9$k35!e}}Qx=-8j^f#emYD^PvE88LHTMywnl?DNo}?@A z%|c`aJ4|=YtQ|-AVInMVJokZ}XRB^Jo^Cr;Tb{VJh{EL4h{NS?GOm(cr?wPOV#3DG zu(_=(Y<6{p7mB}w#J>mlIKU?WZUjK{EY<+T0U`i-0Q8aCMh^iy3Ggw1e*pLtz&QZk zMPCHA8sMu03gR0;1;Dofz5~E}>?eUm0l45C1IE>x!vfym(!-@idfBJ;?sm}G3A4&_Zpur{_gZ2j3vsSzz-?e5sHmj^p$RS>0&s0(M7J9OT&3GBK^=A zI`5ZmOlk&ou6-My%+2wcs zJKq0;z^G^biWMt}r&3zN(}f>jBLG+i^;-k9$+Lr^^_f(zzok1X2Iz;}Ux4J7An~D& z+X?Z{^2FIyZESybOf}`;BKpZXmdvEG*@19SdHMKOk|FlT)nf<%F9k delta 8076 zcmbVR36K=md7kdM_Y~4{gAB+BhGU0gfnC84h-0y&2oWNcM1!`Oqjz`Mnd#;A>;*Q9 z0_Y^j;)z8_j-(=1RW2hb9V?EpqbOEnt1Or7#7>lsT`K1)IaR81B`#HBC$9Y8|K`{o zEz4H6YXAQGz5jau_1^3G``7fVuj!3D8yo9<^ml7&zv(>FIKMANFBzZV%V-%rrx_YO z{W(4FH~jg45y%IPU_NAo@?j&a`S$uUflTm>FB7!iTujZPlOlX}*m9TfSKRGtb}s`p@qPY&QZucA+Suq}HqJv4zzu?7>1tXf)7e z1PXS+iiJf3y&1C{D`%yPDO+SLF`Ug5`$WAw9&FR<+*gC&3u+Phh58@fyeHmg=dJkB zSw$3Tx_8Y|pvwqMEA>&uFxJ$*92S;nm9>*ienL_HxM^ei zTt)dAet03}wrSN|P|SOqH*G4pNHL4$O4RjNBo*iD&4~_X(V4}InghvR%gG+Mrq%nb z;$C^v){PUm#qBrJdQtI>-*imVWn|WV%`XqmYSjYr@mV{x1@hfl_suD}i-XH?-9S7$ z+}4{P)hScH+Ol@_RvPeHOu=TFX*)NVFPPav#u^cAkRQjqKbdmGQrS3rfz}~c&hAW6MVwvn%yyZ6b46;x8da|iv8%_K|%EcCSiors*C_W2q zx{6%G=up-gHl4JvbGevbtrb}|WyDbCzmkbtUsFo<6tkGau+N|sM5>Y1p%qzFvk7)Y(YE#EP(nD5A&d7S+Rh!F$q#O)B;wsW>jQ^ zx}yV&-Q==U`dExGvbbpF9k0$QCPmR1&Qf@cjMOsOKL_ z=ENSnhD$-IRSPj|p!Q}$e9=YP9IZub&=wJtN0z_00rO=i_Y*F10F!O^>P^$JiZIkP zcevfRuh+D={Njq)+hCIy74~qc{wn586)UfS2-9|$V|t(sq6zaZWa`Gslzwfi6HLd4e9|764|c@QIT&b{>g^^bGm-Q4+C( zhCs{ArLy=u2EIVR3sJTXTFQ75U71k(_i-XQtNGlI*Y#?>tPv?23mDN}kNu>y z3owepjMb^C0JZ0*s2O@@g#Jf8%~UOW8D_L$PwT6-{}jz!aIbB6Rr`%*LibS>qwb7) z)<%g(4ipDzL~J5Z?#>+T$)51)`?49Uv=6=PzR}jEjm-i}P+$Sdc`d+Z1Iz*7rlSd^ zI4IU*jH|vK82gd6mbxEn1#!(Hyn3gPYEPx>RRjaY;|sdk%^zwlFq1mrRy<09+#?US zFKnJ1#P7;S+Y?<6fvOa-CkPjh13U)6a{LPCQJzQgcum;8%%W|o4dgAiuH%}vm%YYn zgDO5X0`q05YB?-vRz*^IbNQazu=Ray$CDs_ioi71O_0-EmZO~FcyeK9awB^*1tH3# zULYxY9WT^BjAxBk1qeTuuXY}$F4vG)pfa@FVm(Ge)K)#-;L(z5B=dh)-+;*bHz7zy^R$fcpS&K=vX7!XBXQb5uNt zF?2N5z_$Uzv5QZDe9=+Is{+Q30Sp4z0N6?e_9Af#;H;0-3%<@(I*~Ys8FUHpGyn=& zok37)0=2^xBaqNy2>=pJAZY~BfDe>>fE4hufTP3_dF*rP`P^5k0KHL!3Cxinf9?q_ z><-`cQgbWjsq9Z+5(gMhOV>p2m(gv#R>2w>5Q#6z-3OmuRS{X<3Z4aXWmIj!*we4b z|2=qzy0qS4_rD?!8&5644ofnB58_@SaJ+8Y&cR~QF05Ujh|LwNW%$tITXdT6LS~Pk zyNTd^h*ZiKgsi{6N+)hhYCJ^{G04d ze_k=7{AZ>Sl>hF2oL;4Gh94+~e}IQ7T~9kIP=!=IylQUC>@Kc)6k|Z%+Y?{#6~e?H z18{SK2=%4osvPgREzW$9;6>=Jz-MgE+eva+B-lT3KkDh$I=4W`8vuVspp?W0u5zVA z`qwZwIe1>k8Yx?$9;MqU$Z4 z*VOfPwR?Mh$X|aA5;TB_Tynaz9UPORC%%f=Qv6uz6!CjlE;`n>ANGlF0IUJ1S_i|t zS*DX5!u)q|bFVP?-91Q)v-KJr@R`o~%x5S$e;Otlf1cxP01c)r=fNRPhq zuc-}UO*3Z|Ow)D`6kD`7lm9+e3m=xFIaQ9qFN5;p;0e_`9|ZZ7$;2DO8F$mrF|Cak zyAtAA#~)45!G4^)9D%?+A-&;~aSqT~Fiy-e;5EfVCkdZ#Ts}53s(AQFaU&iOJ@3Xx z=WFe(CUgP9E(hO?zg1mz*PgiM*WQ*tJNeMA zx1sm1mEL}Ocws?bQt2}p+3^nMECTytjVi%;^?!lk*8wP;yYYd_{A4lWPiW#p_gAMr zt7`@-y94vnR5nL{ipu`AUyh&Nc!0HY=Ry%qC|s0lxvSPsDDdjH6mnfvIF}&2^B;2I znV0UEOV~alVDT?Nlsb!fT0aSWi3U7PsD{StWA>M6*e`!^X3Im3gb4>A3eZG=me_|! z=~DVW#vw;_10_@NC@&TQy=JsUlkcu->XXTd4 ztd$lez6E=K0KiUES16;N>I)wjY2R_gJ97QGxu5>^0%g$PcAxu}HfE@WqEup0723lP zSfn`<6Mq8%e+$5ly9HP!tg3|AnQ!`hx^I(v<@|bmjFajh zD0zVy!n~$C9ejDDFW!5Eu!ZV^Xr0PB-iwaYYk8Y_1XHYybC|c8K-$XHHX}Y=JP7}` z5Qi_^q|K8Dp1Eu6ZOGE@kLmlmVtQ9sY?T;?#7h9r0bB;aahng>q8*qCuod7K0FIK% zM^6HK3g8^TmjHej-~|AbqWE)wEdXC9FebhUv;goHz+VCIj`}<>UOBgyvv}oR%{hQ* z2s9dQB*F!TLJ-{U$jcKt+k_5QEr%D0q)V5VFLY}!y8m_I6K$;#&fs?wdz7+&KF!j{ zTl(f}Pxwa5KWz$fBK@?v@Zxs*8SK!-_&tzG!35&0PX1|(BU3Ff#K?%`li5Pfwg+N$UXZVFB^lCx zY0Xkjw3{K57p;lE1CGS@0vRiB56JzO?p^sWc#oJSM42XSMwdbLE=I$}{%oOW38B-Q x{Krf4cX%N?b(DUU7FdV?tI9wqm=HMD5!sfvGlMxRQAd-`tpozP-1^+2{{t3WY7PJZ diff --git a/nlp_resource_data/nltk/draw/__pycache__/tree.cpython-37.pyc b/nlp_resource_data/nltk/draw/__pycache__/tree.cpython-37.pyc index d62d51d636d6a6f43fe6dd244154b4de58de5c85..12360229b6f5e7f947b9be78ab68f43a17768bef 100644 GIT binary patch delta 4146 zcmaKv4RBP|6~}Y4o6V9mfrNw*f^G!)cnO#d;UkbBNeGw-CWL^(Vho$jBfH6dgtJQ$ zXfaZ&RZtncP(cA>@uLXR;yo!MXCF@6p5iDiS{J1v!$>)46Q+D9eY<0Z z8mF1kHT2 zAyxp{moW-brO}`9Yaz<_y)jaxR-tDCP%huea*1lWEoY>>l(m0OH9C1XOG&1qRS)Vh zomVhzosmu|hr(C|%|?@(S(7)hX$+IZmRQ0vBlsz%~*s zS9rS7m-~UZg{QI+%-1qZTTl~nU;WnKbl}w8iwBdv6 z^|@x{!3|UNm?AKAcbkl_Tvds!91~a3eVBaBn)#}ingNaoF3@V=+6B?;1zG?f&_P*Ya6ubpg?kS= zRscbx_ttLVXQSAx!F|4tQ8vX!^D*-VM%yty1qcJ3Km=IJ)wOownHlUbXNA2+4G{rR zV3mS4P~CA~r(m|RXOdIgF&A2u0E(Fkfcd}zpcYsR+y$_wyddn-GN1{#S6-MrQmm7w zmX9_@YnejS$ijlBOZlWYpuom)X4F>mc@;6b9|IeK2Y_DSo4~i^e+tIPv4y#*JixcY z23b~^D>fL*3%4iDD}~ku1ud1W(I#}n?G>W=0C0s-)iiA#`g)qjxhpGPEH)dD6yNEb zQlf@7B#(EyOWkhwfKck@Bdc0IH~uZ8bl96_{C9SlNWEe`qq^*KA$A($=YAy8b}1cg z-~>O*%Vf@*X-7@SzN%4jN5$i=@50PZ1=G>x4X)FpW=cyS+}i9527Dc~%Sf-t62+I5 z==;zd#u}~UZ6jV`U_+mWsTY73d9Atysd8=QTRG35YoEg4&RdFdVbx#8^4Jfq8N-a1 z*+@l&Lwe_*kVREVnb)yVUVR!vIoCJw3QQPv3;ZH?E7nM)OjxdfY_qm%qx@*$%3h>I zc{dfKNWb-t;FZy`tP7UQ<}nOC4sZcI0qJP~SB~ij>fSCrN*oi4izKiecoIOe=qUx$ z?hmx;v>NS31rP03RJAx6Jt#=4gdT;&b@U3PA1Rp0G1B#@-p03wb+_XRy&Mk|*$mg| zZbQ@r#P~hX7>FLXj*6Ab-f6u->i&LIZN5M=9 z2Eux?FA|Ip@9RKEcc3-qr|EK2-OSn8Q1DwEI7;{+LaXUKZ0URC&^&*->9>BSVr~aVGzB&+k2$~Pe{D#r4BM`6lTD`8= z5YyJC$K*W?t~ooPlMnFa&Tg?sxLOBq1omji>Xq_9!&Y&ZEL-}-q9H+Z_2#PirakJ9 zbkiF!e;D?x?+lM!Rb`Z}7gNm0YOE0}>Y<#@w~bX3FljnEy`+a@*2fcnTkVBqzVLqv ziO(88fBZ;cN&cEr&m-^utMavFd#xiZ_?Fae^edaXV4xf50oDU4z$Bm#cn)|8*bf{4 z4g!aO9|AbBgmcBaOe2t4>sg9HI)=wbY?1$3UL>{{1$S2_ieedDp^bPQqdx&&1CGiU zR!njH9O7~L$%<8FZ$c~s8Wl`OGZ(Ar#Ba5|QCf|$EdY+N<>fI*i)3hJ`54QTYMY6^ zHS{h7yI&7>QHODSWx43ZCRmS0oszU<`aL>U16Ytg0pi)dTQvv2E^u>DI}ij`0=OvX z&%j&2&wwE{^+(mW$(E219i;^0{#8$jykaG7aLkWUaB7cfUl<7Ls)%@>zmV>xi8)`W zaqF<0Qj#aBG2{&f>4Xe5xwX@(M?JL4U;=%J(Nu4%daoj(kT={)zd`qJfs^vxrg7q9 z+23@B_|z!AuS)d3gMOrhegS*{#7|NPO?XB>R_MPO5@(AOWv7qP!P~<*LRcz&C_z)` zmf6pt61T{CiD$!s1khP|sd?i>cupr322bPP&=J>V)sx0Et35(|D8Kej6X&GXqG?@F z>jt=Zxn$u9eF~ffV3p3H{_@^d;&P<^8%l5rXQ4CAIm?;pbkN)KP|L0~jMF&6H3$s<=&UpFI=9g zbpcj5Tb!s8IV14kg!jNZxlDyqhsfu{r$8Oz(-&bH?e8O z5#WUIk>@mUYTk!sEU&&YIHF{9I26p3H53U&V-yItU2)j|fN{Q;jzKyE^a1-45_%IT z6(XPA&5%+Q%p@4;q+#gbE1S1_r1gdK8G1JNqPQSaBM*=Jw`!^zt2yKigqxe|v|PSqJsHtJ&u#d_ pEqa^WA8DV2PiA6Iw_t)J($cQ`V#Gyx9%HaWU8<6EWJBld{{uypn-u^6 delta 4095 zcmaKv4^Y&{6~|fr947=61mXDO5&V<6j7}13nsg>JnMvRG{eV-n z;`rh1+ugVCz1@AkW!HJJ`@Bf1PD)CMqMw;9_cSLSOBzl{U0Ku-?AD7qJp&!V9*^uQ zT3{SXI1?k>Y9M*ob_b1>3)S?g$3(gD=F~PZSPD%EkPQgH1xy31@sJh(ML;pIh@d4) zC*(AO+wKeN-QIRREN4Sk1}p()0ZV~A;8tKJkPoOMDUPfPNR>eP1ZF_0GR~%)7Gl{m zFHRSU%b}SAEK{$hImHS!;Zn7k4pFAsGV^TQS5JNE>Z!~li|R0xQ?!<(cKVN> zDo!1(ip6C;fGD+SYms_m$Z<~G%G1~Y=4vTsj9(APT#UBnsh!zQ(Vz}y7o@UZ_h4AE zWur#DpIt8M4M)yYkM~Y&I@9tD}AFKcA5%47|>M9&Bs@*n8gWW;Cxi4((RJ z1GE5MU>?w__T*>g!iFgZWG7UUVr?`|=5G=8?9kQw5T)5=1IBZ_ybpP3vQz92Qp)~l zQ>-}eh2mR4ml2xXFFM#Lc4>TbZ(x*7vC%xtyopgC`Z=m@NI@U~^w5!-uJpMdEo6r| zMeH>Wj{G(t0XIP#JFF33=V7+-=xuhZW>^JVGL2ibR3U1Vv*2;M5yuO1v+hIR4qy=YF0dW=j=EBiCAO>h!dx5Q-~m-s z=nwcZi;%EcHmK+qDoXvm#VM2r=pIoTrV%$P2HK;FseDk`oO4;ed(*VyNkP}lbO zSuR(Z%jFsq%3ORkspWOy-zu35qs7K&OO}bmosg~djIw3tgxF_fRQy#WJx2QRoBAX# zW2;<5HK3yMIp>3zvyWif`aJ$#J!B@d_yVoXUcb-VB_A`QtI|aA#31<;Mo!@otDajkVus5{`$lO0-OcF;rqIWGL%$hZKI z?rqTi0N4ZYS^Oa+b_12gwE1;UpB|DNSrYnw1SsH90O^uHA(*ipzE)jM6c4u?AuUxL zRQ0P)&n`51OOHbO1;LCDOI;7??N;HKsFV{iVIZ5~BRXPyQR5S{o*?ab22gM)R%F1m zTOSZA)4M1an+7>{4m;(lVEKE;3AjPnktig`{0JQ2aB;3UT!{_fwA?I2x^j!7BnQfu z{1_%322dTxJeI$Lg7Xpy@(JUSHKQ@}htPFD@BqO~@cROKvp47uO5W2kMZd2#+#yR< zZ{4E7NiTp!G)+sjc9b`fW4ZSDU$v8w-5tiFd05oe?Fo43d&3p<$57`3cA%V~*{sc} zfMWKYAV{*W!cPt&! zki1b$A;Z4@F0ru=gOj=ZSjPk=O&dMz^gviHhdq7~{S*>c`_~}x8RTytf1$9X#Jh*S zjeI1Ispr4>q;;bO*GuhYztUwd4DdS3<%U{vkjIM4Pwh`(2V-q@IU z9O6=7ohsfmBa3UsZnVyP32+`Ik&zV zYe~}L<=>&0C`9rfXhxE|0fWX~5}oMs0bRgG0Hs2n1AYVi7D%7K3`p-0hGL?kLTDMX zai7R`k+j{mA}sy$x4C0i`TRPa8Q#_p)a<6Y86RLC-;IBfsQ(6>R>zz2#XIV3Q;jfhEQUlZa$X|Yu-*(8)W^;D&4uUk zG)#;=p)W!a8OxbFZS39Z7UFGn$x|XOsoWM#>&37>fX^zQPEj6dy;*zJ=JcLC)Cp|`;*WwFA?a5{F6VKjb4ZN#OU>Y6m93*8AFBA)7Ddn zywDyYgTBw`@y+p$D?TTM+vW><=+Pigs;}B|oTIQD$@0r+z6AUUIHeZ1uS{WM|DG^L zf5h!+e@$Gm{OH&$UQweRwR1{n0=f68^$WFtjC96}g$ppP2;j{Zkcxq^eLn!XfZ(==^l)=n-xih!jd#1= z5Moq~bXW4R@*xMD6t}HK2Kxhdut}=+oC&_#96#se6ii}WdR%ZQ@les^>yIE%rB{F z+MAoZJ-$G5bDiekVrs^#L&47c0Zd^35SM8Swsh*=usi|vC%~8L^6fdQzGun*0N4kF AfdBvi diff --git a/nlp_resource_data/nltk/draw/__pycache__/util.cpython-37.pyc b/nlp_resource_data/nltk/draw/__pycache__/util.cpython-37.pyc index 9aee59193648f7a7e963b5c23f6e6739e7edba5d..e6cdb222dd3bb27f8fad01d0cf8292d6411e9d34 100644 GIT binary patch delta 14043 zcmb_j34B!Lwa+)1nM@WC0wg9RVIU+h36O-HK*ClQMI$>psKaFDCK)oBiQi0s1T|Pd zTU!@;#jEtW_PN%rUNxvie~M3~wrcgY+S`h)wY5~++Sa=8YU%l(JCjL<8J7C^{c`48 z&wkFg+>g6HvA+3+Klf9?>z< zJ(8Pj!BHIr?gB2`gM}Suy3Z6DicPWX6w5OFS$@Z^48KDu+L^gixQnSJ+n+-%IZE-) zO!sK2=lb)go~MkadI{Bs_=i$`DEF6A{S5ywst;32slSZs!~MPa)R4~&W2j+-eZbG@AEqx~gRFX8%ls+ao9s9whP3aXFs zJE`u32<}R%k6q~>M-Ah+VFERj`^Qs#Jl7{uy~1Bf^-8Y0s6N3zk?IqbA%vjH@A6me z%5YaJll;}~mg|MQSE<&2DB5hpgfYbH_j@{&sMimB1Rw1`smN8?9V7E`BTZEg;4a|!99io|de)S)z! zz=wKn#aJleL*DyWLRVAtl(vC8Q_}T(NzlXSLRicq1byL+7P6(0)X%%&i(`Q!SWLd(3 zo9gNaZ&4&zG3;HPes5H9Sy=;y#lMaIx@8UZC22x7fQmk1%1xP57h;cnAP z#cJ7%;xaaBDu$Yfx<_9>;|{T+o{CZsaM@%j%4GzMERQK2iz*w@ZCcu}u@;jh#E5O` zqFL|EVjQpnG3gC?TNPQEm@QXPIeG4CYE$*~v-gTd_ABiE&)njv2`n)WXDr7f zAXhRGJgvGK_SY2Be8b`KguES!$CKK#&@Yuv>G8az4r|=M4i+%%Z2`Yu3Auz}i$uw- zn#iwMl?mU=0U=*F)Dmcohg}NW8dk!jysDPYf41QQ!hAd;>89orVu-Ybx4umnO4XSQ zb~wRCYDwhz)b*bF^94>(uYb1SQp=oi7;75KaQM8zV6)fPZrGc#Jofth_wS2pzfhhgiKfl$;;R+4bW6k%9=&B=POdsqYA`n9WlVm2&p zPP+utVrhomCW@{kC1!a{cm1aBT6>eF7x}5Fr*QjaPH#{uUVpc<%^Pugot<7uR(08B z32<;0LyqRGWY}B6vcpR@_jYzFA%EOvDYBX|+=FF);Ieg#B_kt}N4PSEp0 zX0DY=|H^bCj3X)YVBjF6KhQJXwZaKm)5JNl@&D*+Hu_FD3W@$z{pS}B5fzLp7PToP zlPhWH+p6Q@$^tBiL={dH|Cu1yt6#f#Lhn>+GuOh|D9r`HQZMHLV14G5Ri29ymNZ_d zB-Rsi6zt3}brl?m}`drg7NJ84!X2O*1aA!Aq z3TB9xE|V8_1Z+Fvys)|kQ!^a2zI&p|wy0BWY}=HQA$Z0lwXdyw<$Cba1F#+7QUIu$ zRb#pl8=MJUg%0+r1j&SjE~DN*=v9FOwz!fT3Pchi+DAV|W;EQH z?0)Fqt!|8Nx0_+9{wg|AEK_Y=P0ll@ODX_10E{%OowNmzLs8FDBVFag#-sQRfcpU| z)VsSTiYE27u7Z-mEE~C|$|7xw61`LvTh?8i%0>tJQL(jHb_mPnG4imT3i#-()E4&3 z7to%dlCT)~sns9bvdKD!gFJOeuEO+m_PSR+cl)Ngl^F68fixanNnQIXdqgOcXJm+; zG3u(z&s0C%en(2PfZ)^mBbTld^_+Hq*vvu$U7y)XNCk5n*>E{tI-XbOU;d4S!>O^y zv9Tgj@2rp)Q939DD%K}+8{6Jnv2z`uA>d*~zOFiYXD&^pf7Uw0GI#ec$7c+{lF;*u z)O(A*tM_H(zH@gr)L{0Ym|$Vj>G*y1$(^^J4Zow+^%YP`B)lJ^dU^@(AL<>u?!1W6 zWy;que-{8ZWHElj))EYRqb8Xv(U~A~Dw*(0_4eI2dpS^M(QL}lGl8U^b;IrphowKl zXkA(6F^5%f7RH?ouma#5fL8$q8=9A%EOpOIA44{!P z-H)i_Eq(Jzncns~xRN$tefhfUV)mGnLOO`vsU| z4gL(J{{>*H`~;=*0C@3BxIwn5()hYpOQ&z2OrcvtY#ksqyFC;$OWkwrdz;uHlDVTT zKJ)Z8sJ;%szR2t_cg(~(>WHW1bj#&B{mtuM5mVtH=0+%2zCmTfwtC^(v(6jfF>hf+ zC%|riD)sd}qnde{h07RXbt=hBufZlnE&x~tuoU1?fX4tHAYf!Rhoi|lSVe26vUy(L3B)scaAOw8ozNr?a1YnIr>U=$*u0VEp{Kk!^>0&pk^R*XOv9W|1 z#P$LC;e8PN9YDf6VjKRrcO1tMxW8Lnf75ptFxJ@qicQSuPT9ndsC73#y#bzUWOjJB znQZ+O%=EK$(&z^Z$r^(`_p6KdrQdBmqdvKB>{pNecZtxqbmw;)VH(e^wWn#CpQz8= z`f^&+dyraH#+bzeNhS8m}X+9v+j8I>?UB8$!wB3xiKY$w!Xtq*6hySsa z{-VBphc9gueW9+sv;3=D=?HQCynf)$-N@04?-{pdJT(~E=0<=HrRLs&Rl^If&CHIF zlkRJ9KZ-$Qj6)Zl-QHlJH8fj)_x^jt;(8i#byOrt8umZ(bBTPn2jNH`x*55VwVux^p-~z z(ZEw#KAsv&%yF2^Tc&|<+~7PXg$o;bflx%DYcO{9o75+snB02` zYZq^dSlufj9du)!{@8!#q528HF@Ov~Kma6|=29Jc|4iUZD;)reso)r5wW#VVg?e|N z4aqHk>Uw6?$zt_4 z>b_@oq>9x?AwTa*66%BIC|0*|tiG?G_w0WQzIQYOBNnTfPm|;WX#67?vruhDiQQb~I;RFc69Q!X2HBud5>uH|N#^ z$t-|5>J^7aoZaso^zE=S7>M#VfsPr0ki;%Fe!J2?Ob$&UTn@DzQV$QdJSYs?Cy#mSDqhI#!TKfP`LyzH@7xZ>C`@M}e^^d>XO*e;~zaN#$yP=06 zQ@wiq@28xf2sbcGR-vaN=IxAYxI6=BvOMNz7Q;qgV1tTz?T8yk=|H#L(;{(|WjDu~ zjzBk%1s^OZ;9#sqhoyZh-SF_hDmD1Q*n*frcm&IV$o^FS-Uk(;6?*TnFOJIKc(``0;BR;QR!8H(n z*rk1sT63guBr7tZ%`%!*uDg%8EcHeXQMySI-suwQxoR2C?CV@NAGQfT;GFY6hU;EK~F`4yn^6bJMwOT#; zrzZy(JHb7&;YR((f6lgEz)r|p=~AL1;ZnpYli`p{@O*NrMh+Bf26wYQCtEO>%kVGNBu$B40a zpStfeznKwIWtLfl_y=mq=kJt*KO@r@YLR2WUJ?WQsrPZUsFebkyf68s&| zrypG}W{*TS98o??rPv19uzF={#IUz-#c%R#417s_{{QL%>oK_pAnBtBJkoS#&L+Iz z%|IKJBxeIaDhZa&k8?mefKb9chSRtY^b7xavxQdRPyba(DWUXOS^Vq5!eb-E1?swE zF0oYYJyyGlgWu}GhlydpSPUw(mdB_tJ!Alx2QU=i8Gt7No&rz^P}GNgT@kumblo6E z#%S#=7BiD05Apx*<9;zsOv34eY@SO54QntEO1>vQnmw;o2`Sq;Wur|yD8!OE$QVb8 zFwG{<0H1uTFbpLiGiab{0i@3aM z5s(e^#&q5=vacjltP|7O9eCCNcaZf!GZ!Gq}tGh+G2hlS~4atks^+7EjDOAKiTI%|5;!)eWE)`}Ur~ z8Hi1KQ=&vSYLDfJ%{3R%XnK(pOIIhDCoiCpUhUjmv9W3+=v;haI?_%3ZQ2{TVnYpB zlHk?Iq>srbA_B}o?fg8^d}^KzCOWnE^TaG)TK9yA%ssOt`Evwl4(;KDl6E4;Z8h`IWD2XWZ%># z=ZnvmfmAFByK}H|$9AUya5>o0#u=VKC_r}=;b2%sy{(bEz!-lxv}=beMM2-!M~InX zGNX)tz{4l|WIQ<@ex!z1XAf$>8!3KJ(MHtbb~p<52UC_F>VK;5=}}^_7H1F zJL2Lc^Y9e)9_%|*C|(dg&I-s6bd~~283q6~BZo8-d)6YE2gnnr#bj8(-%_i|`(RP= zIIF`e+YRmGB5|x2gnFzMeiD?^Q74gTB5X{wAH7LA3zTUOavH>}AoWL>5g8=ImLu{+ zvFOQP3=;=>6fHDbyitP(nKDWsZXy;ViEjr`g+syaPqc7}_|qbAj@N!kOlC$D>{D6u z32h|dvTg||TmRX2YpHm{A{w+yoMQA?6Q4zTS;>?!Vw=slw41dDouaTb2~!?#c4II9 z9DToW3XkaB26?&xJ|!k)4@$fV+>X+v0G9z=PJno2cVe>EcBMPgPY`o=VB8e|`%q1y zen$LSFvh@`Y|?az_Ut&Zo3~bS4rZp?TGeYGmWyKTigHn3yPF0Y)>awt$Ca0M>XkIK z5mV2pVE5;ApYjfCne z4MY_1aTS=dH`51WManhW@JdnE`z>nY4*5-#a1^1tXn2>o>4PA$7l8Ld%ptZ$?p%Hu z6Y73|=pc!kM(s~1vI}in0lp5f3xKT+FJ}LgFs3y1h7wpB zz-Dd65UR7)H&M@}+VV-F#X0+e{~X-iNV|=3`bPz;FPpuI?Oj9c{v) zADc|WM*l$~)MZ#aTip(n@Un$&4YqiLT}niIc#3c=LQ-_oJM~CZk%ryuk#KOs(NC3= z#T1Z9G8L%asC_X-93GACLGOK7Jz#ae_O}|*F!yFit^rg6Y{B?gPG;0B74XNgFoJzF z_Aza9tq9cIh-o}(FO}l4n2Ie7dPcL9p=V1m29)5Y3M(st(q#XtI7q|CIAQ=7bs~w`d`&@ritncllE;;tAymH zxZuZpe8(sgXt=~`x`77oMa&?rIfSFkk>jw@rp+dA=-V@moOApwV1(}1d|M@o$ucAR z9@gHUAzH;P+Bq{tQPr&&WzLef#mmXluAphZ)vlW<=9bL{m$-`U$!QC8d!5lXx{!)L zYF(rKVWub%Yc$&|Q8xo89a^P1v{GmyvkW?UUGw@>5tNGZNS)t)MfU zN>lL0I5H*LPOq{RsU4Xu7Jg0Uqqpq1UQe_g7h3*+HyCb}BQPjC6pXe{>52w|5~o$e z9&X;O_@Z)@wr-B7zrYk7CrXLkxQNb1v zZTqx(Yd^(Tt=867+iLwY4_7)$-JJYPcnae?vFAFq|r{8Z{s2u!5@MDCYTfB#v5Ni;Bt2g+z*a$pMe zr*OZV`h|fa>K8G=v#DPkD4~7{_baGBHBd_ZQtnq$zbsHr{c`S4qyE`}3hGyARU|+) z^(zC@s6UPSHLJ#)8>k9Y(?B&3)Dl8XpqBcz+@DVU>46#4pP}Uuff)gJpzh!pk6Wt? z%FA)YwDzBAIT)$jHVbVho7va5cMS~+d0_?~fa+NCy8s(w>FwYC@(?d|Q6y>20A zP)A~^1sc#=K;W1$t)^16%tOBsU_Jnr^0eD#tpsYyax=R>7>IR#V(hD}6-)jv1RR}O za93w6%Xn*gl~A*1I*ZduSx2nL8ddH(Q8$^I>2)y+{GGv2z)K?S(qhp<)nE5u1+++y zF5NU$VXU2bvuy=yZP}_j(x=H1nlc@|Jpo@#bK6;c(-zoE{|4k7noBW*TtZ{hjk-BE zXXPxzQqd+Cs^{jG7O>4Q>vqW*>iFDg?7D>gO$YeJB1?_g^N!mtV!qCR7LCcsfaMEF z<;jitc6ocl?ZI%sbOf}JMqjK!9vnigp1bf$E5@?L>qXPFVH(4WW?^79!06C6^Ss`u77NC-Zm;*-#?visAvUN# zE-9OrAQ87AllaHg6h+qRht;yB57kd09;VCd4g0z^uQz>AVP0xI((8RvO>ccjG^@8- ziYFJjLEg&&-Rj0;59X_SG#m2j|EJ_11+YlMta)FFDUFL|LxJuBjGv zs&P$aJ!+Ggt@UZ)nCY!#>7w+>#w{nH@ zdb=aBU?fbKas_bC23QGDL15UaK@5xzPES6CcFZSZUTCrUYad=DmM;O?8Iqw484+QF zF{kYhVmWNl<{oQRe+@J{2$=R*uv?SmXlD7^qZT6;m3_<1S^Ns+T|aj^*YFI2ODbQ! zs76{}Y7DLaoDe(Jxf^Pij>L$W-6MnHm{pqg5XZP7OdHuL#SRWHTcF?g`UbyP1g~4m zdax{BR5)~^m`Yjf^je{F*jTpdW}BF-ezm1y!&D%0VL~nDVr4oyBC^{@9{2V1XyL%H zW6~5VbAs0wpiD^Ghi*02Z2f}GZIw)^jchNOsNCo&7WWw?o(53?c{AiI>gN;2?(P0l zVxcrYug<=#L^*eST*Ktz37AH9xsI@3CG6TM%cw<+5evirO_!V1_RFU4pG#wWDv(WR zEe62uEL#AurSfJdFG36Z5brJ$n}D^SZA5Dmz!m@xz@-2vLDCE0184{E155|l1<*By zR?AK_5f~C~mVE$w0kQyK8QzmvpCq5N_Zu*aV^%Buiz0wsd(au7>>jdbZPNsB79gf}s%Y z!)&E(SacNtQ-)cFBR12HCV^yAw*~%PG^rM?wiSd-7w;ytS0z!~1ZtT#H@1JSx?S6m z_Ot~PPEwOQ?yX}S+#yMWElV$1h^akre@PvVZ{N!j1FT0Ym=Ol@X(bs@vbpH_-J+u zD44mBBRbIC9tn+d@i#yqlZ!X2S&?6)l^M(Y#+075jsNunOU(&Pm+cl;+Q|6wAkeiD^1nAu^D2e8rR!aF_xxy zJCb&^fN9!$Xm^(d=;x^`d#lD*p?M9!Jpk3}{@xjlKxG$tY|(a81WnOpioO)qW}aok zXs4#d2320)<~iw{_UH(0b{`u9+RH8Ky1v5Vgk2I2gE(`H$NF~KoBM#7J|nrZWpdXy z)Yi*))~v(orwL>TXq3$d)oYhOnbr$n@UZd2D>jS8Ty;RmDniMT&SE6AfK$vIL>TW4 zKUNp)yJq=#8XRmJ4c_rj}OZt8s_V&g`tjw#dbiukL|Njyg5r9Jg z%01p6sAK=>r%&hiEELH1uG|{duzVS%L z*!irotLilw6Fm_-uYnQhS;wGyv=V@A486x2QMCry{`i--pJ4!fNnM~nbuqg~SV`$f z%J=^Q!*>7@;SfKDkAy?2K#=|}^_$!2zUJnG%b0Tfh{UCJ#W9C?!wba7a{?3Nr%`w}m0nm(Y zNAPP8NHd`=hgnL_VNI_3$=z?AHy-xbWag8zY`{3E&gXo`KG$-v8&Q0 zyu*@(&A#tqNCh%<@r#j@t~8O3|_V%UV!D#a#9 zz#YcWi!M>gr8lwfUqv(Yv_9>~b5KLui3KhP_u|6tZ zseCVO%y@LTM*aMy)u+n9EhP2l)q%Ua6bmPb`ZxPn@N78MBneXTC3TeMcm^mX?(%&>fzq(!2AYzAapkF#B*x6_Xiq2@ndS4i0wt`#eE(O2=?WmDLBxTiI3UHx>k!ia9w z_uI-Tai1{;Yk>qmZhYsrHKG8n8FZW%lcCvrJ?hQ(N?g^XA|2~~S+2;Zk_ zKY6_h+L~GZaECk_0;LG>5X~B@_HQ2#Urn=dj?ic-Cgl5((RyNwSacSqi$U#iYQ>MC zrrjrZMNLOnKmJc$4a6ta^PkS$zZr`M00sdPp8^n168^W$%iB;Rx-gPl2mq}lM7BOr zfoKw=gjpvL-a9A{C4}KBu-kHHyC&q8yK-Dkx*|49})$YZO{Wvf!fgpCYI9*vJ266sL{Zr;-OjD zVzq5Db~MR_4kna^pxCWn>lDkXR$*W*4bXT5@ij%JieXqP6BM%Lcrtj`D4L8yl!Hzd>2fc^n)(3XCeQho7ck%q1pDZbi}X0H zhb2*Y9t2Er2_kfk{(O#jaLHy&w}A(z#YS{Dfm{4zXPRIjG3h;q7TcyjmMeDGZzF7a zr4uiLr&uSq5T;k(nkTl`T>?IyQ}dBqX}&{$El+H%he#5#npyN=_EbzjdAHu4FWOHp zvLQrR|0rKHWj>lfMV6jLk|V+=*_AEKsdW({yY-#|Q7A4_x4&6DbVq?$Y8L_h%?V-_ zZ{gPW{c1A0>5SKrBuY#K9y+WyO%$Ij1?6}pr)O(ae&UBx4ML4}tU1mb3>g8pB_|@Oe9RVXl`SX5sebUj6rz#P@0fP$M1$ClDW_S;_4*|IpBr zlf`*r@^;K$LSQg!?wkfWgzouzS)phuZ3UTJ$@dE&DSZGOaGiQjp{QDR84b`YIeNn) zspj!62h+%1Ktw8eT*LaL%7FTPbm-+m@l)aFtVjOSNfsn!7XY}K{Max4QiNrIKEu42 z4J-JcG-}B{T2*?t81AO$p#PztC>AF*FdDSi1V~XXHz8utM%0+;2ziUM6VD9kta1j- z>|phte%VyQ> z0PjkGBOsNc*tp@Fy0`#kIkc0M>d#IShxi;Oo3Jv|Ic$->r%G&U_zdBg_FXa<7&c_a zRi7o)C0JFWkE<5*YB=hb(_n_%6HF}|>Zlgmg$IXoayV}z{iqlN zP*YwGz{e#P9j7Nx9=|Gy`eh^(lC}C(wW6r519(5TB<9>rQ2 zWsXnw=zbbY_ND1WF$(u~a}Dq~{q0_aSE^6PdOL!W?r-BR_37_V7x(aQ+(9Qlovgt* zDRIJQML5$BAwu|cbuGl{fcWv!gJ`CLgO`_~h=1gwwkLuYbk_iTbYw|CjddZKu#Jxq zLzCR%5|Q^gVA4N266yVQ;sSBCezZ=k(lW_FN4H2nxo9`mVWI2>h!9AYOnx4t5W~!- zSIhKbF-4kon%bjx&lKA-ALp|m7W=Ie)KJK+lkiBnk$W%}1=t606#$=A5Z8PfVV_Di zA1oljI7yiLF%&zJx3WuVW>DWSOVryg6>9mdMMJmF5(`9A;;d$pKvcEY6QU;{J6xYn z265E)K?&=N!?vzna>VBhNyWO0*}i9{9o(4+oh~bJQQEWd+?2 zv0wrKZhPc-0PMA%Tx(Zt!_Rv1CfV1EpVl6mx1Y|_)WVw)x)uZh@z`u%goPEoB-m?tXg*vgx*Hh!L9x1}t@{L}jOdE)vyobat*15kKa zp%nbJg!H^VsZq?lH05Un%qpR|r7q;L9)z*3|(23yK-IGE{7{y zesSp5Mhe{On<0peUd;AOG?S<>0o3Bx^p6&ZUFTz(ig8CrO~d>^aNKl4{tV5M>bGE_ zMMxeQZl_WL?r-#47K)Y@obdTQ=wNPVaKKj)>(rz+{DyP`O%9HWR8&NAuegGqNPBZD zDmKgCTbwrNJP)O(x6IcI_0F>Jngt&HF4 z(yc#DWLg^0_F@75-WzWbgrFXqzeAd6Wcg!_0Q1O15&9RSsU8{C8yAWClrIhRik%+q z{!;H>Bra(DD$&Lh|88F}?De*}XY#cJo<#el4J=WtOlPc%$}g2iJN0y1vsgaUGci6k z2e)8>pf42JB_~3ZoNy@CHK#Wg3`zc_I!Qm=EEexD9g+6knm=Z#wir6$q%KjOBmzJ} r7VEKCBGg7=gzgo)BZ1zK)`~4FdOiWW7%!HM&$s0}C+a1OMf3jxO*ky# diff --git a/nlp_resource_data/nltk/draw/cfg.py b/nlp_resource_data/nltk/draw/cfg.py index 9cab511..3afb3e4 100644 --- a/nlp_resource_data/nltk/draw/cfg.py +++ b/nlp_resource_data/nltk/draw/cfg.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: CFG visualization # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Edward Loper # URL: # For license information, see LICENSE.TXT @@ -48,7 +48,8 @@ Visualization tools for CFGs. import re -from tkinter import ( +from six import string_types +from six.moves.tkinter import ( Button, Canvas, Entry, @@ -78,24 +79,24 @@ from nltk.draw.util import ( class ProductionList(ColorizedList): - ARROW = SymbolWidget.SYMBOLS["rightarrow"] + ARROW = SymbolWidget.SYMBOLS['rightarrow'] def _init_colortags(self, textwidget, options): - textwidget.tag_config("terminal", foreground="#006000") - textwidget.tag_config("arrow", font="symbol", underline="0") + textwidget.tag_config('terminal', foreground='#006000') + textwidget.tag_config('arrow', font='symbol', underline='0') textwidget.tag_config( - "nonterminal", foreground="blue", font=("helvetica", -12, "bold") + 'nonterminal', foreground='blue', font=('helvetica', -12, 'bold') ) def _item_repr(self, item): contents = [] - contents.append(("%s\t" % item.lhs(), "nonterminal")) - contents.append((self.ARROW, "arrow")) + contents.append(('%s\t' % item.lhs(), 'nonterminal')) + contents.append((self.ARROW, 'arrow')) for elt in item.rhs(): if isinstance(elt, Nonterminal): - contents.append((" %s" % elt.symbol(), "nonterminal")) + contents.append((' %s' % elt.symbol(), 'nonterminal')) else: - contents.append((" %r" % elt, "terminal")) + contents.append((' %r' % elt, 'terminal')) return contents @@ -160,7 +161,7 @@ class CFGEditor(object): # Regular expressions used by _analyze_line. Precompile them, so # we can process the text faster. - ARROW = SymbolWidget.SYMBOLS["rightarrow"] + ARROW = SymbolWidget.SYMBOLS['rightarrow'] _LHS_RE = re.compile(r"(^\s*\w+\s*)(->|(" + ARROW + "))") _ARROW_RE = re.compile("\s*(->|(" + ARROW + "))\s*") _PRODUCTION_RE = re.compile( @@ -171,14 +172,14 @@ class CFGEditor(object): + r"((\w+|'[\w ]*'|\"[\w ]*\"|\|)\s*)*$" # arrow ) # RHS _TOKEN_RE = re.compile("\\w+|->|'[\\w ]+'|\"[\\w ]+\"|(" + ARROW + ")") - _BOLD = ("helvetica", -12, "bold") + _BOLD = ('helvetica', -12, 'bold') def __init__(self, parent, cfg=None, set_cfg_callback=None): self._parent = parent if cfg is not None: self._cfg = cfg else: - self._cfg = CFG(Nonterminal("S"), []) + self._cfg = CFG(Nonterminal('S'), []) self._set_cfg_callback = set_cfg_callback self._highlight_matching_nonterminals = 1 @@ -188,97 +189,97 @@ class CFGEditor(object): self._init_bindings() self._init_startframe() - self._startframe.pack(side="top", fill="x", expand=0) + self._startframe.pack(side='top', fill='x', expand=0) self._init_prodframe() - self._prodframe.pack(side="top", fill="both", expand=1) + self._prodframe.pack(side='top', fill='both', expand=1) self._init_buttons() - self._buttonframe.pack(side="bottom", fill="x", expand=0) + self._buttonframe.pack(side='bottom', fill='x', expand=0) self._textwidget.focus() def _init_startframe(self): frame = self._startframe = Frame(self._top) self._start = Entry(frame) - self._start.pack(side="right") - Label(frame, text="Start Symbol:").pack(side="right") - Label(frame, text="Productions:").pack(side="left") + self._start.pack(side='right') + Label(frame, text='Start Symbol:').pack(side='right') + Label(frame, text='Productions:').pack(side='left') self._start.insert(0, self._cfg.start().symbol()) def _init_buttons(self): frame = self._buttonframe = Frame(self._top) - Button(frame, text="Ok", command=self._ok, underline=0, takefocus=0).pack( - side="left" + Button(frame, text='Ok', command=self._ok, underline=0, takefocus=0).pack( + side='left' ) - Button(frame, text="Apply", command=self._apply, underline=0, takefocus=0).pack( - side="left" + Button(frame, text='Apply', command=self._apply, underline=0, takefocus=0).pack( + side='left' ) - Button(frame, text="Reset", command=self._reset, underline=0, takefocus=0).pack( - side="left" + Button(frame, text='Reset', command=self._reset, underline=0, takefocus=0).pack( + side='left' ) Button( - frame, text="Cancel", command=self._cancel, underline=0, takefocus=0 - ).pack(side="left") - Button(frame, text="Help", command=self._help, underline=0, takefocus=0).pack( - side="right" + frame, text='Cancel', command=self._cancel, underline=0, takefocus=0 + ).pack(side='left') + Button(frame, text='Help', command=self._help, underline=0, takefocus=0).pack( + side='right' ) def _init_bindings(self): - self._top.title("CFG Editor") - self._top.bind("", self._cancel) - self._top.bind("", self._cancel) - self._top.bind("", self._cancel) + self._top.title('CFG Editor') + self._top.bind('', self._cancel) + self._top.bind('', self._cancel) + self._top.bind('', self._cancel) # self._top.bind('', self._cancel) - self._top.bind("", self._cancel) - self._top.bind("", self._cancel) + self._top.bind('', self._cancel) + self._top.bind('', self._cancel) # self._top.bind('', self._cancel) - self._top.bind("", self._cancel) - - self._top.bind("", self._ok) - self._top.bind("", self._ok) - self._top.bind("", self._apply) - self._top.bind("", self._apply) - self._top.bind("", self._reset) - self._top.bind("", self._reset) - self._top.bind("", self._help) - self._top.bind("", self._help) - self._top.bind("", self._help) + self._top.bind('', self._cancel) + + self._top.bind('', self._ok) + self._top.bind('', self._ok) + self._top.bind('', self._apply) + self._top.bind('', self._apply) + self._top.bind('', self._reset) + self._top.bind('', self._reset) + self._top.bind('', self._help) + self._top.bind('', self._help) + self._top.bind('', self._help) def _init_prodframe(self): self._prodframe = Frame(self._top) # Create the basic Text widget & scrollbar. self._textwidget = Text( - self._prodframe, background="#e0e0e0", exportselection=1 + self._prodframe, background='#e0e0e0', exportselection=1 ) - self._textscroll = Scrollbar(self._prodframe, takefocus=0, orient="vertical") + self._textscroll = Scrollbar(self._prodframe, takefocus=0, orient='vertical') self._textwidget.config(yscrollcommand=self._textscroll.set) self._textscroll.config(command=self._textwidget.yview) - self._textscroll.pack(side="right", fill="y") - self._textwidget.pack(expand=1, fill="both", side="left") + self._textscroll.pack(side='right', fill='y') + self._textwidget.pack(expand=1, fill='both', side='left') # Initialize the colorization tags. Each nonterminal gets its # own tag, so they aren't listed here. - self._textwidget.tag_config("terminal", foreground="#006000") - self._textwidget.tag_config("arrow", font="symbol") - self._textwidget.tag_config("error", background="red") + self._textwidget.tag_config('terminal', foreground='#006000') + self._textwidget.tag_config('arrow', font='symbol') + self._textwidget.tag_config('error', background='red') # Keep track of what line they're on. We use that to remember # to re-analyze a line whenever they leave it. self._linenum = 0 # Expand "->" to an arrow. - self._top.bind(">", self._replace_arrows) + self._top.bind('>', self._replace_arrows) # Re-colorize lines when appropriate. - self._top.bind("<>", self._analyze) - self._top.bind("", self._check_analyze) - self._top.bind("", self._check_analyze) + self._top.bind('<>', self._analyze) + self._top.bind('', self._check_analyze) + self._top.bind('', self._check_analyze) # Tab cycles focus. (why doesn't this work??) def cycle(e, textwidget=self._textwidget): textwidget.tk_focusNext().focus() - self._textwidget.bind("", cycle) + self._textwidget.bind('', cycle) prod_tuples = [(p.lhs(), [p.rhs()]) for p in self._cfg.productions()] for i in range(len(prod_tuples) - 1, 0, -1): @@ -294,16 +295,16 @@ class CFGEditor(object): for lhs, rhss in prod_tuples: print(lhs, rhss) - s = "%s ->" % lhs + s = '%s ->' % lhs for rhs in rhss: for elt in rhs: if isinstance(elt, Nonterminal): - s += " %s" % elt + s += ' %s' % elt else: - s += " %r" % elt - s += " |" - s = s[:-2] + "\n" - self._textwidget.insert("end", s) + s += ' %r' % elt + s += ' |' + s = s[:-2] + '\n' + self._textwidget.insert('end', s) self._analyze() @@ -337,10 +338,10 @@ class CFGEditor(object): Remove all tags (except ``arrow`` and ``sel``) from the given line of the text widget used for editing the productions. """ - start = "%d.0" % linenum - end = "%d.end" % linenum + start = '%d.0' % linenum + end = '%d.end' % linenum for tag in self._textwidget.tag_names(): - if tag not in ("arrow", "sel"): + if tag not in ('arrow', 'sel'): self._textwidget.tag_remove(tag, start, end) def _check_analyze(self, *e): @@ -349,7 +350,7 @@ class CFGEditor(object): all colorization from the line we moved to, and re-colorize the line that we moved from. """ - linenum = int(self._textwidget.index("insert").split(".")[0]) + linenum = int(self._textwidget.index('insert').split('.')[0]) if linenum != self._linenum: self._clear_tags(linenum) self._analyze_line(self._linenum) @@ -361,21 +362,21 @@ class CFGEditor(object): symbol font). This searches the whole buffer, but is fast enough to be done anytime they press '>'. """ - arrow = "1.0" + arrow = '1.0' while True: - arrow = self._textwidget.search("->", arrow, "end+1char") - if arrow == "": + arrow = self._textwidget.search('->', arrow, 'end+1char') + if arrow == '': break - self._textwidget.delete(arrow, arrow + "+2char") - self._textwidget.insert(arrow, self.ARROW, "arrow") - self._textwidget.insert(arrow, "\t") + self._textwidget.delete(arrow, arrow + '+2char') + self._textwidget.insert(arrow, self.ARROW, 'arrow') + self._textwidget.insert(arrow, '\t') - arrow = "1.0" + arrow = '1.0' while True: - arrow = self._textwidget.search(self.ARROW, arrow + "+1char", "end+1char") - if arrow == "": + arrow = self._textwidget.search(self.ARROW, arrow + '+1char', 'end+1char') + if arrow == '': break - self._textwidget.tag_add("arrow", arrow, arrow + "+1char") + self._textwidget.tag_add('arrow', arrow, arrow + '+1char') def _analyze_token(self, match, linenum): """ @@ -386,34 +387,34 @@ class CFGEditor(object): """ # What type of token is it? if match.group()[0] in "'\"": - tag = "terminal" - elif match.group() in ("->", self.ARROW): - tag = "arrow" + tag = 'terminal' + elif match.group() in ('->', self.ARROW): + tag = 'arrow' else: # If it's a nonterminal, then set up new bindings, so we # can highlight all instances of that nonterminal when we # put the mouse over it. - tag = "nonterminal_" + match.group() + tag = 'nonterminal_' + match.group() if tag not in self._textwidget.tag_names(): self._init_nonterminal_tag(tag) - start = "%d.%d" % (linenum, match.start()) - end = "%d.%d" % (linenum, match.end()) + start = '%d.%d' % (linenum, match.start()) + end = '%d.%d' % (linenum, match.end()) self._textwidget.tag_add(tag, start, end) - def _init_nonterminal_tag(self, tag, foreground="blue"): + def _init_nonterminal_tag(self, tag, foreground='blue'): self._textwidget.tag_config(tag, foreground=foreground, font=CFGEditor._BOLD) if not self._highlight_matching_nonterminals: return def enter(e, textwidget=self._textwidget, tag=tag): - textwidget.tag_config(tag, background="#80ff80") + textwidget.tag_config(tag, background='#80ff80') def leave(e, textwidget=self._textwidget, tag=tag): - textwidget.tag_config(tag, background="") + textwidget.tag_config(tag, background='') - self._textwidget.tag_bind(tag, "", enter) - self._textwidget.tag_bind(tag, "", leave) + self._textwidget.tag_bind(tag, '', enter) + self._textwidget.tag_bind(tag, '', leave) def _analyze_line(self, linenum): """ @@ -423,7 +424,7 @@ class CFGEditor(object): self._clear_tags(linenum) # Get the line line's text string. - line = self._textwidget.get(repr(linenum) + ".0", repr(linenum) + ".end") + line = self._textwidget.get(repr(linenum) + '.0', repr(linenum) + '.end') # If it's a valid production, then colorize each token. if CFGEditor._PRODUCTION_RE.match(line): @@ -431,10 +432,10 @@ class CFGEditor(object): # and call analyze_token on each token. def analyze_token(match, self=self, linenum=linenum): self._analyze_token(match, linenum) - return "" + return '' CFGEditor._TOKEN_RE.sub(analyze_token, line) - elif line.strip() != "": + elif line.strip() != '': # It's invalid; show the user where the error is. self._mark_error(linenum, line) @@ -445,29 +446,29 @@ class CFGEditor(object): arrowmatch = CFGEditor._ARROW_RE.search(line) if not arrowmatch: # If there's no arrow at all, highlight the whole line. - start = "%d.0" % linenum - end = "%d.end" % linenum + start = '%d.0' % linenum + end = '%d.end' % linenum elif not CFGEditor._LHS_RE.match(line): # Otherwise, if the LHS is bad, highlight it. - start = "%d.0" % linenum - end = "%d.%d" % (linenum, arrowmatch.start()) + start = '%d.0' % linenum + end = '%d.%d' % (linenum, arrowmatch.start()) else: # Otherwise, highlight the RHS. - start = "%d.%d" % (linenum, arrowmatch.end()) - end = "%d.end" % linenum + start = '%d.%d' % (linenum, arrowmatch.end()) + end = '%d.end' % linenum # If we're highlighting 0 chars, highlight the whole line. - if self._textwidget.compare(start, "==", end): - start = "%d.0" % linenum - end = "%d.end" % linenum - self._textwidget.tag_add("error", start, end) + if self._textwidget.compare(start, '==', end): + start = '%d.0' % linenum + end = '%d.end' % linenum + self._textwidget.tag_add('error', start, end) def _analyze(self, *e): """ Replace ``->`` with arrows, and colorize the entire buffer. """ self._replace_arrows() - numlines = int(self._textwidget.index("end").split(".")[0]) + numlines = int(self._textwidget.index('end').split('.')[0]) for linenum in range(1, numlines + 1): # line numbers start at 1. self._analyze_line(linenum) @@ -479,15 +480,15 @@ class CFGEditor(object): productions = [] # Get the text, normalize it, and split it into lines. - text = self._textwidget.get("1.0", "end") - text = re.sub(self.ARROW, "->", text) - text = re.sub("\t", " ", text) - lines = text.split("\n") + text = self._textwidget.get('1.0', 'end') + text = re.sub(self.ARROW, '->', text) + text = re.sub('\t', ' ', text) + lines = text.split('\n') # Convert each line to a CFG production for line in lines: line = line.strip() - if line == "": + if line == '': continue productions += _read_cfg_production(line) # if line.strip() == '': continue @@ -526,9 +527,9 @@ class CFGEditor(object): self._set_cfg_callback(cfg) def _reset(self, *e): - self._textwidget.delete("1.0", "end") + self._textwidget.delete('1.0', 'end') for production in self._cfg.productions(): - self._textwidget.insert("end", "%s\n" % production) + self._textwidget.insert('end', '%s\n' % production) self._analyze() if self._set_cfg_callback is not None: self._set_cfg_callback(self._cfg) @@ -545,15 +546,15 @@ class CFGEditor(object): try: ShowText( self._parent, - "Help: Chart Parser Demo", + 'Help: Chart Parser Demo', (_CFGEditor_HELP).strip(), width=75, - font="fixed", + font='fixed', ) except: ShowText( self._parent, - "Help: Chart Parser Demo", + 'Help: Chart Parser Demo', (_CFGEditor_HELP).strip(), width=75, ) @@ -571,7 +572,7 @@ class CFGDemo(object): # Set up the main window. self._top = Tk() - self._top.title("Context Free Grammar Demo") + self._top.title('Context Free Grammar Demo') # Base font size self._size = IntVar(self._top) @@ -582,7 +583,7 @@ class CFGDemo(object): # Create the basic frames frame1 = Frame(self._top) - frame1.pack(side="left", fill="y", expand=0) + frame1.pack(side='left', fill='y', expand=0) self._init_menubar(self._top) self._init_buttons(self._top) self._init_grammar(frame1) @@ -594,7 +595,7 @@ class CFGDemo(object): # ////////////////////////////////////////////////// def _init_bindings(self, top): - top.bind("", self.destroy) + top.bind('', self.destroy) def _init_menubar(self, parent): pass @@ -604,19 +605,19 @@ class CFGDemo(object): def _init_grammar(self, parent): self._prodlist = ProductionList(parent, self._grammar, width=20) - self._prodlist.pack(side="top", fill="both", expand=1) + self._prodlist.pack(side='top', fill='both', expand=1) self._prodlist.focus() - self._prodlist.add_callback("select", self._selectprod_cb) - self._prodlist.add_callback("move", self._selectprod_cb) + self._prodlist.add_callback('select', self._selectprod_cb) + self._prodlist.add_callback('move', self._selectprod_cb) def _init_treelet(self, parent): - self._treelet_canvas = Canvas(parent, background="white") - self._treelet_canvas.pack(side="bottom", fill="x") + self._treelet_canvas = Canvas(parent, background='white') + self._treelet_canvas.pack(side='bottom', fill='x') self._treelet = None def _init_workspace(self, parent): - self._workspace = CanvasFrame(parent, background="white") - self._workspace.pack(side="right", fill="both", expand=1) + self._workspace = CanvasFrame(parent, background='white') + self._workspace.pack(side='right', fill='both', expand=1) self._tree = None self.reset_workspace() @@ -627,8 +628,8 @@ class CFGDemo(object): def reset_workspace(self): c = self._workspace.canvas() fontsize = int(self._size.get()) - node_font = ("helvetica", -(fontsize + 4), "bold") - leaf_font = ("helvetica", -(fontsize + 2)) + node_font = ('helvetica', -(fontsize + 4), 'bold') + leaf_font = ('helvetica', -(fontsize + 2)) # Remove the old tree if self._tree is not None: @@ -644,7 +645,7 @@ class CFGDemo(object): leaves.append(TextWidget(c, word, font=leaf_font, draggable=1)) # Put it all together into one tree - self._tree = TreeSegmentWidget(c, rootnode, leaves, color="white") + self._tree = TreeSegmentWidget(c, rootnode, leaves, color='white') # Add it to the workspace. self._workspace.add_widget(self._tree) @@ -663,7 +664,7 @@ class CFGDemo(object): if tree is None: tree = self._tree for i in range(len(tree.subtrees()) - len(prod.rhs())): - if tree["color", i] == "white": + if tree['color', i] == 'white': self._markproduction # FIXME: Is this necessary at all? for j, node in enumerate(prod.rhs()): @@ -675,7 +676,7 @@ class CFGDemo(object): ): pass # matching nonterminal elif ( - isinstance(node, str) + isinstance(node, string_types) and isinstance(widget, TextWidget) and node == widget.text() ): @@ -684,7 +685,7 @@ class CFGDemo(object): break else: # Everything matched! - print("MATCH AT", i) + print('MATCH AT', i) # ////////////////////////////////////////////////// # Grammar @@ -706,16 +707,16 @@ class CFGDemo(object): # Draw the tree in the treelet area. fontsize = int(self._size.get()) - node_font = ("helvetica", -(fontsize + 4), "bold") - leaf_font = ("helvetica", -(fontsize + 2)) + node_font = ('helvetica', -(fontsize + 4), 'bold') + leaf_font = ('helvetica', -(fontsize + 2)) self._treelet = tree_to_treesegment( canvas, tree, node_font=node_font, leaf_font=leaf_font ) - self._treelet["draggable"] = 1 + self._treelet['draggable'] = 1 # Center the treelet. (x1, y1, x2, y2) = self._treelet.bbox() - w, h = int(canvas["width"]), int(canvas["height"]) + w, h = int(canvas['width']), int(canvas['height']) self._treelet.move((w - x1 - x2) / 2, (h - y1 - y2) / 2) # Mark the places where we can add it to the workspace. @@ -731,7 +732,7 @@ class CFGDemo(object): def demo2(): from nltk import Nonterminal, Production, CFG - nonterminals = "S VP NP PP P N Name V Det" + nonterminals = 'S VP NP PP P N Name V Det' (S, VP, NP, PP, P, N, Name, V, Det) = [Nonterminal(s) for s in nonterminals.split()] productions = ( # Syntactic Productions @@ -743,23 +744,23 @@ def demo2(): Production(VP, [V, NP]), Production(PP, [P, NP]), Production(PP, []), - Production(PP, ["up", "over", NP]), + Production(PP, ['up', 'over', NP]), # Lexical Productions - Production(NP, ["I"]), - Production(Det, ["the"]), - Production(Det, ["a"]), - Production(N, ["man"]), - Production(V, ["saw"]), - Production(P, ["in"]), - Production(P, ["with"]), - Production(N, ["park"]), - Production(N, ["dog"]), - Production(N, ["statue"]), - Production(Det, ["my"]), + Production(NP, ['I']), + Production(Det, ['the']), + Production(Det, ['a']), + Production(N, ['man']), + Production(V, ['saw']), + Production(P, ['in']), + Production(P, ['with']), + Production(N, ['park']), + Production(N, ['dog']), + Production(N, ['statue']), + Production(Det, ['my']), ) grammar = CFG(S, productions) - text = "I saw a man in the park".split() + text = 'I saw a man in the park'.split() d = CFGDemo(grammar, text) d.mainloop() @@ -772,7 +773,7 @@ def demo2(): def demo(): from nltk import Nonterminal, CFG - nonterminals = "S VP NP PP P N Name V Det" + nonterminals = 'S VP NP PP P N Name V Det' (S, VP, NP, PP, P, N, Name, V, Det) = [Nonterminal(s) for s in nonterminals.split()] grammar = CFG.fromstring( @@ -804,8 +805,8 @@ def demo(): top = Tk() editor = CFGEditor(top, grammar, cb) - Label(top, text="\nTesting CFG Editor\n").pack() - Button(top, text="Quit", command=top.destroy).pack() + Label(top, text='\nTesting CFG Editor\n').pack() + Button(top, text='Quit', command=top.destroy).pack() top.mainloop() @@ -813,7 +814,7 @@ def demo3(): from nltk import Production (S, VP, NP, PP, P, N, Name, V, Det) = nonterminals( - "S, VP, NP, PP, P, N, Name, V, Det" + 'S, VP, NP, PP, P, N, Name, V, Det' ) productions = ( @@ -826,19 +827,19 @@ def demo3(): Production(VP, [V, NP]), Production(PP, [P, NP]), Production(PP, []), - Production(PP, ["up", "over", NP]), + Production(PP, ['up', 'over', NP]), # Lexical Productions - Production(NP, ["I"]), - Production(Det, ["the"]), - Production(Det, ["a"]), - Production(N, ["man"]), - Production(V, ["saw"]), - Production(P, ["in"]), - Production(P, ["with"]), - Production(N, ["park"]), - Production(N, ["dog"]), - Production(N, ["statue"]), - Production(Det, ["my"]), + Production(NP, ['I']), + Production(Det, ['the']), + Production(Det, ['a']), + Production(N, ['man']), + Production(V, ['saw']), + Production(P, ['in']), + Production(P, ['with']), + Production(N, ['park']), + Production(N, ['dog']), + Production(N, ['statue']), + Production(Det, ['my']), ) t = Tk() @@ -846,15 +847,15 @@ def demo3(): def destroy(e, t=t): t.destroy() - t.bind("q", destroy) + t.bind('q', destroy) p = ProductionList(t, productions) - p.pack(expand=1, fill="both") - p.add_callback("select", p.markonly) - p.add_callback("move", p.markonly) + p.pack(expand=1, fill='both') + p.add_callback('select', p.markonly) + p.add_callback('move', p.markonly) p.focus() p.mark(productions[2]) p.mark(productions[8]) -if __name__ == "__main__": +if __name__ == '__main__': demo() diff --git a/nlp_resource_data/nltk/draw/dispersion.py b/nlp_resource_data/nltk/draw/dispersion.py index d0717af..40b2a9a 100644 --- a/nlp_resource_data/nltk/draw/dispersion.py +++ b/nlp_resource_data/nltk/draw/dispersion.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Dispersion Plots # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Steven Bird # URL: # For license information, see LICENSE.TXT @@ -26,8 +26,8 @@ def dispersion_plot(text, words, ignore_case=False, title="Lexical Dispersion Pl from matplotlib import pylab except ImportError: raise ValueError( - "The plot function requires matplotlib to be installed." - "See http://matplotlib.org/" + 'The plot function requires matplotlib to be installed.' + 'See http://matplotlib.org/' ) text = list(text) @@ -58,8 +58,9 @@ def dispersion_plot(text, words, ignore_case=False, title="Lexical Dispersion Pl pylab.show() -if __name__ == "__main__": +if __name__ == '__main__': + import nltk.compat from nltk.corpus import gutenberg - words = ["Elinor", "Marianne", "Edward", "Willoughby"] - dispersion_plot(gutenberg.words("austen-sense.txt"), words) + words = ['Elinor', 'Marianne', 'Edward', 'Willoughby'] + dispersion_plot(gutenberg.words('austen-sense.txt'), words) diff --git a/nlp_resource_data/nltk/draw/table.py b/nlp_resource_data/nltk/draw/table.py index 7ca4a2d..aea70b4 100644 --- a/nlp_resource_data/nltk/draw/table.py +++ b/nlp_resource_data/nltk/draw/table.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Table widget # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Edward Loper # URL: # For license information, see LICENSE.TXT @@ -9,9 +9,12 @@ Tkinter widgets for displaying multi-column listboxes and tables. """ +from __future__ import division + + import operator -from tkinter import Frame, Label, Listbox, Scrollbar, Tk +from six.moves.tkinter import Frame, Label, Listbox, Scrollbar, Tk ###################################################################### @@ -35,15 +38,15 @@ class MultiListbox(Frame): # ///////////////////////////////////////////////////////////////// #: Default configuration values for the frame. - FRAME_CONFIG = dict(background="#888", takefocus=True, highlightthickness=1) + FRAME_CONFIG = dict(background='#888', takefocus=True, highlightthickness=1) #: Default configurations for the column labels. LABEL_CONFIG = dict( borderwidth=1, - relief="raised", - font="helvetica -16 bold", - background="#444", - foreground="white", + relief='raised', + font='helvetica -16 bold', + background='#444', + foreground='white', ) #: Default configuration for the column listboxes. @@ -52,8 +55,8 @@ class MultiListbox(Frame): selectborderwidth=0, highlightthickness=0, exportselection=False, - selectbackground="#888", - activestyle="none", + selectbackground='#888', + activestyle='none', takefocus=False, ) @@ -100,7 +103,7 @@ class MultiListbox(Frame): if column_weights is None: column_weights = [1] * len(columns) elif len(column_weights) != len(columns): - raise ValueError("Expected one column_weight for each column") + raise ValueError('Expected one column_weight for each column') self._column_weights = column_weights # Configure our widgets. @@ -113,40 +116,40 @@ class MultiListbox(Frame): if include_labels: l = Label(self, text=label, **self.LABEL_CONFIG) self._labels.append(l) - l.grid(column=i, row=0, sticky="news", padx=0, pady=0) + l.grid(column=i, row=0, sticky='news', padx=0, pady=0) l.column_index = i # Create a listbox for the column lb = Listbox(self, **self.LISTBOX_CONFIG) self._listboxes.append(lb) - lb.grid(column=i, row=1, sticky="news", padx=0, pady=0) + lb.grid(column=i, row=1, sticky='news', padx=0, pady=0) lb.column_index = i # Clicking or dragging selects: - lb.bind("", self._select) - lb.bind("", self._select) + lb.bind('', self._select) + lb.bind('', self._select) # Scroll whell scrolls: - lb.bind("", lambda e: self._scroll(-1)) - lb.bind("", lambda e: self._scroll(+1)) - lb.bind("", lambda e: self._scroll(e.delta)) + lb.bind('', lambda e: self._scroll(-1)) + lb.bind('', lambda e: self._scroll(+1)) + lb.bind('', lambda e: self._scroll(e.delta)) # Button 2 can be used to scan: - lb.bind("", lambda e: self.scan_mark(e.x, e.y)) - lb.bind("", lambda e: self.scan_dragto(e.x, e.y)) + lb.bind('', lambda e: self.scan_mark(e.x, e.y)) + lb.bind('', lambda e: self.scan_dragto(e.x, e.y)) # Dragging outside the window has no effect (diable # the default listbox behavior, which scrolls): - lb.bind("", lambda e: "break") + lb.bind('', lambda e: 'break') # Columns can be resized by dragging them: - l.bind("", self._resize_column) + l.bind('', self._resize_column) # Columns can be resized by dragging them. (This binding is # used if they click on the grid between columns:) - self.bind("", self._resize_column) + self.bind('', self._resize_column) # Set up key bindings for the widget: - self.bind("", lambda e: self.select(delta=-1)) - self.bind("", lambda e: self.select(delta=1)) - self.bind("", lambda e: self.select(delta=-self._pagesize())) - self.bind("", lambda e: self.select(delta=self._pagesize())) + self.bind('', lambda e: self.select(delta=-1)) + self.bind('', lambda e: self.select(delta=1)) + self.bind('', lambda e: self.select(delta=-self._pagesize())) + self.bind('', lambda e: self.select(delta=self._pagesize())) # Configuration customizations self.configure(cnf, **kw) @@ -164,7 +167,7 @@ class MultiListbox(Frame): """ # If we're already waiting for a button release, then ignore # the new button press. - if event.widget.bind(""): + if event.widget.bind(''): return False # Decide which column (if any) to resize. @@ -180,9 +183,9 @@ class MultiListbox(Frame): # Bind callbacks that are used to resize it. if self._resize_column_index is not None: - event.widget.bind("", self._resize_column_motion_cb) + event.widget.bind('', self._resize_column_motion_cb) event.widget.bind( - "" % event.num, self._resize_column_buttonrelease_cb + '' % event.num, self._resize_column_buttonrelease_cb ) return True else: @@ -190,16 +193,16 @@ class MultiListbox(Frame): def _resize_column_motion_cb(self, event): lb = self._listboxes[self._resize_column_index] - charwidth = lb.winfo_width() / lb["width"] + charwidth = lb.winfo_width() / lb['width'] x1 = event.x + event.widget.winfo_x() x2 = lb.winfo_x() + lb.winfo_width() - lb["width"] = max(3, lb["width"] + (x1 - x2) // charwidth) + lb['width'] = max(3, lb['width'] + (x1 - x2) // charwidth) def _resize_column_buttonrelease_cb(self, event): - event.widget.unbind("" % event.num) - event.widget.unbind("") + event.widget.unbind('' % event.num) + event.widget.unbind('') # ///////////////////////////////////////////////////////////////// # Properties @@ -243,19 +246,19 @@ class MultiListbox(Frame): def _select(self, e): i = e.widget.nearest(e.y) - self.selection_clear(0, "end") + self.selection_clear(0, 'end') self.selection_set(i) self.activate(i) self.focus() def _scroll(self, delta): for lb in self._listboxes: - lb.yview_scroll(delta, "unit") - return "break" + lb.yview_scroll(delta, 'unit') + return 'break' def _pagesize(self): """:return: The number of rows that makes up one page""" - return int(self.index("@0,1000000")) - int(self.index("@0,0")) + return int(self.index('@0,1000000')) - int(self.index('@0,0')) # ///////////////////////////////////////////////////////////////// # Row selection @@ -273,7 +276,7 @@ class MultiListbox(Frame): selected index, to ensure that it is visible. """ if (index is not None) and (delta is not None): - raise ValueError("specify index or delta, but not both") + raise ValueError('specify index or delta, but not both') # If delta was given, then calculate index. if delta is not None: @@ -283,7 +286,7 @@ class MultiListbox(Frame): index = int(self.curselection()[0]) + delta # Clear all selected rows. - self.selection_clear(0, "end") + self.selection_clear(0, 'end') # Select the specified index if index is not None: @@ -308,10 +311,10 @@ class MultiListbox(Frame): """ cnf = dict(list(cnf.items()) + list(kw.items())) for (key, val) in list(cnf.items()): - if key.startswith("label_") or key.startswith("label-"): + if key.startswith('label_') or key.startswith('label-'): for label in self._labels: label.configure({key[6:]: val}) - elif key.startswith("listbox_") or key.startswith("listbox-"): + elif key.startswith('listbox_') or key.startswith('listbox-'): for listbox in self._listboxes: listbox.configure({key[8:]: val}) else: @@ -344,12 +347,12 @@ class MultiListbox(Frame): cnf = dict(list(cnf.items()) + list(kw.items())) for (key, val) in list(cnf.items()): if key in ( - "background", - "bg", - "foreground", - "fg", - "selectbackground", - "selectforeground", + 'background', + 'bg', + 'foreground', + 'fg', + 'selectbackground', + 'selectforeground', ): for i in range(lb.size()): lb.itemconfigure(i, {key: val}) @@ -380,8 +383,8 @@ class MultiListbox(Frame): for elt in rows: if len(elt) != len(self._column_names): raise ValueError( - "rows should be tuples whose length " - "is equal to the number of columns" + 'rows should be tuples whose length ' + 'is equal to the number of columns' ) for (lb, elts) in zip(self._listboxes, list(zip(*rows))): lb.insert(index, *elts) @@ -435,10 +438,10 @@ class MultiListbox(Frame): weight = self._column_weights[col_index] if self._labels: self._labels[col_index].grid( - column=col_index, row=0, sticky="news", padx=0, pady=0 + column=col_index, row=0, sticky='news', padx=0, pady=0 ) self._listboxes[col_index].grid( - column=col_index, row=1, sticky="news", padx=0, pady=0 + column=col_index, row=1, sticky='news', padx=0, pady=0 ) self.grid_columnconfigure(col_index, weight=weight) @@ -677,22 +680,22 @@ class Table(object): # Create our multi-list box. self._mlb = MultiListbox(self._frame, column_names, column_weights, cnf, **kw) - self._mlb.pack(side="left", expand=True, fill="both") + self._mlb.pack(side='left', expand=True, fill='both') # Optional scrollbar if scrollbar: - sb = Scrollbar(self._frame, orient="vertical", command=self._mlb.yview) - self._mlb.listboxes[0]["yscrollcommand"] = sb.set + sb = Scrollbar(self._frame, orient='vertical', command=self._mlb.yview) + self._mlb.listboxes[0]['yscrollcommand'] = sb.set # for listbox in self._mlb.listboxes: # listbox['yscrollcommand'] = sb.set - sb.pack(side="right", fill="y") + sb.pack(side='right', fill='y') self._scrollbar = sb # Set up sorting self._sortkey = None if click_to_sort: for i, l in enumerate(self._mlb.column_labels): - l.bind("", self._sort) + l.bind('', self._sort) # Fill in our multi-list box. self._fill_table() @@ -804,7 +807,7 @@ class Table(object): Delete all rows in this table. """ self._rows = [] - self._mlb.delete(0, "end") + self._mlb.delete(0, 'end') if self._DEBUG: self._check_table_vs_mlb() @@ -818,7 +821,7 @@ class Table(object): ``i``th row and the ``j``th column. """ if isinstance(index, slice): - raise ValueError("Slicing not supported") + raise ValueError('Slicing not supported') elif isinstance(index, tuple) and len(index) == 2: return self._rows[index[0]][self.column_index(index[1])] else: @@ -839,7 +842,7 @@ class Table(object): ``val``. """ if isinstance(index, slice): - raise ValueError("Slicing not supported") + raise ValueError('Slicing not supported') # table[i,j] = val elif isinstance(index, tuple) and len(index) == 2: @@ -868,9 +871,9 @@ class Table(object): Delete the ``row_index``th row from this table. """ if isinstance(row_index, slice): - raise ValueError("Slicing not supported") + raise ValueError('Slicing not supported') if isinstance(row_index, tuple) and len(row_index) == 2: - raise ValueError("Cannot delete a single cell!") + raise ValueError('Cannot delete a single cell!') del self._rows[row_index] self._mlb.delete(row_index) if self._DEBUG: @@ -889,7 +892,7 @@ class Table(object): """ if len(rowvalue) != self._num_columns: raise ValueError( - "Row %r has %d columns; expected %d" + 'Row %r has %d columns; expected %d' % (rowvalue, len(rowvalue), self._num_columns) ) @@ -947,7 +950,7 @@ class Table(object): # Sorting # ///////////////////////////////////////////////////////////////// - def sort_by(self, column_index, order="toggle"): + def sort_by(self, column_index, order='toggle'): """ Sort the rows in this table, using the specified column's values as a sort key. @@ -966,7 +969,7 @@ class Table(object): then reverse the rows; otherwise sort in ascending order. """ - if order not in ("ascending", "descending", "toggle"): + if order not in ('ascending', 'descending', 'toggle'): raise ValueError( 'sort_by(): order should be "ascending", ' '"descending", or "toggle".' ) @@ -974,11 +977,11 @@ class Table(object): config_cookie = self._save_config_info(index_by_id=True) # Sort the rows. - if order == "toggle" and column_index == self._sortkey: + if order == 'toggle' and column_index == self._sortkey: self._rows.reverse() else: self._rows.sort( - key=operator.itemgetter(column_index), reverse=(order == "descending") + key=operator.itemgetter(column_index), reverse=(order == 'descending') ) self._sortkey = column_index @@ -996,12 +999,12 @@ class Table(object): # If they click on the far-left of far-right of a column's # label, then resize rather than sorting. if self._mlb._resize_column(event): - return "continue" + return 'continue' # Otherwise, sort. else: self.sort_by(column_index) - return "continue" + return 'continue' # ///////////////////////////////////////////////////////////////// # { Table Drawing Helpers @@ -1016,20 +1019,20 @@ class Table(object): selection will also be lost -- i.e., no row will be selected after this call completes. """ - self._mlb.delete(0, "end") + self._mlb.delete(0, 'end') for i, row in enumerate(self._rows): if self._reprfunc is not None: row = [self._reprfunc(i, j, v) for (j, v) in enumerate(row)] - self._mlb.insert("end", row) + self._mlb.insert('end', row) def _get_itemconfig(self, r, c): return dict( (k, self._mlb.itemconfig(r, c, k)[-1]) for k in ( - "foreground", - "selectforeground", - "background", - "selectbackground", + 'foreground', + 'selectforeground', + 'background', + 'selectbackground', ) ) @@ -1083,7 +1086,7 @@ class Table(object): # Clear the selection. if selection is None: - self._mlb.selection_clear(0, "end") + self._mlb.selection_clear(0, 'end') # Restore selection & color config if index_by_id: @@ -1135,46 +1138,46 @@ class Table(object): # update this to use new WordNet API def demo(): root = Tk() - root.bind("", lambda e: root.destroy()) + root.bind('', lambda e: root.destroy()) table = Table( root, - "Word Synset Hypernym Hyponym".split(), + 'Word Synset Hypernym Hyponym'.split(), column_weights=[0, 1, 1, 1], - reprfunc=(lambda i, j, s: " %s" % s), + reprfunc=(lambda i, j, s: ' %s' % s), ) - table.pack(expand=True, fill="both") + table.pack(expand=True, fill='both') from nltk.corpus import wordnet from nltk.corpus import brown for word, pos in sorted(set(brown.tagged_words()[:500])): - if pos[0] != "N": + if pos[0] != 'N': continue word = word.lower() for synset in wordnet.synsets(word): try: hyper_def = synset.hypernyms()[0].definition() except: - hyper_def = "*none*" + hyper_def = '*none*' try: hypo_def = synset.hypernyms()[0].definition() except: - hypo_def = "*none*" + hypo_def = '*none*' table.append([word, synset.definition(), hyper_def, hypo_def]) - table.columnconfig("Word", background="#afa") - table.columnconfig("Synset", background="#efe") - table.columnconfig("Hypernym", background="#fee") - table.columnconfig("Hyponym", background="#ffe") + table.columnconfig('Word', background='#afa') + table.columnconfig('Synset', background='#efe') + table.columnconfig('Hypernym', background='#fee') + table.columnconfig('Hyponym', background='#ffe') for row in range(len(table)): - for column in ("Hypernym", "Hyponym"): - if table[row, column] == "*none*": + for column in ('Hypernym', 'Hyponym'): + if table[row, column] == '*none*': table.itemconfig( - row, column, foreground="#666", selectforeground="#666" + row, column, foreground='#666', selectforeground='#666' ) root.mainloop() -if __name__ == "__main__": +if __name__ == '__main__': demo() diff --git a/nlp_resource_data/nltk/draw/tree.py b/nlp_resource_data/nltk/draw/tree.py index 33bfb9a..8124f5e 100644 --- a/nlp_resource_data/nltk/draw/tree.py +++ b/nlp_resource_data/nltk/draw/tree.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Graphical Representations for Trees # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Edward Loper # URL: # For license information, see LICENSE.TXT @@ -9,7 +9,7 @@ Graphically display a Tree. """ -from tkinter import IntVar, Menu, Tk +from six.moves.tkinter import IntVar, Menu, Tk from nltk.util import in_idle from nltk.tree import Tree @@ -77,9 +77,9 @@ class TreeSegmentWidget(CanvasWidget): self._ordered = False # Create canvas objects. - self._lines = [canvas.create_line(0, 0, 0, 0, fill="#006060") for c in subtrees] + self._lines = [canvas.create_line(0, 0, 0, 0, fill='#006060') for c in subtrees] self._polygon = canvas.create_polygon( - 0, 0, fill="", state="hidden", outline="#006060" + 0, 0, fill='', state='hidden', outline='#006060' ) # Register child widgets (label + subtrees) @@ -94,68 +94,68 @@ class TreeSegmentWidget(CanvasWidget): def __setitem__(self, attr, value): canvas = self.canvas() - if attr == "roof": + if attr == 'roof': self._roof = value if self._roof: for l in self._lines: - canvas.itemconfig(l, state="hidden") - canvas.itemconfig(self._polygon, state="normal") + canvas.itemconfig(l, state='hidden') + canvas.itemconfig(self._polygon, state='normal') else: for l in self._lines: - canvas.itemconfig(l, state="normal") - canvas.itemconfig(self._polygon, state="hidden") - elif attr == "orientation": - if value == "horizontal": + canvas.itemconfig(l, state='normal') + canvas.itemconfig(self._polygon, state='hidden') + elif attr == 'orientation': + if value == 'horizontal': self._horizontal = 1 - elif value == "vertical": + elif value == 'vertical': self._horizontal = 0 else: - raise ValueError("orientation must be horizontal or vertical") - elif attr == "color": + raise ValueError('orientation must be horizontal or vertical') + elif attr == 'color': for l in self._lines: canvas.itemconfig(l, fill=value) canvas.itemconfig(self._polygon, outline=value) - elif isinstance(attr, tuple) and attr[0] == "color": + elif isinstance(attr, tuple) and attr[0] == 'color': # Set the color of an individual line. l = self._lines[int(attr[1])] canvas.itemconfig(l, fill=value) - elif attr == "fill": + elif attr == 'fill': canvas.itemconfig(self._polygon, fill=value) - elif attr == "width": + elif attr == 'width': canvas.itemconfig(self._polygon, {attr: value}) for l in self._lines: canvas.itemconfig(l, {attr: value}) - elif attr in ("xspace", "yspace"): - if attr == "xspace": + elif attr in ('xspace', 'yspace'): + if attr == 'xspace': self._xspace = value - elif attr == "yspace": + elif attr == 'yspace': self._yspace = value self.update(self._label) - elif attr == "ordered": + elif attr == 'ordered': self._ordered = value else: CanvasWidget.__setitem__(self, attr, value) def __getitem__(self, attr): - if attr == "roof": + if attr == 'roof': return self._roof - elif attr == "width": + elif attr == 'width': return self.canvas().itemcget(self._polygon, attr) - elif attr == "color": - return self.canvas().itemcget(self._polygon, "outline") - elif isinstance(attr, tuple) and attr[0] == "color": + elif attr == 'color': + return self.canvas().itemcget(self._polygon, 'outline') + elif isinstance(attr, tuple) and attr[0] == 'color': l = self._lines[int(attr[1])] - return self.canvas().itemcget(l, "fill") - elif attr == "xspace": + return self.canvas().itemcget(l, 'fill') + elif attr == 'xspace': return self._xspace - elif attr == "yspace": + elif attr == 'yspace': return self._yspace - elif attr == "orientation": + elif attr == 'orientation': if self._horizontal: - return "horizontal" + return 'horizontal' else: - return "vertical" - elif attr == "ordered": + return 'vertical' + elif attr == 'ordered': return self._ordered else: return CanvasWidget.__getitem__(self, attr) @@ -196,7 +196,7 @@ class TreeSegmentWidget(CanvasWidget): canvas = self.canvas() self._subtrees.insert(index, child) self._add_child_widget(child) - self._lines.append(canvas.create_line(0, 0, 0, 0, fill="#006060")) + self._lines.append(canvas.create_line(0, 0, 0, 0, fill='#006060')) self.update(self._label) # but.. lines??? @@ -416,7 +416,7 @@ class TreeSegmentWidget(CanvasWidget): self._managing = False def __repr__(self): - return "[TreeSeg %s: %s]" % (self._label, self._subtrees) + return '[TreeSeg %s: %s]' % (self._label, self._subtrees) def _tree_to_treeseg( @@ -479,16 +479,16 @@ def tree_to_treesegment( loc_attribs = {} for (key, value) in list(attribs.items()): - if key[:5] == "tree_": + if key[:5] == 'tree_': tree_attribs[key[5:]] = value - elif key[:5] == "node_": + elif key[:5] == 'node_': node_attribs[key[5:]] = value - elif key[:5] == "leaf_": + elif key[:5] == 'leaf_': leaf_attribs[key[5:]] = value - elif key[:4] == "loc_": + elif key[:4] == 'loc_': loc_attribs[key[4:]] = value else: - raise ValueError("Bad attribute: %s" % key) + raise ValueError('Bad attribute: %s' % key) return _tree_to_treeseg( canvas, t, @@ -558,15 +558,15 @@ class TreeWidget(CanvasWidget): # Attributes. self._nodeattribs = {} self._leafattribs = {} - self._locattribs = {"color": "#008000"} - self._line_color = "#008080" + self._locattribs = {'color': '#008000'} + self._line_color = '#008080' self._line_width = 1 - self._roof_color = "#008080" - self._roof_fill = "#c0c0c0" + self._roof_color = '#008080' + self._roof_fill = '#c0c0c0' self._shapeable = False self._xspace = 10 self._yspace = 10 - self._orientation = "vertical" + self._orientation = 'vertical' self._ordered = False # Build trees. @@ -712,90 +712,90 @@ class TreeWidget(CanvasWidget): return leaf def __setitem__(self, attr, value): - if attr[:5] == "node_": + if attr[:5] == 'node_': for node in self._nodes: node[attr[5:]] = value - elif attr[:5] == "leaf_": + elif attr[:5] == 'leaf_': for leaf in self._leaves: leaf[attr[5:]] = value - elif attr == "line_color": + elif attr == 'line_color': self._line_color = value for tseg in list(self._expanded_trees.values()): - tseg["color"] = value - elif attr == "line_width": + tseg['color'] = value + elif attr == 'line_width': self._line_width = value for tseg in list(self._expanded_trees.values()): - tseg["width"] = value + tseg['width'] = value for tseg in list(self._collapsed_trees.values()): - tseg["width"] = value - elif attr == "roof_color": + tseg['width'] = value + elif attr == 'roof_color': self._roof_color = value for tseg in list(self._collapsed_trees.values()): - tseg["color"] = value - elif attr == "roof_fill": + tseg['color'] = value + elif attr == 'roof_fill': self._roof_fill = value for tseg in list(self._collapsed_trees.values()): - tseg["fill"] = value - elif attr == "shapeable": + tseg['fill'] = value + elif attr == 'shapeable': self._shapeable = value for tseg in list(self._expanded_trees.values()): - tseg["draggable"] = value + tseg['draggable'] = value for tseg in list(self._collapsed_trees.values()): - tseg["draggable"] = value + tseg['draggable'] = value for leaf in self._leaves: - leaf["draggable"] = value - elif attr == "xspace": + leaf['draggable'] = value + elif attr == 'xspace': self._xspace = value for tseg in list(self._expanded_trees.values()): - tseg["xspace"] = value + tseg['xspace'] = value for tseg in list(self._collapsed_trees.values()): - tseg["xspace"] = value + tseg['xspace'] = value self.manage() - elif attr == "yspace": + elif attr == 'yspace': self._yspace = value for tseg in list(self._expanded_trees.values()): - tseg["yspace"] = value + tseg['yspace'] = value for tseg in list(self._collapsed_trees.values()): - tseg["yspace"] = value + tseg['yspace'] = value self.manage() - elif attr == "orientation": + elif attr == 'orientation': self._orientation = value for tseg in list(self._expanded_trees.values()): - tseg["orientation"] = value + tseg['orientation'] = value for tseg in list(self._collapsed_trees.values()): - tseg["orientation"] = value + tseg['orientation'] = value self.manage() - elif attr == "ordered": + elif attr == 'ordered': self._ordered = value for tseg in list(self._expanded_trees.values()): - tseg["ordered"] = value + tseg['ordered'] = value for tseg in list(self._collapsed_trees.values()): - tseg["ordered"] = value + tseg['ordered'] = value else: CanvasWidget.__setitem__(self, attr, value) def __getitem__(self, attr): - if attr[:5] == "node_": + if attr[:5] == 'node_': return self._nodeattribs.get(attr[5:], None) - elif attr[:5] == "leaf_": + elif attr[:5] == 'leaf_': return self._leafattribs.get(attr[5:], None) - elif attr[:4] == "loc_": + elif attr[:4] == 'loc_': return self._locattribs.get(attr[4:], None) - elif attr == "line_color": + elif attr == 'line_color': return self._line_color - elif attr == "line_width": + elif attr == 'line_width': return self._line_width - elif attr == "roof_color": + elif attr == 'roof_color': return self._roof_color - elif attr == "roof_fill": + elif attr == 'roof_fill': return self._roof_fill - elif attr == "shapeable": + elif attr == 'shapeable': return self._shapeable - elif attr == "xspace": + elif attr == 'xspace': return self._xspace - elif attr == "yspace": + elif attr == 'yspace': return self._yspace - elif attr == "orientation": + elif attr == 'orientation': return self._orientation else: return CanvasWidget.__getitem__(self, attr) @@ -818,7 +818,7 @@ class TreeWidget(CanvasWidget): Collapse/expand a tree. """ old_treeseg = treeseg - if old_treeseg["roof"]: + if old_treeseg['roof']: new_treeseg = self._expanded_trees[self._keys[old_treeseg]] else: new_treeseg = self._collapsed_trees[self._keys[old_treeseg]] @@ -857,18 +857,18 @@ class TreeView(object): self._trees = trees self._top = Tk() - self._top.title("NLTK") - self._top.bind("", self.destroy) - self._top.bind("", self.destroy) + self._top.title('NLTK') + self._top.bind('', self.destroy) + self._top.bind('', self.destroy) cf = self._cframe = CanvasFrame(self._top) - self._top.bind("", self._cframe.print_to_file) + self._top.bind('', self._cframe.print_to_file) # Size is variable. self._size = IntVar(self._top) self._size.set(12) - bold = ("helvetica", -self._size.get(), "bold") - helv = ("helvetica", -self._size.get()) + bold = ('helvetica', -self._size.get(), 'bold') + helv = ('helvetica', -self._size.get()) # Lay the trees out in a square. self._width = int(ceil(sqrt(len(trees)))) @@ -878,11 +878,11 @@ class TreeView(object): cf.canvas(), trees[i], node_font=bold, - leaf_color="#008040", - node_color="#004080", - roof_color="#004040", - roof_fill="white", - line_color="#004040", + leaf_color='#008040', + node_color='#004080', + roof_color='#004040', + roof_fill='white', + line_color='#004040', draggable=1, leaf_font=helv, ) @@ -891,7 +891,7 @@ class TreeView(object): cf.add_widget(widget, 0, 0) self._layout() - self._cframe.pack(expand=1, fill="both") + self._cframe.pack(expand=1, fill='both') self._init_menubar() def _layout(self): @@ -912,72 +912,72 @@ class TreeView(object): filemenu = Menu(menubar, tearoff=0) filemenu.add_command( - label="Print to Postscript", + label='Print to Postscript', underline=0, command=self._cframe.print_to_file, - accelerator="Ctrl-p", + accelerator='Ctrl-p', ) filemenu.add_command( - label="Exit", underline=1, command=self.destroy, accelerator="Ctrl-x" + label='Exit', underline=1, command=self.destroy, accelerator='Ctrl-x' ) - menubar.add_cascade(label="File", underline=0, menu=filemenu) + menubar.add_cascade(label='File', underline=0, menu=filemenu) zoommenu = Menu(menubar, tearoff=0) zoommenu.add_radiobutton( - label="Tiny", + label='Tiny', variable=self._size, underline=0, value=10, command=self.resize, ) zoommenu.add_radiobutton( - label="Small", + label='Small', variable=self._size, underline=0, value=12, command=self.resize, ) zoommenu.add_radiobutton( - label="Medium", + label='Medium', variable=self._size, underline=0, value=14, command=self.resize, ) zoommenu.add_radiobutton( - label="Large", + label='Large', variable=self._size, underline=0, value=28, command=self.resize, ) zoommenu.add_radiobutton( - label="Huge", + label='Huge', variable=self._size, underline=0, value=50, command=self.resize, ) - menubar.add_cascade(label="Zoom", underline=0, menu=zoommenu) + menubar.add_cascade(label='Zoom', underline=0, menu=zoommenu) self._top.config(menu=menubar) def resize(self, *e): - bold = ("helvetica", -self._size.get(), "bold") - helv = ("helvetica", -self._size.get()) + bold = ('helvetica', -self._size.get(), 'bold') + helv = ('helvetica', -self._size.get()) xspace = self._size.get() yspace = self._size.get() for widget in self._widgets: - widget["node_font"] = bold - widget["leaf_font"] = helv - widget["xspace"] = xspace - widget["yspace"] = yspace + widget['node_font'] = bold + widget['leaf_font'] = helv + widget['xspace'] = xspace + widget['yspace'] = yspace if self._size.get() < 20: - widget["line_width"] = 1 + widget['line_width'] = 1 elif self._size.get() < 30: - widget["line_width"] = 2 + widget['line_width'] = 2 else: - widget["line_width"] = 3 + widget['line_width'] = 3 self._layout() def destroy(self, *e): @@ -1018,45 +1018,45 @@ def demo(): import random def fill(cw): - cw["fill"] = "#%06d" % random.randint(0, 999999) + cw['fill'] = '#%06d' % random.randint(0, 999999) cf = CanvasFrame(width=550, height=450, closeenough=2) t = Tree.fromstring( - """ + ''' (S (NP the very big cat) - (VP (Adv sorta) (V saw) (NP (Det the) (N dog))))""" + (VP (Adv sorta) (V saw) (NP (Det the) (N dog))))''' ) tc = TreeWidget( cf.canvas(), t, draggable=1, - node_font=("helvetica", -14, "bold"), - leaf_font=("helvetica", -12, "italic"), - roof_fill="white", - roof_color="black", - leaf_color="green4", - node_color="blue2", + node_font=('helvetica', -14, 'bold'), + leaf_font=('helvetica', -12, 'italic'), + roof_fill='white', + roof_color='black', + leaf_color='green4', + node_color='blue2', ) cf.add_widget(tc, 10, 10) def boxit(canvas, text): - big = ("helvetica", -16, "bold") - return BoxWidget(canvas, TextWidget(canvas, text, font=big), fill="green") + big = ('helvetica', -16, 'bold') + return BoxWidget(canvas, TextWidget(canvas, text, font=big), fill='green') def ovalit(canvas, text): - return OvalWidget(canvas, TextWidget(canvas, text), fill="cyan") + return OvalWidget(canvas, TextWidget(canvas, text), fill='cyan') - treetok = Tree.fromstring("(S (NP this tree) (VP (V is) (AdjP shapeable)))") + treetok = Tree.fromstring('(S (NP this tree) (VP (V is) (AdjP shapeable)))') tc2 = TreeWidget(cf.canvas(), treetok, boxit, ovalit, shapeable=1) def color(node): - node["color"] = "#%04d00" % random.randint(0, 9999) + node['color'] = '#%04d00' % random.randint(0, 9999) def color2(treeseg): - treeseg.label()["fill"] = "#%06d" % random.randint(0, 9999) - treeseg.label().child()["color"] = "white" + treeseg.label()['fill'] = '#%06d' % random.randint(0, 9999) + treeseg.label().child()['color'] = 'white' tc.bind_click_trees(tc.toggle_collapsed) tc2.bind_click_trees(tc2.toggle_collapsed) @@ -1068,29 +1068,29 @@ def demo(): cf.add_widget(paren, tc.bbox()[2] + 10, 10) tree3 = Tree.fromstring( - """ + ''' (S (NP this tree) (AUX was) - (VP (V built) (PP (P with) (NP (N tree_to_treesegment)))))""" + (VP (V built) (PP (P with) (NP (N tree_to_treesegment)))))''' ) tc3 = tree_to_treesegment( - cf.canvas(), tree3, tree_color="green4", tree_xspace=2, tree_width=2 + cf.canvas(), tree3, tree_color='green4', tree_xspace=2, tree_width=2 ) - tc3["draggable"] = 1 + tc3['draggable'] = 1 cf.add_widget(tc3, 10, tc.bbox()[3] + 10) def orientswitch(treewidget): - if treewidget["orientation"] == "horizontal": - treewidget.expanded_tree(1, 1).subtrees()[0].set_text("vertical") - treewidget.collapsed_tree(1, 1).subtrees()[0].set_text("vertical") - treewidget.collapsed_tree(1).subtrees()[1].set_text("vertical") - treewidget.collapsed_tree().subtrees()[3].set_text("vertical") - treewidget["orientation"] = "vertical" + if treewidget['orientation'] == 'horizontal': + treewidget.expanded_tree(1, 1).subtrees()[0].set_text('vertical') + treewidget.collapsed_tree(1, 1).subtrees()[0].set_text('vertical') + treewidget.collapsed_tree(1).subtrees()[1].set_text('vertical') + treewidget.collapsed_tree().subtrees()[3].set_text('vertical') + treewidget['orientation'] = 'vertical' else: - treewidget.expanded_tree(1, 1).subtrees()[0].set_text("horizontal") - treewidget.collapsed_tree(1, 1).subtrees()[0].set_text("horizontal") - treewidget.collapsed_tree(1).subtrees()[1].set_text("horizontal") - treewidget.collapsed_tree().subtrees()[3].set_text("horizontal") - treewidget["orientation"] = "horizontal" + treewidget.expanded_tree(1, 1).subtrees()[0].set_text('horizontal') + treewidget.collapsed_tree(1, 1).subtrees()[0].set_text('horizontal') + treewidget.collapsed_tree(1).subtrees()[1].set_text('horizontal') + treewidget.collapsed_tree().subtrees()[3].set_text('horizontal') + treewidget['orientation'] = 'horizontal' text = """ Try clicking, right clicking, and dragging @@ -1102,19 +1102,19 @@ constructors for the nodes & leaves (BoxWidget and OvalWidget). The bottom-left tree is built from tree_to_treesegment.""" twidget = TextWidget(cf.canvas(), text.strip()) - textbox = BoxWidget(cf.canvas(), twidget, fill="white", draggable=1) + textbox = BoxWidget(cf.canvas(), twidget, fill='white', draggable=1) cf.add_widget(textbox, tc3.bbox()[2] + 10, tc2.bbox()[3] + 10) - tree4 = Tree.fromstring("(S (NP this tree) (VP (V is) (Adj horizontal)))") + tree4 = Tree.fromstring('(S (NP this tree) (VP (V is) (Adj horizontal)))') tc4 = TreeWidget( cf.canvas(), tree4, draggable=1, - line_color="brown2", - roof_color="brown2", - node_font=("helvetica", -12, "bold"), - node_color="brown4", - orientation="horizontal", + line_color='brown2', + roof_color='brown2', + node_font=('helvetica', -12, 'bold'), + node_color='brown4', + orientation='horizontal', ) tc4.manage() cf.add_widget(tc4, tc3.bbox()[2] + 10, textbox.bbox()[3] + 10) @@ -1125,5 +1125,5 @@ built from tree_to_treesegment.""" cf.mainloop() -if __name__ == "__main__": +if __name__ == '__main__': demo() diff --git a/nlp_resource_data/nltk/draw/util.py b/nlp_resource_data/nltk/draw/util.py index be2db58..9daebbb 100644 --- a/nlp_resource_data/nltk/draw/util.py +++ b/nlp_resource_data/nltk/draw/util.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Drawing utilities # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Edward Loper # URL: # For license information, see LICENSE.TXT @@ -34,7 +34,8 @@ homepage (http://www.ags.uni-sb.de/~konrad/clig.html). """ from abc import ABCMeta, abstractmethod -from tkinter import ( +from six import add_metaclass +from six.moves.tkinter import ( Button, Canvas, Entry, @@ -50,7 +51,7 @@ from tkinter import ( Widget, RAISED, ) -from tkinter.filedialog import asksaveasfilename +from six.moves.tkinter_tkfiledialog import asksaveasfilename from nltk.util import in_idle @@ -59,7 +60,8 @@ from nltk.util import in_idle ##////////////////////////////////////////////////////// -class CanvasWidget(metaclass=ABCMeta): +@add_metaclass(ABCMeta) +class CanvasWidget(object): """ A collection of graphical elements and bindings used to display a complex object on a Tkinter ``Canvas``. A canvas widget is @@ -188,17 +190,17 @@ class CanvasWidget(metaclass=ABCMeta): :param attribs: The new canvas widget's attributes. """ if self.__class__ == CanvasWidget: - raise TypeError("CanvasWidget is an abstract base class") + raise TypeError('CanvasWidget is an abstract base class') if not isinstance(canvas, Canvas): - raise TypeError("Expected a canvas!") + raise TypeError('Expected a canvas!') self.__canvas = canvas self.__parent = parent # If the subclass constructor called _add_child_widget, then # self.__children will already exist. - if not hasattr(self, "_CanvasWidget__children"): + if not hasattr(self, '_CanvasWidget__children'): self.__children = [] # Is this widget hidden? @@ -222,9 +224,9 @@ class CanvasWidget(metaclass=ABCMeta): # Register any new bindings for tag in self._tags(): - self.__canvas.tag_bind(tag, "", self.__press_cb) - self.__canvas.tag_bind(tag, "", self.__press_cb) - self.__canvas.tag_bind(tag, "", self.__press_cb) + self.__canvas.tag_bind(tag, '', self.__press_cb) + self.__canvas.tag_bind(tag, '', self.__press_cb) + self.__canvas.tag_bind(tag, '', self.__press_cb) ##////////////////////////////////////////////////////// ## Inherited methods. @@ -242,7 +244,7 @@ class CanvasWidget(metaclass=ABCMeta): if self.__hidden: return (0, 0, 0, 0) if len(self.tags()) == 0: - raise ValueError("No tags") + raise ValueError('No tags') return self.__canvas.bbox(*self.tags()) def width(self): @@ -252,7 +254,7 @@ class CanvasWidget(metaclass=ABCMeta): :rtype: int """ if len(self.tags()) == 0: - raise ValueError("No tags") + raise ValueError('No tags') bbox = self.__canvas.bbox(*self.tags()) return bbox[2] - bbox[0] @@ -263,7 +265,7 @@ class CanvasWidget(metaclass=ABCMeta): :rtype: int """ if len(self.tags()) == 0: - raise ValueError("No tags") + raise ValueError('No tags') bbox = self.__canvas.bbox(*self.tags()) return bbox[3] - bbox[1] @@ -314,7 +316,7 @@ class CanvasWidget(metaclass=ABCMeta): if self.__parent: self.__parent.update(self) - def moveto(self, x, y, anchor="NW"): + def moveto(self, x, y, anchor='NW'): """ Move this canvas widget to the given location. In particular, shift the canvas widget such that the corner or side of the @@ -329,21 +331,21 @@ class CanvasWidget(metaclass=ABCMeta): corner; etc. """ x1, y1, x2, y2 = self.bbox() - if anchor == "NW": + if anchor == 'NW': self.move(x - x1, y - y1) - if anchor == "N": + if anchor == 'N': self.move(x - x1 / 2 - x2 / 2, y - y1) - if anchor == "NE": + if anchor == 'NE': self.move(x - x2, y - y1) - if anchor == "E": + if anchor == 'E': self.move(x - x2, y - y1 / 2 - y2 / 2) - if anchor == "SE": + if anchor == 'SE': self.move(x - x2, y - y2) - if anchor == "S": + if anchor == 'S': self.move(x - x1 / 2 - x2 / 2, y - y2) - if anchor == "SW": + if anchor == 'SW': self.move(x - x1, y - y2) - if anchor == "W": + if anchor == 'W': self.move(x - x1, y - y1 / 2 - y2 / 2) def destroy(self): @@ -365,9 +367,9 @@ class CanvasWidget(metaclass=ABCMeta): return for tag in self.tags(): - self.__canvas.tag_unbind(tag, "") - self.__canvas.tag_unbind(tag, "") - self.__canvas.tag_unbind(tag, "") + self.__canvas.tag_unbind(tag, '') + self.__canvas.tag_unbind(tag, '') + self.__canvas.tag_unbind(tag, '') self.__canvas.delete(*self.tags()) self.__canvas = None @@ -418,7 +420,7 @@ class CanvasWidget(metaclass=ABCMeta): :rtype: list of int """ if self.__canvas is None: - raise ValueError("Attempt to access a destroyed canvas widget") + raise ValueError('Attempt to access a destroyed canvas widget') tags = [] tags += self._tags() for child in self.__children: @@ -433,10 +435,10 @@ class CanvasWidget(metaclass=ABCMeta): :rtype: None """ - if attr == "draggable": + if attr == 'draggable': self.__draggable = value else: - raise ValueError("Unknown attribute %r" % attr) + raise ValueError('Unknown attribute %r' % attr) def __getitem__(self, attr): """ @@ -445,17 +447,17 @@ class CanvasWidget(metaclass=ABCMeta): canvas widget. :rtype: (any) """ - if attr == "draggable": + if attr == 'draggable': return self.__draggable else: - raise ValueError("Unknown attribute %r" % attr) + raise ValueError('Unknown attribute %r' % attr) def __repr__(self): """ :return: a string representation of this canvas widget. :rtype: str """ - return "<%s>" % self.__class__.__name__ + return '<%s>' % self.__class__.__name__ def hide(self): """ @@ -465,7 +467,7 @@ class CanvasWidget(metaclass=ABCMeta): """ self.__hidden = 1 for tag in self.tags(): - self.__canvas.itemconfig(tag, state="hidden") + self.__canvas.itemconfig(tag, state='hidden') def show(self): """ @@ -475,7 +477,7 @@ class CanvasWidget(metaclass=ABCMeta): """ self.__hidden = 0 for tag in self.tags(): - self.__canvas.itemconfig(tag, state="normal") + self.__canvas.itemconfig(tag, state='normal') def hidden(self): """ @@ -516,7 +518,7 @@ class CanvasWidget(metaclass=ABCMeta): will be called with this ``CanvasWidget`` as its argument. """ self.__draggable = 1 - self.__callbacks["drag"] = callback + self.__callbacks['drag'] = callback def unbind_click(self, button=1): """ @@ -537,7 +539,7 @@ class CanvasWidget(metaclass=ABCMeta): Remove a callback that was registered with ``bind_drag``. """ try: - del self.__callbacks["drag"] + del self.__callbacks['drag'] except: pass @@ -556,14 +558,14 @@ class CanvasWidget(metaclass=ABCMeta): # If we're already waiting for a button release, then ignore # this new button press. if ( - self.__canvas.bind("") - or self.__canvas.bind("") - or self.__canvas.bind("") + self.__canvas.bind('') + or self.__canvas.bind('') + or self.__canvas.bind('') ): return # Unbind motion (just in case; this shouldn't be necessary) - self.__canvas.unbind("") + self.__canvas.unbind('') # Record the button press event. self.__press = event @@ -573,13 +575,13 @@ class CanvasWidget(metaclass=ABCMeta): if event.num == 1: widget = self while widget is not None: - if widget["draggable"]: + if widget['draggable']: widget.__start_drag(event) break widget = widget.parent() # Set up the button release callback. - self.__canvas.bind("" % event.num, self.__release_cb) + self.__canvas.bind('' % event.num, self.__release_cb) def __start_drag(self, event): """ @@ -587,7 +589,7 @@ class CanvasWidget(metaclass=ABCMeta): - register a motion callback - record the drag coordinates """ - self.__canvas.bind("", self.__motion_cb) + self.__canvas.bind('', self.__motion_cb) self.__drag_x = event.x self.__drag_y = event.y @@ -609,8 +611,8 @@ class CanvasWidget(metaclass=ABCMeta): - call the appropriate handler. """ # Unbind the button release & motion callbacks. - self.__canvas.unbind("" % event.num) - self.__canvas.unbind("") + self.__canvas.unbind('' % event.num) + self.__canvas.unbind('') # Is it a click or a drag? if ( @@ -635,12 +637,12 @@ class CanvasWidget(metaclass=ABCMeta): call it. If no ancestors have a drag callback, do nothing. """ if self.__draggable: - if "drag" in self.__callbacks: - cb = self.__callbacks["drag"] + if 'drag' in self.__callbacks: + cb = self.__callbacks['drag'] try: cb(self) except: - print("Error in drag callback for %r" % self) + print('Error in drag callback for %r' % self) elif self.__parent is not None: self.__parent.__drag() @@ -655,7 +657,7 @@ class CanvasWidget(metaclass=ABCMeta): # try: cb(self) # except: - # print('Error in click callback for %r' % self) + # print 'Error in click callback for %r' % self # raise elif self.__parent is not None: self.__parent.__click(button) @@ -677,10 +679,10 @@ class CanvasWidget(metaclass=ABCMeta): have a parent. :type child: CanvasWidget """ - if not hasattr(self, "_CanvasWidget__children"): + if not hasattr(self, '_CanvasWidget__children'): self.__children = [] if child.__parent is not None: - raise ValueError("{} already has a parent".format(child)) + raise ValueError('{} already has a parent'.format(child)) child.__parent = self self.__children.append(child) @@ -768,19 +770,19 @@ class TextWidget(CanvasWidget): CanvasWidget.__init__(self, canvas, **attribs) def __setitem__(self, attr, value): - if attr in ("color", "font", "justify", "width"): - if attr == "color": - attr = "fill" + if attr in ('color', 'font', 'justify', 'width'): + if attr == 'color': + attr = 'fill' self.canvas().itemconfig(self._tag, {attr: value}) else: CanvasWidget.__setitem__(self, attr, value) def __getitem__(self, attr): - if attr == "width": + if attr == 'width': return int(self.canvas().itemcget(self._tag, attr)) - elif attr in ("color", "font", "justify"): - if attr == "color": - attr = "fill" + elif attr in ('color', 'font', 'justify'): + if attr == 'color': + attr = 'fill' return self.canvas().itemcget(self._tag, attr) else: return CanvasWidget.__getitem__(self, attr) @@ -793,7 +795,7 @@ class TextWidget(CanvasWidget): :return: The text displayed by this text widget. :rtype: str """ - return self.canvas().itemcget(self._tag, "TEXT") + return self.canvas().itemcget(self._tag, 'TEXT') def set_text(self, text): """ @@ -808,7 +810,7 @@ class TextWidget(CanvasWidget): self.parent().update(self) def __repr__(self): - return "[Text: %r]" % self._text + return '[Text: %r]' % self._text class SymbolWidget(TextWidget): @@ -830,24 +832,24 @@ class SymbolWidget(TextWidget): """ SYMBOLS = { - "neg": "\330", - "disj": "\332", - "conj": "\331", - "lambda": "\154", - "merge": "\304", - "forall": "\042", - "exists": "\044", - "subseteq": "\315", - "subset": "\314", - "notsubset": "\313", - "emptyset": "\306", - "imp": "\336", - "rightarrow": chr(222), #'\256', - "equal": "\75", - "notequal": "\271", - "intersection": "\307", - "union": "\310", - "epsilon": "e", + 'neg': '\330', + 'disj': '\332', + 'conj': '\331', + 'lambda': '\154', + 'merge': '\304', + 'forall': '\042', + 'exists': '\044', + 'subseteq': '\315', + 'subset': '\314', + 'notsubset': '\313', + 'emptyset': '\306', + 'imp': '\336', + 'rightarrow': chr(222), #'\256', + 'equal': '\75', + 'notequal': '\271', + 'intersection': '\307', + 'union': '\310', + 'epsilon': 'e', } def __init__(self, canvas, symbol, **attribs): @@ -860,8 +862,8 @@ class SymbolWidget(TextWidget): :param symbol: The name of the symbol to display. :param attribs: The new canvas widget's attributes. """ - attribs["font"] = "symbol" - TextWidget.__init__(self, canvas, "", **attribs) + attribs['font'] = 'symbol' + TextWidget.__init__(self, canvas, '', **attribs) self.set_symbol(symbol) def symbol(self): @@ -880,12 +882,12 @@ class SymbolWidget(TextWidget): :param symbol: The name of the symbol to display. """ if symbol not in SymbolWidget.SYMBOLS: - raise ValueError("Unknown symbol: %s" % symbol) + raise ValueError('Unknown symbol: %s' % symbol) self._symbol = symbol self.set_text(SymbolWidget.SYMBOLS[symbol]) def __repr__(self): - return "[Symbol: %r]" % self._symbol + return '[Symbol: %r]' % self._symbol @staticmethod def symbolsheet(size=20): @@ -899,24 +901,24 @@ class SymbolWidget(TextWidget): def destroy(e, top=top): top.destroy() - top.bind("q", destroy) - Button(top, text="Quit", command=top.destroy).pack(side="bottom") - text = Text(top, font=("helvetica", -size), width=20, height=30) - text.pack(side="left") + top.bind('q', destroy) + Button(top, text='Quit', command=top.destroy).pack(side='bottom') + text = Text(top, font=('helvetica', -size), width=20, height=30) + text.pack(side='left') sb = Scrollbar(top, command=text.yview) - text["yscrollcommand"] = sb.set - sb.pack(side="right", fill="y") - text.tag_config("symbol", font=("symbol", -size)) + text['yscrollcommand'] = sb.set + sb.pack(side='right', fill='y') + text.tag_config('symbol', font=('symbol', -size)) for i in range(256): if i in (0, 10): continue # null and newline for k, v in list(SymbolWidget.SYMBOLS.items()): if v == chr(i): - text.insert("end", "%-10s\t" % k) + text.insert('end', '%-10s\t' % k) break else: - text.insert("end", "%-10d \t" % i) - text.insert("end", "[%s]\n" % chr(i), "symbol") + text.insert('end', '%-10d \t' % i) + text.insert('end', '[%s]\n' % chr(i), 'symbol') top.mainloop() @@ -973,9 +975,9 @@ class AbstractContainerWidget(CanvasWidget): def __repr__(self): name = self.__class__.__name__ - if name[-6:] == "Widget": + if name[-6:] == 'Widget': name = name[:-6] - return "[%s: %r]" % (name, self._child) + return '[%s: %r]' % (name, self._child) class BoxWidget(AbstractContainerWidget): @@ -1009,26 +1011,26 @@ class BoxWidget(AbstractContainerWidget): AbstractContainerWidget.__init__(self, canvas, child, **attribs) def __setitem__(self, attr, value): - if attr == "margin": + if attr == 'margin': self._margin = value - elif attr in ("outline", "fill", "width"): + elif attr in ('outline', 'fill', 'width'): self.canvas().itemconfig(self._box, {attr: value}) else: CanvasWidget.__setitem__(self, attr, value) def __getitem__(self, attr): - if attr == "margin": + if attr == 'margin': return self._margin - elif attr == "width": + elif attr == 'width': return float(self.canvas().itemcget(self._box, attr)) - elif attr in ("outline", "fill", "width"): + elif attr in ('outline', 'fill', 'width'): return self.canvas().itemcget(self._box, attr) else: return CanvasWidget.__getitem__(self, attr) def _update(self, child): (x1, y1, x2, y2) = child.bbox() - margin = self._margin + self["width"] / 2 + margin = self._margin + self['width'] / 2 self.canvas().coords( self._box, x1 - margin, y1 - margin, x2 + margin, y2 + margin ) @@ -1065,8 +1067,8 @@ class OvalWidget(AbstractContainerWidget): self._child = child self._margin = 1 self._oval = canvas.create_oval(1, 1, 1, 1) - self._circle = attribs.pop("circle", False) - self._double = attribs.pop("double", False) + self._circle = attribs.pop('circle', False) + self._double = attribs.pop('double', False) if self._double: self._oval2 = canvas.create_oval(1, 1, 1, 1) else: @@ -1076,42 +1078,42 @@ class OvalWidget(AbstractContainerWidget): def __setitem__(self, attr, value): c = self.canvas() - if attr == "margin": + if attr == 'margin': self._margin = value - elif attr == "double": + elif attr == 'double': if value == True and self._oval2 is None: # Copy attributes & position from self._oval. x1, y1, x2, y2 = c.bbox(self._oval) - w = self["width"] * 2 + w = self['width'] * 2 self._oval2 = c.create_oval( x1 - w, y1 - w, x2 + w, y2 + w, - outline=c.itemcget(self._oval, "outline"), - width=c.itemcget(self._oval, "width"), + outline=c.itemcget(self._oval, 'outline'), + width=c.itemcget(self._oval, 'width'), ) c.tag_lower(self._oval2) if value == False and self._oval2 is not None: c.delete(self._oval2) self._oval2 = None - elif attr in ("outline", "fill", "width"): + elif attr in ('outline', 'fill', 'width'): c.itemconfig(self._oval, {attr: value}) - if self._oval2 is not None and attr != "fill": + if self._oval2 is not None and attr != 'fill': c.itemconfig(self._oval2, {attr: value}) - if self._oval2 is not None and attr != "fill": + if self._oval2 is not None and attr != 'fill': self.canvas().itemconfig(self._oval2, {attr: value}) else: CanvasWidget.__setitem__(self, attr, value) def __getitem__(self, attr): - if attr == "margin": + if attr == 'margin': return self._margin - elif attr == "double": + elif attr == 'double': return self._double is not None - elif attr == "width": + elif attr == 'width': return float(self.canvas().itemcget(self._oval, attr)) - elif attr in ("outline", "fill", "width"): + elif attr in ('outline', 'fill', 'width'): return self.canvas().itemcget(self._oval, attr) else: return CanvasWidget.__getitem__(self, attr) @@ -1181,25 +1183,25 @@ class ParenWidget(AbstractContainerWidget): :param attribs: The new canvas widget's attributes. """ self._child = child - self._oparen = canvas.create_arc(1, 1, 1, 1, style="arc", start=90, extent=180) - self._cparen = canvas.create_arc(1, 1, 1, 1, style="arc", start=-90, extent=180) + self._oparen = canvas.create_arc(1, 1, 1, 1, style='arc', start=90, extent=180) + self._cparen = canvas.create_arc(1, 1, 1, 1, style='arc', start=-90, extent=180) AbstractContainerWidget.__init__(self, canvas, child, **attribs) def __setitem__(self, attr, value): - if attr == "color": + if attr == 'color': self.canvas().itemconfig(self._oparen, outline=value) self.canvas().itemconfig(self._cparen, outline=value) - elif attr == "width": + elif attr == 'width': self.canvas().itemconfig(self._oparen, width=value) self.canvas().itemconfig(self._cparen, width=value) else: CanvasWidget.__setitem__(self, attr, value) def __getitem__(self, attr): - if attr == "color": - return self.canvas().itemcget(self._oparen, "outline") - elif attr == "width": - return self.canvas().itemcget(self._oparen, "width") + if attr == 'color': + return self.canvas().itemcget(self._oparen, 'outline') + elif attr == 'width': + return self.canvas().itemcget(self._oparen, 'width') else: return CanvasWidget.__getitem__(self, attr) @@ -1241,20 +1243,20 @@ class BracketWidget(AbstractContainerWidget): AbstractContainerWidget.__init__(self, canvas, child, **attribs) def __setitem__(self, attr, value): - if attr == "color": + if attr == 'color': self.canvas().itemconfig(self._obrack, fill=value) self.canvas().itemconfig(self._cbrack, fill=value) - elif attr == "width": + elif attr == 'width': self.canvas().itemconfig(self._obrack, width=value) self.canvas().itemconfig(self._cbrack, width=value) else: CanvasWidget.__setitem__(self, attr, value) def __getitem__(self, attr): - if attr == "color": - return self.canvas().itemcget(self._obrack, "outline") - elif attr == "width": - return self.canvas().itemcget(self._obrack, "width") + if attr == 'color': + return self.canvas().itemcget(self._obrack, 'outline') + elif attr == 'width': + return self.canvas().itemcget(self._obrack, 'width') else: return CanvasWidget.__getitem__(self, attr) @@ -1298,7 +1300,7 @@ class SequenceWidget(CanvasWidget): :type children: list(CanvasWidget) :param attribs: The new canvas widget's attributes. """ - self._align = "center" + self._align = 'center' self._space = 1 self._ordered = False self._children = list(children) @@ -1307,23 +1309,23 @@ class SequenceWidget(CanvasWidget): CanvasWidget.__init__(self, canvas, **attribs) def __setitem__(self, attr, value): - if attr == "align": - if value not in ("top", "bottom", "center"): - raise ValueError("Bad alignment: %r" % value) + if attr == 'align': + if value not in ('top', 'bottom', 'center'): + raise ValueError('Bad alignment: %r' % value) self._align = value - elif attr == "space": + elif attr == 'space': self._space = value - elif attr == "ordered": + elif attr == 'ordered': self._ordered = value else: CanvasWidget.__setitem__(self, attr, value) def __getitem__(self, attr): - if attr == "align": + if attr == 'align': return self._align - elif attr == "space": + elif attr == 'space': return self._space - elif attr == "ordered": + elif attr == 'ordered': return self._ordered else: return CanvasWidget.__getitem__(self, attr) @@ -1332,11 +1334,11 @@ class SequenceWidget(CanvasWidget): return [] def _yalign(self, top, bot): - if self._align == "top": + if self._align == 'top': return top - if self._align == "bottom": + if self._align == 'bottom': return bot - if self._align == "center": + if self._align == 'center': return (top + bot) / 2 def _update(self, child): @@ -1390,7 +1392,7 @@ class SequenceWidget(CanvasWidget): x -= x2 - x1 + self._space def __repr__(self): - return "[Sequence: " + repr(self._children)[1:-1] + "]" + return '[Sequence: ' + repr(self._children)[1:-1] + ']' # Provide an alias for the child_widgets() member. children = CanvasWidget.child_widgets @@ -1470,7 +1472,7 @@ class StackWidget(CanvasWidget): :type children: list(CanvasWidget) :param attribs: The new canvas widget's attributes. """ - self._align = "center" + self._align = 'center' self._space = 1 self._ordered = False self._children = list(children) @@ -1479,23 +1481,23 @@ class StackWidget(CanvasWidget): CanvasWidget.__init__(self, canvas, **attribs) def __setitem__(self, attr, value): - if attr == "align": - if value not in ("left", "right", "center"): - raise ValueError("Bad alignment: %r" % value) + if attr == 'align': + if value not in ('left', 'right', 'center'): + raise ValueError('Bad alignment: %r' % value) self._align = value - elif attr == "space": + elif attr == 'space': self._space = value - elif attr == "ordered": + elif attr == 'ordered': self._ordered = value else: CanvasWidget.__setitem__(self, attr, value) def __getitem__(self, attr): - if attr == "align": + if attr == 'align': return self._align - elif attr == "space": + elif attr == 'space': return self._space - elif attr == "ordered": + elif attr == 'ordered': return self._ordered else: return CanvasWidget.__getitem__(self, attr) @@ -1504,11 +1506,11 @@ class StackWidget(CanvasWidget): return [] def _xalign(self, left, right): - if self._align == "left": + if self._align == 'left': return left - if self._align == "right": + if self._align == 'right': return right - if self._align == "center": + if self._align == 'center': return (left + right) / 2 def _update(self, child): @@ -1562,7 +1564,7 @@ class StackWidget(CanvasWidget): y -= y2 - y1 + self._space def __repr__(self): - return "[Stack: " + repr(self._children)[1:-1] + "]" + return '[Stack: ' + repr(self._children)[1:-1] + ']' # Provide an alias for the child_widgets() member. children = CanvasWidget.child_widgets @@ -1643,7 +1645,7 @@ class SpaceWidget(CanvasWidget): width -= 4 if height > 4: height -= 4 - self._tag = canvas.create_line(1, 1, width, height, fill="") + self._tag = canvas.create_line(1, 1, width, height, fill='') CanvasWidget.__init__(self, canvas, **attribs) # note: width() and height() are already defined by CanvasWidget. @@ -1673,7 +1675,7 @@ class SpaceWidget(CanvasWidget): return [self._tag] def __repr__(self): - return "[Space]" + return '[Space]' class ScrollWatcherWidget(CanvasWidget): @@ -1739,7 +1741,7 @@ class ScrollWatcherWidget(CanvasWidget): """ bbox = self.bbox() canvas = self.canvas() - scrollregion = [int(n) for n in canvas["scrollregion"].split()] + scrollregion = [int(n) for n in canvas['scrollregion'].split()] if len(scrollregion) != 4: return if ( @@ -1748,13 +1750,13 @@ class ScrollWatcherWidget(CanvasWidget): or bbox[2] > scrollregion[2] or bbox[3] > scrollregion[3] ): - scrollregion = "%d %d %d %d" % ( + scrollregion = '%d %d %d %d' % ( min(bbox[0], scrollregion[0]), min(bbox[1], scrollregion[1]), max(bbox[2], scrollregion[2]), max(bbox[3], scrollregion[3]), ) - canvas["scrollregion"] = scrollregion + canvas['scrollregion'] = scrollregion ##////////////////////////////////////////////////////// @@ -1791,35 +1793,35 @@ class CanvasFrame(object): # If no parent was given, set up a top-level window. if parent is None: self._parent = Tk() - self._parent.title("NLTK") - self._parent.bind("", lambda e: self.print_to_file()) - self._parent.bind("", self.destroy) - self._parent.bind("", self.destroy) + self._parent.title('NLTK') + self._parent.bind('', lambda e: self.print_to_file()) + self._parent.bind('', self.destroy) + self._parent.bind('', self.destroy) else: self._parent = parent # Create a frame for the canvas & scrollbars self._frame = frame = Frame(self._parent) self._canvas = canvas = Canvas(frame, **kw) - xscrollbar = Scrollbar(self._frame, orient="horizontal") - yscrollbar = Scrollbar(self._frame, orient="vertical") - xscrollbar["command"] = canvas.xview - yscrollbar["command"] = canvas.yview - canvas["xscrollcommand"] = xscrollbar.set - canvas["yscrollcommand"] = yscrollbar.set - yscrollbar.pack(fill="y", side="right") - xscrollbar.pack(fill="x", side="bottom") - canvas.pack(expand=1, fill="both", side="left") + xscrollbar = Scrollbar(self._frame, orient='horizontal') + yscrollbar = Scrollbar(self._frame, orient='vertical') + xscrollbar['command'] = canvas.xview + yscrollbar['command'] = canvas.yview + canvas['xscrollcommand'] = xscrollbar.set + canvas['yscrollcommand'] = yscrollbar.set + yscrollbar.pack(fill='y', side='right') + xscrollbar.pack(fill='x', side='bottom') + canvas.pack(expand=1, fill='both', side='left') # Set initial scroll region. - scrollregion = "0 0 %s %s" % (canvas["width"], canvas["height"]) - canvas["scrollregion"] = scrollregion + scrollregion = '0 0 %s %s' % (canvas['width'], canvas['height']) + canvas['scrollregion'] = scrollregion self._scrollwatcher = ScrollWatcherWidget(canvas) # If no parent was given, pack the frame, and add a menu. if parent is None: - self.pack(expand=1, fill="both") + self.pack(expand=1, fill='both') self._init_menubar() def _init_menubar(self): @@ -1827,15 +1829,15 @@ class CanvasFrame(object): filemenu = Menu(menubar, tearoff=0) filemenu.add_command( - label="Print to Postscript", + label='Print to Postscript', underline=0, command=self.print_to_file, - accelerator="Ctrl-p", + accelerator='Ctrl-p', ) filemenu.add_command( - label="Exit", underline=1, command=self.destroy, accelerator="Ctrl-x" + label='Exit', underline=1, command=self.destroy, accelerator='Ctrl-x' ) - menubar.add_cascade(label="File", underline=0, menu=filemenu) + menubar.add_cascade(label='File', underline=0, menu=filemenu) self._parent.config(menu=menubar) @@ -1849,8 +1851,8 @@ class CanvasFrame(object): :rtype: None """ if filename is None: - ftypes = [("Postscript files", ".ps"), ("All files", "*")] - filename = asksaveasfilename(filetypes=ftypes, defaultextension=".ps") + ftypes = [('Postscript files', '.ps'), ('All files', '*')] + filename = asksaveasfilename(filetypes=ftypes, defaultextension='.ps') if not filename: return (x0, y0, w, h) = self.scrollregion() @@ -1865,9 +1867,9 @@ class CanvasFrame(object): pagey=0, ) # workaround for bug in Tk font handling - postscript = postscript.replace(" 0 scalefont ", " 9 scalefont ") - with open(filename, "wb") as f: - f.write(postscript.encode("utf8")) + postscript = postscript.replace(' 0 scalefont ', ' 9 scalefont ') + with open(filename, 'wb') as f: + f.write(postscript.encode('utf8')) def scrollregion(self): """ @@ -1875,7 +1877,7 @@ class CanvasFrame(object): this ``CanvasFrame``. :rtype: 4-tuple of int """ - (x1, y1, x2, y2) = self._canvas["scrollregion"].split() + (x1, y1, x2, y2) = self._canvas['scrollregion'].split() return (int(x1), int(y1), int(x2), int(y2)) def canvas(self): @@ -2021,32 +2023,32 @@ class ShowText(object): self._top = top = Toplevel(root) top.title(title) - b = Button(top, text="Ok", command=self.destroy) - b.pack(side="bottom") + b = Button(top, text='Ok', command=self.destroy) + b.pack(side='bottom') tbf = Frame(top) - tbf.pack(expand=1, fill="both") - scrollbar = Scrollbar(tbf, orient="vertical") - scrollbar.pack(side="right", fill="y") - textbox = Text(tbf, wrap="word", width=width, height=height, **textbox_options) - textbox.insert("end", text) - textbox["state"] = "disabled" - textbox.pack(side="left", expand=1, fill="both") - scrollbar["command"] = textbox.yview - textbox["yscrollcommand"] = scrollbar.set + tbf.pack(expand=1, fill='both') + scrollbar = Scrollbar(tbf, orient='vertical') + scrollbar.pack(side='right', fill='y') + textbox = Text(tbf, wrap='word', width=width, height=height, **textbox_options) + textbox.insert('end', text) + textbox['state'] = 'disabled' + textbox.pack(side='left', expand=1, fill='both') + scrollbar['command'] = textbox.yview + textbox['yscrollcommand'] = scrollbar.set # Make it easy to close the window. - top.bind("q", self.destroy) - top.bind("x", self.destroy) - top.bind("c", self.destroy) - top.bind("", self.destroy) - top.bind("", self.destroy) + top.bind('q', self.destroy) + top.bind('x', self.destroy) + top.bind('c', self.destroy) + top.bind('', self.destroy) + top.bind('', self.destroy) # Focus the scrollbar, so they can use up/down, etc. scrollbar.focus() def find_dimentions(self, text, width, height): - lines = text.split("\n") + lines = text.split('\n') if width is None: maxwidth = max(len(line) for line in lines) width = min(maxwidth, 80) @@ -2055,7 +2057,7 @@ class ShowText(object): height = 0 for line in lines: while len(line) > width: - brk = line[:width].rfind(" ") + brk = line[:width].rfind(' ') line = line[brk:] height += 1 height += 1 @@ -2092,7 +2094,7 @@ class EntryDialog(object): """ def __init__( - self, parent, original_text="", instructions="", set_callback=None, title=None + self, parent, original_text='', instructions='', set_callback=None, title=None ): self._parent = parent self._original_text = original_text @@ -2106,36 +2108,36 @@ class EntryDialog(object): # The text entry box. entryframe = Frame(self._top) - entryframe.pack(expand=1, fill="both", padx=5, pady=5, ipady=10) + entryframe.pack(expand=1, fill='both', padx=5, pady=5, ipady=10) if instructions: l = Label(entryframe, text=instructions) - l.pack(side="top", anchor="w", padx=30) + l.pack(side='top', anchor='w', padx=30) self._entry = Entry(entryframe, width=width) - self._entry.pack(expand=1, fill="x", padx=30) + self._entry.pack(expand=1, fill='x', padx=30) self._entry.insert(0, original_text) # A divider - divider = Frame(self._top, borderwidth=1, relief="sunken") - divider.pack(fill="x", ipady=1, padx=10) + divider = Frame(self._top, borderwidth=1, relief='sunken') + divider.pack(fill='x', ipady=1, padx=10) # The buttons. buttons = Frame(self._top) - buttons.pack(expand=0, fill="x", padx=5, pady=5) - b = Button(buttons, text="Cancel", command=self._cancel, width=8) - b.pack(side="right", padx=5) - b = Button(buttons, text="Ok", command=self._ok, width=8, default="active") - b.pack(side="left", padx=5) - b = Button(buttons, text="Apply", command=self._apply, width=8) - b.pack(side="left") - - self._top.bind("", self._ok) - self._top.bind("", self._cancel) - self._top.bind("", self._cancel) + buttons.pack(expand=0, fill='x', padx=5, pady=5) + b = Button(buttons, text='Cancel', command=self._cancel, width=8) + b.pack(side='right', padx=5) + b = Button(buttons, text='Ok', command=self._ok, width=8, default='active') + b.pack(side='left', padx=5) + b = Button(buttons, text='Apply', command=self._apply, width=8) + b.pack(side='left') + + self._top.bind('', self._ok) + self._top.bind('', self._cancel) + self._top.bind('', self._cancel) self._entry.focus() def _reset(self, *e): - self._entry.delete(0, "end") + self._entry.delete(0, 'end') self._entry.insert(0, self._original_text) if self._set_callback: self._set_callback(self._original_text) @@ -2198,8 +2200,8 @@ class ColorizedList(object): self._init_itemframe(options.copy()) # Set up key & mouse bindings. - self._textwidget.bind("", self._keypress) - self._textwidget.bind("", self._buttonpress) + self._textwidget.bind('', self._keypress) + self._textwidget.bind('', self._buttonpress) # Fill in the given CFG's items. self._items = None @@ -2247,17 +2249,17 @@ class ColorizedList(object): return self._items = list(items) - self._textwidget["state"] = "normal" - self._textwidget.delete("1.0", "end") + self._textwidget['state'] = 'normal' + self._textwidget.delete('1.0', 'end') for item in items: for (text, colortag) in self._item_repr(item): - assert "\n" not in text, "item repr may not contain newline" - self._textwidget.insert("end", text, colortag) - self._textwidget.insert("end", "\n") + assert '\n' not in text, 'item repr may not contain newline' + self._textwidget.insert('end', text, colortag) + self._textwidget.insert('end', '\n') # Remove the final newline - self._textwidget.delete("end-1char", "end") - self._textwidget.mark_set("insert", "1.0") - self._textwidget["state"] = "disabled" + self._textwidget.delete('end-1char', 'end') + self._textwidget.mark_set('insert', '1.0') + self._textwidget['state'] = 'disabled' # Clear all marks self._marks.clear() @@ -2270,12 +2272,12 @@ class ColorizedList(object): """ if item is None: self._marks.clear() - self._textwidget.tag_remove("highlight", "1.0", "end+1char") + self._textwidget.tag_remove('highlight', '1.0', 'end+1char') else: index = self._items.index(item) del self._marks[item] - (start, end) = ("%d.0" % (index + 1), "%d.0" % (index + 2)) - self._textwidget.tag_remove("highlight", start, end) + (start, end) = ('%d.0' % (index + 1), '%d.0' % (index + 2)) + self._textwidget.tag_remove('highlight', start, end) def mark(self, item): """ @@ -2284,8 +2286,8 @@ class ColorizedList(object): """ self._marks[item] = 1 index = self._items.index(item) - (start, end) = ("%d.0" % (index + 1), "%d.0" % (index + 2)) - self._textwidget.tag_add("highlight", start, end) + (start, end) = ('%d.0' % (index + 1), '%d.0' % (index + 2)) + self._textwidget.tag_add('highlight', start, end) def markonly(self, item): """ @@ -2301,7 +2303,7 @@ class ColorizedList(object): the item is already visible, then do nothing. """ index = self._items.index(item) - self._textwidget.see("%d.0" % (index + 1)) + self._textwidget.see('%d.0' % (index + 1)) # //////////////////////////////////////////////////////////// # Callbacks @@ -2320,10 +2322,10 @@ class ColorizedList(object): single item as its argument. (The item selected or the item moved to). """ - if event == "select": - events = ["click1", "space", "return"] - elif event == "move": - events = ["up", "down", "next", "prior"] + if event == 'select': + events = ['click1', 'space', 'return'] + elif event == 'move': + events = ['up', 'down', 'next', 'prior'] else: events = [event] @@ -2337,10 +2339,10 @@ class ColorizedList(object): """ if event is None: events = list(self._callbacks.keys()) - elif event == "select": - events = ["click1", "space", "return"] - elif event == "move": - events = ["up", "down", "next", "prior"] + elif event == 'select': + events = ['click1', 'space', 'return'] + elif event == 'move': + events = ['up', 'down', 'next', 'prior'] else: events = [event] @@ -2377,26 +2379,26 @@ class ColorizedList(object): self._itemframe = Frame(self._parent) # Create the basic Text widget & scrollbar. - options.setdefault("background", "#e0e0e0") + options.setdefault('background', '#e0e0e0') self._textwidget = Text(self._itemframe, **options) - self._textscroll = Scrollbar(self._itemframe, takefocus=0, orient="vertical") + self._textscroll = Scrollbar(self._itemframe, takefocus=0, orient='vertical') self._textwidget.config(yscrollcommand=self._textscroll.set) self._textscroll.config(command=self._textwidget.yview) - self._textscroll.pack(side="right", fill="y") - self._textwidget.pack(expand=1, fill="both", side="left") + self._textscroll.pack(side='right', fill='y') + self._textwidget.pack(expand=1, fill='both', side='left') # Initialize the colorization tags self._textwidget.tag_config( - "highlight", background="#e0ffff", border="1", relief="raised" + 'highlight', background='#e0ffff', border='1', relief='raised' ) self._init_colortags(self._textwidget, options) # How do I want to mark keyboard selection? - self._textwidget.tag_config("sel", foreground="") + self._textwidget.tag_config('sel', foreground='') self._textwidget.tag_config( - "sel", foreground="", background="", border="", underline=1 + 'sel', foreground='', background='', border='', underline=1 ) - self._textwidget.tag_lower("highlight", "sel") + self._textwidget.tag_lower('highlight', 'sel') def _fire_callback(self, event, itemnum): if event not in self._callbacks: @@ -2409,38 +2411,38 @@ class ColorizedList(object): cb_func(item) def _buttonpress(self, event): - clickloc = "@%d,%d" % (event.x, event.y) + clickloc = '@%d,%d' % (event.x, event.y) insert_point = self._textwidget.index(clickloc) - itemnum = int(insert_point.split(".")[0]) - 1 - self._fire_callback("click%d" % event.num, itemnum) + itemnum = int(insert_point.split('.')[0]) - 1 + self._fire_callback('click%d' % event.num, itemnum) def _keypress(self, event): - if event.keysym == "Return" or event.keysym == "space": - insert_point = self._textwidget.index("insert") - itemnum = int(insert_point.split(".")[0]) - 1 + if event.keysym == 'Return' or event.keysym == 'space': + insert_point = self._textwidget.index('insert') + itemnum = int(insert_point.split('.')[0]) - 1 self._fire_callback(event.keysym.lower(), itemnum) return - elif event.keysym == "Down": - delta = "+1line" - elif event.keysym == "Up": - delta = "-1line" - elif event.keysym == "Next": - delta = "+10lines" - elif event.keysym == "Prior": - delta = "-10lines" + elif event.keysym == 'Down': + delta = '+1line' + elif event.keysym == 'Up': + delta = '-1line' + elif event.keysym == 'Next': + delta = '+10lines' + elif event.keysym == 'Prior': + delta = '-10lines' else: - return "continue" + return 'continue' - self._textwidget.mark_set("insert", "insert" + delta) - self._textwidget.see("insert") - self._textwidget.tag_remove("sel", "1.0", "end+1char") - self._textwidget.tag_add("sel", "insert linestart", "insert lineend") + self._textwidget.mark_set('insert', 'insert' + delta) + self._textwidget.see('insert') + self._textwidget.tag_remove('sel', '1.0', 'end+1char') + self._textwidget.tag_add('sel', 'insert linestart', 'insert lineend') - insert_point = self._textwidget.index("insert") - itemnum = int(insert_point.split(".")[0]) - 1 + insert_point = self._textwidget.index('insert') + itemnum = int(insert_point.split('.')[0]) - 1 self._fire_callback(event.keysym.lower(), itemnum) - return "break" + return 'break' ##////////////////////////////////////////////////////// @@ -2450,9 +2452,9 @@ class ColorizedList(object): class MutableOptionMenu(Menubutton): def __init__(self, master, values, **options): - self._callback = options.get("command") - if "command" in options: - del options["command"] + self._callback = options.get('command') + if 'command' in options: + del options['command'] # Create a variable self._variable = variable = StringVar() @@ -2469,7 +2471,7 @@ class MutableOptionMenu(Menubutton): } kw.update(options) Widget.__init__(self, master, "menubutton", kw) - self.widgetName = "tk_optionMenu" + self.widgetName = 'tk_optionMenu' self._menu = Menu(self, name="menu", tearoff=0) self.menuname = self._menu._w @@ -2501,7 +2503,7 @@ class MutableOptionMenu(Menubutton): self._menu.delete(i, i) def __getitem__(self, name): - if name == "menu": + if name == 'menu': return self.__menu return Widget.__getitem__(self, name) @@ -2524,38 +2526,38 @@ def demo(): def fill(cw): from random import randint - cw["fill"] = "#00%04d" % randint(0, 9999) + cw['fill'] = '#00%04d' % randint(0, 9999) def color(cw): from random import randint - cw["color"] = "#ff%04d" % randint(0, 9999) + cw['color'] = '#ff%04d' % randint(0, 9999) cf = CanvasFrame(closeenough=10, width=300, height=300) c = cf.canvas() - ct3 = TextWidget(c, "hiya there", draggable=1) - ct2 = TextWidget(c, "o o\n||\n___\n U", draggable=1, justify="center") - co = OvalWidget(c, ct2, outline="red") - ct = TextWidget(c, "o o\n||\n\\___/", draggable=1, justify="center") - cp = ParenWidget(c, ct, color="red") - cb = BoxWidget(c, cp, fill="cyan", draggable=1, width=3, margin=10) + ct3 = TextWidget(c, 'hiya there', draggable=1) + ct2 = TextWidget(c, 'o o\n||\n___\n U', draggable=1, justify='center') + co = OvalWidget(c, ct2, outline='red') + ct = TextWidget(c, 'o o\n||\n\\___/', draggable=1, justify='center') + cp = ParenWidget(c, ct, color='red') + cb = BoxWidget(c, cp, fill='cyan', draggable=1, width=3, margin=10) equation = SequenceWidget( c, - SymbolWidget(c, "forall"), - TextWidget(c, "x"), - SymbolWidget(c, "exists"), - TextWidget(c, "y: "), - TextWidget(c, "x"), - SymbolWidget(c, "notequal"), - TextWidget(c, "y"), + SymbolWidget(c, 'forall'), + TextWidget(c, 'x'), + SymbolWidget(c, 'exists'), + TextWidget(c, 'y: '), + TextWidget(c, 'x'), + SymbolWidget(c, 'notequal'), + TextWidget(c, 'y'), ) space = SpaceWidget(c, 0, 30) - cstack = StackWidget(c, cb, ct3, space, co, equation, align="center") + cstack = StackWidget(c, cb, ct3, space, co, equation, align='center') prompt_msg = TextWidget( - c, "try clicking\nand dragging", draggable=1, justify="center" + c, 'try clicking\nand dragging', draggable=1, justify='center' ) cs = SequenceWidget(c, cstack, prompt_msg) - zz = BracketWidget(c, cs, color="green4", width=3) + zz = BracketWidget(c, cs, color='green4', width=3) cf.add_widget(zz, 60, 30) cb.bind_click(fill) @@ -2568,5 +2570,5 @@ def demo(): # ShowText(None, 'title', ((('this is text'*150)+'\n')*5)) -if __name__ == "__main__": +if __name__ == '__main__': demo() diff --git a/nlp_resource_data/nltk/featstruct.py b/nlp_resource_data/nltk/featstruct.py index f90b581..9c2cdeb 100644 --- a/nlp_resource_data/nltk/featstruct.py +++ b/nlp_resource_data/nltk/featstruct.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Feature Structures # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Edward Loper , # Rob Speer, # Steven Bird @@ -88,11 +88,14 @@ In general, if your feature structures will contain any reentrances, or if you plan to use them as dictionary keys, it is strongly recommended that you use full-fledged ``FeatStruct`` objects. """ +from __future__ import print_function, unicode_literals, division import re import copy from functools import total_ordering +from six import integer_types, string_types + from nltk.internals import read_str, raise_unorderable_types from nltk.sem.logic import ( Variable, @@ -101,6 +104,8 @@ from nltk.sem.logic import ( LogicParser, LogicalExpressionException, ) +from nltk.compat import python_2_unicode_compatible, unicode_repr + ###################################################################### # Feature Structure @@ -178,10 +183,10 @@ class FeatStruct(SubstituteBindingsI): return FeatDict.__new__(FeatDict, features, **morefeatures) elif morefeatures: raise TypeError( - "Keyword arguments may only be specified " - "if features is None or is a mapping." + 'Keyword arguments may only be specified ' + 'if features is None or is a mapping.' ) - if isinstance(features, str): + if isinstance(features, string_types): if FeatStructReader._START_FDICT_RE.match(features): return FeatDict.__new__(FeatDict, features, **morefeatures) else: @@ -189,7 +194,7 @@ class FeatStruct(SubstituteBindingsI): elif _is_sequence(features): return FeatList.__new__(FeatList, features) else: - raise TypeError("Expected string or mapping or sequence") + raise TypeError('Expected string or mapping or sequence') # Otherwise, construct the object as normal. else: @@ -267,7 +272,7 @@ class FeatStruct(SubstituteBindingsI): otherwise, raise ``TypeError``. """ if not self._frozen: - raise TypeError("FeatStructs must be frozen before they " "can be hashed.") + raise TypeError('FeatStructs must be frozen before they ' 'can be hashed.') try: return self._hash except AttributeError: @@ -571,7 +576,7 @@ _FROZEN_ERROR = "Frozen FeatStructs may not be modified." _FROZEN_NOTICE = "\n%sIf self is frozen, raise ValueError." -def _check_frozen(method, indent=""): +def _check_frozen(method, indent=''): """ Given a method function, return a new method function that first checks if ``self._frozen`` is true; and if so, raises ``ValueError`` @@ -586,7 +591,7 @@ def _check_frozen(method, indent=""): return method(self, *args, **kwargs) wrapped.__name__ = method.__name__ - wrapped.__doc__ = (method.__doc__ or "") + (_FROZEN_NOTICE % indent) + wrapped.__doc__ = (method.__doc__ or '') + (_FROZEN_NOTICE % indent) return wrapped @@ -595,7 +600,7 @@ def _check_frozen(method, indent=""): ###################################################################### - +@python_2_unicode_compatible class FeatDict(FeatStruct, dict): """ A feature structure that acts like a Python dictionary. I.e., a @@ -628,7 +633,7 @@ class FeatDict(FeatStruct, dict): ``morefeatures``, then the value from ``morefeatures`` will be used. """ - if isinstance(features, str): + if isinstance(features, string_types): FeatStructReader().fromstring(features, self) self.update(**morefeatures) else: @@ -643,7 +648,7 @@ class FeatDict(FeatStruct, dict): def __getitem__(self, name_or_path): """If the feature with the given name or path exists, return its value; otherwise, raise ``KeyError``.""" - if isinstance(name_or_path, (str, Feature)): + if isinstance(name_or_path, (string_types, Feature)): return dict.__getitem__(self, name_or_path) elif isinstance(name_or_path, tuple): try: @@ -683,7 +688,7 @@ class FeatDict(FeatStruct, dict): its value; otherwise, raise ``KeyError``.""" if self._frozen: raise ValueError(_FROZEN_ERROR) - if isinstance(name_or_path, (str, Feature)): + if isinstance(name_or_path, (string_types, Feature)): return dict.__delitem__(self, name_or_path) elif isinstance(name_or_path, tuple): if len(name_or_path) == 0: @@ -702,7 +707,7 @@ class FeatDict(FeatStruct, dict): ``KeyError``.""" if self._frozen: raise ValueError(_FROZEN_ERROR) - if isinstance(name_or_path, (str, Feature)): + if isinstance(name_or_path, (string_types, Feature)): return dict.__setitem__(self, name_or_path, value) elif isinstance(name_or_path, tuple): if len(name_or_path) == 0: @@ -725,20 +730,20 @@ class FeatDict(FeatStruct, dict): raise ValueError(_FROZEN_ERROR) if features is None: items = () - elif hasattr(features, "items") and callable(features.items): + elif hasattr(features, 'items') and callable(features.items): items = features.items() - elif hasattr(features, "__iter__"): + elif hasattr(features, '__iter__'): items = features else: - raise ValueError("Expected mapping or list of tuples") + raise ValueError('Expected mapping or list of tuples') for key, val in items: - if not isinstance(key, (str, Feature)): - raise TypeError("Feature names must be strings") + if not isinstance(key, (string_types, Feature)): + raise TypeError('Feature names must be strings') self[key] = val for key, val in morefeatures.items(): - if not isinstance(key, (str, Feature)): - raise TypeError("Feature names must be strings") + if not isinstance(key, (string_types, Feature)): + raise TypeError('Feature names must be strings') self[key] = val ##//////////////////////////////////////////////////////////// @@ -773,12 +778,12 @@ class FeatDict(FeatStruct, dict): Display a multi-line representation of this feature dictionary as an FVM (feature value matrix). """ - return "\n".join(self._str(self._find_reentrances({}), {})) + return '\n'.join(self._str(self._find_reentrances({}), {})) def _repr(self, reentrances, reentrance_ids): segments = [] - prefix = "" - suffix = "" + prefix = '' + suffix = '' # If this is the first time we've seen a reentrant structure, # then assign it a unique identifier. @@ -789,37 +794,37 @@ class FeatDict(FeatStruct, dict): # sorting note: keys are unique strings, so we'll never fall # through to comparing values. for (fname, fval) in sorted(self.items()): - display = getattr(fname, "display", None) + display = getattr(fname, 'display', None) if id(fval) in reentrance_ids: - segments.append("%s->(%s)" % (fname, reentrance_ids[id(fval)])) + segments.append('%s->(%s)' % (fname, reentrance_ids[id(fval)])) elif ( - display == "prefix" + display == 'prefix' and not prefix - and isinstance(fval, (Variable, str)) + and isinstance(fval, (Variable, string_types)) ): - prefix = "%s" % fval - elif display == "slash" and not suffix: + prefix = '%s' % fval + elif display == 'slash' and not suffix: if isinstance(fval, Variable): - suffix = "/%s" % fval.name + suffix = '/%s' % fval.name else: - suffix = "/%s" % repr(fval) + suffix = '/%s' % unicode_repr(fval) elif isinstance(fval, Variable): - segments.append("%s=%s" % (fname, fval.name)) + segments.append('%s=%s' % (fname, fval.name)) elif fval is True: - segments.append("+%s" % fname) + segments.append('+%s' % fname) elif fval is False: - segments.append("-%s" % fname) + segments.append('-%s' % fname) elif isinstance(fval, Expression): - segments.append("%s=<%s>" % (fname, fval)) + segments.append('%s=<%s>' % (fname, fval)) elif not isinstance(fval, FeatStruct): - segments.append("%s=%s" % (fname, repr(fval))) + segments.append('%s=%s' % (fname, unicode_repr(fval))) else: fval_repr = fval._repr(reentrances, reentrance_ids) - segments.append("%s=%s" % (fname, fval_repr)) + segments.append('%s=%s' % (fname, fval_repr)) # If it's reentrant, then add on an identifier tag. if reentrances[id(self)]: - prefix = "(%s)%s" % (reentrance_ids[id(self)], prefix) - return "%s[%s]%s" % (prefix, ", ".join(segments), suffix) + prefix = '(%s)%s' % (reentrance_ids[id(self)], prefix) + return '%s[%s]%s' % (prefix, ', '.join(segments), suffix) def _str(self, reentrances, reentrance_ids): """ @@ -843,9 +848,9 @@ class FeatDict(FeatStruct, dict): # Special case: empty feature dict. if len(self) == 0: if reentrances[id(self)]: - return ["(%s) []" % reentrance_ids[id(self)]] + return ['(%s) []' % reentrance_ids[id(self)]] else: - return ["[]"] + return ['[]'] # What's the longest feature name? Use this to align names. maxfnamelen = max(len("%s" % k) for k in self.keys()) @@ -856,60 +861,60 @@ class FeatDict(FeatStruct, dict): for (fname, fval) in sorted(self.items()): fname = ("%s" % fname).ljust(maxfnamelen) if isinstance(fval, Variable): - lines.append("%s = %s" % (fname, fval.name)) + lines.append('%s = %s' % (fname, fval.name)) elif isinstance(fval, Expression): - lines.append("%s = <%s>" % (fname, fval)) + lines.append('%s = <%s>' % (fname, fval)) elif isinstance(fval, FeatList): fval_repr = fval._repr(reentrances, reentrance_ids) - lines.append("%s = %s" % (fname, repr(fval_repr))) + lines.append('%s = %s' % (fname, unicode_repr(fval_repr))) elif not isinstance(fval, FeatDict): # It's not a nested feature structure -- just print it. - lines.append("%s = %s" % (fname, repr(fval))) + lines.append('%s = %s' % (fname, unicode_repr(fval))) elif id(fval) in reentrance_ids: # It's a feature structure we've seen before -- print # the reentrance id. - lines.append("%s -> (%s)" % (fname, reentrance_ids[id(fval)])) + lines.append('%s -> (%s)' % (fname, reentrance_ids[id(fval)])) else: # It's a new feature structure. Separate it from # other values by a blank line. - if lines and lines[-1] != "": - lines.append("") + if lines and lines[-1] != '': + lines.append('') # Recursively print the feature's value (fval). fval_lines = fval._str(reentrances, reentrance_ids) # Indent each line to make room for fname. - fval_lines = [(" " * (maxfnamelen + 3)) + l for l in fval_lines] + fval_lines = [(' ' * (maxfnamelen + 3)) + l for l in fval_lines] # Pick which line we'll display fname on, & splice it in. nameline = (len(fval_lines) - 1) // 2 fval_lines[nameline] = ( - fname + " =" + fval_lines[nameline][maxfnamelen + 2 :] + fname + ' =' + fval_lines[nameline][maxfnamelen + 2 :] ) # Add the feature structure to the output. lines += fval_lines # Separate FeatStructs by a blank line. - lines.append("") + lines.append('') # Get rid of any excess blank lines. - if lines[-1] == "": + if lines[-1] == '': lines.pop() # Add brackets around everything. maxlen = max(len(line) for line in lines) - lines = ["[ %s%s ]" % (line, " " * (maxlen - len(line))) for line in lines] + lines = ['[ %s%s ]' % (line, ' ' * (maxlen - len(line))) for line in lines] # If it's reentrant, then add on an identifier tag. if reentrances[id(self)]: - idstr = "(%s) " % reentrance_ids[id(self)] - lines = [(" " * len(idstr)) + l for l in lines] + idstr = '(%s) ' % reentrance_ids[id(self)] + lines = [(' ' * len(idstr)) + l for l in lines] idline = (len(lines) - 1) // 2 lines[idline] = idstr + lines[idline][len(idstr) :] @@ -947,7 +952,7 @@ class FeatList(FeatStruct, list): ``FeatStructReader``. Otherwise, it should be a sequence of basic values and nested feature structures. """ - if isinstance(features, str): + if isinstance(features, string_types): FeatStructReader().fromstring(features, self) else: list.__init__(self, features) @@ -958,7 +963,7 @@ class FeatList(FeatStruct, list): _INDEX_ERROR = "Expected int or feature path. Got %r." def __getitem__(self, name_or_path): - if isinstance(name_or_path, int): + if isinstance(name_or_path, integer_types): return list.__getitem__(self, name_or_path) elif isinstance(name_or_path, tuple): try: @@ -978,7 +983,7 @@ class FeatList(FeatStruct, list): its value; otherwise, raise ``KeyError``.""" if self._frozen: raise ValueError(_FROZEN_ERROR) - if isinstance(name_or_path, (int, slice)): + if isinstance(name_or_path, (integer_types, slice)): return list.__delitem__(self, name_or_path) elif isinstance(name_or_path, tuple): if len(name_or_path) == 0: @@ -997,7 +1002,7 @@ class FeatList(FeatStruct, list): ``KeyError``.""" if self._frozen: raise ValueError(_FROZEN_ERROR) - if isinstance(name_or_path, (int, slice)): + if isinstance(name_or_path, (integer_types, slice)): return list.__setitem__(self, name_or_path, value) elif isinstance(name_or_path, tuple): if len(name_or_path) == 0: @@ -1055,24 +1060,24 @@ class FeatList(FeatStruct, list): if reentrances[id(self)]: assert id(self) not in reentrance_ids reentrance_ids[id(self)] = repr(len(reentrance_ids) + 1) - prefix = "(%s)" % reentrance_ids[id(self)] + prefix = '(%s)' % reentrance_ids[id(self)] else: - prefix = "" + prefix = '' segments = [] for fval in self: if id(fval) in reentrance_ids: - segments.append("->(%s)" % reentrance_ids[id(fval)]) + segments.append('->(%s)' % reentrance_ids[id(fval)]) elif isinstance(fval, Variable): segments.append(fval.name) elif isinstance(fval, Expression): - segments.append("%s" % fval) + segments.append('%s' % fval) elif isinstance(fval, FeatStruct): segments.append(fval._repr(reentrances, reentrance_ids)) else: - segments.append("%s" % repr(fval)) + segments.append('%s' % unicode_repr(fval)) - return "%s[%s]" % (prefix, ", ".join(segments)) + return '%s[%s]' % (prefix, ', '.join(segments)) ###################################################################### @@ -1080,7 +1085,7 @@ class FeatList(FeatStruct, list): ###################################################################### -def substitute_bindings(fstruct, bindings, fs_class="default"): +def substitute_bindings(fstruct, bindings, fs_class='default'): """ Return the feature structure that is obtained by replacing each variable bound by ``bindings`` with its binding. If a variable is @@ -1091,7 +1096,7 @@ def substitute_bindings(fstruct, bindings, fs_class="default"): :type bindings: dict(Variable -> any) :param bindings: A dictionary mapping from variables to values. """ - if fs_class == "default": + if fs_class == 'default': fs_class = _default_fs_class(fstruct) fstruct = copy.deepcopy(fstruct) _substitute_bindings(fstruct, bindings, fs_class, set()) @@ -1109,7 +1114,7 @@ def _substitute_bindings(fstruct, bindings, fs_class, visited): elif _is_sequence(fstruct): items = enumerate(fstruct) else: - raise ValueError("Expected mapping or sequence") + raise ValueError('Expected mapping or sequence') for (fname, fval) in items: while isinstance(fval, Variable) and fval in bindings: fval = fstruct[fname] = bindings[fval] @@ -1119,7 +1124,7 @@ def _substitute_bindings(fstruct, bindings, fs_class, visited): fstruct[fname] = fval.substitute_bindings(bindings) -def retract_bindings(fstruct, bindings, fs_class="default"): +def retract_bindings(fstruct, bindings, fs_class='default'): """ Return the feature structure that is obtained by replacing each feature structure value that is bound by ``bindings`` with the @@ -1131,7 +1136,7 @@ def retract_bindings(fstruct, bindings, fs_class="default"): values in ``bindings`` may be modified if they are contained in ``fstruct``. """ - if fs_class == "default": + if fs_class == 'default': fs_class = _default_fs_class(fstruct) (fstruct, new_bindings) = copy.deepcopy((fstruct, bindings)) bindings.update(new_bindings) @@ -1151,7 +1156,7 @@ def _retract_bindings(fstruct, inv_bindings, fs_class, visited): elif _is_sequence(fstruct): items = enumerate(fstruct) else: - raise ValueError("Expected mapping or sequence") + raise ValueError('Expected mapping or sequence') for (fname, fval) in items: if isinstance(fval, fs_class): if id(fval) in inv_bindings: @@ -1159,12 +1164,12 @@ def _retract_bindings(fstruct, inv_bindings, fs_class, visited): _retract_bindings(fval, inv_bindings, fs_class, visited) -def find_variables(fstruct, fs_class="default"): +def find_variables(fstruct, fs_class='default'): """ :return: The set of variables used by this feature structure. :rtype: set(Variable) """ - if fs_class == "default": + if fs_class == 'default': fs_class = _default_fs_class(fstruct) return _variables(fstruct, set(), fs_class, set()) @@ -1179,7 +1184,7 @@ def _variables(fstruct, vars, fs_class, visited): elif _is_sequence(fstruct): items = enumerate(fstruct) else: - raise ValueError("Expected mapping or sequence") + raise ValueError('Expected mapping or sequence') for (fname, fval) in items: if isinstance(fval, Variable): vars.add(fval) @@ -1191,7 +1196,7 @@ def _variables(fstruct, vars, fs_class, visited): def rename_variables( - fstruct, vars=None, used_vars=(), new_vars=None, fs_class="default" + fstruct, vars=None, used_vars=(), new_vars=None, fs_class='default' ): """ Return the feature structure that is obtained by replacing @@ -1234,7 +1239,7 @@ def rename_variables( If new_vars is not specified, then an empty dictionary is used. """ - if fs_class == "default": + if fs_class == 'default': fs_class = _default_fs_class(fstruct) # Default values: @@ -1263,7 +1268,7 @@ def _rename_variables(fstruct, vars, used_vars, new_vars, fs_class, visited): elif _is_sequence(fstruct): items = enumerate(fstruct) else: - raise ValueError("Expected mapping or sequence") + raise ValueError('Expected mapping or sequence') for (fname, fval) in items: if isinstance(fval, Variable): # If it's in new_vars, then rebind it. @@ -1288,21 +1293,21 @@ def _rename_variables(fstruct, vars, used_vars, new_vars, fs_class, visited): def _rename_variable(var, used_vars): - name, n = re.sub("\d+$", "", var.name), 2 + name, n = re.sub('\d+$', '', var.name), 2 if not name: - name = "?" - while Variable("%s%s" % (name, n)) in used_vars: + name = '?' + while Variable('%s%s' % (name, n)) in used_vars: n += 1 - return Variable("%s%s" % (name, n)) + return Variable('%s%s' % (name, n)) -def remove_variables(fstruct, fs_class="default"): +def remove_variables(fstruct, fs_class='default'): """ :rtype: FeatStruct :return: The feature structure that is obtained by deleting all features whose values are ``Variables``. """ - if fs_class == "default": + if fs_class == 'default': fs_class = _default_fs_class(fstruct) return _remove_variables(copy.deepcopy(fstruct), fs_class, set()) @@ -1317,7 +1322,7 @@ def _remove_variables(fstruct, fs_class, visited): elif _is_sequence(fstruct): items = list(enumerate(fstruct)) else: - raise ValueError("Expected mapping or sequence") + raise ValueError('Expected mapping or sequence') for (fname, fval) in items: if isinstance(fval, Variable): @@ -1332,10 +1337,10 @@ def _remove_variables(fstruct, fs_class, visited): ###################################################################### - +@python_2_unicode_compatible class _UnificationFailure(object): def __repr__(self): - return "nltk.featstruct.UnificationFailure" + return 'nltk.featstruct.UnificationFailure' UnificationFailure = _UnificationFailure() @@ -1356,7 +1361,7 @@ def unify( trace=False, fail=None, rename_vars=True, - fs_class="default", + fs_class='default', ): """ Unify ``fstruct1`` with ``fstruct2``, and return the resulting feature @@ -1402,7 +1407,7 @@ def unify( """ # Decide which class(es) will be treated as feature structures, # for the purposes of unification. - if fs_class == "default": + if fs_class == 'default': fs_class = _default_fs_class(fstruct1) if _default_fs_class(fstruct2) != fs_class: raise ValueError( @@ -1517,10 +1522,10 @@ def _destructively_unify( # Unifying two mappings: if _is_mapping(fstruct1) and _is_mapping(fstruct2): for fname in fstruct1: - if getattr(fname, "default", None) is not None: + if getattr(fname, 'default', None) is not None: fstruct2.setdefault(fname, fname.default) for fname in fstruct2: - if getattr(fname, "default", None) is not None: + if getattr(fname, 'default', None) is not None: fstruct1.setdefault(fname, fname.default) # Unify any values that are defined in both fstruct1 and @@ -1576,7 +1581,7 @@ def _destructively_unify( return UnificationFailure # Unifying anything else: not allowed! - raise TypeError("Expected mappings or sequences") + raise TypeError('Expected mappings or sequences') def _unify_feature_values( @@ -1654,8 +1659,8 @@ def _unify_feature_values( # Sanity check: unify value should be symmetric if isinstance(fval2, CustomFeatureValue) and result != fval2.unify(fval1): raise AssertionError( - "CustomFeatureValue objects %r and %r disagree " - "about unification value: %r vs. %r" + 'CustomFeatureValue objects %r and %r disagree ' + 'about unification value: %r vs. %r' % (fval1, fval2, result, fval2.unify(fval1)) ) elif isinstance(fval2, CustomFeatureValue): @@ -1730,7 +1735,7 @@ def _apply_forwards(fstruct, forward, fs_class, visited): elif _is_sequence(fstruct): items = enumerate(fstruct) else: - raise ValueError("Expected mapping or sequence") + raise ValueError('Expected mapping or sequence') for fname, fval in items: if isinstance(fval, fs_class): # Replace w/ forwarded value. @@ -1755,52 +1760,52 @@ def _resolve_aliases(bindings): def _trace_unify_start(path, fval1, fval2): if path == (): - print("\nUnification trace:") + print('\nUnification trace:') else: - fullname = ".".join("%s" % n for n in path) - print(" " + "| " * (len(path) - 1) + "|") - print(" " + "| " * (len(path) - 1) + "| Unify feature: %s" % fullname) - print(" " + "| " * len(path) + " / " + _trace_valrepr(fval1)) - print(" " + "| " * len(path) + "|\\ " + _trace_valrepr(fval2)) + fullname = '.'.join("%s" % n for n in path) + print(' ' + '| ' * (len(path) - 1) + '|') + print(' ' + '| ' * (len(path) - 1) + '| Unify feature: %s' % fullname) + print(' ' + '| ' * len(path) + ' / ' + _trace_valrepr(fval1)) + print(' ' + '| ' * len(path) + '|\\ ' + _trace_valrepr(fval2)) def _trace_unify_identity(path, fval1): - print(" " + "| " * len(path) + "|") - print(" " + "| " * len(path) + "| (identical objects)") - print(" " + "| " * len(path) + "|") - print(" " + "| " * len(path) + "+-->" + repr(fval1)) + print(' ' + '| ' * len(path) + '|') + print(' ' + '| ' * len(path) + '| (identical objects)') + print(' ' + '| ' * len(path) + '|') + print(' ' + '| ' * len(path) + '+-->' + unicode_repr(fval1)) def _trace_unify_fail(path, result): if result is UnificationFailure: - resume = "" + resume = '' else: - resume = " (nonfatal)" - print(" " + "| " * len(path) + "| |") - print(" " + "X " * len(path) + "X X <-- FAIL" + resume) + resume = ' (nonfatal)' + print(' ' + '| ' * len(path) + '| |') + print(' ' + 'X ' * len(path) + 'X X <-- FAIL' + resume) def _trace_unify_succeed(path, fval1): # Print the result. - print(" " + "| " * len(path) + "|") - print(" " + "| " * len(path) + "+-->" + repr(fval1)) + print(' ' + '| ' * len(path) + '|') + print(' ' + '| ' * len(path) + '+-->' + unicode_repr(fval1)) def _trace_bindings(path, bindings): # Print the bindings (if any). if len(bindings) > 0: binditems = sorted(bindings.items(), key=lambda v: v[0].name) - bindstr = "{%s}" % ", ".join( - "%s: %s" % (var, _trace_valrepr(val)) for (var, val) in binditems + bindstr = '{%s}' % ', '.join( + '%s: %s' % (var, _trace_valrepr(val)) for (var, val) in binditems ) - print(" " + "| " * len(path) + " Bindings: " + bindstr) + print(' ' + '| ' * len(path) + ' Bindings: ' + bindstr) def _trace_valrepr(val): if isinstance(val, Variable): - return "%s" % val + return '%s' % val else: - return "%s" % repr(val) + return '%s' % unicode_repr(val) def subsumes(fstruct1, fstruct2): @@ -1837,14 +1842,14 @@ def conflicts(fstruct1, fstruct2, trace=0): def _is_mapping(v): - return hasattr(v, "__contains__") and hasattr(v, "keys") + return hasattr(v, '__contains__') and hasattr(v, 'keys') def _is_sequence(v): return ( - hasattr(v, "__iter__") - and hasattr(v, "__len__") - and not isinstance(v, str) + hasattr(v, '__iter__') + and hasattr(v, '__len__') + and not isinstance(v, string_types) ) @@ -1855,8 +1860,8 @@ def _default_fs_class(obj): return (dict, list) else: raise ValueError( - "To unify objects of type %s, you must specify " - "fs_class explicitly." % obj.__class__.__name__ + 'To unify objects of type %s, you must specify ' + 'fs_class explicitly.' % obj.__class__.__name__ ) @@ -1891,7 +1896,7 @@ class SubstituteBindingsSequence(SubstituteBindingsI): return bindings.get(v, v) - +@python_2_unicode_compatible class FeatureValueTuple(SubstituteBindingsSequence, tuple): """ A base feature value that is a tuple of other base feature values. @@ -1902,11 +1907,11 @@ class FeatureValueTuple(SubstituteBindingsSequence, tuple): def __repr__(self): # [xx] really use %s here? if len(self) == 0: - return "()" - return "(%s)" % ", ".join("%s" % (b,) for b in self) - + return '()' + return '(%s)' % ', '.join('%s' % (b,) for b in self) +@python_2_unicode_compatible class FeatureValueSet(SubstituteBindingsSequence, frozenset): """ A base feature value that is a set of other base feature values. @@ -1917,15 +1922,15 @@ class FeatureValueSet(SubstituteBindingsSequence, frozenset): def __repr__(self): # [xx] really use %s here? if len(self) == 0: - return "{/}" # distinguish from dict. + return '{/}' # distinguish from dict. # n.b., we sort the string reprs of our elements, to ensure # that our own repr is deterministic. - return "{%s}" % ", ".join(sorted("%s" % (b,) for b in self)) + return '{%s}' % ', '.join(sorted('%s' % (b,) for b in self)) __str__ = __repr__ - +@python_2_unicode_compatible class FeatureValueUnion(SubstituteBindingsSequence, frozenset): """ A base feature value that represents the union of two or more @@ -1953,10 +1958,10 @@ class FeatureValueUnion(SubstituteBindingsSequence, frozenset): # n.b., we sort the string reprs of our elements, to ensure # that our own repr is deterministic. also, note that len(self) # is guaranteed to be 2 or more. - return "{%s}" % "+".join(sorted("%s" % (b,) for b in self)) - + return '{%s}' % '+'.join(sorted('%s' % (b,) for b in self)) +@python_2_unicode_compatible class FeatureValueConcat(SubstituteBindingsSequence, tuple): """ A base feature value that represents the concatenation of two or @@ -1982,7 +1987,7 @@ class FeatureValueConcat(SubstituteBindingsSequence, tuple): def __repr__(self): # n.b.: len(self) is guaranteed to be 2 or more. - return "(%s)" % "+".join("%s" % (b,) for b in self) + return '(%s)' % '+'.join('%s' % (b,) for b in self) def _flatten(lst, cls): @@ -2005,7 +2010,7 @@ def _flatten(lst, cls): @total_ordering - +@python_2_unicode_compatible class Feature(object): """ A feature identifier that's specialized to put additional @@ -2013,15 +2018,15 @@ class Feature(object): """ def __init__(self, name, default=None, display=None): - assert display in (None, "prefix", "slash") + assert display in (None, 'prefix', 'slash') self._name = name # [xx] rename to .identifier? self._default = default # [xx] not implemented yet. self._display = display - if self._display == "prefix": + if self._display == 'prefix': self._sortkey = (-1, self._name) - elif self._display == "slash": + elif self._display == 'slash': self._sortkey = (1, self._name) else: self._sortkey = (0, self._name) @@ -2042,10 +2047,10 @@ class Feature(object): return self._display def __repr__(self): - return "*%s*" % self.name + return '*%s*' % self.name def __lt__(self, other): - if isinstance(other, str): + if isinstance(other, string_types): return True if not isinstance(other, Feature): raise_unorderable_types("<", self, other) @@ -2084,12 +2089,12 @@ class SlashFeature(Feature): class RangeFeature(Feature): - RANGE_RE = re.compile("(-?\d+):(-?\d+)") + RANGE_RE = re.compile('(-?\d+):(-?\d+)') def read_value(self, s, position, reentrances, parser): m = self.RANGE_RE.match(s, position) if not m: - raise ValueError("range", position) + raise ValueError('range', position) return (int(m.group(1)), int(m.group(2))), m.end() def unify_base_values(self, fval1, fval2, bindings): @@ -2103,8 +2108,8 @@ class RangeFeature(Feature): return rng -SLASH = SlashFeature("slash", default=False, display="slash") -TYPE = Feature("type", display="prefix") +SLASH = SlashFeature('slash', default=False, display='slash') +TYPE = Feature('type', display='prefix') ###################################################################### @@ -2137,19 +2142,19 @@ class CustomFeatureValue(object): If this base value unifies with ``other``, then return the unified value. Otherwise, return ``UnificationFailure``. """ - raise NotImplementedError("abstract base class") + raise NotImplementedError('abstract base class') def __eq__(self, other): - raise NotImplementedError("abstract base class") + raise NotImplementedError('abstract base class') def __ne__(self, other): return not self == other def __lt__(self, other): - raise NotImplementedError("abstract base class") + raise NotImplementedError('abstract base class') def __hash__(self): - raise TypeError("%s objects or unhashable" % self.__class__.__name__) + raise TypeError('%s objects or unhashable' % self.__class__.__name__) ###################################################################### @@ -2171,13 +2176,13 @@ class FeatStructReader(object): self._prefix_feature = None self._slash_feature = None for feature in features: - if feature.display == "slash": + if feature.display == 'slash': if self._slash_feature: - raise ValueError("Multiple features w/ display=slash") + raise ValueError('Multiple features w/ display=slash') self._slash_feature = feature - if feature.display == "prefix": + if feature.display == 'prefix': if self._prefix_feature: - raise ValueError("Multiple features w/ display=prefix") + raise ValueError('Multiple features w/ display=prefix') self._prefix_feature = feature self._features_with_defaults = [ feature for feature in features if feature.default is not None @@ -2208,21 +2213,21 @@ class FeatStructReader(object): s = s.strip() value, position = self.read_partial(s, 0, {}, fstruct) if position != len(s): - self._error(s, "end of string", position) + self._error(s, 'end of string', position) return value - _START_FSTRUCT_RE = re.compile(r"\s*(?:\((\d+)\)\s*)?(\??[\w-]+)?(\[)") - _END_FSTRUCT_RE = re.compile(r"\s*]\s*") - _SLASH_RE = re.compile(r"/") + _START_FSTRUCT_RE = re.compile(r'\s*(?:\((\d+)\)\s*)?(\??[\w-]+)?(\[)') + _END_FSTRUCT_RE = re.compile(r'\s*]\s*') + _SLASH_RE = re.compile(r'/') _FEATURE_NAME_RE = re.compile(r'\s*([+-]?)([^\s\(\)<>"\'\-=\[\],]+)\s*') - _REENTRANCE_RE = re.compile(r"\s*->\s*") - _TARGET_RE = re.compile(r"\s*\((\d+)\)\s*") - _ASSIGN_RE = re.compile(r"\s*=\s*") - _COMMA_RE = re.compile(r"\s*,\s*") - _BARE_PREFIX_RE = re.compile(r"\s*(?:\((\d+)\)\s*)?(\??[\w-]+\s*)()") + _REENTRANCE_RE = re.compile(r'\s*->\s*') + _TARGET_RE = re.compile(r'\s*\((\d+)\)\s*') + _ASSIGN_RE = re.compile(r'\s*=\s*') + _COMMA_RE = re.compile(r'\s*,\s*') + _BARE_PREFIX_RE = re.compile(r'\s*(?:\((\d+)\)\s*)?(\??[\w-]+\s*)()') # This one is used to distinguish fdicts from flists: _START_FDICT_RE = re.compile( - r"(%s)|(%s\s*(%s\s*(=|->)|[+-]%s|\]))" + r'(%s)|(%s\s*(%s\s*(=|->)|[+-]%s|\]))' % ( _BARE_PREFIX_RE.pattern, _START_FSTRUCT_RE.pattern, @@ -2265,14 +2270,14 @@ class FeatStructReader(object): if not match: match = self._BARE_PREFIX_RE.match(s, position) if not match: - raise ValueError("open bracket or identifier", position) + raise ValueError('open bracket or identifier', position) position = match.end() # If there as an identifier, record it. if match.group(1): identifier = match.group(1) if identifier in reentrances: - raise ValueError("new identifier", match.start(1)) + raise ValueError('new identifier', match.start(1)) reentrances[identifier] = fstruct if isinstance(fstruct, FeatDict): @@ -2285,10 +2290,10 @@ class FeatStructReader(object): def _read_partial_featlist(self, s, position, match, reentrances, fstruct): # Prefix features are not allowed: if match.group(2): - raise ValueError("open bracket") + raise ValueError('open bracket') # Bare prefixes are not allowed: if not match.group(3): - raise ValueError("open bracket") + raise ValueError('open bracket') # Build a list of the features defined by the structure. while position < len(s): @@ -2303,10 +2308,10 @@ class FeatStructReader(object): position = match.end() match = self._TARGET_RE.match(s, position) if not match: - raise ValueError("identifier", position) + raise ValueError('identifier', position) target = match.group(1) if target not in reentrances: - raise ValueError("bound identifier", position) + raise ValueError('bound identifier', position) position = match.end() fstruct.append(reentrances[target]) @@ -2322,19 +2327,19 @@ class FeatStructReader(object): # Otherwise, there should be a comma match = self._COMMA_RE.match(s, position) if match is None: - raise ValueError("comma", position) + raise ValueError('comma', position) position = match.end() # We never saw a close bracket. - raise ValueError("close bracket", position) + raise ValueError('close bracket', position) def _read_partial_featdict(self, s, position, match, reentrances, fstruct): # If there was a prefix feature, record it. if match.group(2): if self._prefix_feature is None: - raise ValueError("open bracket or identifier", match.start(2)) + raise ValueError('open bracket or identifier', match.start(2)) prefixval = match.group(2).strip() - if prefixval.startswith("?"): + if prefixval.startswith('?'): prefixval = Variable(prefixval) fstruct[self._prefix_feature] = prefixval @@ -2361,24 +2366,24 @@ class FeatStructReader(object): # Get the feature name's name match = self._FEATURE_NAME_RE.match(s, position) if match is None: - raise ValueError("feature name", position) + raise ValueError('feature name', position) name = match.group(2) position = match.end() # Check if it's a special feature. - if name[0] == "*" and name[-1] == "*": + if name[0] == '*' and name[-1] == '*': name = self._features.get(name[1:-1]) if name is None: - raise ValueError("known special feature", match.start(2)) + raise ValueError('known special feature', match.start(2)) # Check if this feature has a value already. if name in fstruct: - raise ValueError("new name", match.start(2)) + raise ValueError('new name', match.start(2)) # Boolean value ("+name" or "-name") - if match.group(1) == "+": + if match.group(1) == '+': value = True - if match.group(1) == "-": + if match.group(1) == '-': value = False # Reentrance link ("-> (target)") @@ -2388,10 +2393,10 @@ class FeatStructReader(object): position = match.end() match = self._TARGET_RE.match(s, position) if not match: - raise ValueError("identifier", position) + raise ValueError('identifier', position) target = match.group(1) if target not in reentrances: - raise ValueError("bound identifier", position) + raise ValueError('bound identifier', position) position = match.end() value = reentrances[target] @@ -2403,7 +2408,7 @@ class FeatStructReader(object): value, position = self._read_value(name, s, position, reentrances) # None of the above: error. else: - raise ValueError("equals sign", position) + raise ValueError('equals sign', position) # Store the value. fstruct[name] = value @@ -2415,11 +2420,11 @@ class FeatStructReader(object): # Otherwise, there should be a comma match = self._COMMA_RE.match(s, position) if match is None: - raise ValueError("comma", position) + raise ValueError('comma', position) position = match.end() # We never saw a close bracket. - raise ValueError("close bracket", position) + raise ValueError('close bracket', position) def _finalize(self, s, pos, reentrances, fstruct): """ @@ -2450,19 +2455,19 @@ class FeatStructReader(object): if match: handler_func = getattr(self, handler) return handler_func(s, position, reentrances, match) - raise ValueError("value", position) + raise ValueError('value', position) def _error(self, s, expected, position): - lines = s.split("\n") + lines = s.split('\n') while position > len(lines[0]): position -= len(lines.pop(0)) + 1 # +1 for the newline. estr = ( - "Error parsing feature structure\n " + 'Error parsing feature structure\n ' + lines[0] - + "\n " - + " " * position - + "^ " - + "Expected %s" % expected + + '\n ' + + ' ' * position + + '^ ' + + 'Expected %s' % expected ) raise ValueError(estr) @@ -2481,20 +2486,20 @@ class FeatStructReader(object): #: the string position where the value ended. (n.b.: order is #: important here!) VALUE_HANDLERS = [ - ("read_fstruct_value", _START_FSTRUCT_RE), - ("read_var_value", re.compile(r"\?[a-zA-Z_][a-zA-Z0-9_]*")), - ("read_str_value", re.compile("[uU]?[rR]?(['\"])")), - ("read_int_value", re.compile(r"-?\d+")), - ("read_sym_value", re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*")), + ('read_fstruct_value', _START_FSTRUCT_RE), + ('read_var_value', re.compile(r'\?[a-zA-Z_][a-zA-Z0-9_]*')), + ('read_str_value', re.compile("[uU]?[rR]?(['\"])")), + ('read_int_value', re.compile(r'-?\d+')), + ('read_sym_value', re.compile(r'[a-zA-Z_][a-zA-Z0-9_]*')), ( - "read_app_value", - re.compile(r"<(app)\((\?[a-z][a-z]*)\s*," r"\s*(\?[a-z][a-z]*)\)>"), + 'read_app_value', + re.compile(r'<(app)\((\?[a-z][a-z]*)\s*,' r'\s*(\?[a-z][a-z]*)\)>'), ), # ('read_logic_value', re.compile(r'<([^>]*)>')), # lazily match any character after '<' until we hit a '>' not preceded by '-' - ("read_logic_value", re.compile(r"<(.*?)(?")), - ("read_set_value", re.compile(r"{")), - ("read_tuple_value", re.compile(r"\(")), + ('read_logic_value', re.compile(r'<(.*?)(?')), + ('read_set_value', re.compile(r'{')), + ('read_tuple_value', re.compile(r'\(')), ] def read_fstruct_value(self, s, position, reentrances, match): @@ -2510,7 +2515,7 @@ class FeatStructReader(object): def read_var_value(self, s, position, reentrances, match): return Variable(match.group()), match.end() - _SYM_CONSTS = {"None": None, "True": True, "False": False} + _SYM_CONSTS = {'None': None, 'True': True, 'False': False} def read_sym_value(self, s, position, reentrances, match): val, end = match.group(), match.end() @@ -2518,7 +2523,7 @@ class FeatStructReader(object): def read_app_value(self, s, position, reentrances, match): """Mainly included for backwards compat.""" - return self._logic_parser.parse("%s(%s)" % match.group(2, 3)), match.end() + return self._logic_parser.parse('%s(%s)' % match.group(2, 3)), match.end() def read_logic_value(self, s, position, reentrances, match): try: @@ -2528,16 +2533,16 @@ class FeatStructReader(object): raise ValueError() return expr, match.end() except ValueError: - raise ValueError("logic expression", match.start(1)) + raise ValueError('logic expression', match.start(1)) def read_tuple_value(self, s, position, reentrances, match): return self._read_seq_value( - s, position, reentrances, match, ")", FeatureValueTuple, FeatureValueConcat + s, position, reentrances, match, ')', FeatureValueTuple, FeatureValueConcat ) def read_set_value(self, s, position, reentrances, match): return self._read_seq_value( - s, position, reentrances, match, "}", FeatureValueSet, FeatureValueUnion + s, position, reentrances, match, '}', FeatureValueSet, FeatureValueUnion ) def _read_seq_value( @@ -2549,7 +2554,7 @@ class FeatStructReader(object): cp = re.escape(close_paren) position = match.end() # Special syntax fo empty tuples: - m = re.compile(r"\s*/?\s*%s" % cp).match(s, position) + m = re.compile(r'\s*/?\s*%s' % cp).match(s, position) if m: return seq_class(), m.end() # Read values: @@ -2557,7 +2562,7 @@ class FeatStructReader(object): seen_plus = False while True: # Close paren: return value. - m = re.compile(r"\s*%s" % cp).match(s, position) + m = re.compile(r'\s*%s' % cp).match(s, position) if m: if seen_plus: return plus_class(values), m.end() @@ -2569,10 +2574,10 @@ class FeatStructReader(object): values.append(val) # Comma or looking at close paren - m = re.compile(r"\s*(,|\+|(?=%s))\s*" % cp).match(s, position) + m = re.compile(r'\s*(,|\+|(?=%s))\s*' % cp).match(s, position) if not m: raise ValueError("',' or '+' or '%s'" % cp, position) - if m.group(1) == "+": + if m.group(1) == '+': seen_plus = True position = m.end() @@ -2582,34 +2587,34 @@ class FeatStructReader(object): ###################################################################### -def display_unification(fs1, fs2, indent=" "): +def display_unification(fs1, fs2, indent=' '): # Print the two input feature structures, side by side. - fs1_lines = ("%s" % fs1).split("\n") - fs2_lines = ("%s" % fs2).split("\n") + fs1_lines = ("%s" % fs1).split('\n') + fs2_lines = ("%s" % fs2).split('\n') if len(fs1_lines) > len(fs2_lines): - blankline = "[" + " " * (len(fs2_lines[0]) - 2) + "]" + blankline = '[' + ' ' * (len(fs2_lines[0]) - 2) + ']' fs2_lines += [blankline] * len(fs1_lines) else: - blankline = "[" + " " * (len(fs1_lines[0]) - 2) + "]" + blankline = '[' + ' ' * (len(fs1_lines[0]) - 2) + ']' fs1_lines += [blankline] * len(fs2_lines) for (fs1_line, fs2_line) in zip(fs1_lines, fs2_lines): - print(indent + fs1_line + " " + fs2_line) - print(indent + "-" * len(fs1_lines[0]) + " " + "-" * len(fs2_lines[0])) + print(indent + fs1_line + ' ' + fs2_line) + print(indent + '-' * len(fs1_lines[0]) + ' ' + '-' * len(fs2_lines[0])) linelen = len(fs1_lines[0]) * 2 + 3 - print(indent + "| |".center(linelen)) - print(indent + "+-----UNIFY-----+".center(linelen)) - print(indent + "|".center(linelen)) - print(indent + "V".center(linelen)) + print(indent + '| |'.center(linelen)) + print(indent + '+-----UNIFY-----+'.center(linelen)) + print(indent + '|'.center(linelen)) + print(indent + 'V'.center(linelen)) bindings = {} result = fs1.unify(fs2, bindings) if result is None: - print(indent + "(FAILED)".center(linelen)) + print(indent + '(FAILED)'.center(linelen)) else: print( - "\n".join(indent + l.center(linelen) for l in ("%s" % result).split("\n")) + '\n'.join(indent + l.center(linelen) for l in ("%s" % result).split('\n')) ) if bindings and len(bindings.bound_variables()) > 0: print(repr(bindings).center(linelen)) @@ -2619,16 +2624,16 @@ def display_unification(fs1, fs2, indent=" "): def interactive_demo(trace=False): import random, sys - HELP = """ + HELP = ''' 1-%d: Select the corresponding feature structure q: Quit t: Turn tracing on or off l: List all feature structures ?: Help - """ + ''' print( - """ + ''' This demo will repeatedly present you with a list of feature structures, and ask you to choose two for unification. Whenever a new feature structure is generated, it is added to the list of @@ -2637,26 +2642,26 @@ def interactive_demo(trace=False): random subset for you to choose between at a given time. If you want to see the complete lists, type "l". For a list of valid commands, type "?". - """ + ''' ) print('Press "Enter" to continue...') sys.stdin.readline() fstruct_strings = [ - "[agr=[number=sing, gender=masc]]", - "[agr=[gender=masc, person=3]]", - "[agr=[gender=fem, person=3]]", - "[subj=[agr=(1)[]], agr->(1)]", - "[obj=?x]", - "[subj=?x]", - "[/=None]", - "[/=NP]", - "[cat=NP]", - "[cat=VP]", - "[cat=PP]", - "[subj=[agr=[gender=?y]], obj=[agr=[gender=?y]]]", - "[gender=masc, agr=?C]", - "[gender=?S, agr=[gender=?S,person=3]]", + '[agr=[number=sing, gender=masc]]', + '[agr=[gender=masc, person=3]]', + '[agr=[gender=fem, person=3]]', + '[subj=[agr=(1)[]], agr->(1)]', + '[obj=?x]', + '[subj=?x]', + '[/=None]', + '[/=NP]', + '[cat=NP]', + '[cat=VP]', + '[cat=PP]', + '[subj=[agr=[gender=?y]], obj=[agr=[gender=?y]]]', + '[gender=masc, agr=?C]', + '[gender=?S, agr=[gender=?S,person=3]]', ] all_fstructs = [ @@ -2666,10 +2671,10 @@ def interactive_demo(trace=False): def list_fstructs(fstructs): for i, fstruct in fstructs: print() - lines = ("%s" % fstruct).split("\n") - print("%3d: %s" % (i + 1, lines[0])) + lines = ("%s" % fstruct).split('\n') + print('%3d: %s' % (i + 1, lines[0])) for line in lines[1:]: - print(" " + line) + print(' ' + line) print() while True: @@ -2680,40 +2685,40 @@ def interactive_demo(trace=False): else: fstructs = all_fstructs - print("_" * 75) + print('_' * 75) - print("Choose two feature structures to unify:") + print('Choose two feature structures to unify:') list_fstructs(fstructs) selected = [None, None] - for (nth, i) in (("First", 0), ("Second", 1)): + for (nth, i) in (('First', 0), ('Second', 1)): while selected[i] is None: print( ( - "%s feature structure (1-%d,q,t,l,?): " + '%s feature structure (1-%d,q,t,l,?): ' % (nth, len(all_fstructs)) ), - end=" ", + end=' ', ) try: input = sys.stdin.readline().strip() - if input in ("q", "Q", "x", "X"): + if input in ('q', 'Q', 'x', 'X'): return - if input in ("t", "T"): + if input in ('t', 'T'): trace = not trace - print(" Trace = %s" % trace) + print(' Trace = %s' % trace) continue - if input in ("h", "H", "?"): + if input in ('h', 'H', '?'): print(HELP % len(fstructs)) continue - if input in ("l", "L"): + if input in ('l', 'L'): list_fstructs(all_fstructs) continue num = int(input) - 1 selected[i] = all_fstructs[num][1] print() except: - print("Bad sentence number") + print('Bad sentence number') continue if trace: @@ -2729,7 +2734,7 @@ def interactive_demo(trace=False): print('\nType "Enter" to continue unifying; or "q" to quit.') input = sys.stdin.readline().strip() - if input in ("q", "Q", "x", "X"): + if input in ('q', 'Q', 'x', 'X'): return @@ -2741,20 +2746,20 @@ def demo(trace=False): # processor breaks with values like '3rd' fstruct_strings = [ - "[agr=[number=sing, gender=masc]]", - "[agr=[gender=masc, person=3]]", - "[agr=[gender=fem, person=3]]", - "[subj=[agr=(1)[]], agr->(1)]", - "[obj=?x]", - "[subj=?x]", - "[/=None]", - "[/=NP]", - "[cat=NP]", - "[cat=VP]", - "[cat=PP]", - "[subj=[agr=[gender=?y]], obj=[agr=[gender=?y]]]", - "[gender=masc, agr=?C]", - "[gender=?S, agr=[gender=?S,person=3]]", + '[agr=[number=sing, gender=masc]]', + '[agr=[gender=masc, person=3]]', + '[agr=[gender=fem, person=3]]', + '[subj=[agr=(1)[]], agr->(1)]', + '[obj=?x]', + '[subj=?x]', + '[/=None]', + '[/=NP]', + '[cat=NP]', + '[cat=VP]', + '[cat=PP]', + '[subj=[agr=[gender=?y]], obj=[agr=[gender=?y]]]', + '[gender=masc, agr=?C]', + '[gender=?S, agr=[gender=?S,person=3]]', ] all_fstructs = [FeatStruct(fss) for fss in fstruct_strings] # MAX_CHOICES = 5 @@ -2772,20 +2777,20 @@ def demo(trace=False): ) -if __name__ == "__main__": +if __name__ == '__main__': demo() __all__ = [ - "FeatStruct", - "FeatDict", - "FeatList", - "unify", - "subsumes", - "conflicts", - "Feature", - "SlashFeature", - "RangeFeature", - "SLASH", - "TYPE", - "FeatStructReader", + 'FeatStruct', + 'FeatDict', + 'FeatList', + 'unify', + 'subsumes', + 'conflicts', + 'Feature', + 'SlashFeature', + 'RangeFeature', + 'SLASH', + 'TYPE', + 'FeatStructReader', ] diff --git a/nlp_resource_data/nltk/grammar.py b/nlp_resource_data/nltk/grammar.py index c6c7a69..5ada3cf 100644 --- a/nlp_resource_data/nltk/grammar.py +++ b/nlp_resource_data/nltk/grammar.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Natural Language Toolkit: Context Free Grammars # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Steven Bird # Edward Loper # Jason Narad @@ -68,10 +68,15 @@ The operation of replacing the left hand side (*lhs*) of a production with the right hand side (*rhs*) in a tree (*tree*) is known as "expanding" *lhs* to *rhs* in *tree*. """ +from __future__ import print_function, unicode_literals, division + import re from functools import total_ordering +from six import string_types + from nltk.util import transitive_closure, invert_graph +from nltk.compat import python_2_unicode_compatible, unicode_repr from nltk.internals import raise_unorderable_types from nltk.probability import ImmutableProbabilisticMixIn @@ -84,6 +89,7 @@ from nltk.featstruct import FeatStruct, FeatDict, FeatStructReader, SLASH, TYPE @total_ordering +@python_2_unicode_compatible class Nonterminal(object): """ A non-terminal symbol for a context free grammar. ``Nonterminal`` @@ -150,10 +156,10 @@ class Nonterminal(object): :rtype: str """ - if isinstance(self._symbol, str): - return "%s" % self._symbol + if isinstance(self._symbol, string_types): + return '%s' % self._symbol else: - return "%s" % repr(self._symbol) + return '%s' % unicode_repr(self._symbol) def __str__(self): """ @@ -161,10 +167,10 @@ class Nonterminal(object): :rtype: str """ - if isinstance(self._symbol, str): - return "%s" % self._symbol + if isinstance(self._symbol, string_types): + return '%s' % self._symbol else: - return "%s" % repr(self._symbol) + return '%s' % unicode_repr(self._symbol) def __div__(self, rhs): """ @@ -176,7 +182,7 @@ class Nonterminal(object): :type rhs: Nonterminal :rtype: Nonterminal """ - return Nonterminal("%s/%s" % (self._symbol, rhs._symbol)) + return Nonterminal('%s/%s' % (self._symbol, rhs._symbol)) def __truediv__(self, rhs): """ @@ -206,8 +212,8 @@ def nonterminals(symbols): in the same order as the symbols names. :rtype: list(Nonterminal) """ - if "," in symbols: - symbol_list = symbols.split(",") + if ',' in symbols: + symbol_list = symbols.split(',') else: symbol_list = symbols.split() return [Nonterminal(s.strip()) for s in symbol_list] @@ -245,7 +251,7 @@ def is_terminal(item): :rtype: bool """ - return hasattr(item, "__hash__") and not isinstance(item, Nonterminal) + return hasattr(item, '__hash__') and not isinstance(item, Nonterminal) ################################################################# @@ -254,7 +260,7 @@ def is_terminal(item): @total_ordering - +@python_2_unicode_compatible class Production(object): """ A grammar production. Each production maps a single symbol @@ -284,9 +290,9 @@ class Production(object): :param rhs: The right-hand side of the new ``Production``. :type rhs: sequence(Nonterminal and terminal) """ - if isinstance(rhs, str): + if isinstance(rhs, string_types): raise TypeError( - "production right hand side should be a list, " "not a string" + 'production right hand side should be a list, ' 'not a string' ) self._lhs = lhs self._rhs = tuple(rhs) @@ -338,8 +344,8 @@ class Production(object): :rtype: str """ - result = "%s -> " % repr(self._lhs) - result += " ".join(repr(el) for el in self._rhs) + result = '%s -> ' % unicode_repr(self._lhs) + result += " ".join(unicode_repr(el) for el in self._rhs) return result def __repr__(self): @@ -348,7 +354,7 @@ class Production(object): :rtype: str """ - return "%s" % self + return '%s' % self def __eq__(self, other): """ @@ -379,7 +385,7 @@ class Production(object): return self._hash - +@python_2_unicode_compatible class DependencyProduction(Production): """ A dependency grammar production. Each production maps a single @@ -392,13 +398,13 @@ class DependencyProduction(Production): :rtype: str """ - result = "'%s' ->" % (self._lhs,) + result = '\'%s\' ->' % (self._lhs,) for elt in self._rhs: - result += " '%s'" % (elt,) + result += ' \'%s\'' % (elt,) return result - +@python_2_unicode_compatible class ProbabilisticProduction(Production, ImmutableProbabilisticMixIn): """ A probabilistic context free grammar production. @@ -425,8 +431,8 @@ class ProbabilisticProduction(Production, ImmutableProbabilisticMixIn): Production.__init__(self, lhs, rhs) def __str__(self): - return super().__str__() + ( - " [1.0]" if (self.prob() == 1.0) else " [%g]" % self.prob() + return Production.__unicode__(self) + ( + ' [1.0]' if (self.prob() == 1.0) else ' [%g]' % self.prob() ) def __eq__(self, other): @@ -449,7 +455,7 @@ class ProbabilisticProduction(Production, ImmutableProbabilisticMixIn): ################################################################# - +@python_2_unicode_compatible class CFG(object): """ A context-free grammar. A grammar consists of a start state and @@ -673,7 +679,7 @@ class CFG(object): """ missing = [tok for tok in tokens if not self._lexical_index.get(tok)] if missing: - missing = ", ".join("%r" % (w,) for w in missing) + missing = ', '.join('%r' % (w,) for w in missing) raise ValueError( "Grammar does not cover some of the " "input words: %r." % missing ) @@ -747,120 +753,14 @@ class CFG(object): """ return self.is_flexible_chomsky_normal_form() and self._all_unary_are_lexical - def chomsky_normal_form(self, new_token_padding="@$@", flexible=False): - """ - Returns a new Grammer that is in chomsky normal - :param: new_token_padding - Customise new rule formation during binarisation - """ - if self.is_chomsky_normal_form(): - return self - if self.productions(empty=True): - raise ValueError( - ("Grammar has Empty rules. " "Cannot deal with them at the moment") - ) - - # check for mixed rules - for rule in self.productions(): - if rule.is_lexical() and len(rule.rhs()) > 1: - raise ValueError( - "Cannot handled mixed rule {} => {}".format(rule.lhs(), rule.rhs()) - ) - - step1 = CFG.eliminate_start(self) - step2 = CFG.binarize(step1, new_token_padding) - if flexible: - return step2 - step3 = CFG.remove_unitary_rules(step2) - return step3 - - @classmethod - def remove_unitary_rules(cls, grammar): - """ - Remove nonlexical unitary rules and convert them to - lexical - """ - result = [] - unitary = [] - for rule in grammar.productions(): - if len(rule) == 1 and rule.is_nonlexical(): - unitary.append(rule) - else: - result.append(rule) - - while unitary: - rule = unitary.pop(0) - for item in grammar.productions(lhs=rule.rhs()[0]): - new_rule = Production(rule.lhs(), item.rhs()) - if len(new_rule) != 1 or new_rule.is_lexical(): - result.append(new_rule) - else: - unitary.append(new_rule) - - n_grammar = CFG(grammar.start(), result) - return n_grammar - - @classmethod - def binarize(cls, grammar, padding="@$@"): - """ - Convert all non-binary rules into binary by introducing - new tokens. - Example:: - Original: - A => B C D - After Conversion: - A => B A@$@B - A@$@B => C D - """ - result = [] - - for rule in grammar.productions(): - if len(rule.rhs()) > 2: - # this rule needs to be broken down - left_side = rule.lhs() - for k in range(0, len(rule.rhs()) - 2): - tsym = rule.rhs()[k] - new_sym = Nonterminal(left_side.symbol() + padding + tsym.symbol()) - new_production = Production(left_side, (tsym, new_sym)) - left_side = new_sym - result.append(new_production) - last_prd = Production(left_side, rule.rhs()[-2:]) - result.append(last_prd) - else: - result.append(rule) - - n_grammar = CFG(grammar.start(), result) - return n_grammar - - @classmethod - def eliminate_start(cls, grammar): - """ - Eliminate start rule in case it appears on RHS - Example: S -> S0 S1 and S0 -> S1 S - Then another rule S0_Sigma -> S is added - """ - start = grammar.start() - result = [] - need_to_add = None - for rule in grammar.productions(): - if start in rule.rhs(): - need_to_add = True - result.append(rule) - if need_to_add: - start = Nonterminal("S0_SIGMA") - result.append(Production(start, [grammar.start()])) - n_grammar = CFG(start, result) - return n_grammar - return grammar - def __repr__(self): - return "" % len(self._productions) + return '' % len(self._productions) def __str__(self): - result = "Grammar with %d productions" % len(self._productions) - result += " (start state = %r)" % self._start + result = 'Grammar with %d productions' % len(self._productions) + result += ' (start state = %r)' % self._start for production in self._productions: - result += "\n %s" % production + result += '\n %s' % production return result @@ -944,7 +844,7 @@ class FeatureGrammar(CFG): ) elif logic_parser is not None: raise Exception( - "'logic_parser' and 'fstruct_reader' must " "not both be set" + '\'logic_parser\' and \'fstruct_reader\' must ' 'not both be set' ) start, productions = read_grammar( @@ -1020,7 +920,7 @@ class FeatureGrammar(CFG): @total_ordering - +@python_2_unicode_compatible class FeatureValueType(object): """ A helper class for ``FeatureGrammars``, designed to be different @@ -1033,7 +933,7 @@ class FeatureValueType(object): self._hash = hash(value) def __repr__(self): - return "<%s>" % self._value + return '<%s>' % self._value def __eq__(self, other): return type(self) == type(other) and self._value == other._value @@ -1050,7 +950,7 @@ class FeatureValueType(object): return self._hash - +@python_2_unicode_compatible class DependencyGrammar(object): """ A dependency grammar. A DependencyGrammar consists of a set of @@ -1070,16 +970,16 @@ class DependencyGrammar(object): @classmethod def fromstring(cls, input): productions = [] - for linenum, line in enumerate(input.split("\n")): + for linenum, line in enumerate(input.split('\n')): line = line.strip() - if line.startswith("#") or line == "": + if line.startswith('#') or line == '': continue try: productions += _read_dependency_production(line) except ValueError: - raise ValueError("Unable to parse line %s: %s" % (linenum, line)) + raise ValueError('Unable to parse line %s: %s' % (linenum, line)) if len(productions) == 0: - raise ValueError("No productions found!") + raise ValueError('No productions found!') return cls(productions) def contains(self, head, mod): @@ -1133,19 +1033,19 @@ class DependencyGrammar(object): :rtype: str """ - str = "Dependency grammar with %d productions" % len(self._productions) + str = 'Dependency grammar with %d productions' % len(self._productions) for production in self._productions: - str += "\n %s" % production + str += '\n %s' % production return str def __repr__(self): """ Return a concise string representation of the ``DependencyGrammar`` """ - return "Dependency grammar with %d productions" % len(self._productions) - + return 'Dependency grammar with %d productions' % len(self._productions) +@python_2_unicode_compatible class ProbabilisticDependencyGrammar(object): """ @@ -1179,24 +1079,24 @@ class ProbabilisticDependencyGrammar(object): :rtype: str """ - str = "Statistical dependency grammar with %d productions" % len( + str = 'Statistical dependency grammar with %d productions' % len( self._productions ) for production in self._productions: - str += "\n %s" % production - str += "\nEvents:" + str += '\n %s' % production + str += '\nEvents:' for event in self._events: - str += "\n %d:%s" % (self._events[event], event) - str += "\nTags:" + str += '\n %d:%s' % (self._events[event], event) + str += '\nTags:' for tag_word in self._tags: - str += "\n %s:\t(%s)" % (tag_word, self._tags[tag_word]) + str += '\n %s:\t(%s)' % (tag_word, self._tags[tag_word]) return str def __repr__(self): """ Return a concise string representation of the ``ProbabilisticDependencyGrammar`` """ - return "Statistical Dependency grammar with %d productions" % len( + return 'Statistical Dependency grammar with %d productions' % len( self._productions ) @@ -1331,10 +1231,10 @@ def _read_fcfg_production(input, fstruct_reader): # Parsing generic grammars -_ARROW_RE = re.compile(r"\s* -> \s*", re.VERBOSE) -_PROBABILITY_RE = re.compile(r"( \[ [\d\.]+ \] ) \s*", re.VERBOSE) +_ARROW_RE = re.compile(r'\s* -> \s*', re.VERBOSE) +_PROBABILITY_RE = re.compile(r'( \[ [\d\.]+ \] ) \s*', re.VERBOSE) _TERMINAL_RE = re.compile(r'( "[^"]+" | \'[^\']+\' ) \s*', re.VERBOSE) -_DISJUNCTION_RE = re.compile(r"\| \s*", re.VERBOSE) +_DISJUNCTION_RE = re.compile(r'\| \s*', re.VERBOSE) def _read_production(line, nonterm_parser, probabilistic=False): @@ -1350,7 +1250,7 @@ def _read_production(line, nonterm_parser, probabilistic=False): # Skip over the arrow. m = _ARROW_RE.match(line, pos) if not m: - raise ValueError("Expected an arrow") + raise ValueError('Expected an arrow') pos = m.end() # Parse the right hand side. @@ -1364,20 +1264,20 @@ def _read_production(line, nonterm_parser, probabilistic=False): probabilities[-1] = float(m.group(1)[1:-1]) if probabilities[-1] > 1.0: raise ValueError( - "Production probability %f, " - "should not be greater than 1.0" % (probabilities[-1],) + 'Production probability %f, ' + 'should not be greater than 1.0' % (probabilities[-1],) ) # String -- add terminal. - elif line[pos] in "'\"": + elif line[pos] in "\'\"": m = _TERMINAL_RE.match(line, pos) if not m: - raise ValueError("Unterminated string") + raise ValueError('Unterminated string') rhsides[-1].append(m.group(1)[1:-1]) pos = m.end() # Vertical bar -- start new rhside. - elif line[pos] == "|": + elif line[pos] == '|': m = _DISJUNCTION_RE.match(line, pos) probabilities.append(0.0) rhsides.append([]) @@ -1419,51 +1319,51 @@ def read_grammar(input, nonterm_parser, probabilistic=False, encoding=None): """ if encoding is not None: input = input.decode(encoding) - if isinstance(input, str): - lines = input.split("\n") + if isinstance(input, string_types): + lines = input.split('\n') else: lines = input start = None productions = [] - continue_line = "" + continue_line = '' for linenum, line in enumerate(lines): line = continue_line + line.strip() - if line.startswith("#") or line == "": + if line.startswith('#') or line == '': continue - if line.endswith("\\"): - continue_line = line[:-1].rstrip() + " " + if line.endswith('\\'): + continue_line = line[:-1].rstrip() + ' ' continue - continue_line = "" + continue_line = '' try: - if line[0] == "%": + if line[0] == '%': directive, args = line[1:].split(None, 1) - if directive == "start": + if directive == 'start': start, pos = nonterm_parser(args, 0) if pos != len(args): - raise ValueError("Bad argument to start directive") + raise ValueError('Bad argument to start directive') else: - raise ValueError("Bad directive") + raise ValueError('Bad directive') else: # expand out the disjunctions on the RHS productions += _read_production(line, nonterm_parser, probabilistic) except ValueError as e: - raise ValueError("Unable to parse line %s: %s\n%s" % (linenum + 1, line, e)) + raise ValueError('Unable to parse line %s: %s\n%s' % (linenum + 1, line, e)) if not productions: - raise ValueError("No productions found!") + raise ValueError('No productions found!') if not start: start = productions[0].lhs() return (start, productions) -_STANDARD_NONTERM_RE = re.compile("( [\w/][\w/^<>-]* ) \s*", re.VERBOSE) +_STANDARD_NONTERM_RE = re.compile('( [\w/][\w/^<>-]* ) \s*', re.VERBOSE) def standard_nonterm_parser(string, pos): m = _STANDARD_NONTERM_RE.match(string, pos) if not m: - raise ValueError("Expected a nonterminal, found: " + string[pos:]) + raise ValueError('Expected a nonterminal, found: ' + string[pos:]) return (Nonterminal(m.group(1)), m.end()) @@ -1472,7 +1372,7 @@ def standard_nonterm_parser(string, pos): ################################################################# _READ_DG_RE = re.compile( - r"""^\s* # leading whitespace + r'''^\s* # leading whitespace ('[^']+')\s* # single-quoted lhs (?:[-=]+>)\s* # arrow (?:( # rhs: @@ -1481,24 +1381,24 @@ _READ_DG_RE = re.compile( | \| # disjunction ) \s*) # trailing space - *$""", # zero or more copies + *$''', # zero or more copies re.VERBOSE, ) -_SPLIT_DG_RE = re.compile(r"""('[^']'|[-=]+>|"[^"]+"|'[^']+'|\|)""") +_SPLIT_DG_RE = re.compile(r'''('[^']'|[-=]+>|"[^"]+"|'[^']+'|\|)''') def _read_dependency_production(s): if not _READ_DG_RE.match(s): - raise ValueError("Bad production string") + raise ValueError('Bad production string') pieces = _SPLIT_DG_RE.split(s) pieces = [p for i, p in enumerate(pieces) if i % 2 == 1] - lhside = pieces[0].strip("'\"") + lhside = pieces[0].strip('\'\"') rhsides = [[]] for piece in pieces[2:]: - if piece == "|": + if piece == '|': rhsides.append([]) else: - rhsides[-1].append(piece.strip("'\"")) + rhsides[-1].append(piece.strip('\'\"')) return [DependencyProduction(lhside, rhside) for rhside in rhsides] @@ -1515,12 +1415,12 @@ def cfg_demo(): from nltk import nonterminals, Production, CFG # Create some nonterminals - S, NP, VP, PP = nonterminals("S, NP, VP, PP") - N, V, P, Det = nonterminals("N, V, P, Det") + S, NP, VP, PP = nonterminals('S, NP, VP, PP') + N, V, P, Det = nonterminals('N, V, P, Det') VP_slash_NP = VP / NP - print("Some nonterminals:", [S, NP, VP, PP, N, V, P, Det, VP / NP]) - print(" S.symbol() =>", repr(S.symbol())) + print('Some nonterminals:', [S, NP, VP, PP, N, V, P, Det, VP / NP]) + print(' S.symbol() =>', repr(S.symbol())) print() print(Production(S, [NP])) @@ -1539,11 +1439,11 @@ def cfg_demo(): """ ) - print("A Grammar:", repr(grammar)) - print(" grammar.start() =>", repr(grammar.start())) - print(" grammar.productions() =>", end=" ") + print('A Grammar:', repr(grammar)) + print(' grammar.start() =>', repr(grammar.start())) + print(' grammar.productions() =>', end=' ') # Use string.replace(...) is to line-wrap the output. - print(repr(grammar.productions()).replace(",", ",\n" + " " * 25)) + print(repr(grammar.productions()).replace(',', ',\n' + ' ' * 25)) print() @@ -1602,18 +1502,18 @@ def pcfg_demo(): pcfg_prods = toy_pcfg1.productions() pcfg_prod = pcfg_prods[2] - print("A PCFG production:", repr(pcfg_prod)) - print(" pcfg_prod.lhs() =>", repr(pcfg_prod.lhs())) - print(" pcfg_prod.rhs() =>", repr(pcfg_prod.rhs())) - print(" pcfg_prod.prob() =>", repr(pcfg_prod.prob())) + print('A PCFG production:', repr(pcfg_prod)) + print(' pcfg_prod.lhs() =>', repr(pcfg_prod.lhs())) + print(' pcfg_prod.rhs() =>', repr(pcfg_prod.rhs())) + print(' pcfg_prod.prob() =>', repr(pcfg_prod.prob())) print() grammar = toy_pcfg2 - print("A PCFG grammar:", repr(grammar)) - print(" grammar.start() =>", repr(grammar.start())) - print(" grammar.productions() =>", end=" ") + print('A PCFG grammar:', repr(grammar)) + print(' grammar.start() =>', repr(grammar.start())) + print(' grammar.productions() =>', end=' ') # Use .replace(...) is to line-wrap the output. - print(repr(grammar.productions()).replace(",", ",\n" + " " * 26)) + print(repr(grammar.productions()).replace(',', ',\n' + ' ' * 26)) print() # extract productions from three trees and induce the PCFG @@ -1628,7 +1528,7 @@ def pcfg_demo(): productions += tree.productions() - S = Nonterminal("S") + S = Nonterminal('S') grammar = induce_pcfg(S, productions) print(grammar) print() @@ -1650,7 +1550,7 @@ def pcfg_demo(): def fcfg_demo(): import nltk.data - g = nltk.data.load("grammars/book_grammars/feat0.fcfg") + g = nltk.data.load('grammars/book_grammars/feat0.fcfg') print(g) print() @@ -1706,19 +1606,19 @@ def demo(): sdg_demo() -if __name__ == "__main__": +if __name__ == '__main__': demo() __all__ = [ - "Nonterminal", - "nonterminals", - "CFG", - "Production", - "PCFG", - "ProbabilisticProduction", - "DependencyGrammar", - "DependencyProduction", - "ProbabilisticDependencyGrammar", - "induce_pcfg", - "read_grammar", + 'Nonterminal', + 'nonterminals', + 'CFG', + 'Production', + 'PCFG', + 'ProbabilisticProduction', + 'DependencyGrammar', + 'DependencyProduction', + 'ProbabilisticDependencyGrammar', + 'induce_pcfg', + 'read_grammar', ] diff --git a/nlp_resource_data/nltk/help.py b/nlp_resource_data/nltk/help.py index 8b292d6..27671e8 100644 --- a/nlp_resource_data/nltk/help.py +++ b/nlp_resource_data/nltk/help.py @@ -1,6 +1,6 @@ # Natural Language Toolkit (NLTK) Help # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Authors: Steven Bird # URL: # For license information, see LICENSE.TXT @@ -8,6 +8,7 @@ """ Provide structured access to documentation. """ +from __future__ import print_function import re from textwrap import wrap @@ -37,7 +38,7 @@ def _print_entries(tags, tagdict): entry = tagdict[tag] defn = [tag + ": " + entry[0]] examples = wrap( - entry[1], width=75, initial_indent=" ", subsequent_indent=" " + entry[1], width=75, initial_indent=' ', subsequent_indent=' ' ) print("\n".join(defn + examples)) @@ -57,8 +58,8 @@ def _format_tagset(tagset, tagpattern=None): print("No matching tags found.") -if __name__ == "__main__": - brown_tagset(r"NN.*") - upenn_tagset(r".*\$") - claws5_tagset("UNDEFINED") - brown_tagset(r"NN") +if __name__ == '__main__': + brown_tagset(r'NN.*') + upenn_tagset(r'.*\$') + claws5_tagset('UNDEFINED') + brown_tagset(r'NN') diff --git a/nlp_resource_data/nltk/inference/__init__.py b/nlp_resource_data/nltk/inference/__init__.py index fdd653d..d79c935 100644 --- a/nlp_resource_data/nltk/inference/__init__.py +++ b/nlp_resource_data/nltk/inference/__init__.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Inference # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Dan Garrette # Ewan Klein # diff --git a/nlp_resource_data/nltk/inference/__pycache__/__init__.cpython-37.pyc b/nlp_resource_data/nltk/inference/__pycache__/__init__.cpython-37.pyc index abbb0723b57884de36f7df56a8208d68b1e952dd..1255a72cb9fd124c8ad67a98ca969d713dca5f8f 100644 GIT binary patch delta 31 lcmX@gwv&z9iI1$U5$5B$q1%4wNa+_meQ_bI;5yRQgv*k zPy-nWS~P8x*B}iLpeO?LK#L&821ru`NLs)t@~a5i{^$Y?kfKK0z(5b6bz$_HnWe-n z%e5T|`}p3xnRzqc9B;`#e22aNT^1Y*1bjC5Tk)*R7mu$8TUMu-&8FBgHiaqnJS*5U zc7}6D-cfL7oCR0LMQdl?opA%*nfGKo@af7+1#ia7Y^qmvuCZyG;#NF2Y>G$qtvS|M zreTvVDPEv^D|G)R-KR7FeYK&o5dal_B><#=8d!5=g0K!MAy|iK9fEaOX@qqnt;4X6 zC{b8P)vy{|b7mTW(xk+I5(7#IC=sBw@ov%Y8e%)ditEj8q^6u)f)DP+0Rzb!aKJ5N2;j^E!=vIc_t_&sTsk*s zCO(oNW;g7%!c#)UlFMoLdF+fMRr`ngF5COy^Yn)Km0Vtt3q?iE4~jPLoljtE6C5-l z4*B-YgkZ+QI5grA!6AkN*^hj6?T^AnFFc0*9AeK*+=!pu&X>7ga{s+=vA zRtn1)l9EXq0-=X62&VmW+V}jcOd=HyE{X17oFY>JTFeDo*hP^GzLq2wW<-X7rGe1J z80Ex3==u9GVKhH4eji$Ew$LP=4#(Lw@y+nSLOqlSpcYWVx8gu~K^d|QmsN|ZX2oIS z6g(wN5DdECYiIrjzvP#AT)fwm)-E-6u}(6_fUj<%q4YE2X5@{uiBp$Kxx#Wjw|LET zG$HI-=EdUTSH*ht%R5P+E@ry#akVT1>lal14Kdo(HEQWIrFR<`!ILo~vrq2DA}?girXviGDCd6wA|*QvHqZ5h_#oaSe%Jg| z*L^%}L=Q<+rqWUQ8{%s%hb~w;N_FrlIDqfKq3R>e$kBG#cm)IASoLEzB#~p+Aa`wqO0+F4)!Gi@AJR<(!U#9D&BEdoE=8 zQf}#SPAN=hS6Uw_+yZi$2CkM?@?|-ngMNyxY#c}SU@3eke$@7pB9D?=t4C>#pI zcj2bW&IF9sGHxB2Z;5vXZj2)iMvc|$D;0pKuk<5lO@y&lzpp)WAi?@b(E##L*_v#r z+byum+|+(?Fu;sJqEMyR@Pnwus>Qr5LWdtS;*ms|`U!EAF@vWhwU@tpz382 z8jOji@h#fW;APf40we=+Q(0%CB6}nntAz-Q;OFA@k-r&G(;Gs)j$(%T2^MD|f~VFW=#weaaB zr+PS5`W|440(86!CfC#A`J-FJ!t~TZ^w9W*_0VeKq(=O9dSI4L@}del9naLgXHm!7w3y5Rd>lt*@C)Z`WUiNaypT0N*yv9wOr5^ z=DaMnsf(XQ8GOy>=fu;eJGRm5DXW#$9eic!l6dFzg(P8Bzjq;)S497r-K#apP4;JC zSC`--T~>MdngM(@_h?3DVt~#gDEPaK#C#sfPeDs(U^+7_a09)FC{hl!n2{{Pr-Dm}K`m;4=nK0-k;0ZQ*2i TCej}n2zNyGM)pV2k@SB7lqOw~ delta 5450 zcmb7IeQaA-75DY$^NZ~`&R5f~&Uz0v@j2H?oR z@dBq&X#!3Y>GkP*$m40>J7}g1`x?_UrZFZ-ZjHWC?W^IIFd`S~b_{>6!CtKB4&%E4e&Rtme~d{#sUv ziudHE!UNk~){NQx_k8$THm!!QCvIKKt|$17@KJHoG0B4BzT<~slnB|m6aH`)5($YL z2`=fR2Jaj|dc{5Gr6)Z2Xe;pya->1Rc39mHR}RfdcDq$}Ss6!eh-q%{GqIXV)V{Qr z5VpY$(dxf_)ZCI*x6YH9ym9lK_@#ezwOVQ^tIc4QV@v2AM!nXGk7XCjem26wJX|>} z61I=WZDpPskK~bfqj*h6BotqnP(|8cVH2(# z%3`Dtrvok*Z^JmnR$YDJP~-NaW5xb`S$x>^;IJ`P=USGh6M4hddZaW!-x41*-#f8K zq;(i6iN~Vs8^rcNAkNN)STrPY(?rm4b%7Q?h@_kYx}!t{Vd3j= z>4|VqA^
    ;6brG(o#sZbX)r>5Ta~{qLSkAWG0!9$6qh}sI`_29s+ZkQ++W3ECg~^VfwB+y2FLNMRaFgc#qqE2?iXkxHajl*Htpf&UOL%jSf5sk2$U z?yDCI9W&4L0!wqQB~y8ob8-N>3MNH!T~6>!GINzv(~~zv*rhF6KrdFq)!arZA5SF# z&x>`TRq_yr=XYYKxNBnhYB^-e}ec%nga5KF9f_Q$I50egp zn~u@?bm+i~s8|^QO1>c4kKfSULU1y13zxnkwvR`uo=RvkFh;#i%P56vhD|Ad1i(|U z@GRD#bSjZ#$p*i6+0Lg5%OgoPjv|Ch+<*x&uMSEx<`wydcsP1z26LuQXEj7BCdQ3I z$B|T27N`7`!jH#V*f1HwiQqKw24AY;4C2$b3$7Dhb}~{@Dxwr0LxveR-N$UualnPg zfw5+>c5>_(*+5t_sS7UsbXSvYc9Q0ArtRYIC+p2e<#ADL8qAB`QGp{Byg(#Ev1u5U z!NLm@ODs?>R0$%)&Z(A$ij-oHyeV2H|EZUT4lmVk%2=8S(5Wm9PWf*O%Tp0!X=va} zyQ5NP6-@-FKNjAI^fP_7P^na!Ni!uBdRqjiSM(9tqZAm?`{K3fv*u}kT!fkdQ@^N4 zyGQoG%VUNRYGvj{7RL7&C&VnKj2gLG$}sg7(LQ^<^3X^9Ur{F<7rXe*Y;0r}?x5|? z;-AO*ti^J=MC9y$C&@QzGPDlbWpJRAR(WGWgMJY5Lhthf9svkYsC7};(4Uk^vT_C% zKEMJZOM=f^05+R!<7cRRR~8nmlXyVT;SJwMA%}hhE!VK@IGHWm#%hMepB6vUgWK=* zUdI9qK|MOfopa;I9wm*eZPH;*)kj`KeYle^`L{@<74ZXGA|0{Eii zY);Ha!&?}hp@`<1Q>6~z^B9PG6K^T>@N!4!Qg9psY(;~p3P ziC#3XD8)s?XOd=aa|BS_FwO^U;(?MgPM zYL0j&kycBtpGFDvU$Z0I&_v`kT3IgKz2Ihz5tKw*{2~(kz~Gm}50<;yoN%kLN?pyy zH!`0V50@_oDOc6MZ7`RLxPI~QRz;hW|5wnm1aI1PmFI8iiKhlE6LNC_uF-@9)!}CT;oy0NCbpf$k{Y+w#EzlTf2@Lw%{6T*-FcKJ*{at~9 K!0|wTp#Oj4Rc@^S diff --git a/nlp_resource_data/nltk/inference/__pycache__/discourse.cpython-37.pyc b/nlp_resource_data/nltk/inference/__pycache__/discourse.cpython-37.pyc index db0b09876815f86d94c7cfcf5ba714e694191e38..1f8f18f4009f7ed309db1b3160bcc6f994467e92 100644 GIT binary patch delta 4847 zcma)AU2GKB72Z4hH@jZ1*EarTZPpl!*MM#Q9Uuk_7zor5(m)tQhVjnWES}kQ@67Ti zl}&0$N_h#~NKLAyjhZ}^O6@~dRi0W^eXEj6RUfK$`Vh5Ie^M!`s;UnOO4W1D?5@`% zQw1}}_nv$1x#xc8=icl0-eHg5VaciH=6C@9m7)*Shvy$8yLUaq0s%ds1aziLCbJ|( zV%QFvK_v*kLuSYdD`6|5M2H_Yqn4~ZxOk{c-6nI5)uD7yJ7IQOT}qeLt#nhr+3c}; zm0oHm&9zoqNmIMU?6dlnerl)8byh~nuz=BQgqPT{fZnRN-3jPz#=uf=i7D%W)2^=p z&KhI=QcxL$euv%({fnK%*Z_fL zH|ZID09sjny*>z1BgSTZ!-Di0C=Hm74m8`#pOf?J72k$m}L7J zh2zGJMGo4)cK<>Vr>M>q&C9T;EddF{Zgq%%wRMP4a`;#X`n(%SFOoEpek7>8fuPB} z9mo`XwhTm;5>Yvkh!MB)QgRQQXacqyRTnjG8@!5yBF}Tpn#-GpPn#>NYQB(nRMisy zZ25RceZj2cN~7a$$l->OWqb&fdQ;Jc&*S^eyzS(Q*5c&XMY#3M2^@FB_M(}0_y%gz z`q>aUf_EYdAXqR8wh~62o+oOWX=4b$b@9E{S4X1I;$a|F^Z8n2)NSJ=jwqGI>9)eg z1{d(YRU{G0CGqFBk!RQ8Xa>mul8s0R_x}(1dXO{W^B`yOxGc*dj$H9lva@2h|C;S? zfu0*y=gpE)+w~Z*_&AbnNPPC=;`hY0j{AFdK}$&yU@C-(WoQM6)!N<>JO5m?`Asp= z`Su7Io`h~qyk`}0jH56?Mb3vvM`f#mr!aofXBw&c2nV zx(O`fHE{k=`N7~rY2-`qF(qCcI#NCj%iiidT;d!&oT=%uGa5HC*;v(1ZdhQy8$YK( zA|B&mdW8Jz-P}e9AIh!H2P%;)cVuoTJuV`-K?U^vls0${s{A2m}flC4+C* zQ7pTb#=AuC@J~lb8dJBHtE;8<^DsV8`D}QKZP|^YUfA$6(B{t~gMnLWfk%Z9qhP0) zADK}Opb$A7y~9ye!=BGUcg1$=RTZqCMOjqmwyU{f0UTd~J=YGuY^7!Mhiq%3!!Dw1 z#H$oO%qWbf#JBeLi@wp6*uAx90~v+pxw2Iwc*@GAyLitml+qi$6 zhz8k}_jf&zCJWG<`2?=QxEMmg5tJ3NjsdwkC~u2bCJt}Mn5jmy8>_|(q=kdZ{CnkJ z1@6Pj{!r(W?M>!SJqbSXX%~ z5T$9hlsDn#iUpFN$5|13QOg-#J#xb|kd1rsP(^ zAT^tBAfs-x2RsGdH()@CVCyIn%1J^Rab1qjT2!7uF2y3wcEdRU%vsI?&l1crH;Jdxtd?7t=LqWG z&>oxr_R1>?FOn>iSWh}Vu@DpPJ}pi384xYnzBF{i5D&6UoyOE`ELiH3wq)v^=o z8S7mL$cf}rgrDCZ+tHkS4>iaN*j$)*m!WfL&+6Y=u&E;dnlfBF$)MWG!Z zWKWB`#|JNvHwg0h94}f>yWtwk7g3oCBpY&>t;BI9!-6B+cB5v#U=&IgCRD+H_i|;k zsOk7<+9248KK8J(?nFuoUjt39*m?3?MnRp79DS?hP-fBKcCnDrHF)pYz`r1Vbn;|^ zsu8#~FvCX$J2h0s9dOz`xs@=aQdtMmjTKC1VN}-~jc-Tk9Y}TpaYM)iYr(K(@Umfk z90w)cQ9?LXX;}^_l^$Ibk!w&*FHtY}>hYoKaBF#5{Nj|s-Vj4y`BT{j(UoO5%%5a9 z$l>z*+ek?J@)B)O5Y?5-T$domQ6dmZLe53%Ujo|vP2hjhFJ771wgYQ4z(@xR+&REa zKJENANW6;b;0pv8zb*bT^Q-A`Xw!oSaCrmUaU=;OsGV-_VDopey~?`?Fa|L0i_cE? zmEQsxe4$!ezM!gEHbejUcYvMU!trU~##Q)pfm7lJBn6s59&>}*Y>o%98$uzu2*IH_ za2E_CFM_-n{gt6uG;JQmK3uA9a2bbRM-l^4R%D6lmj>9p7`E)Djt&C;_QDCL;U0b delta 4667 zcma)9YitzP72Z4hnq9A##l}x;V`DI^!4KX9h_MX@T!=9ZrDXyc#yexYcxHBcXAA++ zZkrHLg&IOGL`~DDQlZk6s>-TT)gSrUca*BC+G_eEs@g>DkN#=@L{yD>&Kd8nJ%uV* zd${-9bM86!JCA$k-*2$D-eBqR`ub!D{&mevtG_#SJKeftGYf_EkP_0FE}2YW@PF8p ztgsTcB1(iIKVn9$m=d$%N}T3V^Y$}iC2N(kil%jDlhv#=(>P_eSglGcjqA-et6gcQ zaoSvMbtoMybU381&@dqr-tSVmN9feyF6DYYJ7fZP*{m+ zHdcN+ag)g?l9hO)I8oQKrmDo7fbFG=ypS^-M|J18q3K7&jk0{AF9!gRJC9gTr3L<%W@Ed_8PZL*rp`pP6@D zfTTEbMGUN*p2T%8>J-hq%X?`|_OlT(&sQM}Fqk)NM~UK|KERco>7y7WQo%JyDqUw2b!Kj>i5LkQ&$uN?D|7iTW_(s#sUE5)#WC$=7 z^29PU8&a@xBE&AJi$1?9&NY9lpBzuYv?AW}2yslauZsSbJClTL6h-QQc(RkXAZD|d zvtd6_2(*{>tD}WXryq;>$4LFnu>FhhbFdxCESZrb za$Jsbgy``{&B+sx4q;k4JvMe^8%+M@Tbt%4c~q2jpJ5+FVW(`6V`dn$OZ zp=^`r+SJPW%DXoGnhh+Ci~_6Bz3Ivfphg+HUknc%UPqSE2T!&ZOgCRNFE24aiWbJi z>jOQFbRpF(_87cJxF+5mXlp_Vo*f%Qp;08{5#>ZXPVsv25);qmjG_zg2QQ(jbg!!_rxuDw z!KxFhw{6-^NKyQDqV6P;MQ|t&(WqDNd+g)vDCH~LB9ewV2fh|jp2L+4aBRowtYRAt5TdpmARBPQ%I^>>ClNgVOvh}5!d#*}+BQePK8*nME@ zsIP$M@e+&40*TLI{y%5sPXqT>x%Y{4{)5aH2-Z%}U(A#5LF#si-|X4IxcG3-Oo^g` zyLl1JOfPI{MZeDJfkv0s1iG*Sh*CGRkT-RedLloCS}~`n|?+MjG8t=RpQRV3}JHtKhTFM=3nrRlSgd zx~JDEU1w)-QAKhN33c{b5UW+|U-}e8iYda+pGSg@dnw1&+;(%8$_~D-JlQO0I(`|g7TL*8_EP!qWJZd*u#+Or96i;28uw%r=qoG6 zx{C&P3U;@y;R_4+pAo-3dNf5H37gFn9}wANeb3?vIGvutN*GeRtODsJY}1_|)HPS* zTTpr{l5Ie|2r|K2aI6MiHp&m{vT;^tH*yDRQb?c9ca7hu6t<4XZx4+i%t zEPWoHi}Yj+F@9Av9RJ0kAv86NT2X!n$1;*6lDkOo#SLzK1;;2B*cC8?^FA+nraRed zV*hk==~YzN$X`TR#McXJGdT`+Lp+RAxd5TmTzIrakQYT>Vs^pKxrKu1@E9_0A;E{1 zB2%zj?L7IVBx&k@;3jZ=MU62Cce~3^pzBJC*p6Wh0&4WZ!&Atwuk!J0jn@N@R~wPi zNUM2!*5LS%hYg{juMndrI!f3!=+m3N{RkB=R+zb9w!VTF>N2V`R3Y8#6ZOJ{!Ofn)j HB-wufacact diff --git a/nlp_resource_data/nltk/inference/__pycache__/mace.cpython-37.pyc b/nlp_resource_data/nltk/inference/__pycache__/mace.cpython-37.pyc index 3dfabb3104f104d38df79e5ba42d84f26c707fec..7c3bb476ab3dc5a3e0f96161e00550ec8e2df2d3 100644 GIT binary patch delta 2623 zcmai0TWB0r7@jjbyP3@1H@D6G)?7wOliJY4($=bU*zTE4H<{Vpo|#SC zSX`y4p_LTE(JK1vOCNk$@ktPT_EA9(LM;?TUwjiOBL4qba&6UweCN!6&i|kP^8M%J z_WXnS_{n(OP~fN9v|)d+5$~)$h7YP(3MEu^iKkjB!Fk9HSs^)UmIkBdhP{Xtk!xYM z$Wks1@~MR!n|3xp6OHCA_4SB+40u>a=-Pq0LUhiQUqy6i5sj1xeIN zI<+W>(sY2vX=0UF8JeW2RmEzdX_|qtmA24U7~5zYZHKX)cF-)09ki2n(e4Gcpwgbr z*@9|iX)o=A^-iaY_AjaIXQ%U)0#1eI6?1UqgmGfPD^tfExLhr|)L{bw&ywkkNkpQ; zilx9VR7?4wST5Dik;fs@#1Di|M_VC7PmWGjeKyJOgk>O2uI!l7j(c2A?pdbPW$SSc*G_-MiaA5wr8<(z3&X z7nXDGO#P8@MxE?}{n8^d#D>vos8aUXzD?T zsukBUH8zc^Gq{5=p@Xp^F7FANO~MAs|B8P(fex5q5ptc943wdDHi2*qmzvmdoTEiH z%^xO??9Dpvs@5ly>ty&KOa<`-DG5>*q$0>w_L7wOf&_w8**^X-H9UFLHoqU<)0>_qo`@DXw zWq^<@|G4$e@Rl~ah<0`tNgnZa{Z!jegrxXjN8jFX^Puw`pYNC5+GiXkyHy2Ftza>`<4VbRAl=wVIOR3j79 z^odwHmWhq&X-#hf=AF#GM&kU#>~R^Kr2`*rzqsP;uqitP{rs)|XEsX6BG#XYsa;|{88Nvd98E%y13JQt{ivU)-TA@$^Y8OkDYG6Yf z?)ArqZGZFoJn`8dN)5r9GOM2@%*YA zw4*ZiywYS4R^y-a{Xts!Mt@5mHZwbku-gE`J|5@a^v_0Q%3JvPk@ni08DU{q5JrKO zJ-o0=3$pkkx+D4QEwonyFm=(KV+&Q!DFr^;F5p?T5zgU_#$LtEZzDXz4Uq~QKWNm{ zF7#uhLl_GZLeUb$APM-@VN4OWR{vsPlpK(GQKg^k7HosA{Ld&4@#xU^B*PyKP1G<) zf*5nAzOCQ5YF1%|-9*@mPBsvb>NZL-B=#PH4Cvou*^#Iz45bfr6wOd0L^t^Qa0j_l z|7`f^Rv7P~$#((%FN_PkW#o`9?@7WkP_E7zwj{F=E+gasR?@HgP_%)u5r1r;aVwcb z-Y|1OX{@cx*^ZcHxH9k#ST!S7e6yll@rT)fg~`u!3Kb8^5Y32sL=}@}?97a=tCl8YPl1#3XJzjIhn` zO_ydB(cYlft|!WOf;<)mw<$y=8In>}qLD5&0b>tg>wIJEBk4RmWNTI+s;32Gui}9XCyk}T5g3JisKMf*dhc^7* kc6}XXG92`+amPI?9Q_EqSXI|K7KLs8a{wBN@k{2|KQLb*ivR!s delta 2557 zcmah~-D?zA6yG~LvzeXjCc8;Co5Un;lE!4PNi?aS_}!uwN|ajrFxE~txsznf%+7je z){j`-Qll8qhjJfE3#ElVR79T^+PD4_wO}u$zLbJ5g(CDp5PHsC@@a|*`@46}J?Gr> zeIJ~;btZi*oi<|d*E@CEE`O2ktUiGc8na@QP{kt_k*i`Uu*SW(uUaa>Gu6}lgq83c ztOoh5dAgsplD=UX@;l)*`Y9{rr>!)Joruu}sxQWReDR zviY%5{++f@Yl5=^|3h=zkc&xZ!(9$wyml_JCB6rVrXu2QDH2*3F}Dm~%IFS3F4YBA zq+S>YJU5J{1HXLeEPs@=_V$6SFv6(FqVTdC&9MR!G`B>ZD?*)tl_`=^DVH545=B^m zmgI+x9eNy2OoKJ^%ZA+}FIl-sr0nUSbje}S_@$z^yY}2Tp^S9`zqH7VvwqYXF9#tT zSf;M=?hT-oxSKwIGKqX`wqKlgZ02~y$PG$%R4IFosj>+a-HjZC2_2NVc)Y=BHVO>W zys7EO5j4QmMBH;q(ou%m*_#Ll@u-0v#yx6e6a1g10~@1`Tv6?4^F1;!4qHKdK}v!I zf|Lch$PP(bC`cqog$?k3G6UOoAuZuKQ3Uaemn!~7^YHrHwf)(oavYO7X)0_FtZWbf z0@aNBF#wU637B7uHpq)+n>y10D@Km(2h{N%)s|Wdgyi{ywxxj;Y4#E7S)U|%#m}`5 zbH9@bWE4raRIWre)R0h%Y&H}Pkin@SbT$f30rxN5$?qWs|0e%dm7;R2RmcJhpi-i% zvO=XNA_esoxzoNA%>`7ZLx#H^`12@=zA{w0=AxBal3B{bVb~6!YpS9uMoUW7M^f2T zOKM2Zs(KDe@`H}|Ns9l_aYTA;=}@S4=q|isJlgrJW^^_Z;_~(`Gb4kP!P~P4EOuEFlIbFZuoM?N%9u;**od zq3$jq|2%?+a2WyZWEBLI*)%dKIQR>Gp$?!Eh>K=4vlJicd1v_CdLf(|A`zC0(+;`|>`|r3EhoR$@ zg$C0rbfAc_!`V@U_39Wl_Yi-)b+3xy=EcE|>ZF-qs8Sd+EbwuLm1bqyMdO;Vvjx<4 z1HjZp5(>29JEbUOtEoGII4F|0~Jz=lvs9oR5MSlcv5ZUmrO)(X49-D=x`?ff`yw zD*C~0AxJO&-+?vJ8Nd)JmF#1LBEl#9?qH{blV%cX8WSpGw#H}au5=uG*-TjJ z0?VYQ;=4tk5v4|Q&xg8M^kv=~uH3&|)7QAwB5 z1Zy{8clq~2U!IpSRTdC=DQZGnsuex=s zhTTP)EKIz)KCCt~RzCnQP{nf&H3C~W44^7tu#$pMWJo67QjMK%*>0$Mihpfp{sXIb B3KReU diff --git a/nlp_resource_data/nltk/inference/__pycache__/nonmonotonic.cpython-37.pyc b/nlp_resource_data/nltk/inference/__pycache__/nonmonotonic.cpython-37.pyc index 24af18a4607532353d3927ea677f6ce5bde23976..e6f765222da299701763d670e42448360cdfbf18 100644 GIT binary patch delta 4140 zcma)9U2Ggz72Z3uKjU3{z5a<4|E--kYkM63#c_y}*iKCACap+PB~1&Hz+}Dm+8sJG zyS}rV*eR*grcb=I(d7ZPQVS9iAdo0p1TP@LQ~47D!3$SZLhw+E#0#o~pg=k2j^kZh zo0eV8nS0OAJ>Na&+@JZ{i{zUxkxQR!X-P)lS4rHrFFziM{24nBelcjAg(rBDL?Sd| zMJS<~OFYfeJl)a>_H{Sv#jKcWN8S6E_9av^?%rRU*z)#U`@I9!0k7NYR_uh^Gy~=O83(PFt|y(X%D=|XfN%9 z_YvApv+y3L19TAH6Z9ZG1n)^YL=VGzicQnpbK0wfe~L}ri@*eiuSCodah?2XNR#Oj zcl@%wTJZ~Ir|8Q}#diut%52vuGoE*Ys;NCr223J*OPl3qi@rT$KTs(Yy;8pHthh{Y zoo4zW<{ZhtmZvHKpAvu9m)g3aZ(=euAO_*x;?d|EJv*vAi~3FTYs_aGCB8T#{v92U zV+Q#?F&Uek*sUp(YpiV3qL+942^`e`5+MeOYt4j@R9}fLlA}8&rp5Y7E$YB_^ zXDbOc2|t2EE2|yMSH$7wvI-8!m76d;k`#7<+--GL{H%F;IGl+j8@u6)qWDb$C#g z@qiVIKIN^bt)`iQmKnno8<8;bUV9D!y(JNOH4on4LBHYjLW#5UOh*UBtgf zAScAzy{Ggu;Q2y&7Ga&DM2tTuxhvgww`yNSQsKO$MiSPBD7;VSCs&ZTy zW=AdIGFjq`I)!|h1^k@o=r_r{Sm?jjDCuukzuj+>Tqh2(y&s>)fonljvp)p9e~Re= zC#x2T)j;J6@ZJ|c82FE&0tpLF{BW=*`#Ds7vVs0H!26*3{@@dYM8%myL!)YBRcfC` z^#;cRzYKy{^^1poPR6&BhHK#g@J3$nI55TC!+yB;5Vd$d&R`5~ChA0tN6fZx^(A2( zD*?xFh4W3`3_Y0)mzo_oYy1WL) zcN_9G;5{a)W2>{uM)|3;u?$%HHtb}jy@t5UJRvR|`O2WVcUL6e0ThO4&gsM2Aq{Z^BIwy7}{!-BNwdIlsUrZYRW}y zm5|gzqgpCa3 zJhPToI$noc)k1s0Ee4F@wJ#0#KASSHcnzm;Ta3@;ME}fZ$Y%A{%uk6z$JY@6!r~l> z0=0;(8YqJY$S5PJAVE<4CJ;-n_#q;G3wgB=DkyDm^y4W;1UQ{ z5w4?Ns!()YroPdFc8<3p*`z+&ibXfzF>&ooUw0g}vGT37@0OpN4460R7T27Dc2lywxoq!mu2esOtNm~;+(OQ zw0nt`7V*NvR%f*ji-4Eqfd`)ahQtF8hy|gFl|U;-LgFGoJRxxvpjG+*Gf5_n9AItD z?{ofh{{MXcb&kLNCGx@BWc5;QZA5}kV*MrcM^`23w>UZc_+hXFk3B&oiAouX66({5 z;mi04j%8go{29L(`}Kek%sg3bmW`UsldaKx!42wXjh0M{(VA%`((@7xQRT5j6|L=& zoN1?F8hI>bI)Gn8qri`9t-$Yuc`c2>JSOG|nAg!b%;Q=Ytarn_o;JX|L9CyHc_VFt zd6U)y>%Fv@o_YL&lOtuUUpRen0!*HLH+iTjOK= z1^;sW8CV-Yr}=;=gq`7E^M9l5NQ||kYJ?sOuX zT2ZBzkD@B|b(+YYvr>WbetXZ^C1^-p3B`d@cFzW}h0FSULD5d<4qJkbHYf{#W8+wrR z0ZGYh2*(~5wSdYLa>bI+i0jYsSCpMJ+U3c+g6P3RO+vMht~5u;ES77-DVenK^U9Ipo0!-#W^?o;B8K zHhmk;{0ltN!)cxkHS+hPZTvg6%L_u-&qff^D3UQG#~`zDU{nYFMLrx`m=+o;K)WKx zeOn3p9>5;lza&8(lbZrD*f@#jP9YJSJ&9@HWsyGvP5%UO^UwYXcD8k4SpRRK5hA|4pZOc=J^FPXWb?SyIG zc84xP*-aI(*UJ|S)n)$Gx)(ot7In8EX$9hhRH%;};P5gI_Z~COvoHw@Xr(A4#qaki zK_$SFSVsI0jfwJlyxn(OSj>E*@CdMbbRf^qV^Zc^#}ev1QxVSTS#zR~WMxI|od52}sH>!sYn$ zMa^)C?Z~FKd(5F&z)p>-LT|zys`?&JH&0dtdL4CE1Db|~C-AsHWdNEM-R#5WZw7`? z04m{FaX4h>n3|)W^prno&$-+GM`C}BKxTNNW6nPhL;kcqvGQpO<99+H$RD4YQ{*}RZs*-<(f>hN zPN*c=fGQ4}`V~~{c~iKd3wZzFU+KyxM1Fm~%9fXa_h%mK{$xpb)GbCQd<4Y_+xbm1 zTP!h+-9`Bqt0=*~_RsS5bJqy*@%MUq2fcK@gWIdTaM&s=$mL)6{Fsa!gcJ+q(3)ze z^uzi9|G3ZG{}cj)a6hqqAPH}#dbc$3J(&qXgDElgfJI>DM6!i~W>OVwWv>E*)g#Fv z5jBkE>$W)r8Fky22yLAIG?^qr{O`&BxmR!(cD77l1JqAu0$H|c!w=4;+}YI07~vsx z{B!;47fvl^j1fa03#YlTyhw|Y2=@)x@-295Y{N zd|{RB@tOz6m{Orr;W(kg`*D@V=$wVc^*pp($a*Xk2&sVcdjgktyUF#B zPl1cRLSiIJ;)1e=<>k2x@;xtZqCJV=RRR;SbZSI4p?+%vu3#8MM}e+yBJtFVQ9JM)KgMYB3G3twxrTVxW_bW<#IzAk4itkI2_>gi%=~PCP L7(5|GiID#QG%yih diff --git a/nlp_resource_data/nltk/inference/__pycache__/prover9.cpython-37.pyc b/nlp_resource_data/nltk/inference/__pycache__/prover9.cpython-37.pyc index 1c272134a39d62c1e7aaa3dfdb7a0373558b035a..ffec0b9ab2c0c3661f1b1bdcfb8835fe0b42de6b 100644 GIT binary patch delta 2877 zcma)8O>7%Q6!v&+$65a*ag#Vs9A}fpi8rm2v`L$!gwU2Ig_Z`Cf)Go^vh}Rp1+Q&p z*QG5;5wsKuA$~eHKthNE2tff^qK96JK!Ov3>s}BNNC*T6#0hcWy)kuC(*rB*H}l@S z_syGmGjI1s;ip3Ea4aVK;Hh`Ku3vvWmfHN1_hiC^4 z(@rYW2#wMhjnl5TI9hk!E;a)yq8JYuFn9tK&VqacrF zgd#)7=uV&-q2n|QtwJa0B($S^nWQ#%mwZJ(RRvB7Y-;kNKP_;E&eA<4KiKUR@|?hV zf&1wG5`mG0W?Smk#^@qlq6g?5beY~sSLj{zAU#A6mm=CudM`afS1%E5oUYNM5JR@# zoS^qS>3@x|8)o*R54KV|=~IvKAIb5AO1yZDSru0=)++_qs#Y2^{**+L{NCV|hyscJ z>)=e?VKe-X;5u30heAhS>~d&s_ZW;ZG}s2CDE0%)!s`S8!oh%_^*4SB>C$i%WG}4i zR>g94T@A7fDgvddQD%eOko$TSP&DZT({2#g`AhQ8%V?=ZYphx=>W&NXmb|cI*|oA& zJl{6I7pG>S$_&@kg}uqIM&>tiNTreo)ezf>lZ+!|5hf6DCbkzrxfMhllb|~auM+~m z70F39$sdzajT2E$$N>K#RwHq~5ubp)IUnCgGW_Fs-_#;ldx45!o7-5C(2EzUm7-O` z0MD9i=|bc8_}64D4MYF#3d=*E4Wcb^v{-FPHyo#K*DyitWU+r)U|(x>_oc?)-S3gS zf?P!GaJzl?X{%ze^Ln{jFp$rAn}3_Q5|{@M{#$I%rtFvoE1anr?iscZlx#o30s^iG zB3#4%5<(K;4uoX@HPD>!5Gv5NX{lgOfLMG~FIp90=;V@@xCkA#U#q9oMl@BPG2 zhWJ60B(pk8M={#kk{gEx;&BPcBp>%c3s*oY15ge-D9Gq{Y~3fD_Gw2p7{7>J}j)bdxb zCz2LwUV#<V{{ z2Y{&?$9fPF2s;4OFgt*Kk^VE-JH-QI$GS!652O4D!XkfcEE#IX&Yu~}wU;Lgj2>)U z8~dEhw+_^2v#reA^uoF(}~Q z?ok8XWqxtuV6M&McE%Mjeue+~|BP)gzQRvVzMaBC2uYKm@#G?_+RbkO6yZoNzkoMS z3yNY77jOiPNQpfPZCk8f#5EmPZx-J-c{TTP9w}KJ0a=(R!IBriYvU2uBe#@po>SB0 zeZH#xy`X@#>K73Sch&Ftr)S0FZIk{bFPAOHEmZB=$~As&D!+-Vxe!zoCF@*}pUp#a z0A6_=D(hA_lNiof1W{NzyiS9a>b6;Novn`pdm0S9pn(%K7mTZK3&4}bs>N60Qv#QL zd{-}duCcW118GKZ6Hltt?RKw_Q!@%C`vp84Kd|};dx0lrZmf!pdI9G-hwvJ@!k!0(Iw?NMo~*+!iCbr;t}~pU ztsp#rfL|RgUMagzPCKSOU9Og_0zW&OJ1z>KmJlOWrD!sc7i^iavS5?8@7Kqh_m+dO8m(3L{#SU&z LfJFK6J@tP8z4~7R delta 2821 zcma)8O>7%Q6!!RM<2te9q;Xs)apER%;%(wMY0{=i(~|xq{UJ?JTG47N)i|Dws3b$%niJwZeLnVYN5aLjQ03nPJ;sDL9964}c4+tS40TQPkfC>cfjoT)5bHLJk zGxO%n_ujlW^VUD7@1(tZylkn~mBO8zt19BMK3{U9pSap*Wy- z2pa>{OaSvXM4{q)BK5Xn-~^+mvP+q?@1*!g`3dzZ2^@qZHIZg+5+=dn0L?!%p+pnMmN(=pbFD2+6{d>jnW?IJ7{ml zwqU0*fwI6Y0{fnjS0w#H9xsV*fF|f*#tsBSV!l=2u)qlPr|2Zzlc`lU(}Q#$-M>tfPC7-W!Lcs2n;tl8f1R+;)UL}mh_3yZO+Ivf zzh_UxTvT=b7uj1rVh=e;Gtang1Un#^_h%A|x~3-1q|Pnsg%mrR7~oE6mTc$8r2U}x zrZn8Y8T1$`uYm|o0{{tVh7-W;a@tv-{D+iuOxMEPawn77eAY}RWfu#hz?sogITpfU zjhWR{y=W>@f$2*st7h&4r6c^7=a&iGsnmg%zK}Fbmd$4@w~^Hfx$MHZs$G_ks%f32 zOf@O=DZf)YGS>?Pvobj@ORN)ytP7zVA&P*O*bu^cC+vZRX=sK7fG6|>Sr5P92$hp{ zoRA>D=PeK~r@kJD)vLa3B+OTQfi0t;Z8`HPO>_czwfkD&Q&2-^|xAmG^)j&~q5Ba9>L1dyE-Lz5^# z-HPUFBqJcR>XHlDJWW=te^d@Ot=LHm|GxP-h}L*892e1Yq%~@h0y~5(hY_k+T*-7Q zm-~`02WLv_B~rMz2RH6V5H4ft*r~_K1Z@OmTLB8t3?Bd{7t-r@2i)}@uUJb%eG@+i zU=6f=(2SQLT1PkiNn-q9sCVlih*%4N>|is{SrSu?V$Ni!TXh76dH#XF8;X)v{`Ekp z{A1`_Qi`D>TCf~&zb*Gtie*#tIhCD8nbTC6#_)YH7Xscx%e9bYhG|I#T=zU>Zd}09 z%+N`;0YD~-M}4sXz8Ftl9Kn1Kvk1lZKLWq1Gf0D$KDsNyH|E~SL5ln^1+rt~R%CcS{ z(cVn?!wzDXg1B=pFK_N2#WbFGN2617b4TaKfvFzX{0I#QjR3Nnjp10_j71!s;?13h z{lfKUaefkElovXKQpIs+xk5%B-LZ$RuVF!|s+|KMTjF1J_npAP zBs*qjr)5udWfCsxxLcH`GdL2r8tpQqSVOydp=DSY=9AGNk@?B!$HTb!p;wp!t44zh zk;ZOkL$;J&AbL^AbL+9%I!WKAtQ5X3u1-YR7uYc~HK_|M`E)8Yo}q zYVSKC{9HiN?9f@B1*U72j{sERa4bHGFHdoaY7Y;v9hFFiDbQD)&Z;$4!%SAH?_2y* z?6o*jvLXVqh{q{AEGIr2igR#I01ApLg$61I#?x0hV0px;#dqQpsAd6C>TFIQ@em*J1NsTOSbn{nng&`ld*%e*^rM-qWs=D#IcyDx&6l@QiRzU8E~wd42^$H_Xj*BQSZ#}H%3F>&IMI5>cL!~>ayH_F?Xf zlNd-0O+rBh3S1#kN>EEsKy8KEE~2Wcwp49@v}&amsV&Tp{wb>HuhObdsH&cGX4j7) zRJAqV+{by{bIv{Y&VKwVd+aJ(*&mInKKNUzc-B~)_xXO0o$_A@8WZqXFEO8w`*a^? z+@EDRzwXZk^Z>(tARE+!@D64}xv(Clu~4=mr|1f`!`aH5s;ktl$X4YddL$Rsqq&$K z%T?>oK2*C>r`P2+>zi}+dOeI(6Qz=E$TjMXxhB1d`jy!&xn{kY`40KG%BwE=c$K;J zLO^eUeuPJ%A2l1HzYY2^UJdMW(m&M&1Nd9efLKhIf*0BFF=b~?W=;8_pU0aZN8N@zhO(3)a=;WKiycTc;YdF!3 zu+b|{i1a)ve-rtEHUaZaz%p%7fs_#hT!t7$AO&OCs+8Z4HrCca(+Q+F*TN+o@`Gry z#Unp5pUHD0mAB5A!g{*c9eavhk0hF3>_S=X)OP%R1L7d|H?n`Ej%72JjVUvJMgFP! zZZ;sNbzm-nuM* z8$WT+#ZB>Yk$u}ErtKE&FW)RR7T2v%u zV`KLXBhb+mYw4u5lry%0{ z=!AT|xo79SNN*tAhmb-b2NRG#?t!tt%6~Uo`$!Es)`=L#f@xdXOxiSzQrUmR%((GoXP^g@LOy zj28LbmLo%JM#4Dk7_iUF?OI2RyB`E3ihH)5VRHk}ynRZm5Vu1^+>JoK#?s@ul)R1( z*Fvs67;`sl6m)jdyY3+%Xp>6fC+)OW6X{nyK!x?A%CQAAZ=PKg<74vAiCz|x+gmp` z(Q-FZiY#!C7H@9d%O-IK3Vww!7qh9fS@tE$hhsxUai(SAk)fPUiA)L(fOU&T=T6CA z>=+y*=azENf>9w5NV?x`(0hk&u_P6b2tV!nDTN0~Ff@n}4k-bEV8g7waZQt8T z8cG3JB`Pxztqrs{$ymqB$&y}_>jdG1%Xxey@-Q!y(Wq0#_iG5gtTX)3&OT zcIdZY(!wPxekG*#D-k8B1e8Dwpb;Jw{sUsRc%bt#>%wFda5wYKXkjOq&E!o=L8scF zlixNC%S;zwqs_@GZ8*7c8#~qI&2E%RDDCSmIj>E6PwF|)F(<#Ljj*}mKeS711vSq= zf zTfl5_Vraxor&1!v+(<@&Ts(oWw$5toQnFEqDVpT(N}O1&y8`d3aNMA|>$p80LD~O7 z%iz8v|G7KKo}l~Up5Z&#o$~47U-u`0K#!b+%Tgx4K+AR5X+bIC5j75!jm77#`;mW= zvJm6uW+CRIqfDF2?q&1yI8lQ@`A2s;%D>V{fup*7VWeg2Mh#9C9A2i4#m+Ex$X|{8 z>zGFoNq!30%hyX16@l*pQ^36=lY;jrWy0y~$~-KaM(6raNck8%jaGU%;5>(FVy7~B z%P{UNetA^&kJCOQ?X-~6t@#8lM1e~pxDReUPkvGumB=~y!QK-ZMU-^2YZhWSp|fTl zK<3Ba>MR*~7)7F7I3+MruB}ufu>LQ~>3u`fOE_@`iGt!RyqvIA5H=WuLY&|!bJ;4b zS6A?oXFy7mpt4Ysw+zGu`O|&V3G_#KYNKzcrdx9NMDm&k#TS54JcIBo0v!*}VGCt{ z`gwrPt%naT+C7S2Rh3R93WvZZc!E^o(g7gBKYWManJm6H5%S+fVWAwsbc3@jx3Fa3 z{-mw)IE;z?2$KjDxq!C>dKQR0XXNl?8+%;NO&T+l!MOEBt}KHr(gIEZ718dy7pFXe zo`-Oj7@F;b?4`x5=~Ny(n>H8mbB(U)rD(xNj#%)@ z9S61-<@*PEN9s{)141JLPOKv)b11O5?k}Px@ZdM?d9dQ;{ujzT<*n3%1E&x?D?!1p z$cCx^U*n6w@s)KNUxb>Ilvkz_?6~~#)X_G|Z47pD7B-b?20sx9zTN2;$;Mv0b9rq zLF$Et*4&elU&ycB!HI8ocQZ1;zO z%>44Z^6$rb*;Uzoi@9C?pj^Cy?R9H|E__p(xx!!2v`N_X zL@kQngv?bm>;7IO>TsBL3m&*`ToTwCM3_aGLwFeB9D{NP`PPGrN))6J9G{WG0YO_)iSK^^~RN1bCl`V>@vi}0u Cl{ZKL delta 5271 zcma)AYiu0V72dnMUXR!7XKZKf*p3}PH}TsE#Bm-F#|ez5b^7{B=>azpO0ip?}S@lg6(tkLPddUsbi^7lp@TdUTH|Om9Ny zg5JJ_H|f)T0&D(+KUt&~@wg}vNEYkGJPstZWQksq4C=vTss5b3WjUmWc&9i~ldRQi zlXZHX@Er1(nptwyW0qJOpY-Wrng`8NnwMI&G_R+5$Sk9InN>&g2AY?f6*RA~HqpG% ztTd~xdh{l<+T1|zW^>rAF>9|1eY06-ZlskKGi=t=yLH)YFgMX)i`i&4(YuXUHJh7> z)n==mSaq1&%@(tjNIT6fW*cF25k|Y&K^PsZTsO@-%`TdE@jOEFZZksj2y@tKZZ)?N z-8Qqw?4@^)*=P3CyVo2r2d~z9^ggTK99r-`D`eX0d(uPR@J@LmJJfq(zv}aTBs3NF zwWt}N*;q$mkN}?@>pyEJt^RY-OK0uHs9fmpQ@`=;tE#5Gc6uBy0Zqti^?pfxX1o6% z-i<4?awF}z0b^0x=dGU1aB)fquXCDC$rh|`0{}Yfc^iOK879z8kE0RLgkSi*!B!b2EvH3OrXx5d>VV>w z-|IM*OvmlixRkb(Rq94*!#36r=E)8MdVnpEg)&BI&(3^MS|yIMGAjgfH%OZRnwcIh zu;(H^$&5R(0p$u9xAL6SDRZ(#bAW1 zMdkcb(n_Ue1vY$9(~JaiV%1oAXVY4LyP>&w$~2-W=Yl1jZ)C2QKP~PCO2jMs!M>Hp zuD5SM?#FyB`Del1iMW%F*~!Jdx6}ibr+4)d(&h5g9nP7S4k!CR7zwz(Wc1=HL$^pq zQ}dP_0LNiS&?p=JT-n*XohEKEBwcDGet)S|FF*dQ%Nmoq2d|nn=V%!gulj zScRN)G#!s6t@K&jWUAXZu`Qe}2vv%nT($%&gJr=A*`)qi^{0J(UFz3f}pZ${J6+gLMK5dApg+fZ60MpQ-kMD-w89R`4JBW1EpIpJ-)?xKmVIZLw+ zMb4D$18;0l?!dY?HYB@1vexr*h}Pc8d>HnLLG0vnO9yJ?PJA~Qccw^!l|DB2_e`ka zhvI%tpAw+5USk+d0!#s%1mF~yrjc81pWnOi~169CsOHei-vG^o1f2Zpm+<<{xo7{(pcS?yTPJRvq^ zRPzOK3Q7Oq!AP+@P6PQ606RB;5&Mwc3=c<&vicy%**GD}u9z*fdw`Z(RIKHf?QH8T z@K-#*mG!fd-Sbw;y0|F!j;JH8z5PCL((FY`M$@)*eYP~nS~hfl7mF99IYWei{r0GX1l zb@01{QzR^q=K(GNup3qZ8wof3h#sebfF`t}V80g9IyIl>D<=@9CrE!jIhlE->m^Zx zER@tNc_ygf`V;Y#CHvK6q*hF;xyX*rwFlI#$RA@PR~-4ocB!97#tQG`Y2rDp!dpj( z(<@!|nT4$hu?%Swl+70*m2>QI6T+jWdxirT$NcI_aZTp6oeFf81?Fw{)(cf2B21WuH#RAYseK25&UNth<@dyH$^%J5_ zW#S7R!#=qi=mipHXzh;_TPH*VkEj%lqFjW$K_O?F_a_F})Kh3#D$Z{IF{!}z+W!n&l&nEC7jVSy${Ca6D5nXaD1HC}DO+cdS zfs}ROeDsdCse$%BR?EZnq9OC*@Mt!ba*11>4PGgs$m;;B7L{SjiN;ao0J3L`vBB}> z$f7f)+YpM~MqOY#cz+Jtsb^DDJ37U6^sh?oI4P#oM?3x?+SI#u)z)(s6p)S*X-($i zyMEza=0xMpj8#I)x!PgtQnr=+8Z@W?;1uIFj#Dgm1z^=Jo~8aPo|<mqrNM!J?; z8Y)Nvx0G%xD~%#&7|qmotNva8K31Ru3w??>=Nke@(L?c6+LF{<#G~|HYY^DZ=h=>{ z^ysNRFv(wi4vh;bLWU8uQ|V|tb0wD)E|;f)Y^RgPG(#k zPpYxKXCfS2mc4!rrrSDUr3~Y&`qkcdx^hY$1WWK;bJcMHMd?qdH}?%6mOx4a$uF1a z|rP9+UA5e#ZwS{c><4fOcsRA%$N4c-}!0H*_z zj&7{v{E~t5#|PvZ1ndL22Y`d*E39<|wt&V_&yKZ;YwGPWV*e6A0I zY^BVC5ohvN(V&(lIyV6Mf9 zVBsvlDkJ6rsV^&e@c){o?A3gn zDNp^>ot(^6-{0hWM58S?I4A8Soh>SJl9dTVqdf#7e%HG=NY9WfVy;*$u->gnTTc3f z<=BZOX;L+)_U~^3VeiCch1^B7o&lQQS(au9$O0xj`JZ=g9Y+&07NBU^4YpFzfq0gZv+OKXMuPNax z3rAMHeY|&BujcJ|YR;n1^O)5aU5rZASV{IJjF+6Ls}AE464KRnCv9^nVeLgn v;p`^h_i7Z_kT)cDR#f^b{gnqR_g5}^y0jjxE7(WAenAX2lt3Uc6s8HocxP+|@5^^~jYA*H zD-Ti?h1{r3+Z0Hvw&?@4=_aXE(pG8|ss<9NQronnG-{Mzty(ERDz!?brsun}_O2Hy zQ68ZEe9xJC&-w1X=bn3J-hW1Zfy*WLm6QY|`j_;bQ~9})x?rU&NxGy+x~#jxa>S*$ zB5uVkLw1KfkpiVav_0W-JJzKll}cr#N~wxeE7d|>5Uz>TDz%Y1rB0TNf>V-GKPu^7 z-FI5jea4bgZl!@_zg|dkp;1e6Bgp~1h~y%pj^w2z7waV?ml#bXHlGwd7z60gqA$`HpO%#6dZk`Pe^=;@daYh}T2@;1dVL9L+Kf-=4O6bCWmayqosx)X z^+Qr{DPJvbTJEw-vs?sznbo8pb6;}R%=cvtq_@3loW*90uJm^A>uzrcsn{C6D6q3~ zVL!HPi-ru_LtG-PNSKQnlNyQ55d4-IiHFUJN5yEnETXX~ zRSSnLb%GhzVYULDJQ|y{f<7i(>!e`@U=2V6i~~Xdhf*n;;LbJ>9N|sH|4B_kJWMba zEMi9>fFv^k;Kil@&44ffvtbbcIIt+e|3d*|1cQERoscB{e8~@#c@85%kKG0Z>L`vi-V{rggdGG4n*kh^q;`qT zqWLJ`7~nI2yhC#z>E7VKSbU`;XZHevxm_?}GAm(wEh7|*>Q2D)z{W6tsj_PL_QGZh z8G45oR-JeNha%{9c1Roy7`~-Y9HD`K<$tVtJSA-Um|*(|Y1!ui#Q?al{WBUfwehea zV&ayZZ6iA$($De|NbZ1K?>g2p2Af)vzgIoIfP+e191gfHoWMAa)-BKVlQjBY^viX> zJmY>h@+E(}#@H*?!Q-s$XW{R2x8?MH8ZF8E$=b&b2+IXIyLJc6?;`6yK2djftnVi4 zrF={M^Ulr`*ae|*Eb3eYERVhaW->z{Y8+LyNh7Lj`N!%(@>NBpfQ68g+AK#?>Wf4rbPmQgbFb-2wx45~K4@^cd-y_(;QxA>qQyvM$8wx>J7kk)I&n)A-A~ zH!{9@C2x?P5vm!{^I$oA=`71EfVsSKjV^ju|dd)e<{^0))> zfpj4G0u&j=vR_2Uc$mN2e8P#*Lg#8Ir#{(}X8V{C4{ITV9fv;edcCR0U5v=3LTx%} zsPma{K^*xcc95*^Pk*rN9b9Oam-nYoBG{g=X(gPypc(E(Og(|tae&CU*=#3PDTN2( zjuR6B;b7n<4wzCrKBHNj zZy|Ij34$)xjW%7C*IC3R{Y8GYbs$)ar*(jOKpS8!K+J3dTK#~Hyr*q{x1eGdQ_-0) zLlGl!IHo%newMLlHDCxhb@0n=eR5;^pKWL4eS$r-cIjrzG8k%x8-pv8Nz@0U#=jVYHql2>LSb z3n%&cRbBbDB2x0p{QXsjy5NGQkHxYJ%&2b6=1s^hQdO#F>18USs&Ddm=bu}1)_{GL z2Ih?rOYj?>?CJ=Lh{|Iu=98rj0(Ld;T3 zk~foyw|L{~L)~c6YGXD^QPzOPE|4al<}%X#f&Xyz$)O%R?E{EKTu-gJ7{{xX;z5;f zyXnO(-^M9={BAnFrcyo-$9T-f*(|XWA`v0|*j{Ets3;O!WxM0C_?&|ojCc-RT-U8V zyO;DI@IQ5*9~Oq^(_GN&?3gW-R-o_NGgvTC0 zFCd#Iu?b9sjv!hFaR*>AU=N@a@DN}T;8Oq;!|c<5V!$H+k%ouRsv)ooRW%gWEK5~c zEAmn%{F#m(c?)-*bnf(rJ$vQU5j>vf?{MAL!P^OR!CUb;Ulp!98{eVcjr`*cx{e_^H;G8W z$OG)_fMx(r1Um&l;4wsvbG>618P!K>7KJg8>*KF%+_zQKkRH~8P7BA(WvxOp8luAm2MagP z_NmAgsydwB-Pb3#A152OC#pq^xqR=U1G#XF$X$E*EB&JnimiSYe#JhvqxBR(WZQY_ zwpWYCsRVb}1*t>kZO-=ST0(Q?=p0PGYoIni6XPSy2tPJ3GJ2!^`9A!K?=)GO*(JxK zY@~})yn^}VfZy+CE!;PFyj&b88H2HY>qcAN-s~NO8wW%CZxN!T6{A!f1_rh5ux9ZR#q?>>423(U}J6XkRInzkzl4 zkSNJsnY|4?)khJ{j8c8%+BFhfiSW~7Ayu^lDt(?zB$+`OhlWYaO`w!U!-=U*`gUos zD1D}6Fy<%&ot6>l496zT5TD#Ol5&y;(u}ewGibtK27OoT)Z!+qg2`$C10(=QbWS=t zOD)?Q8$V)%5>8ctuyN8?cAjF-Fbk1c7EZgt`I2Y*`y(+u88!xssGqfxfLNs7w_I(=B|xN zlP(035EZ3K1{F{Od9-N~DN+-y`lHH^N-3g1`O!Zq(yc<9Mr~Co6+ie<3AME6o4fYf zi>p#MU~zQL%$#$+nRDjMx%-Y_U%t*tww9EHJ@l`kZ;yK5wUU}9Kl6BWkK)mp?v1gy zSMf6NzL+oWSN!pS5{L(t*LJT>%VB@4B3`Lf#;cSn<}th{JW6$sM-S-1lO8>2%s$~$ zYDg~7LnMccN|I|yF4V&$hm9(d=a5{a7n59U)RA0Ia*19_a;cOXNS>jWkz6L_xq7)i z^Q1>v*{ILbD~Oq=*Xh;z?2}BHuh;0cG_t^0sLvVlzRI}ASa8BaKr0{jMC!$6wmI`- z_7SVAp$&78J3Ctk6LF*Ukal!1k<|E5Yl}GO>nN|JxyAI~_7Vv(UMb4kYciL8XS~(p zy1bU=oS>TIi4mhEb2#`*U$B{mc&lg+@2Z%*j?bf+?#vtExh%aJdSTNxEjy)IQN!`4 z3?Ao&(wt=sXe2g32-#*l88iEjN^NIGoB*m?EM}|y+^`4vd@S;7e87$raOt{_MtBsY z6+{I&2%>?wkV?S8-h16(b*rLEXz5ye}eKh8tip;o=kUFei9=^j&$480a_@b%NxsKNwao8b;Y1xp&hdl32;xd`f%bQ%N+tQ`( zQN*#r0s(fO00C~E1UU@ypCEHiv-bs-+pvRN;VF<|3Vdc(RsK!cOmmmSUuIpNn3Z9P z(+TvO+)g<`+lVGC-OZ9Uu(gEn3Bj#I34R z(TUEU8g~{NbH=|1ZrYpLLE3*8PWAD9(vROgsApjFyZ@KrM@ai$9`W()7kZ`Tz=RNd z1Lk*<`Ji~E=D}Ehl&qJCSnZnwE_NpnjU_C%w(~snq5HY)Vaqt8Y6FI)-*d6|kgp1n znDfrl*>g(Mz?5OQA5Z)kwDVrDZ_`+Vu7)u3^Xy>)H=cK|MWI`r z<;yPL5(LC4;6^g0MGg0I=RI`)H4Vhgc{TZ^`8#CxK&EK^4Ll!TE$B$2wmSZpX{X#; zJno#oKr^mmS>SRtM{fK%%*q!>+UNAZR^!hv;4O#B!&YI=z$`jT@=Kgzi4CImD8y#cE($D!y8@fz6Q(=laD-wqNc5 zGfwFi+cr3wgD%b!oL68lq^hQ6rc{->CfNfng{5Ugl%Q-)dH&(NLUV74yGz!!+ahTutL`7RsU663`6?OyRD8Z{C@ouDEaIA_dvz{g}ACyfy#Ip~z-LVavf*xS->} z;W+`*(v8tPY)+A?QgdmhjJT>^73W(1&@|x;_&HL{)8IIQ--rvX%NEKk%R?;plY@Om z{H?XF1K_i%=7bSc!?r0S!NGL!BybsoUsq3C}+Ut7ic73 zZ7z-75I0slze$#nHY}0Hx0blEj6X;`eFssq?fB^XDc{C%n*Lqp+{y~pYZ2?lN}7`~ zkg<9WCW73E)3=-qlH*Gzl4A~1Q2Qa4c<(uNzKX{GC<3c5ZbO7}g)b6xW93W4bl^H7 zE%0}-XsXP5o+fUKKd&BIC+$dp-vrG=BU=x(NY?R-`=UNW6}G1yk0<}=|@;#mhSS2+;`E|W$AErIwjNIA%EkYbR> zK*~V&f%rjsLBb$Ug2=+W5nLq^r%+X+G0nDBl{cYEWfPzGVMX5N-=r~bX7;*$EIo+n zar`dkZ5WeV*Z?kvz73*8-vv&he;k}d|1EG5{Vs44{Q+xE4NTzXaNG!vl$MFdku~m@I3mb# z7992_9FjaP`M%WC#81Vows};y%iDLbMxnI#bS$S)Cy0t-rFevdv9!ko{{RG|6D&$& z0M{^O8Ms3nusHoNE+(}HZ*<@j|irMY_?znDrP zUJI2zzifv>b|hv})1C(bKaX^I#g8_2^-LAB4}k$|Cg|nD&`1+)UV2W0#DrKl6!P&# z(b{>eOkOeuJ)9|jN&KQyE6a1RRxI5#Sd+t(vkUE~#3!3pvo)DNZTi|bKeq@GbIMbO zo!X1eNgLjSU#oH96 z0emOi&l7R#|4eHj8!~=nvJsgrXKCzHv8rcF3>iYT5z746AczN-p~Kx&07)#v{1LDGt?P+CZR;%CO2L0yOq9si}3-c8qNe~W#>gU!dG+jA8z_L6z4>3 diff --git a/nlp_resource_data/nltk/inference/api.py b/nlp_resource_data/nltk/inference/api.py index 3135e1b..3bc8ad3 100644 --- a/nlp_resource_data/nltk/inference/api.py +++ b/nlp_resource_data/nltk/inference/api.py @@ -17,13 +17,17 @@ the model builder tries to build a model for the assumptions. Given a set of ass goal *G*, the model builder tries to find a counter-model, in the sense of a model that will satisfy the assumptions plus the negation of *G*. """ +from __future__ import print_function from abc import ABCMeta, abstractmethod import threading import time +from six import add_metaclass -class Prover(metaclass=ABCMeta): + +@add_metaclass(ABCMeta) +class Prover(object): """ Interface for trying to prove a goal from assumptions. Both the goal and the assumptions are constrained to be formulas of ``logic.Expression``. @@ -44,7 +48,8 @@ class Prover(metaclass=ABCMeta): """ -class ModelBuilder(metaclass=ABCMeta): +@add_metaclass(ABCMeta) +class ModelBuilder(object): """ Interface for trying to build a model of set of formulas. Open formulas are assumed to be universally quantified. @@ -69,7 +74,8 @@ class ModelBuilder(metaclass=ABCMeta): """ -class TheoremToolCommand(metaclass=ABCMeta): +@add_metaclass(ABCMeta) +class TheoremToolCommand(object): """ This class holds a goal and a list of assumptions to be used in proving or model building. @@ -350,7 +356,7 @@ class BaseModelBuilderCommand(BaseTheoremToolCommand, ModelBuilderCommand): :return: str """ if self._result is None: - raise LookupError("You have to call build_model() first to " "get a model!") + raise LookupError('You have to call build_model() first to ' 'get a model!') else: return self._decorate_model(self._model, format) @@ -484,7 +490,7 @@ class ModelBuilderCommandDecorator(TheoremToolCommandDecorator, ModelBuilderComm :return: str """ if self._result is None: - raise LookupError("You have to call build_model() first to " "get a model!") + raise LookupError('You have to call build_model() first to ' 'get a model!') else: return self._decorate_model(self._model, format) @@ -514,20 +520,20 @@ class ParallelProverBuilder(Prover, ModelBuilder): self._modelbuilder = modelbuilder def _prove(self, goal=None, assumptions=None, verbose=False): - return self._run(goal, assumptions, verbose), "" + return self._run(goal, assumptions, verbose), '' def _build_model(self, goal=None, assumptions=None, verbose=False): - return not self._run(goal, assumptions, verbose), "" + return not self._run(goal, assumptions, verbose), '' def _run(self, goal, assumptions, verbose): # Set up two thread, Prover and ModelBuilder to run in parallel tp_thread = TheoremToolThread( - lambda: self._prover.prove(goal, assumptions, verbose), verbose, "TP" + lambda: self._prover.prove(goal, assumptions, verbose), verbose, 'TP' ) mb_thread = TheoremToolThread( lambda: self._modelbuilder.build_model(goal, assumptions, verbose), verbose, - "MB", + 'MB', ) tp_thread.start() @@ -569,10 +575,10 @@ class ParallelProverBuilderCommand(BaseProverCommand, BaseModelBuilderCommand): def _run(self, verbose): # Set up two thread, Prover and ModelBuilder to run in parallel tp_thread = TheoremToolThread( - lambda: BaseProverCommand.prove(self, verbose), verbose, "TP" + lambda: BaseProverCommand.prove(self, verbose), verbose, 'TP' ) mb_thread = TheoremToolThread( - lambda: BaseModelBuilderCommand.build_model(self, verbose), verbose, "MB" + lambda: BaseModelBuilderCommand.build_model(self, verbose), verbose, 'MB' ) tp_thread.start() @@ -602,12 +608,12 @@ class TheoremToolThread(threading.Thread): self._result = self._command() if self._verbose: print( - "Thread %s finished with result %s at %s" + 'Thread %s finished with result %s at %s' % (self._name, self._result, time.localtime(time.time())) ) except Exception as e: print(e) - print("Thread %s completed abnormally" % (self._name)) + print('Thread %s completed abnormally' % (self._name)) @property def result(self): diff --git a/nlp_resource_data/nltk/inference/discourse.py b/nlp_resource_data/nltk/inference/discourse.py index 5d4065c..7dad02d 100644 --- a/nlp_resource_data/nltk/inference/discourse.py +++ b/nlp_resource_data/nltk/inference/discourse.py @@ -42,12 +42,14 @@ The set of all threads for a discourse is the Cartesian product of all the readi (This is not intended to scale beyond very short discourses!) The method ``readings(filter=True)`` will only show those threads which are consistent (taking into account any background assumptions). """ +from __future__ import print_function import os from abc import ABCMeta, abstractmethod from operator import and_, add from functools import reduce +from six import add_metaclass from nltk.data import show_cfg from nltk.tag import RegexpTagger @@ -61,7 +63,8 @@ from nltk.inference.mace import MaceCommand from nltk.inference.prover9 import Prover9Command -class ReadingCommand(metaclass=ABCMeta): +@add_metaclass(ABCMeta) +class ReadingCommand(object): @abstractmethod def parse_to_readings(self, sentence): """ @@ -109,7 +112,7 @@ class CfgReadingCommand(ReadingCommand): :type gramfile: str """ self._gramfile = ( - gramfile if gramfile else "grammars/book_grammars/discourse.fcfg" + gramfile if gramfile else 'grammars/book_grammars/discourse.fcfg' ) self._parser = load_parser(self._gramfile) @@ -139,7 +142,7 @@ class DrtGlueReadingCommand(ReadingCommand): """ if semtype_file is None: semtype_file = os.path.join( - "grammars", "sample_grammars", "drt_glue.semtype" + 'grammars', 'sample_grammars', 'drt_glue.semtype' ) self._glue = DrtGlue( semtype_file=semtype_file, @@ -183,7 +186,7 @@ class DiscourseTester(object): :type background: list(Expression) """ self._input = input - self._sentences = dict([("s%s" % i, sent) for i, sent in enumerate(input)]) + self._sentences = dict([('s%s' % i, sent) for i, sent in enumerate(input)]) self._models = None self._readings = {} self._reading_command = ( @@ -239,7 +242,7 @@ class DiscourseTester(object): self._input.append(sentence) self._sentences = dict( - [("s%s" % i, sent) for i, sent in enumerate(self._input)] + [('s%s' % i, sent) for i, sent in enumerate(self._input)] ) # check whether adding the new sentence to the discourse preserves consistency (i.e. a model can be found for the combined set of # of assumptions @@ -266,7 +269,7 @@ class DiscourseTester(object): self.sentences() return None self._sentences = dict( - [("s%s" % i, sent) for i, sent in enumerate(self._input)] + [('s%s' % i, sent) for i, sent in enumerate(self._input)] ) self.readings(verbose=False) if verbose: @@ -337,7 +340,7 @@ class DiscourseTester(object): else: for sid in sorted(self._readings): print() - print("%s readings:" % sid) + print('%s readings:' % sid) print() #'-' * 30 for rid in sorted(self._readings[sid]): lf = self._readings[sid][rid] @@ -351,7 +354,7 @@ class DiscourseTester(object): for tid in sorted(threads): if show_thread_readings: readings = [ - self._readings[rid.split("-")[0]][rid] for rid in self._threads[tid] + self._readings[rid.split('-')[0]][rid] for rid in self._threads[tid] ] try: thread_reading = ( @@ -359,9 +362,9 @@ class DiscourseTester(object): % self._reading_command.combine_readings(readings).normalize() ) except Exception as e: - thread_reading = ": INVALID: %s" % e.__class__.__name__ + thread_reading = ': INVALID: %s' % e.__class__.__name__ else: - thread_reading = "" + thread_reading = '' print("%s:" % tid, self._threads[tid], thread_reading) @@ -412,7 +415,7 @@ class DiscourseTester(object): return [ (rid, self._readings[sid][rid]) for rid in threads[thread_id] - for sid in rid.split("-")[:1] + for sid in rid.split('-')[:1] ] ############################### @@ -448,7 +451,7 @@ class DiscourseTester(object): print(a) spacer(80) if modelfound: - print(mb.model(format="cooked")) + print(mb.model(format='cooked')) else: print("No model found!\n") return results @@ -536,6 +539,12 @@ class DiscourseTester(object): return result +# multiply = DiscourseTester.multiply +# L1 = [['A'], ['B']] +# L2 = ['a', 'b', 'c'] +# print multiply(L1,L2) + + def load_fol(s): """ Temporarily duplicated from ``nltk.sem.util``. @@ -549,12 +558,12 @@ def load_fol(s): statements = [] for linenum, line in enumerate(s.splitlines()): line = line.strip() - if line.startswith("#") or line == "": + if line.startswith('#') or line == '': continue try: statements.append(Expression.fromstring(line)) except Exception: - raise ValueError("Unable to parse line %s: %s" % (linenum, line)) + raise ValueError('Unable to parse line %s: %s' % (linenum, line)) return statements @@ -566,7 +575,7 @@ def discourse_demo(reading_command=None): Illustrate the various methods of ``DiscourseTester`` """ dt = DiscourseTester( - ["A boxer walks", "Every boxer chases a girl"], reading_command + ['A boxer walks', 'Every boxer chases a girl'], reading_command ) dt.models() print() @@ -578,36 +587,36 @@ def discourse_demo(reading_command=None): print() dt.readings(threaded=True) print() - dt.models("d1") - dt.add_sentence("John is a boxer") + dt.models('d1') + dt.add_sentence('John is a boxer') print() dt.sentences() print() dt.readings(threaded=True) print() dt = DiscourseTester( - ["A student dances", "Every student is a person"], reading_command + ['A student dances', 'Every student is a person'], reading_command ) print() - dt.add_sentence("No person dances", consistchk=True) + dt.add_sentence('No person dances', consistchk=True) print() dt.readings() print() - dt.retract_sentence("No person dances", verbose=True) + dt.retract_sentence('No person dances', verbose=True) print() dt.models() print() - dt.readings("A person dances") + dt.readings('A person dances') print() - dt.add_sentence("A person dances", informchk=True) + dt.add_sentence('A person dances', informchk=True) dt = DiscourseTester( - ["Vincent is a boxer", "Fido is a boxer", "Vincent is married", "Fido barks"], + ['Vincent is a boxer', 'Fido is a boxer', 'Vincent is married', 'Fido barks'], reading_command, ) dt.readings(filter=True) import nltk.data - background_file = os.path.join("grammars", "book_grammars", "background.fol") + background_file = os.path.join('grammars', 'book_grammars', 'background.fol') background = nltk.data.load(background_file) print() @@ -623,7 +632,7 @@ def drt_discourse_demo(reading_command=None): """ Illustrate the various methods of ``DiscourseTester`` """ - dt = DiscourseTester(["every dog chases a boy", "he runs"], reading_command) + dt = DiscourseTester(['every dog chases a boy', 'he runs'], reading_command) dt.models() print() dt.sentences() @@ -636,7 +645,7 @@ def drt_discourse_demo(reading_command=None): def spacer(num=30): - print("-" * num) + print('-' * num) def demo(): @@ -644,11 +653,11 @@ def demo(): tagger = RegexpTagger( [ - ("^(chases|runs)$", "VB"), - ("^(a)$", "ex_quant"), - ("^(every)$", "univ_quant"), - ("^(dog|boy)$", "NN"), - ("^(he)$", "PRP"), + ('^(chases|runs)$', 'VB'), + ('^(a)$', 'ex_quant'), + ('^(every)$', 'univ_quant'), + ('^(dog|boy)$', 'NN'), + ('^(he)$', 'PRP'), ] ) depparser = MaltParser(tagger=tagger) @@ -657,5 +666,5 @@ def demo(): ) -if __name__ == "__main__": +if __name__ == '__main__': demo() diff --git a/nlp_resource_data/nltk/inference/mace.py b/nlp_resource_data/nltk/inference/mace.py index 159a510..7763b75 100644 --- a/nlp_resource_data/nltk/inference/mace.py +++ b/nlp_resource_data/nltk/inference/mace.py @@ -9,6 +9,7 @@ """ A model builder that makes use of the external 'Mace4' package. """ +from __future__ import print_function import os import tempfile @@ -49,7 +50,7 @@ class MaceCommand(Prover9CommandParent, BaseModelBuilderCommand): @property def valuation(mbc): - return mbc.model("valuation") + return mbc.model('valuation') def _convert2val(self, valuation_str): """ @@ -58,40 +59,40 @@ class MaceCommand(Prover9CommandParent, BaseModelBuilderCommand): :return: A model if one is generated; None otherwise. :rtype: sem.Valuation """ - valuation_standard_format = self._transform_output(valuation_str, "standard") + valuation_standard_format = self._transform_output(valuation_str, 'standard') val = [] for line in valuation_standard_format.splitlines(False): l = line.strip() - if l.startswith("interpretation"): + if l.startswith('interpretation'): # find the number of entities in the model - num_entities = int(l[l.index("(") + 1 : l.index(",")].strip()) + num_entities = int(l[l.index('(') + 1 : l.index(',')].strip()) - elif l.startswith("function") and l.find("_") == -1: + elif l.startswith('function') and l.find('_') == -1: # replace the integer identifier with a corresponding alphabetic character - name = l[l.index("(") + 1 : l.index(",")].strip() + name = l[l.index('(') + 1 : l.index(',')].strip() if is_indvar(name): name = name.upper() - value = int(l[l.index("[") + 1 : l.index("]")].strip()) + value = int(l[l.index('[') + 1 : l.index(']')].strip()) val.append((name, MaceCommand._make_model_var(value))) - elif l.startswith("relation"): - l = l[l.index("(") + 1 :] - if "(" in l: + elif l.startswith('relation'): + l = l[l.index('(') + 1 :] + if '(' in l: # relation is not nullary - name = l[: l.index("(")].strip() + name = l[: l.index('(')].strip() values = [ int(v.strip()) - for v in l[l.index("[") + 1 : l.index("]")].split(",") + for v in l[l.index('[') + 1 : l.index(']')].split(',') ] val.append( (name, MaceCommand._make_relation_set(num_entities, values)) ) else: # relation is nullary - name = l[: l.index(",")].strip() - value = int(l[l.index("[") + 1 : l.index("]")].strip()) + name = l[: l.index(',')].strip() + value = int(l[l.index('[') + 1 : l.index(']')].strip()) val.append((name, value == 1)) return Valuation(val) @@ -140,32 +141,32 @@ class MaceCommand(Prover9CommandParent, BaseModelBuilderCommand): :type value: int """ letter = [ - "a", - "b", - "c", - "d", - "e", - "f", - "g", - "h", - "i", - "j", - "k", - "l", - "m", - "n", - "o", - "p", - "q", - "r", - "s", - "t", - "u", - "v", - "w", - "x", - "y", - "z", + 'a', + 'b', + 'c', + 'd', + 'e', + 'f', + 'g', + 'h', + 'i', + 'j', + 'k', + 'l', + 'm', + 'n', + 'o', + 'p', + 'q', + 'r', + 's', + 't', + 'u', + 'v', + 'w', + 'x', + 'y', + 'z', ][value] num = value // 26 return letter + str(num) if num > 0 else letter @@ -182,7 +183,7 @@ class MaceCommand(Prover9CommandParent, BaseModelBuilderCommand): """ if not format: return valuation_str - elif format == "valuation": + elif format == 'valuation': return self._convert2val(valuation_str) else: return self._transform_output(valuation_str, format) @@ -195,14 +196,14 @@ class MaceCommand(Prover9CommandParent, BaseModelBuilderCommand): :type format: str """ if format in [ - "standard", - "standard2", - "portable", - "tabular", - "raw", - "cooked", - "xml", - "tex", + 'standard', + 'standard2', + 'portable', + 'tabular', + 'raw', + 'cooked', + 'xml', + 'tex', ]: return self._call_interpformat(valuation_str, [format])[0] else: @@ -219,7 +220,7 @@ class MaceCommand(Prover9CommandParent, BaseModelBuilderCommand): """ if self._interpformat_bin is None: self._interpformat_bin = self._modelbuilder._find_binary( - "interpformat", verbose + 'interpformat', verbose ) return self._modelbuilder._call( @@ -260,18 +261,18 @@ class Mace(Prover9Parent, ModelBuilder): :see: ``config_prover9`` """ if self._mace4_bin is None: - self._mace4_bin = self._find_binary("mace4", verbose) + self._mace4_bin = self._find_binary('mace4', verbose) - updated_input_str = "" + updated_input_str = '' if self._end_size > 0: - updated_input_str += "assign(end_size, %d).\n\n" % self._end_size + updated_input_str += 'assign(end_size, %d).\n\n' % self._end_size updated_input_str += input_str return self._call(updated_input_str, self._mace4_bin, args, verbose) def spacer(num=30): - print("-" * num) + print('-' * num) def decode_result(found): @@ -281,7 +282,7 @@ def decode_result(found): :param found: The output of model_found() :type found: bool """ - return {True: "Countermodel found", False: "No countermodel found", None: "None"}[ + return {True: 'Countermodel found', False: 'No countermodel found', None: 'None'}[ found ] @@ -296,24 +297,24 @@ def test_model_found(arguments): m = MaceCommand(g, assumptions=alist, max_models=50) found = m.build_model() for a in alist: - print(" %s" % a) - print("|- %s: %s\n" % (g, decode_result(found))) + print(' %s' % a) + print('|- %s: %s\n' % (g, decode_result(found))) def test_build_model(arguments): """ Try to build a ``nltk.sem.Valuation``. """ - g = Expression.fromstring("all x.man(x)") + g = Expression.fromstring('all x.man(x)') alist = [ Expression.fromstring(a) for a in [ - "man(John)", - "man(Socrates)", - "man(Bill)", - "some x.(-(x = John) & man(x) & sees(John,x))", - "some x.(-(x = Bill) & man(x))", - "all x.some y.(man(x) -> gives(Socrates,x,y))", + 'man(John)', + 'man(Socrates)', + 'man(Bill)', + 'some x.(-(x = John) & man(x) & sees(John,x))', + 'some x.(-(x = Bill) & man(x))', + 'all x.some y.(man(x) -> gives(Socrates,x,y))', ] ] @@ -323,14 +324,14 @@ def test_build_model(arguments): print("Assumptions and Goal") spacer() for a in alist: - print(" %s" % a) - print("|- %s: %s\n" % (g, decode_result(m.build_model()))) + print(' %s' % a) + print('|- %s: %s\n' % (g, decode_result(m.build_model()))) spacer() - # print(m.model('standard')) - # print(m.model('cooked')) + # print m.model('standard') + # print m.model('cooked') print("Valuation") spacer() - print(m.valuation, "\n") + print(m.valuation, '\n') def test_transform_output(argument_pair): @@ -342,9 +343,9 @@ def test_transform_output(argument_pair): m = MaceCommand(g, assumptions=alist) m.build_model() for a in alist: - print(" %s" % a) - print("|- %s: %s\n" % (g, m.build_model())) - for format in ["standard", "portable", "xml", "cooked"]: + print(' %s' % a) + print('|- %s: %s\n' % (g, m.build_model())) + for format in ['standard', 'portable', 'xml', 'cooked']: spacer() print("Using '%s' format" % format) spacer() @@ -354,23 +355,23 @@ def test_transform_output(argument_pair): def test_make_relation_set(): print( MaceCommand._make_relation_set(num_entities=3, values=[1, 0, 1]) - == set([("c",), ("a",)]) + == set([('c',), ('a',)]) ) print( MaceCommand._make_relation_set( num_entities=3, values=[0, 0, 0, 0, 0, 0, 1, 0, 0] ) - == set([("c", "a")]) + == set([('c', 'a')]) ) print( MaceCommand._make_relation_set(num_entities=2, values=[0, 0, 1, 0, 0, 0, 1, 0]) - == set([("a", "b", "a"), ("b", "b", "a")]) + == set([('a', 'b', 'a'), ('b', 'b', 'a')]) ) arguments = [ - ("mortal(Socrates)", ["all x.(man(x) -> mortal(x))", "man(Socrates)"]), - ("(not mortal(Socrates))", ["all x.(man(x) -> mortal(x))", "man(Socrates)"]), + ('mortal(Socrates)', ['all x.(man(x) -> mortal(x))', 'man(Socrates)']), + ('(not mortal(Socrates))', ['all x.(man(x) -> mortal(x))', 'man(Socrates)']), ] @@ -380,5 +381,5 @@ def demo(): test_transform_output(arguments[1]) -if __name__ == "__main__": +if __name__ == '__main__': demo() diff --git a/nlp_resource_data/nltk/inference/nonmonotonic.py b/nlp_resource_data/nltk/inference/nonmonotonic.py index 3bbb8d2..b9180f0 100644 --- a/nlp_resource_data/nltk/inference/nonmonotonic.py +++ b/nlp_resource_data/nltk/inference/nonmonotonic.py @@ -2,7 +2,7 @@ # # Author: Daniel H. Garrette # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # URL: # For license information, see LICENSE.TXT @@ -11,6 +11,7 @@ A module to perform nonmonotonic reasoning. The ideas and demonstrations in this module are based on "Logical Foundations of Artificial Intelligence" by Michael R. Genesereth and Nils J. Nilsson. """ +from __future__ import print_function, unicode_literals from collections import defaultdict from functools import reduce @@ -34,6 +35,7 @@ from nltk.sem.logic import ( ) from nltk.inference.api import Prover, ProverCommandDecorator +from nltk.compat import python_2_unicode_compatible class ProverParseError(Exception): @@ -299,6 +301,7 @@ class ClosedWorldProver(ProverCommandDecorator): predDict[func1].validate_sig_len(sig) +@python_2_unicode_compatible class PredHolder(object): """ This class will be used by a dictionary that will store information @@ -335,7 +338,7 @@ class PredHolder(object): raise Exception("Signature lengths do not match") def __str__(self): - return "(%s,%s,%s)" % (self.signatures, self.properties, self.signature_len) + return '(%s,%s,%s)' % (self.signatures, self.properties, self.signature_len) def __repr__(self): return "%s" % self @@ -344,151 +347,151 @@ class PredHolder(object): def closed_domain_demo(): lexpr = Expression.fromstring - p1 = lexpr(r"exists x.walk(x)") - p2 = lexpr(r"man(Socrates)") - c = lexpr(r"walk(Socrates)") + p1 = lexpr(r'exists x.walk(x)') + p2 = lexpr(r'man(Socrates)') + c = lexpr(r'walk(Socrates)') prover = Prover9Command(c, [p1, p2]) print(prover.prove()) cdp = ClosedDomainProver(prover) - print("assumptions:") + print('assumptions:') for a in cdp.assumptions(): - print(" ", a) - print("goal:", cdp.goal()) + print(' ', a) + print('goal:', cdp.goal()) print(cdp.prove()) - p1 = lexpr(r"exists x.walk(x)") - p2 = lexpr(r"man(Socrates)") - p3 = lexpr(r"-walk(Bill)") - c = lexpr(r"walk(Socrates)") + p1 = lexpr(r'exists x.walk(x)') + p2 = lexpr(r'man(Socrates)') + p3 = lexpr(r'-walk(Bill)') + c = lexpr(r'walk(Socrates)') prover = Prover9Command(c, [p1, p2, p3]) print(prover.prove()) cdp = ClosedDomainProver(prover) - print("assumptions:") + print('assumptions:') for a in cdp.assumptions(): - print(" ", a) - print("goal:", cdp.goal()) + print(' ', a) + print('goal:', cdp.goal()) print(cdp.prove()) - p1 = lexpr(r"exists x.walk(x)") - p2 = lexpr(r"man(Socrates)") - p3 = lexpr(r"-walk(Bill)") - c = lexpr(r"walk(Socrates)") + p1 = lexpr(r'exists x.walk(x)') + p2 = lexpr(r'man(Socrates)') + p3 = lexpr(r'-walk(Bill)') + c = lexpr(r'walk(Socrates)') prover = Prover9Command(c, [p1, p2, p3]) print(prover.prove()) cdp = ClosedDomainProver(prover) - print("assumptions:") + print('assumptions:') for a in cdp.assumptions(): - print(" ", a) - print("goal:", cdp.goal()) + print(' ', a) + print('goal:', cdp.goal()) print(cdp.prove()) - p1 = lexpr(r"walk(Socrates)") - p2 = lexpr(r"walk(Bill)") - c = lexpr(r"all x.walk(x)") + p1 = lexpr(r'walk(Socrates)') + p2 = lexpr(r'walk(Bill)') + c = lexpr(r'all x.walk(x)') prover = Prover9Command(c, [p1, p2]) print(prover.prove()) cdp = ClosedDomainProver(prover) - print("assumptions:") + print('assumptions:') for a in cdp.assumptions(): - print(" ", a) - print("goal:", cdp.goal()) + print(' ', a) + print('goal:', cdp.goal()) print(cdp.prove()) - p1 = lexpr(r"girl(mary)") - p2 = lexpr(r"dog(rover)") - p3 = lexpr(r"all x.(girl(x) -> -dog(x))") - p4 = lexpr(r"all x.(dog(x) -> -girl(x))") - p5 = lexpr(r"chase(mary, rover)") - c = lexpr(r"exists y.(dog(y) & all x.(girl(x) -> chase(x,y)))") + p1 = lexpr(r'girl(mary)') + p2 = lexpr(r'dog(rover)') + p3 = lexpr(r'all x.(girl(x) -> -dog(x))') + p4 = lexpr(r'all x.(dog(x) -> -girl(x))') + p5 = lexpr(r'chase(mary, rover)') + c = lexpr(r'exists y.(dog(y) & all x.(girl(x) -> chase(x,y)))') prover = Prover9Command(c, [p1, p2, p3, p4, p5]) print(prover.prove()) cdp = ClosedDomainProver(prover) - print("assumptions:") + print('assumptions:') for a in cdp.assumptions(): - print(" ", a) - print("goal:", cdp.goal()) + print(' ', a) + print('goal:', cdp.goal()) print(cdp.prove()) def unique_names_demo(): lexpr = Expression.fromstring - p1 = lexpr(r"man(Socrates)") - p2 = lexpr(r"man(Bill)") - c = lexpr(r"exists x.exists y.(x != y)") + p1 = lexpr(r'man(Socrates)') + p2 = lexpr(r'man(Bill)') + c = lexpr(r'exists x.exists y.(x != y)') prover = Prover9Command(c, [p1, p2]) print(prover.prove()) unp = UniqueNamesProver(prover) - print("assumptions:") + print('assumptions:') for a in unp.assumptions(): - print(" ", a) - print("goal:", unp.goal()) + print(' ', a) + print('goal:', unp.goal()) print(unp.prove()) - p1 = lexpr(r"all x.(walk(x) -> (x = Socrates))") - p2 = lexpr(r"Bill = William") - p3 = lexpr(r"Bill = Billy") - c = lexpr(r"-walk(William)") + p1 = lexpr(r'all x.(walk(x) -> (x = Socrates))') + p2 = lexpr(r'Bill = William') + p3 = lexpr(r'Bill = Billy') + c = lexpr(r'-walk(William)') prover = Prover9Command(c, [p1, p2, p3]) print(prover.prove()) unp = UniqueNamesProver(prover) - print("assumptions:") + print('assumptions:') for a in unp.assumptions(): - print(" ", a) - print("goal:", unp.goal()) + print(' ', a) + print('goal:', unp.goal()) print(unp.prove()) def closed_world_demo(): lexpr = Expression.fromstring - p1 = lexpr(r"walk(Socrates)") - p2 = lexpr(r"(Socrates != Bill)") - c = lexpr(r"-walk(Bill)") + p1 = lexpr(r'walk(Socrates)') + p2 = lexpr(r'(Socrates != Bill)') + c = lexpr(r'-walk(Bill)') prover = Prover9Command(c, [p1, p2]) print(prover.prove()) cwp = ClosedWorldProver(prover) - print("assumptions:") + print('assumptions:') for a in cwp.assumptions(): - print(" ", a) - print("goal:", cwp.goal()) + print(' ', a) + print('goal:', cwp.goal()) print(cwp.prove()) - p1 = lexpr(r"see(Socrates, John)") - p2 = lexpr(r"see(John, Mary)") - p3 = lexpr(r"(Socrates != John)") - p4 = lexpr(r"(John != Mary)") - c = lexpr(r"-see(Socrates, Mary)") + p1 = lexpr(r'see(Socrates, John)') + p2 = lexpr(r'see(John, Mary)') + p3 = lexpr(r'(Socrates != John)') + p4 = lexpr(r'(John != Mary)') + c = lexpr(r'-see(Socrates, Mary)') prover = Prover9Command(c, [p1, p2, p3, p4]) print(prover.prove()) cwp = ClosedWorldProver(prover) - print("assumptions:") + print('assumptions:') for a in cwp.assumptions(): - print(" ", a) - print("goal:", cwp.goal()) + print(' ', a) + print('goal:', cwp.goal()) print(cwp.prove()) - p1 = lexpr(r"all x.(ostrich(x) -> bird(x))") - p2 = lexpr(r"bird(Tweety)") - p3 = lexpr(r"-ostrich(Sam)") - p4 = lexpr(r"Sam != Tweety") - c = lexpr(r"-bird(Sam)") + p1 = lexpr(r'all x.(ostrich(x) -> bird(x))') + p2 = lexpr(r'bird(Tweety)') + p3 = lexpr(r'-ostrich(Sam)') + p4 = lexpr(r'Sam != Tweety') + c = lexpr(r'-bird(Sam)') prover = Prover9Command(c, [p1, p2, p3, p4]) print(prover.prove()) cwp = ClosedWorldProver(prover) - print("assumptions:") + print('assumptions:') for a in cwp.assumptions(): - print(" ", a) - print("goal:", cwp.goal()) + print(' ', a) + print('goal:', cwp.goal()) print(cwp.prove()) def combination_prover_demo(): lexpr = Expression.fromstring - p1 = lexpr(r"see(Socrates, John)") - p2 = lexpr(r"see(John, Mary)") - c = lexpr(r"-see(Socrates, Mary)") + p1 = lexpr(r'see(Socrates, John)') + p2 = lexpr(r'see(John, Mary)') + c = lexpr(r'-see(Socrates, Mary)') prover = Prover9Command(c, [p1, p2]) print(prover.prove()) command = ClosedDomainProver(UniqueNamesProver(ClosedWorldProver(prover))) @@ -503,32 +506,32 @@ def default_reasoning_demo(): premises = [] # define taxonomy - premises.append(lexpr(r"all x.(elephant(x) -> animal(x))")) - premises.append(lexpr(r"all x.(bird(x) -> animal(x))")) - premises.append(lexpr(r"all x.(dove(x) -> bird(x))")) - premises.append(lexpr(r"all x.(ostrich(x) -> bird(x))")) - premises.append(lexpr(r"all x.(flying_ostrich(x) -> ostrich(x))")) + premises.append(lexpr(r'all x.(elephant(x) -> animal(x))')) + premises.append(lexpr(r'all x.(bird(x) -> animal(x))')) + premises.append(lexpr(r'all x.(dove(x) -> bird(x))')) + premises.append(lexpr(r'all x.(ostrich(x) -> bird(x))')) + premises.append(lexpr(r'all x.(flying_ostrich(x) -> ostrich(x))')) # default properties premises.append( - lexpr(r"all x.((animal(x) & -Ab1(x)) -> -fly(x))") + lexpr(r'all x.((animal(x) & -Ab1(x)) -> -fly(x))') ) # normal animals don't fly premises.append( - lexpr(r"all x.((bird(x) & -Ab2(x)) -> fly(x))") + lexpr(r'all x.((bird(x) & -Ab2(x)) -> fly(x))') ) # normal birds fly premises.append( - lexpr(r"all x.((ostrich(x) & -Ab3(x)) -> -fly(x))") + lexpr(r'all x.((ostrich(x) & -Ab3(x)) -> -fly(x))') ) # normal ostriches don't fly # specify abnormal entities - premises.append(lexpr(r"all x.(bird(x) -> Ab1(x))")) # flight - premises.append(lexpr(r"all x.(ostrich(x) -> Ab2(x))")) # non-flying bird - premises.append(lexpr(r"all x.(flying_ostrich(x) -> Ab3(x))")) # flying ostrich + premises.append(lexpr(r'all x.(bird(x) -> Ab1(x))')) # flight + premises.append(lexpr(r'all x.(ostrich(x) -> Ab2(x))')) # non-flying bird + premises.append(lexpr(r'all x.(flying_ostrich(x) -> Ab3(x))')) # flying ostrich # define entities - premises.append(lexpr(r"elephant(E)")) - premises.append(lexpr(r"dove(D)")) - premises.append(lexpr(r"ostrich(O)")) + premises.append(lexpr(r'elephant(E)')) + premises.append(lexpr(r'dove(D)')) + premises.append(lexpr(r'ostrich(O)')) # print the assumptions prover = Prover9Command(None, premises) @@ -536,9 +539,9 @@ def default_reasoning_demo(): for a in command.assumptions(): print(a) - print_proof("-fly(E)", premises) - print_proof("fly(D)", premises) - print_proof("-fly(O)", premises) + print_proof('-fly(E)', premises) + print_proof('fly(D)', premises) + print_proof('-fly(O)', premises) def print_proof(goal, premises): @@ -556,5 +559,5 @@ def demo(): default_reasoning_demo() -if __name__ == "__main__": +if __name__ == '__main__': demo() diff --git a/nlp_resource_data/nltk/inference/prover9.py b/nlp_resource_data/nltk/inference/prover9.py index 5a76c34..3ac69fa 100644 --- a/nlp_resource_data/nltk/inference/prover9.py +++ b/nlp_resource_data/nltk/inference/prover9.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Interface to the Prover9 Theorem Prover # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Dan Garrette # Ewan Klein # @@ -9,6 +9,7 @@ """ A theorem prover that makes use of the external 'Prover9' package. """ +from __future__ import print_function import os import subprocess @@ -51,14 +52,14 @@ class Prover9CommandParent(object): and generating prover9-style input files from them. """ - def print_assumptions(self, output_format="nltk"): + def print_assumptions(self, output_format='nltk'): """ Print the list of the current assumptions. """ - if output_format.lower() == "nltk": + if output_format.lower() == 'nltk': for a in self.assumptions(): print(a) - elif output_format.lower() == "prover9": + elif output_format.lower() == 'prover9': for a in convert_to_prover9(self.assumptions()): print(a) else: @@ -102,7 +103,7 @@ class Prover9Command(Prover9CommandParent, BaseProverCommand): :see BaseProverCommand.decorate_proof() """ if simplify: - return self._prover._call_prooftrans(proof_string, ["striplabels"])[ + return self._prover._call_prooftrans(proof_string, ['striplabels'])[ 0 ].rstrip() else: @@ -123,13 +124,13 @@ class Prover9Parent(object): self._binary_location = None self._prover9_bin = None else: - name = "prover9" + name = 'prover9' self._prover9_bin = nltk.internals.find_binary( name, path_to_bin=binary_location, - env_vars=["PROVER9"], - url="http://www.cs.unm.edu/~mccune/prover9/", - binary_names=[name, name + ".exe"], + env_vars=['PROVER9'], + url='http://www.cs.unm.edu/~mccune/prover9/', + binary_names=[name, name + '.exe'], verbose=verbose, ) self._binary_location = self._prover9_bin.rsplit(os.path.sep, 1) @@ -140,18 +141,18 @@ class Prover9Parent(object): prover9 binary. This string is formed based on the goal, assumptions, and timeout value of this object. """ - s = "" + s = '' if assumptions: - s += "formulas(assumptions).\n" + s += 'formulas(assumptions).\n' for p9_assumption in convert_to_prover9(assumptions): - s += " %s.\n" % p9_assumption - s += "end_of_list.\n\n" + s += ' %s.\n' % p9_assumption + s += 'end_of_list.\n\n' if goal: - s += "formulas(goals).\n" - s += " %s.\n" % convert_to_prover9(goal) - s += "end_of_list.\n\n" + s += 'formulas(goals).\n' + s += ' %s.\n' % convert_to_prover9(goal) + s += 'end_of_list.\n\n' return s @@ -162,12 +163,12 @@ class Prover9Parent(object): for the prover9 executables. """ return [ - "/usr/local/bin/prover9", - "/usr/local/bin/prover9/bin", - "/usr/local/bin", - "/usr/bin", - "/usr/local/prover9", - "/usr/local/share/prover9", + '/usr/local/bin/prover9', + '/usr/local/bin/prover9/bin', + '/usr/local/bin', + '/usr/bin', + '/usr/local/prover9', + '/usr/local/share/prover9', ] def _find_binary(self, name, verbose=False): @@ -177,9 +178,9 @@ class Prover9Parent(object): return nltk.internals.find_binary( name, searchpath=binary_locations, - env_vars=["PROVER9"], - url="http://www.cs.unm.edu/~mccune/prover9/", - binary_names=[name, name + ".exe"], + env_vars=['PROVER9'], + url='http://www.cs.unm.edu/~mccune/prover9/', + binary_names=[name, name + '.exe'], verbose=verbose, ) @@ -194,9 +195,9 @@ class Prover9Parent(object): :see: ``config_prover9`` """ if verbose: - print("Calling:", binary) - print("Args:", args) - print("Input:\n", input_str, "\n") + print('Calling:', binary) + print('Args:', args) + print('Input:\n', input_str, '\n') # Call prover9 via a subprocess cmd = [binary] + args @@ -210,11 +211,11 @@ class Prover9Parent(object): (stdout, stderr) = p.communicate(input=input_str) if verbose: - print("Return code:", p.returncode) + print('Return code:', p.returncode) if stdout: - print("stdout:\n", stdout, "\n") + print('stdout:\n', stdout, '\n') if stderr: - print("stderr:\n", stderr, "\n") + print('stderr:\n', stderr, '\n') return (stdout.decode("utf-8"), p.returncode) @@ -229,14 +230,14 @@ def convert_to_prover9(input): try: result.append(_convert_to_prover9(s.simplify())) except: - print("input %s cannot be converted to Prover9 input syntax" % input) + print('input %s cannot be converted to Prover9 input syntax' % input) raise return result else: try: return _convert_to_prover9(input.simplify()) except: - print("input %s cannot be converted to Prover9 input syntax" % input) + print('input %s cannot be converted to Prover9 input syntax' % input) raise @@ -246,59 +247,59 @@ def _convert_to_prover9(expression): """ if isinstance(expression, ExistsExpression): return ( - "exists " + 'exists ' + str(expression.variable) - + " " + + ' ' + _convert_to_prover9(expression.term) ) elif isinstance(expression, AllExpression): return ( - "all " + 'all ' + str(expression.variable) - + " " + + ' ' + _convert_to_prover9(expression.term) ) elif isinstance(expression, NegatedExpression): - return "-(" + _convert_to_prover9(expression.term) + ")" + return '-(' + _convert_to_prover9(expression.term) + ')' elif isinstance(expression, AndExpression): return ( - "(" + '(' + _convert_to_prover9(expression.first) - + " & " + + ' & ' + _convert_to_prover9(expression.second) - + ")" + + ')' ) elif isinstance(expression, OrExpression): return ( - "(" + '(' + _convert_to_prover9(expression.first) - + " | " + + ' | ' + _convert_to_prover9(expression.second) - + ")" + + ')' ) elif isinstance(expression, ImpExpression): return ( - "(" + '(' + _convert_to_prover9(expression.first) - + " -> " + + ' -> ' + _convert_to_prover9(expression.second) - + ")" + + ')' ) elif isinstance(expression, IffExpression): return ( - "(" + '(' + _convert_to_prover9(expression.first) - + " <-> " + + ' <-> ' + _convert_to_prover9(expression.second) - + ")" + + ')' ) elif isinstance(expression, EqualityExpression): return ( - "(" + '(' + _convert_to_prover9(expression.first) - + " = " + + ' = ' + _convert_to_prover9(expression.second) - + ")" + + ')' ) else: return str(expression) @@ -333,7 +334,7 @@ class Prover9(Prover9Parent, Prover): """ :see: Prover9Parent.prover9_input """ - s = "clear(auto_denials).\n" # only one proof required + s = 'clear(auto_denials).\n' # only one proof required return s + Prover9Parent.prover9_input(self, goal, assumptions) def _call_prover9(self, input_str, args=[], verbose=False): @@ -346,11 +347,11 @@ class Prover9(Prover9Parent, Prover): :see: ``config_prover9`` """ if self._prover9_bin is None: - self._prover9_bin = self._find_binary("prover9", verbose) + self._prover9_bin = self._find_binary('prover9', verbose) - updated_input_str = "" + updated_input_str = '' if self._timeout > 0: - updated_input_str += "assign(max_seconds, %d).\n\n" % self._timeout + updated_input_str += 'assign(max_seconds, %d).\n\n' % self._timeout updated_input_str += input_str stdout, returncode = self._call( @@ -358,7 +359,7 @@ class Prover9(Prover9Parent, Prover): ) if returncode not in [0, 2]: - errormsgprefix = "%%ERROR:" + errormsgprefix = '%%ERROR:' if errormsgprefix in stdout: msgstart = stdout.index(errormsgprefix) errormsg = stdout[msgstart:].strip() @@ -381,7 +382,7 @@ class Prover9(Prover9Parent, Prover): :see: ``config_prover9`` """ if self._prooftrans_bin is None: - self._prooftrans_bin = self._find_binary("prooftrans", verbose) + self._prooftrans_bin = self._find_binary('prooftrans', verbose) return self._call(input_str, self._prooftrans_bin, args, verbose) @@ -390,7 +391,7 @@ class Prover9Exception(Exception): def __init__(self, returncode, message): msg = p9_return_codes[returncode] if message: - msg += "\n%s" % message + msg += '\n%s' % message Exception.__init__(self, msg) @@ -409,8 +410,8 @@ class Prover9LimitExceededException(Prover9Exception): def test_config(): - a = Expression.fromstring("(walk(j) & sing(j))") - g = Expression.fromstring("walk(j)") + a = Expression.fromstring('(walk(j) & sing(j))') + g = Expression.fromstring('walk(j)') p = Prover9Command(g, assumptions=[a]) p._executable_path = None p.prover9_search = [] @@ -438,56 +439,56 @@ def test_prove(arguments): alist = [Expression.fromstring(a) for a in assumptions] p = Prover9Command(g, assumptions=alist).prove() for a in alist: - print(" %s" % a) - print("|- %s: %s\n" % (g, p)) + print(' %s' % a) + print('|- %s: %s\n' % (g, p)) arguments = [ - ("(man(x) <-> (not (not man(x))))", []), - ("(not (man(x) & (not man(x))))", []), - ("(man(x) | (not man(x)))", []), - ("(man(x) & (not man(x)))", []), - ("(man(x) -> man(x))", []), - ("(not (man(x) & (not man(x))))", []), - ("(man(x) | (not man(x)))", []), - ("(man(x) -> man(x))", []), - ("(man(x) <-> man(x))", []), - ("(not (man(x) <-> (not man(x))))", []), - ("mortal(Socrates)", ["all x.(man(x) -> mortal(x))", "man(Socrates)"]), - ("((all x.(man(x) -> walks(x)) & man(Socrates)) -> some y.walks(y))", []), - ("(all x.man(x) -> all x.man(x))", []), - ("some x.all y.sees(x,y)", []), + ('(man(x) <-> (not (not man(x))))', []), + ('(not (man(x) & (not man(x))))', []), + ('(man(x) | (not man(x)))', []), + ('(man(x) & (not man(x)))', []), + ('(man(x) -> man(x))', []), + ('(not (man(x) & (not man(x))))', []), + ('(man(x) | (not man(x)))', []), + ('(man(x) -> man(x))', []), + ('(man(x) <-> man(x))', []), + ('(not (man(x) <-> (not man(x))))', []), + ('mortal(Socrates)', ['all x.(man(x) -> mortal(x))', 'man(Socrates)']), + ('((all x.(man(x) -> walks(x)) & man(Socrates)) -> some y.walks(y))', []), + ('(all x.man(x) -> all x.man(x))', []), + ('some x.all y.sees(x,y)', []), ( - "some e3.(walk(e3) & subj(e3, mary))", + 'some e3.(walk(e3) & subj(e3, mary))', [ - "some e1.(see(e1) & subj(e1, john) & some e2.(pred(e1, e2) & walk(e2) & subj(e2, mary)))" + 'some e1.(see(e1) & subj(e1, john) & some e2.(pred(e1, e2) & walk(e2) & subj(e2, mary)))' ], ), ( - "some x e1.(see(e1) & subj(e1, x) & some e2.(pred(e1, e2) & walk(e2) & subj(e2, mary)))", + 'some x e1.(see(e1) & subj(e1, x) & some e2.(pred(e1, e2) & walk(e2) & subj(e2, mary)))', [ - "some e1.(see(e1) & subj(e1, john) & some e2.(pred(e1, e2) & walk(e2) & subj(e2, mary)))" + 'some e1.(see(e1) & subj(e1, john) & some e2.(pred(e1, e2) & walk(e2) & subj(e2, mary)))' ], ), ] expressions = [ - r"some x y.sees(x,y)", - r"some x.(man(x) & walks(x))", - r"\x.(man(x) & walks(x))", - r"\x y.sees(x,y)", - r"walks(john)", - r"\x.big(x, \y.mouse(y))", - r"(walks(x) & (runs(x) & (threes(x) & fours(x))))", - r"(walks(x) -> runs(x))", - r"some x.(PRO(x) & sees(John, x))", - r"some x.(man(x) & (not walks(x)))", - r"all x.(man(x) -> walks(x))", + r'some x y.sees(x,y)', + r'some x.(man(x) & walks(x))', + r'\x.(man(x) & walks(x))', + r'\x y.sees(x,y)', + r'walks(john)', + r'\x.big(x, \y.mouse(y))', + r'(walks(x) & (runs(x) & (threes(x) & fours(x))))', + r'(walks(x) -> runs(x))', + r'some x.(PRO(x) & sees(John, x))', + r'some x.(man(x) & (not walks(x)))', + r'all x.(man(x) -> walks(x))', ] def spacer(num=45): - print("-" * num) + print('-' * num) def demo(): @@ -504,5 +505,5 @@ def demo(): test_prove(arguments) -if __name__ == "__main__": +if __name__ == '__main__': demo() diff --git a/nlp_resource_data/nltk/inference/resolution.py b/nlp_resource_data/nltk/inference/resolution.py index df19776..06761a9 100644 --- a/nlp_resource_data/nltk/inference/resolution.py +++ b/nlp_resource_data/nltk/inference/resolution.py @@ -2,13 +2,14 @@ # # Author: Dan Garrette # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # URL: # For license information, see LICENSE.TXT """ Module for a resolution-based First Order theorem prover. """ +from __future__ import print_function, unicode_literals import operator from collections import defaultdict @@ -31,6 +32,7 @@ from nltk.sem.logic import ( ) from nltk.inference.api import Prover, BaseProverCommand +from nltk.compat import python_2_unicode_compatible class ProverParseError(Exception): @@ -38,7 +40,7 @@ class ProverParseError(Exception): class ResolutionProver(Prover): - ANSWER_KEY = "ANSWER" + ANSWER_KEY = 'ANSWER' _assume_false = True def _prove(self, goal=None, assumptions=None, verbose=False): @@ -63,7 +65,7 @@ class ResolutionProver(Prover): print(ResolutionProverCommand._decorate_clauses(clauses)) except RuntimeError as e: if self._assume_false and str(e).startswith( - "maximum recursion depth exceeded" + 'maximum recursion depth exceeded' ): result = False clauses = [] @@ -157,22 +159,23 @@ class ResolutionProverCommand(BaseProverCommand): """ Decorate the proof output. """ - out = "" + out = '' max_clause_len = max([len(str(clause)) for clause in clauses]) max_seq_len = len(str(len(clauses))) for i in range(len(clauses)): - parents = "A" - taut = "" + parents = 'A' + taut = '' if clauses[i].is_tautology(): - taut = "Tautology" + taut = 'Tautology' if clauses[i]._parents: parents = str(clauses[i]._parents) - parents = " " * (max_clause_len - len(str(clauses[i])) + 1) + parents - seq = " " * (max_seq_len - len(str(i + 1))) + str(i + 1) - out += "[%s] %s %s %s\n" % (seq, clauses[i], parents, taut) + parents = ' ' * (max_clause_len - len(str(clauses[i])) + 1) + parents + seq = ' ' * (max_seq_len - len(str(i + 1))) + str(i + 1) + out += '[%s] %s %s %s\n' % (seq, clauses[i], parents, taut) return out +@python_2_unicode_compatible class Clause(list): def __init__(self, data): list.__init__(self, data) @@ -333,7 +336,7 @@ class Clause(list): return Clause([atom.substitute_bindings(bindings) for atom in self]) def __str__(self): - return "{" + ", ".join("%s" % item for item in self) + "}" + return '{' + ', '.join("%s" % item for item in self) + '}' def __repr__(self): return "%s" % self @@ -343,7 +346,7 @@ def _iterate_first(first, second, bindings, used, skipped, finalize_method, debu """ This method facilitates movement through the terms of 'self' """ - debug.line("unify(%s,%s) %s" % (first, second, bindings)) + debug.line('unify(%s,%s) %s' % (first, second, bindings)) if not len(first) or not len(second): # if no more recursions can be performed return finalize_method(first, second, bindings, used, skipped, debug) @@ -387,7 +390,7 @@ def _iterate_second(first, second, bindings, used, skipped, finalize_method, deb """ This method facilitates movement through the terms of 'other' """ - debug.line("unify(%s,%s) %s" % (first, second, bindings)) + debug.line('unify(%s,%s) %s' % (first, second, bindings)) if not len(first) or not len(second): # if no more recursions can be performed return finalize_method(first, second, bindings, used, skipped, debug) @@ -471,10 +474,10 @@ def _unify_terms(a, b, bindings=None, used=None): def _complete_unify_path(first, second, bindings, used, skipped, debug): if used[0] or used[1]: # if bindings were made along the path newclause = Clause(skipped[0] + skipped[1] + first + second) - debug.line(" -> New Clause: %s" % newclause) + debug.line(' -> New Clause: %s' % newclause) return [newclause.substitute_bindings(bindings)] else: # no bindings made means no unification occurred. so no result - debug.line(" -> End") + debug.line(' -> End') return [] @@ -528,6 +531,7 @@ def _clausify(expression): raise ProverParseError() +@python_2_unicode_compatible class BindingDict(object): def __init__(self, binding_list=None): """ @@ -571,11 +575,11 @@ class BindingDict(object): self.d[binding.variable] = binding2 else: raise BindingException( - "Variable %s already bound to another " "value" % (variable) + 'Variable %s already bound to another ' 'value' % (variable) ) else: raise BindingException( - "Variable %s already bound to another " "value" % (variable) + 'Variable %s already bound to another ' 'value' % (variable) ) def __getitem__(self, variable): @@ -617,8 +621,8 @@ class BindingDict(object): return len(self.d) def __str__(self): - data_str = ", ".join("%s: %s" % (v, self.d[v]) for v in sorted(self.d.keys())) - return "{" + data_str + "}" + data_str = ', '.join('%s: %s' % (v, self.d[v]) for v in sorted(self.d.keys())) + return '{' + data_str + '}' def __repr__(self): return "%s" % self @@ -681,70 +685,70 @@ class DebugObject(object): def line(self, line): if self.enabled: - print(" " * self.indent + line) + print(' ' * self.indent + line) def testResolutionProver(): - resolution_test(r"man(x)") - resolution_test(r"(man(x) -> man(x))") - resolution_test(r"(man(x) -> --man(x))") - resolution_test(r"-(man(x) and -man(x))") - resolution_test(r"(man(x) or -man(x))") - resolution_test(r"(man(x) -> man(x))") - resolution_test(r"-(man(x) and -man(x))") - resolution_test(r"(man(x) or -man(x))") - resolution_test(r"(man(x) -> man(x))") - resolution_test(r"(man(x) iff man(x))") - resolution_test(r"-(man(x) iff -man(x))") - resolution_test("all x.man(x)") - resolution_test("-all x.some y.F(x,y) & some x.all y.(-F(x,y))") - resolution_test("some x.all y.sees(x,y)") - - p1 = Expression.fromstring(r"all x.(man(x) -> mortal(x))") - p2 = Expression.fromstring(r"man(Socrates)") - c = Expression.fromstring(r"mortal(Socrates)") - print("%s, %s |- %s: %s" % (p1, p2, c, ResolutionProver().prove(c, [p1, p2]))) - - p1 = Expression.fromstring(r"all x.(man(x) -> walks(x))") - p2 = Expression.fromstring(r"man(John)") - c = Expression.fromstring(r"some y.walks(y)") - print("%s, %s |- %s: %s" % (p1, p2, c, ResolutionProver().prove(c, [p1, p2]))) - - p = Expression.fromstring(r"some e1.some e2.(believe(e1,john,e2) & walk(e2,mary))") - c = Expression.fromstring(r"some e0.walk(e0,mary)") - print("%s |- %s: %s" % (p, c, ResolutionProver().prove(c, [p]))) + resolution_test(r'man(x)') + resolution_test(r'(man(x) -> man(x))') + resolution_test(r'(man(x) -> --man(x))') + resolution_test(r'-(man(x) and -man(x))') + resolution_test(r'(man(x) or -man(x))') + resolution_test(r'(man(x) -> man(x))') + resolution_test(r'-(man(x) and -man(x))') + resolution_test(r'(man(x) or -man(x))') + resolution_test(r'(man(x) -> man(x))') + resolution_test(r'(man(x) iff man(x))') + resolution_test(r'-(man(x) iff -man(x))') + resolution_test('all x.man(x)') + resolution_test('-all x.some y.F(x,y) & some x.all y.(-F(x,y))') + resolution_test('some x.all y.sees(x,y)') + + p1 = Expression.fromstring(r'all x.(man(x) -> mortal(x))') + p2 = Expression.fromstring(r'man(Socrates)') + c = Expression.fromstring(r'mortal(Socrates)') + print('%s, %s |- %s: %s' % (p1, p2, c, ResolutionProver().prove(c, [p1, p2]))) + + p1 = Expression.fromstring(r'all x.(man(x) -> walks(x))') + p2 = Expression.fromstring(r'man(John)') + c = Expression.fromstring(r'some y.walks(y)') + print('%s, %s |- %s: %s' % (p1, p2, c, ResolutionProver().prove(c, [p1, p2]))) + + p = Expression.fromstring(r'some e1.some e2.(believe(e1,john,e2) & walk(e2,mary))') + c = Expression.fromstring(r'some e0.walk(e0,mary)') + print('%s |- %s: %s' % (p, c, ResolutionProver().prove(c, [p]))) def resolution_test(e): f = Expression.fromstring(e) t = ResolutionProver().prove(f) - print("|- %s: %s" % (f, t)) + print('|- %s: %s' % (f, t)) def test_clausify(): lexpr = Expression.fromstring - print(clausify(lexpr("P(x) | Q(x)"))) - print(clausify(lexpr("(P(x) & Q(x)) | R(x)"))) - print(clausify(lexpr("P(x) | (Q(x) & R(x))"))) - print(clausify(lexpr("(P(x) & Q(x)) | (R(x) & S(x))"))) + print(clausify(lexpr('P(x) | Q(x)'))) + print(clausify(lexpr('(P(x) & Q(x)) | R(x)'))) + print(clausify(lexpr('P(x) | (Q(x) & R(x))'))) + print(clausify(lexpr('(P(x) & Q(x)) | (R(x) & S(x))'))) - print(clausify(lexpr("P(x) | Q(x) | R(x)"))) - print(clausify(lexpr("P(x) | (Q(x) & R(x)) | S(x)"))) + print(clausify(lexpr('P(x) | Q(x) | R(x)'))) + print(clausify(lexpr('P(x) | (Q(x) & R(x)) | S(x)'))) - print(clausify(lexpr("exists x.P(x) | Q(x)"))) + print(clausify(lexpr('exists x.P(x) | Q(x)'))) - print(clausify(lexpr("-(-P(x) & Q(x))"))) - print(clausify(lexpr("P(x) <-> Q(x)"))) - print(clausify(lexpr("-(P(x) <-> Q(x))"))) - print(clausify(lexpr("-(all x.P(x))"))) - print(clausify(lexpr("-(some x.P(x))"))) + print(clausify(lexpr('-(-P(x) & Q(x))'))) + print(clausify(lexpr('P(x) <-> Q(x)'))) + print(clausify(lexpr('-(P(x) <-> Q(x))'))) + print(clausify(lexpr('-(all x.P(x))'))) + print(clausify(lexpr('-(some x.P(x))'))) - print(clausify(lexpr("some x.P(x)"))) - print(clausify(lexpr("some x.all y.P(x,y)"))) - print(clausify(lexpr("all y.some x.P(x,y)"))) - print(clausify(lexpr("all z.all y.some x.P(x,y,z)"))) - print(clausify(lexpr("all x.(all y.P(x,y) -> -all y.(Q(x,y) -> R(x,y)))"))) + print(clausify(lexpr('some x.P(x)'))) + print(clausify(lexpr('some x.all y.P(x,y)'))) + print(clausify(lexpr('all y.some x.P(x,y)'))) + print(clausify(lexpr('all z.all y.some x.P(x,y,z)'))) + print(clausify(lexpr('all x.(all y.P(x,y) -> -all y.(Q(x,y) -> R(x,y)))'))) def demo(): @@ -753,9 +757,9 @@ def demo(): testResolutionProver() print() - p = Expression.fromstring("man(x)") + p = Expression.fromstring('man(x)') print(ResolutionProverCommand(p, [p]).prove()) -if __name__ == "__main__": +if __name__ == '__main__': demo() diff --git a/nlp_resource_data/nltk/inference/tableau.py b/nlp_resource_data/nltk/inference/tableau.py index 90c9725..e8cc840 100644 --- a/nlp_resource_data/nltk/inference/tableau.py +++ b/nlp_resource_data/nltk/inference/tableau.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: First-Order Tableau Theorem Prover # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Dan Garrette # # URL: @@ -9,6 +9,7 @@ """ Module for a tableau-based First Order theorem prover. """ +from __future__ import print_function, unicode_literals from nltk.internals import Counter @@ -57,7 +58,7 @@ class TableauProver(Prover): result = self._attempt_proof(agenda, set(), set(), debugger) except RuntimeError as e: if self._assume_false and str(e).startswith( - "maximum recursion depth exceeded" + 'maximum recursion depth exceeded' ): result = False else: @@ -65,14 +66,14 @@ class TableauProver(Prover): print(e) else: raise e - return (result, "\n".join(debugger.lines)) + return (result, '\n'.join(debugger.lines)) def _attempt_proof(self, agenda, accessible_vars, atoms, debug): (current, context), category = agenda.pop_first() # if there's nothing left in the agenda, and we haven't closed the path if not current: - debug.line("AGENDA EMPTY") + debug.line('AGENDA EMPTY') return False proof_method = { @@ -107,7 +108,7 @@ class TableauProver(Prover): ): # Check if the branch is closed. Return 'True' if it is if (current, True) in atoms: - debug.line("CLOSED", 1) + debug.line('CLOSED', 1) return True if context: @@ -130,7 +131,7 @@ class TableauProver(Prover): ): # Check if the branch is closed. Return 'True' if it is if (current.term, False) in atoms: - debug.line("CLOSED", 1) + debug.line('CLOSED', 1) return True if context: @@ -153,7 +154,7 @@ class TableauProver(Prover): ): # Check if the branch is closed. Return 'True' if it is if (current, True) in atoms: - debug.line("CLOSED", 1) + debug.line('CLOSED', 1) return True # mark all AllExpressions as 'not exhausted' into the agenda since we are (potentially) adding new accessible vars @@ -167,7 +168,7 @@ class TableauProver(Prover): ): # Check if the branch is closed. Return 'True' if it is if (current.term, False) in atoms: - debug.line("CLOSED", 1) + debug.line('CLOSED', 1) return True # mark all AllExpressions as 'not exhausted' into the agenda since we are (potentially) adding new accessible vars @@ -183,7 +184,7 @@ class TableauProver(Prover): for i, arg in enumerate(args): if not TableauProver.is_atom(arg): ctx = f - nv = Variable("X%s" % _counter.get()) + nv = Variable('X%s' % _counter.get()) for j, a in enumerate(args): ctx = ctx(VariableExpression(nv)) if i == j else ctx(a) if context: @@ -191,7 +192,7 @@ class TableauProver(Prover): ctx = LambdaExpression(nv, ctx) agenda.put(arg, ctx) return self._attempt_proof(agenda, accessible_vars, atoms, debug + 1) - raise Exception("If this method is called, there must be a non-atomic argument") + raise Exception('If this method is called, there must be a non-atomic argument') def _attempt_proof_n_app( self, current, context, agenda, accessible_vars, atoms, debug @@ -200,7 +201,7 @@ class TableauProver(Prover): for i, arg in enumerate(args): if not TableauProver.is_atom(arg): ctx = f - nv = Variable("X%s" % _counter.get()) + nv = Variable('X%s' % _counter.get()) for j, a in enumerate(args): ctx = ctx(VariableExpression(nv)) if i == j else ctx(a) if context: @@ -209,7 +210,7 @@ class TableauProver(Prover): ctx = LambdaExpression(nv, -ctx) agenda.put(-arg, ctx) return self._attempt_proof(agenda, accessible_vars, atoms, debug + 1) - raise Exception("If this method is called, there must be a non-atomic argument") + raise Exception('If this method is called, there must be a non-atomic argument') def _attempt_proof_n_eq( self, current, context, agenda, accessible_vars, atoms, debug @@ -218,7 +219,7 @@ class TableauProver(Prover): # Since 'current' is of type '~(a=b)', the path is closed if 'a' == 'b' ########################################################################### if current.term.first == current.term.second: - debug.line("CLOSED", 1) + debug.line('CLOSED', 1) return True agenda[Categories.N_EQ].add((current, context)) @@ -365,7 +366,7 @@ class TableauProver(Prover): if bv_available: variable_to_use = list(bv_available)[0] - debug.line("--> Using '%s'" % variable_to_use, 2) + debug.line('--> Using \'%s\'' % variable_to_use, 2) current._used_vars |= set([variable_to_use]) agenda.put( current.term.replace(current.variable, variable_to_use), context @@ -375,14 +376,14 @@ class TableauProver(Prover): else: # no more available variables to substitute - debug.line("--> Variables Exhausted", 2) + debug.line('--> Variables Exhausted', 2) current._exhausted = True agenda[Categories.ALL].add((current, context)) return self._attempt_proof(agenda, accessible_vars, atoms, debug + 1) else: new_unique_variable = VariableExpression(unique_variable()) - debug.line("--> Using '%s'" % new_unique_variable, 2) + debug.line('--> Using \'%s\'' % new_unique_variable, 2) current._used_vars |= set([new_unique_variable]) agenda.put( current.term.replace(current.variable, new_unique_variable), context @@ -582,20 +583,20 @@ class Debug(object): if isinstance(data, tuple): ex, ctx = data if ctx: - data = "%s, %s" % (ex, ctx) + data = '%s, %s' % (ex, ctx) else: - data = "%s" % ex + data = '%s' % ex if isinstance(ex, AllExpression): try: used_vars = "[%s]" % ( ",".join("%s" % ve.variable.name for ve in ex._used_vars) ) - data += ": %s" % used_vars + data += ': %s' % used_vars except AttributeError: - data += ": []" + data += ': []' - newline = "%s%s" % (" " * (self.indent + indent), data) + newline = '%s%s' % (' ' * (self.indent + indent), data) self.lines.append(newline) if self.verbose: @@ -627,49 +628,49 @@ class Categories(object): def testTableauProver(): - tableau_test("P | -P") - tableau_test("P & -P") - tableau_test("Q", ["P", "(P -> Q)"]) - tableau_test("man(x)") - tableau_test("(man(x) -> man(x))") - tableau_test("(man(x) -> --man(x))") - tableau_test("-(man(x) and -man(x))") - tableau_test("(man(x) or -man(x))") - tableau_test("(man(x) -> man(x))") - tableau_test("-(man(x) and -man(x))") - tableau_test("(man(x) or -man(x))") - tableau_test("(man(x) -> man(x))") - tableau_test("(man(x) iff man(x))") - tableau_test("-(man(x) iff -man(x))") - tableau_test("all x.man(x)") - tableau_test("all x.all y.((x = y) -> (y = x))") - tableau_test("all x.all y.all z.(((x = y) & (y = z)) -> (x = z))") + tableau_test('P | -P') + tableau_test('P & -P') + tableau_test('Q', ['P', '(P -> Q)']) + tableau_test('man(x)') + tableau_test('(man(x) -> man(x))') + tableau_test('(man(x) -> --man(x))') + tableau_test('-(man(x) and -man(x))') + tableau_test('(man(x) or -man(x))') + tableau_test('(man(x) -> man(x))') + tableau_test('-(man(x) and -man(x))') + tableau_test('(man(x) or -man(x))') + tableau_test('(man(x) -> man(x))') + tableau_test('(man(x) iff man(x))') + tableau_test('-(man(x) iff -man(x))') + tableau_test('all x.man(x)') + tableau_test('all x.all y.((x = y) -> (y = x))') + tableau_test('all x.all y.all z.(((x = y) & (y = z)) -> (x = z))') # tableau_test('-all x.some y.F(x,y) & some x.all y.(-F(x,y))') # tableau_test('some x.all y.sees(x,y)') - p1 = "all x.(man(x) -> mortal(x))" - p2 = "man(Socrates)" - c = "mortal(Socrates)" + p1 = 'all x.(man(x) -> mortal(x))' + p2 = 'man(Socrates)' + c = 'mortal(Socrates)' tableau_test(c, [p1, p2]) - p1 = "all x.(man(x) -> walks(x))" - p2 = "man(John)" - c = "some y.walks(y)" + p1 = 'all x.(man(x) -> walks(x))' + p2 = 'man(John)' + c = 'some y.walks(y)' tableau_test(c, [p1, p2]) - p = "((x = y) & walks(y))" - c = "walks(x)" + p = '((x = y) & walks(y))' + c = 'walks(x)' tableau_test(c, [p]) - p = "((x = y) & ((y = z) & (z = w)))" - c = "(x = w)" + p = '((x = y) & ((y = z) & (z = w)))' + c = '(x = w)' tableau_test(c, [p]) - p = "some e1.some e2.(believe(e1,john,e2) & walk(e2,mary))" - c = "some e0.walk(e0,mary)" + p = 'some e1.some e2.(believe(e1,john,e2) & walk(e2,mary))' + c = 'some e0.walk(e0,mary)' tableau_test(c, [p]) - c = "(exists x.exists z3.((x = Mary) & ((z3 = John) & sees(z3,x))) <-> exists x.exists z4.((x = John) & ((z4 = Mary) & sees(x,z4))))" + c = '(exists x.exists z3.((x = Mary) & ((z3 = John) & sees(z3,x))) <-> exists x.exists z4.((x = John) & ((z4 = Mary) & sees(x,z4))))' tableau_test(c) @@ -679,19 +680,19 @@ def testTableauProver(): def testHigherOrderTableauProver(): - tableau_test("believe(j, -lie(b))", ["believe(j, -lie(b) & -cheat(b))"]) - tableau_test("believe(j, lie(b) & cheat(b))", ["believe(j, lie(b))"]) + tableau_test('believe(j, -lie(b))', ['believe(j, -lie(b) & -cheat(b))']) + tableau_test('believe(j, lie(b) & cheat(b))', ['believe(j, lie(b))']) tableau_test( - "believe(j, lie(b))", ["lie(b)"] + 'believe(j, lie(b))', ['lie(b)'] ) # how do we capture that John believes all things that are true tableau_test( - "believe(j, know(b, cheat(b)))", - ["believe(j, know(b, lie(b)) & know(b, steals(b) & cheat(b)))"], + 'believe(j, know(b, cheat(b)))', + ['believe(j, know(b, lie(b)) & know(b, steals(b) & cheat(b)))'], ) - tableau_test("P(Q(y), R(y) & R(z))", ["P(Q(x) & Q(y), R(y) & R(z))"]) + tableau_test('P(Q(y), R(y) & R(z))', ['P(Q(x) & Q(y), R(y) & R(z))']) - tableau_test("believe(j, cheat(b) & lie(b))", ["believe(j, lie(b) & cheat(b))"]) - tableau_test("believe(j, -cheat(b) & -lie(b))", ["believe(j, -lie(b) & -cheat(b))"]) + tableau_test('believe(j, cheat(b) & lie(b))', ['believe(j, lie(b) & cheat(b))']) + tableau_test('believe(j, -cheat(b) & -lie(b))', ['believe(j, -lie(b) & -cheat(b))']) def tableau_test(c, ps=None, verbose=False): @@ -700,8 +701,8 @@ def tableau_test(c, ps=None, verbose=False): if not ps: ps = [] print( - "%s |- %s: %s" - % (", ".join(ps), pc, TableauProver().prove(pc, pps, verbose=verbose)) + '%s |- %s: %s' + % (', '.join(ps), pc, TableauProver().prove(pc, pps, verbose=verbose)) ) @@ -710,5 +711,5 @@ def demo(): testHigherOrderTableauProver() -if __name__ == "__main__": +if __name__ == '__main__': demo() diff --git a/nlp_resource_data/nltk/internals.py b/nlp_resource_data/nltk/internals.py index ac93c8a..01d0f48 100644 --- a/nlp_resource_data/nltk/internals.py +++ b/nlp_resource_data/nltk/internals.py @@ -1,11 +1,12 @@ # Natural Language Toolkit: Internal utility functions # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Steven Bird # Edward Loper # Nitin Madnani # URL: # For license information, see LICENSE.TXT +from __future__ import print_function import subprocess import os @@ -17,7 +18,16 @@ import types import sys import stat import locale -from xml.etree import ElementTree + +# Use the c version of ElementTree, which is faster, if possible: +try: + from xml.etree import cElementTree as ElementTree +except ImportError: + from xml.etree import ElementTree + +from six import string_types + +from nltk import compat ########################################################################## # Java Via Command-Line @@ -45,15 +55,15 @@ def config_java(bin=None, options=None, verbose=False): """ global _java_bin, _java_options _java_bin = find_binary( - "java", + 'java', bin, - env_vars=["JAVAHOME", "JAVA_HOME"], + env_vars=['JAVAHOME', 'JAVA_HOME'], verbose=verbose, - binary_names=["java.exe"], + binary_names=['java.exe'], ) if options is not None: - if isinstance(options, str): + if isinstance(options, string_types): options = options.split() _java_options = list(options) @@ -78,7 +88,7 @@ def java(cmd, classpath=None, stdin=None, stdout=None, stderr=None, blocking=Tru standard input, standard output and standard error file handles, respectively. Valid values are ``subprocess.PIPE``, an existing file descriptor (a positive integer), an existing - file object, 'pipe', 'stdout', 'devnull' and None. ``subprocess.PIPE`` indicates that a + file object, and None. ``subprocess.PIPE`` indicates that a new pipe to the child should be created. With None, no redirection will occur; the child's file handles will be inherited from the parent. Additionally, stderr can be @@ -98,26 +108,21 @@ def java(cmd, classpath=None, stdin=None, stdout=None, stderr=None, blocking=Tru :raise OSError: If the java command returns a nonzero return code. """ - - subprocess_output_dict = { - "pipe": subprocess.PIPE, - "stdout": subprocess.STDOUT, - "devnull": subprocess.DEVNULL, - } - - stdin = subprocess_output_dict.get(stdin, stdin) - stdout = subprocess_output_dict.get(stdout, stdout) - stderr = subprocess_output_dict.get(stderr, stderr) - - if isinstance(cmd, str): - raise TypeError("cmd should be a list of strings") + if stdin == 'pipe': + stdin = subprocess.PIPE + if stdout == 'pipe': + stdout = subprocess.PIPE + if stderr == 'pipe': + stderr = subprocess.PIPE + if isinstance(cmd, string_types): + raise TypeError('cmd should be a list of strings') # Make sure we know where a java binary is. if _java_bin is None: config_java() # Set up the classpath. - if isinstance(classpath, str): + if isinstance(classpath, string_types): classpaths = [classpath] else: classpaths = list(classpath) @@ -125,7 +130,7 @@ def java(cmd, classpath=None, stdin=None, stdout=None, stderr=None, blocking=Tru # Construct the full command string. cmd = list(cmd) - cmd = ["-cp", classpath] + cmd + cmd = ['-cp', classpath] + cmd cmd = [_java_bin] + _java_options + cmd # Call java via a subprocess @@ -137,7 +142,7 @@ def java(cmd, classpath=None, stdin=None, stdout=None, stderr=None, blocking=Tru # Check the return code. if p.returncode != 0: print(_decode_stdoutdata(stderr)) - raise OSError("Java command failed : " + str(cmd)) + raise OSError('Java command failed : ' + str(cmd)) return (stdout, stderr) @@ -151,15 +156,15 @@ if 0: # Read: (a, b) = java( [ - "weka.classifiers.bayes.NaiveBayes", - "-l", - "/tmp/names.model", - "-T", - "/tmp/test.arff", - "-p", - "0", + 'weka.classifiers.bayes.NaiveBayes', + '-l', + '/tmp/names.model', + '-T', + '/tmp/test.arff', + '-p', + '0', ], # , '-distribution'], - classpath="/Users/edloper/Desktop/weka/weka.jar", + classpath='/Users/edloper/Desktop/weka/weka.jar', ) @@ -181,7 +186,7 @@ class ReadError(ValueError): self.position = position def __str__(self): - return "Expected %s at %s" % (self.expected, self.position) + return 'Expected %s at %s' % (self.expected, self.position) _STRING_START_RE = re.compile(r"[uU]?[rR]?(\"\"\"|\'\'\'|\"|\')") @@ -222,17 +227,17 @@ def read_str(s, start_position): # Read the open quote, and any modifiers. m = _STRING_START_RE.match(s, start_position) if not m: - raise ReadError("open quote", start_position) + raise ReadError('open quote', start_position) quotemark = m.group(1) # Find the close quote. - _STRING_END_RE = re.compile(r"\\|%s" % quotemark) + _STRING_END_RE = re.compile(r'\\|%s' % quotemark) position = m.end() while True: match = _STRING_END_RE.search(s, position) if not match: - raise ReadError("close quote", position) - if match.group(0) == "\\": + raise ReadError('close quote', position) + if match.group(0) == '\\': position = match.end() + 1 else: break @@ -242,10 +247,10 @@ def read_str(s, start_position): try: return eval(s[start_position : match.end()]), match.end() except ValueError as e: - raise ReadError("invalid string (%s)" % e) + raise ReadError('invalid string (%s)' % e) -_READ_INT_RE = re.compile(r"-?\d+") +_READ_INT_RE = re.compile(r'-?\d+') def read_int(s, start_position): @@ -278,11 +283,11 @@ def read_int(s, start_position): """ m = _READ_INT_RE.match(s, start_position) if not m: - raise ReadError("integer", start_position) + raise ReadError('integer', start_position) return int(m.group()), m.end() -_READ_NUMBER_VALUE = re.compile(r"-?(\d*)([.]?\d*)?") +_READ_NUMBER_VALUE = re.compile(r'-?(\d*)([.]?\d*)?') def read_number(s, start_position): @@ -315,7 +320,7 @@ def read_number(s, start_position): """ m = _READ_NUMBER_VALUE.match(s, start_position) if not m or not (m.group(1) or m.group(2)): - raise ReadError("number", start_position) + raise ReadError('number', start_position) if m.group(2): return float(m.group()), m.end() else: @@ -346,16 +351,17 @@ def overridden(method): :type method: instance method """ - if isinstance(method, types.MethodType) and method.__self__.__class__ is not None: + # [xx] breaks on classic classes! + if isinstance(method, types.MethodType) and compat.get_im_class(method) is not None: name = method.__name__ funcs = [ cls.__dict__[name] - for cls in _mro(method.__self__.__class__) + for cls in _mro(compat.get_im_class(method)) if name in cls.__dict__ ] return len(funcs) > 1 else: - raise TypeError("Expected an instance method.") + raise TypeError('Expected an instance method.') def _mro(cls): @@ -383,22 +389,22 @@ def _mro(cls): def _add_epytext_field(obj, field, message): """Add an epytext @field to a given object's docstring.""" - indent = "" + indent = '' # If we already have a docstring, then add a blank line to separate # it from the new field, and check its indentation. if obj.__doc__: - obj.__doc__ = obj.__doc__.rstrip() + "\n\n" - indents = re.findall(r"(?<=\n)[ ]+(?!\s)", obj.__doc__.expandtabs()) + obj.__doc__ = obj.__doc__.rstrip() + '\n\n' + indents = re.findall(r'(?<=\n)[ ]+(?!\s)', obj.__doc__.expandtabs()) if indents: indent = min(indents) # If we don't have a docstring, add an empty one. else: - obj.__doc__ = "" + obj.__doc__ = '' obj.__doc__ += textwrap.fill( - "@%s: %s" % (field, message), + '@%s: %s' % (field, message), initial_indent=indent, - subsequent_indent=indent + " ", + subsequent_indent=indent + ' ', ) @@ -416,7 +422,7 @@ def deprecated(message): def decorator(func): msg = "Function %s() has been deprecated. %s" % (func.__name__, message) - msg = "\n" + textwrap.fill(msg, initial_indent=" ", subsequent_indent=" ") + msg = '\n' + textwrap.fill(msg, initial_indent=' ', subsequent_indent=' ') def newFunc(*args, **kwargs): warnings.warn(msg, category=DeprecationWarning, stacklevel=2) @@ -428,7 +434,7 @@ def deprecated(message): newFunc.__doc__ = func.__doc__ newFunc.__deprecated__ = True # Add a @deprecated field to the docstring. - _add_epytext_field(newFunc, "deprecated", message) + _add_epytext_field(newFunc, 'deprecated', message) return newFunc return decorator @@ -457,22 +463,22 @@ class Deprecated(object): if Deprecated in base.__bases__: dep_cls = base break - assert dep_cls, "Unable to determine which base is deprecated." + assert dep_cls, 'Unable to determine which base is deprecated.' # Construct an appropriate warning. - doc = dep_cls.__doc__ or "".strip() + doc = dep_cls.__doc__ or ''.strip() # If there's a @deprecated field, strip off the field marker. - doc = re.sub(r"\A\s*@deprecated:", r"", doc) + doc = re.sub(r'\A\s*@deprecated:', r'', doc) # Strip off any indentation. - doc = re.sub(r"(?m)^\s*", "", doc) + doc = re.sub(r'(?m)^\s*', '', doc) # Construct a 'name' string. - name = "Class %s" % dep_cls.__name__ + name = 'Class %s' % dep_cls.__name__ if cls != dep_cls: - name += " (base class for %s)" % cls.__name__ + name += ' (base class for %s)' % cls.__name__ # Put it all together. - msg = "%s has been deprecated. %s" % (name, doc) + msg = '%s has been deprecated. %s' % (name, doc) # Wrap it. - msg = "\n" + textwrap.fill(msg, initial_indent=" ", subsequent_indent=" ") + msg = '\n' + textwrap.fill(msg, initial_indent=' ', subsequent_indent=' ') warnings.warn(msg, category=DeprecationWarning, stacklevel=2) # Do the actual work of __new__. return object.__new__(cls) @@ -521,10 +527,10 @@ def find_file_iter( :param verbose: Whether or not to print path when a file is found. """ file_names = [filename] + (file_names or []) - assert isinstance(filename, str) - assert not isinstance(file_names, str) - assert not isinstance(searchpath, str) - if isinstance(env_vars, str): + assert isinstance(filename, string_types) + assert not isinstance(file_names, string_types) + assert not isinstance(searchpath, string_types) + if isinstance(env_vars, string_types): env_vars = env_vars.split() yielded = False @@ -533,20 +539,20 @@ def find_file_iter( path_to_file = os.path.join(filename, alternative) if os.path.isfile(path_to_file): if verbose: - print("[Found %s: %s]" % (filename, path_to_file)) + print('[Found %s: %s]' % (filename, path_to_file)) yielded = True yield path_to_file # Check the bare alternatives if os.path.isfile(alternative): if verbose: - print("[Found %s: %s]" % (filename, alternative)) + print('[Found %s: %s]' % (filename, alternative)) yielded = True yield alternative # Check if the alternative is inside a 'file' directory - path_to_file = os.path.join(filename, "file", alternative) + path_to_file = os.path.join(filename, 'file', alternative) if os.path.isfile(path_to_file): if verbose: - print("[Found %s: %s]" % (filename, path_to_file)) + print('[Found %s: %s]' % (filename, path_to_file)) yielded = True yield path_to_file @@ -561,7 +567,7 @@ def find_file_iter( # Check if the environment variable contains a direct path to the bin if os.path.isfile(env_dir): if verbose: - print("[Found %s: %s]" % (filename, env_dir)) + print('[Found %s: %s]' % (filename, env_dir)) yielded = True yield env_dir # Check if the possible bin names exist inside the environment variable directories @@ -569,18 +575,18 @@ def find_file_iter( path_to_file = os.path.join(env_dir, alternative) if os.path.isfile(path_to_file): if verbose: - print("[Found %s: %s]" % (filename, path_to_file)) + print('[Found %s: %s]' % (filename, path_to_file)) yielded = True yield path_to_file # Check if the alternative is inside a 'file' directory # path_to_file = os.path.join(env_dir, 'file', alternative) # Check if the alternative is inside a 'bin' directory - path_to_file = os.path.join(env_dir, "bin", alternative) + path_to_file = os.path.join(env_dir, 'bin', alternative) if os.path.isfile(path_to_file): if verbose: - print("[Found %s: %s]" % (filename, path_to_file)) + print('[Found %s: %s]' % (filename, path_to_file)) yielded = True yield path_to_file @@ -594,11 +600,11 @@ def find_file_iter( # If we're on a POSIX system, then try using the 'which' command # to find the file. - if os.name == "posix": + if os.name == 'posix': for alternative in file_names: try: p = subprocess.Popen( - ["which", alternative], + ['which', alternative], stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) @@ -606,7 +612,7 @@ def find_file_iter( path = _decode_stdoutdata(stdout).strip() if path.endswith(alternative) and os.path.exists(path): if verbose: - print("[Found %s: %s]" % (filename, path)) + print('[Found %s: %s]' % (filename, path)) yielded = True yield path except (KeyboardInterrupt, SystemExit, OSError): @@ -621,15 +627,15 @@ def find_file_iter( "configuration paramaters" % filename ) if env_vars: - msg += " or set the %s environment variable" % env_vars[0] - msg += "." + msg += ' or set the %s environment variable' % env_vars[0] + msg += '.' if searchpath: - msg += "\n\n Searched in:" - msg += "".join("\n - %s" % d for d in searchpath) + msg += '\n\n Searched in:' + msg += ''.join('\n - %s' % d for d in searchpath) if url: - msg += "\n\n For more information on %s, see:\n <%s>" % (filename, url) - div = "=" * 75 - raise LookupError("\n\n%s\n%s\n%s" % (div, msg, div)) + msg += '\n\n For more information on %s, see:\n <%s>' % (filename, url) + div = '=' * 75 + raise LookupError('\n\n%s\n%s\n%s' % (div, msg, div)) def find_file( @@ -713,14 +719,14 @@ def find_jar_iter( :param is_regex: Whether name is a regular expression. """ - assert isinstance(name_pattern, str) - assert not isinstance(searchpath, str) - if isinstance(env_vars, str): + assert isinstance(name_pattern, string_types) + assert not isinstance(searchpath, string_types) + if isinstance(env_vars, string_types): env_vars = env_vars.split() yielded = False # Make sure we check the CLASSPATH first - env_vars = ["CLASSPATH"] + list(env_vars) + env_vars = ['CLASSPATH'] + list(env_vars) # If an explicit location was given, then check it, and yield it if # it's present; otherwise, complain. @@ -730,14 +736,14 @@ def find_jar_iter( yield path_to_jar else: raise LookupError( - "Could not find %s jar file at %s" % (name_pattern, path_to_jar) + 'Could not find %s jar file at %s' % (name_pattern, path_to_jar) ) # Check environment variables for env_var in env_vars: if env_var in os.environ: - if env_var == "CLASSPATH": - classpath = os.environ["CLASSPATH"] + if env_var == 'CLASSPATH': + classpath = os.environ['CLASSPATH'] for cp in classpath.split(os.path.pathsep): if os.path.isfile(cp): filename = os.path.basename(cp) @@ -747,7 +753,7 @@ def find_jar_iter( or (not is_regex and filename == name_pattern) ): if verbose: - print("[Found %s: %s]" % (name_pattern, cp)) + print('[Found %s: %s]' % (name_pattern, cp)) yielded = True yield cp # The case where user put directory containing the jar file in the classpath @@ -755,7 +761,7 @@ def find_jar_iter( if not is_regex: if os.path.isfile(os.path.join(cp, name_pattern)): if verbose: - print("[Found %s: %s]" % (name_pattern, cp)) + print('[Found %s: %s]' % (name_pattern, cp)) yielded = True yield os.path.join(cp, name_pattern) else: @@ -764,7 +770,7 @@ def find_jar_iter( if re.match(name_pattern, file_name): if verbose: print( - "[Found %s: %s]" + '[Found %s: %s]' % ( name_pattern, os.path.join(cp, file_name), @@ -792,7 +798,7 @@ def find_jar_iter( or (not is_regex and filename == name_pattern) ): if verbose: - print("[Found %s: %s]" % (name_pattern, path_to_jar)) + print('[Found %s: %s]' % (name_pattern, path_to_jar)) yielded = True yield path_to_jar @@ -804,14 +810,14 @@ def find_jar_iter( if os.path.isfile(path_to_jar): if re.match(name_pattern, filename): if verbose: - print("[Found %s: %s]" % (filename, path_to_jar)) + print('[Found %s: %s]' % (filename, path_to_jar)) yielded = True yield path_to_jar else: path_to_jar = os.path.join(directory, name_pattern) if os.path.isfile(path_to_jar): if verbose: - print("[Found %s: %s]" % (name_pattern, path_to_jar)) + print('[Found %s: %s]' % (name_pattern, path_to_jar)) yielded = True yield path_to_jar @@ -819,18 +825,18 @@ def find_jar_iter( # If nothing was found, raise an error msg = "NLTK was unable to find %s!" % name_pattern if env_vars: - msg += " Set the %s environment variable" % env_vars[0] - msg = textwrap.fill(msg + ".", initial_indent=" ", subsequent_indent=" ") + msg += ' Set the %s environment variable' % env_vars[0] + msg = textwrap.fill(msg + '.', initial_indent=' ', subsequent_indent=' ') if searchpath: - msg += "\n\n Searched in:" - msg += "".join("\n - %s" % d for d in searchpath) + msg += '\n\n Searched in:' + msg += ''.join('\n - %s' % d for d in searchpath) if url: - msg += "\n\n For more information, on %s, see:\n <%s>" % ( + msg += '\n\n For more information, on %s, see:\n <%s>' % ( name_pattern, url, ) - div = "=" * 75 - raise LookupError("\n\n%s\n%s\n%s" % (div, msg, div)) + div = '=' * 75 + raise LookupError('\n\n%s\n%s\n%s' % (div, msg, div)) def find_jar( @@ -853,7 +859,7 @@ def find_jars_within_path(path_to_jars): return [ os.path.join(root, filename) for root, dirnames, filenames in os.walk(path_to_jars) - for filename in fnmatch.filter(filenames, "*.jar") + for filename in fnmatch.filter(filenames, '*.jar') ] @@ -884,7 +890,7 @@ def import_from_stdlib(module): instead (causing the import to fail). """ old_path = sys.path - sys.path = [d for d in sys.path if d not in ("", ".")] + sys.path = [d for d in sys.path if d not in ('', '.')] m = __import__(module) sys.path = old_path return m @@ -895,7 +901,7 @@ def import_from_stdlib(module): ########################################################################## - +@compat.python_2_unicode_compatible class ElementWrapper(object): """ A wrapper around ElementTree Element objects whose main purpose is @@ -931,9 +937,9 @@ class ElementWrapper(object): \n"> """ - if isinstance(etree, str): + if isinstance(etree, string_types): etree = ElementTree.fromstring(etree) - self.__dict__["_etree"] = etree + self.__dict__['_etree'] = etree def unwrap(self): """ @@ -946,13 +952,13 @@ class ElementWrapper(object): ##//////////////////////////////////////////////////////////// def __repr__(self): - s = ElementTree.tostring(self._etree, encoding="utf8").decode("utf8") + s = ElementTree.tostring(self._etree, encoding='utf8').decode('utf8') if len(s) > 60: - e = s.rfind("<") + e = s.rfind('<') if (len(s) - e) > 30: e = -20 - s = "%s...%s" % (s[:30], s[e:]) - return "" % s + s = '%s...%s' % (s[:30], s[e:]) + return '' % s def __str__(self): """ @@ -960,7 +966,7 @@ class ElementWrapper(object): the wrapped Element object. """ return ( - ElementTree.tostring(self._etree, encoding="utf8").decode("utf8").rstrip() + ElementTree.tostring(self._etree, encoding='utf8').decode('utf8').rstrip() ) ##//////////////////////////////////////////////////////////// @@ -1061,7 +1067,7 @@ def slice_bounds(sequence, slice_obj, allow_step=False): # Otherwise, make sure that no non-default step value is used. elif slice_obj.step not in (None, 1): raise ValueError( - "slices with steps are not supported by %s" % sequence.__class__.__name__ + 'slices with steps are not supported by %s' % sequence.__class__.__name__ ) # Supply default offsets. @@ -1103,7 +1109,7 @@ def is_writable(path): return False # If we're on a posix system, check its permissions. - if hasattr(os, "getuid"): + if hasattr(os, 'getuid'): statdata = os.stat(path) perm = stat.S_IMODE(statdata.st_mode) # is it world-writable? diff --git a/nlp_resource_data/nltk/jsontags.py b/nlp_resource_data/nltk/jsontags.py index f15fea1..f85f67b 100644 --- a/nlp_resource_data/nltk/jsontags.py +++ b/nlp_resource_data/nltk/jsontags.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Natural Language Toolkit: JSON Encoder/Decoder Helpers # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Steven Xu # # URL: @@ -19,20 +19,20 @@ import json json_tags = {} -TAG_PREFIX = "!" +TAG_PREFIX = '!' def register_tag(cls): """ Decorates a class to register it's json tag. """ - json_tags[TAG_PREFIX + getattr(cls, "json_tag")] = cls + json_tags[TAG_PREFIX + getattr(cls, 'json_tag')] = cls return cls class JSONTaggedEncoder(json.JSONEncoder): def default(self, obj): - obj_tag = getattr(obj, "json_tag", None) + obj_tag = getattr(obj, 'json_tag', None) if obj_tag is None: return super(JSONTaggedEncoder, self).default(obj) obj_tag = TAG_PREFIX + obj_tag @@ -55,12 +55,12 @@ class JSONTaggedDecoder(json.JSONDecoder): if not isinstance(obj, dict) or len(obj) != 1: return obj obj_tag = next(iter(obj.keys())) - if not obj_tag.startswith("!"): + if not obj_tag.startswith('!'): return obj if obj_tag not in json_tags: - raise ValueError("Unknown tag", obj_tag) + raise ValueError('Unknown tag', obj_tag) obj_cls = json_tags[obj_tag] return obj_cls.decode_json_obj(obj[obj_tag]) -__all__ = ["register_tag", "json_tags", "JSONTaggedEncoder", "JSONTaggedDecoder"] +__all__ = ['register_tag', 'json_tags', 'JSONTaggedEncoder', 'JSONTaggedDecoder'] diff --git a/nlp_resource_data/nltk/lazyimport.py b/nlp_resource_data/nltk/lazyimport.py index 266df76..e51f2c2 100644 --- a/nlp_resource_data/nltk/lazyimport.py +++ b/nlp_resource_data/nltk/lazyimport.py @@ -14,6 +14,7 @@ See the documentation for further information on copyrights, or contact the author. All Rights Reserved. """ +from __future__ import print_function ### Constants @@ -49,7 +50,7 @@ class LazyModule: __lazymodule_init = 0 # Name of the module to load - __lazymodule_name = "" + __lazymodule_name = '' # Flag which indicates whether the module was loaded or not __lazymodule_loaded = 0 @@ -74,9 +75,9 @@ class LazyModule: if globals is None: globals = locals self.__lazymodule_globals = globals - mainname = globals.get("__name__", "") + mainname = globals.get('__name__', '') if mainname: - self.__name__ = mainname + "." + name + self.__name__ = mainname + '.' + name self.__lazymodule_name = name else: self.__name__ = self.__lazymodule_name = name @@ -91,9 +92,9 @@ class LazyModule: if self.__lazymodule_loaded: return self.__lazymodule_locals[name] if _debug: - print("LazyModule: Loading module %r" % name) + print('LazyModule: Loading module %r' % name) self.__lazymodule_locals[name] = module = __import__( - name, self.__lazymodule_locals, self.__lazymodule_globals, "*" + name, self.__lazymodule_locals, self.__lazymodule_globals, '*' ) # Fill namespace with all symbols from original module to @@ -101,10 +102,10 @@ class LazyModule: self.__dict__.update(module.__dict__) # Set import flag - self.__dict__["__lazymodule_loaded"] = 1 + self.__dict__['__lazymodule_loaded'] = 1 if _debug: - print("LazyModule: Module %r loaded" % name) + print('LazyModule: Module %r loaded' % name) return module def __getattr__(self, name): @@ -115,8 +116,8 @@ class LazyModule: raise AttributeError(name) if _debug: print( - "LazyModule: " - "Module load triggered by attribute %r read access" % name + 'LazyModule: ' + 'Module load triggered by attribute %r read access' % name ) module = self.__lazymodule_import() return getattr(module, name) @@ -134,8 +135,8 @@ class LazyModule: return if _debug: print( - "LazyModule: " - "Module load triggered by attribute %r write access" % name + 'LazyModule: ' + 'Module load triggered by attribute %r write access' % name ) module = self.__lazymodule_import() setattr(module, name, value) diff --git a/nlp_resource_data/nltk/lm/__init__.py b/nlp_resource_data/nltk/lm/__init__.py index 5b87ef7..b76799b 100644 --- a/nlp_resource_data/nltk/lm/__init__.py +++ b/nlp_resource_data/nltk/lm/__init__.py @@ -1,6 +1,7 @@ +# -*- coding: utf-8 -*- # Natural Language Toolkit: Language Models # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Authors: Ilia Kurenkov # URL: >> lm.generate(1, random_seed=3) '' >>> lm.generate(5, random_seed=3) - ['', 'a', 'b', 'c', 'd'] + ['', 'a', 'b', 'c', ''] Provide `random_seed` if you want to consistently reproduce the same text all other things being equal. Here we are using it to test the examples. @@ -210,7 +211,7 @@ You can also condition your generation on some preceding text with the `context` argument. >>> lm.generate(5, text_seed=['c'], random_seed=3) - ['', 'c', 'd', 'c', 'd'] + ['', '', 'a', 'b', 'c'] Note that an ngram model is restricted in how much preceding context it can take into account. For example, a trigram model can only condition its output diff --git a/nlp_resource_data/nltk/lm/__pycache__/__init__.cpython-37.pyc b/nlp_resource_data/nltk/lm/__pycache__/__init__.cpython-37.pyc index 5f3ebc2ee0340caf95a51174853cc690cdf493b6..2b68b775145c1911bb94c2cbf9c2dee519e0bec7 100644 GIT binary patch delta 88 zcmexl{o0z>iI8+{EKD-&Jd8ZdOpFK!0L;7*c>n+a delta 86 zcmaED{mGiwiIB{j)Hm$h=8y*w@EWI i3AyQK7&-6`q-$C6`Nzre%tfC0St{I|&uXN?=EETvc`)r>W3HKyAcIw+Oat&WfVF zKlJR-mWvexR5Gj{YQWiRklF=;qU|Zsr6^FeK+yob6zHLsJroIo^wJ*M9CPY>L&-ma zf$n17zMYvj^M1egr}^7w*Ut?N*#w?`@vh67gnWrVyN?CTSs2OJKm>7!pn?T7WDW}r z$DsIc1l-~9<$)O%oI+T1ihAA*tk7~S%@=}FXgjv%i$OW8I2FxX!TL~Gb*lQO6zmIY zPK}bAH8{==_J_mHFeOVQn&!kmaMhN-pafriY2B&&^(!OGRFp+!lRCpcAQ5|y__klz zpl=Z|B&wT4RQ<+=u|b_tu}{=Ci8Chli(&X46r5LtlCAz)O;>LQk(Va2Y-( z4D@3gMsfj&A}QIRYwTu4(UbvafQfZZ6ay2V-!<_fd5@&VB`|k&GG*t<`}H-R($q-# z23s=*E7xY1z!{(=`|-#HU-f0wl>I2`MxCZwiWTU#*fMgCdgeUxS5-cAQN_Il)LVfq zEi<>Y5tjo$=ga-5owKgWO@*>~(GUF4kJPV-tb^L$yfV{IG4X<9$-lR*W_Y5T@`-pC57ud;H?It|!xF3NAP3iZfpTGr4Q?2;Ts2^U2 z*?4)dA!#nhvKe)x7be*)cFZJTC(S-IW{zle24n^yab` z%?sxyQ3eAopF;9967<<41T~cnAT1aPCWKYu%;FWUXS2VW>gYoeZI=ldzk`wmAOwQLseBQ*wt@ff;5=&l28`rVHdXvq zO>s=ed**$zW~M|HGQaq@Rx!6Pc|qTQQ_5J%=U{ivgKna9i1m^31(=kD%&{h3Devfz z_`b-??SAOq@VlKAl}JpKyrfnjkq~} zqLds(g3gm;Kw5kdty4IG=_n_VJnk5Y!D(;*qw*t4N3%24@o^nty$Rj?O=%82w%U{P zKgp8niE(|$46NjZeouIis`@U-jO>rqi3SR`=-^@DPQ%ABb+zvL z=Anb_IZT*#9QNjam)&lhI)Q1tb8lY83Udc_uJ7NK*S;+RJjl3r^V96>#($2z^-xYH z_g;PNo-ccWzuHwdf1g#yj!s~5w>2ODQ-p!3gB6hcJ6jwZT~DE%U1jpO10dd_SJ+ik z&Zi8yG7)SP;PnOeWJPODN1GtdX7 z)(WY4&jv-x0@O;)QRoxG{F!l|QgZFHC6bzelm+iM*}}oUR~D9=JsDr`3SS({+T({U z4Z(oI+qcF)ru2>Mg~@ME>s%THF{d{mGEH#NAT1t5QO?2Kzp@V|-2-|y;$o{R4+A5gL2?AiQ6w6w&mf0Z z4?z1Ya+t|d+kj~%7m(nUk0jYcXS@mvKZKDWqFV*d>;rbywxJ8w>~+g3!)r<0)S`>6 zNquu}yY6xynzZk_67gJOB5Lr=4exSWVxA84L2S;Aq`N9HXPtdfpspSd!s9!gQv;w= z*`4jHZXgj8oMAnc{bj1Uj%b;eu(_)^4ib&ld3ho1K~OZ5=T;2Wf^7nB7a}jjm#_G3 zrJIrNGXrd&g>$qaPjW+U&4sb(2mUEpm6!!RUz24ny{?l&K#`F)iO*XAs+LDIQpOjKWQ%FlIS-{p=JK1gQ^>*yE zNwpzD32Hdxf<+EoNT`rN+}I-rKwJ@ss{skA964}6NbQOD#=A+nrNKgT`7pT;8raTbR6F@VYGoJmaHCOUzyVDpZk3y!FZ1m{It zawJ`1v}DUpOph^IwiPF?#~B^7RVSe*7_HbzC#7FX5$+Oa#!YpZGgV746RU#WVkXVh zGN-4_7BdasR7;%Xbp2nd}B-3X}iHo#+P9H0w^rvNCj$SZ16WV2Zk#vRKy z3bx^S!K&=<+mdG*OL@mK%12R{2jFUHUwV$S6XR zL?m=SPzBT#ADqV|xB$bO2!4y5NmluFaf4fss+=zc&nZ7I%AtD0uxG7{l)99njYH8c zc|OHU&}R_P=2_sYxD^OBzvPy~*gdN>UG%-MwOOGsQJ8h|jb+JqeZvmpevw*U(Y62M zs)5!zuVv;7MYmM2&cJCN8ikWYqL5Y?Nf-*DgSAiN-w2~ISfZHq6z@h809i3|><~_* z5LytPwjM=4zES%mb&C-7!*nLYVuA7CaZlnSXw|t29jyc>(#JFGh=Z^a%Ci;IfRtcI zK@0`&rF*+kFiWCynGc1jl5f_&N*^M}m?(O4d&E(UjV7H!iG?Vw#oPM%!_6_elLS2l zyHtaTj^9t9d+F)_bBbcS)9j$t+EiyBc?mDVe61_>RPnNWbd{ZyY^kS8KB@*E)xPUe z2{{n_&^>ng0Bki5c6b5x;OQ~==u#Q8uVI`m3#%g-Sq0arjD8-x(sTMaRBRMtnVi66 z%ri-kS*anp`vOuYu7mIr<{d%~n+_7OmLUT*;sRGWcoN!QNqDDzMrC z1WQl&Vw!k=2kG5Ci$19eOJqT;LWh{)>9;;#<(J3`xh>2{^yMm#RDv`49_SVC^Sik+ ztbf3*kaZDOq;+|P+aMccL7E$0;pkdb^45HQg`VP=ebd+$Q5Dz7I%k)#P~ zRplP8Dw@)&ut5lS=f|5|Rfgsv8^1zjxfIn6OT)71X;aj7u&@VxnXfON)1q9~Jj*gQ zmuf~?E0sOpC>Jbj0Joy$+ypB@w{qI$DXl`?yCu^y2O2d!c$C>!OZ9$6$Z$~Wd+{&} z&nPTHau0mX;_KB*%UnlE3BrmXr)8=iv=Ref6TB@_FU>) zCLIjc_8eGLq9{I(JXrSV7(y06k9mdzLE-7~diAu-kmRFK+pZVJ8@&urgW(MTa9y25 z;T2&Y5&3qal0MQcDujo(o5+GlTA=cZ@F$ZXw|8(6gKc~~+d_MQpt}%uBV-U*GdYPA znkH&1n9?kPqPt;q)3XSu#kSbq%p{tFg;!yC*o+iOlmxsAW6F~8 zY)5o5*78s=CJPi>Oms4)gr08d)~v4T$;RxgU$UtjT2}M3BB%#bF`J~X^t4NBhrvC-G*eO>4 diff --git a/nlp_resource_data/nltk/lm/__pycache__/counter.cpython-37.pyc b/nlp_resource_data/nltk/lm/__pycache__/counter.cpython-37.pyc index d7a24903d8431ddd3663f2606a0baa75fe997d75..4475148c080c54d9ba077d6d99ba90af0ffadba6 100644 GIT binary patch delta 912 zcmZuvOHb866mEO(ZKscWDR9LgveR73LlP1fL_!R~Xoz7UE1Q@}X$L6uah*;LtX#mt zl?ka~s|yp?Lc+!$;l|Z$jPVz^W#u`&5`tlpIX!b8-+c4+oKBz5^j6w!OMz7y$MmSH zD8KP=xmw^nhFxqP_a@iUmfup9NR66_o*KSU`>lkerf=51o>-~v+o|I_s>00O_ERM? zB66fegmn%ZhpImmnH!2{9q%r#x@8uJdBkWE3&w+_@H9y_*z1yIA(M8*wt_MdQ5*_S zl};geoNZHa&}Rjl^)OHSf%y9ElhthkL2iJN1`YEv69p$#ZqM2-#4Z-+%Yx5Wr}nb? z>gBnk(^to5R3qlFYD80FZdjR&idOy1M0IUD8fRi23;&VbmLO7G67y=tM` zdpm-~sCKOq$H(v~G>FZo(Oju{^=K0kW3UShkta>fa>)eg5RgJX8xbNL*6rA6sh zDN2qyXc(}=JV{t6;yf$v4wS|zl8qwXuYZN#%o4H3_3CGLYzJqH6U#J{KEXh)uM`gm6QK|wbucJ+yD!=NPSDb#2gRmqG&%Jkl%PdXiWIRI3CLr!`)bzik2t$2_S8c? zcv8Xx4_*}i1OvVLKX?)LCjJZJp(kgyN(l79&YPY2=6l}^bK3oSv$I{PloUKyM^E_g zmZJQkpZ_O^&RzWC!K`y-T~%Dw&HIHwAL@ZIG*soG;uc)}jpFJj+K{=%o?@}tyE`kU zEB5WFpSYfrSZd1l#H*g_g_LT0MuQA^|bO6{k*x|}&H zZoLqpcs+r^d)aDiZZJfq`l0PlMQo|(l*=WsijWgIX`HSZ=xK(ff332L$@;4y<0p1bBp=WNKn0emqrib*vN^kt$%-ivAW_~k%u=>N= z=9{){N$@L-yZ)~&N%{*9)1L;LJy`6QyPKECL<&i$CVFakMrwMdIBQ8GwLB|rdQBol z+EMGO6zZXITMCWHI8{!GXHT?d*Z^%~qHRyKR@ek>Gir;wbMWt0XoJSad!7U5b~p#; zx#$xI6dh1FVFwhQ2+#B3{0+%nIGMX13}Jemrm zEtM`ox>l+;TAS6&#<$}OV3?&WluL3C5{GFM2~&Cx3ETju&*DR&%kZGL0VHzG*CxcdMTptMBBRAcJ1Y2V_QvBcqg#%mcYp?;trEB38M_^-(~Bl(`nSV#bPn zQEsYNO>1Q=BHutnq6n+FTLZxx5zFH=$TR9{A|0O1(k#mlZ zfjpk1YhYm}2#K^wi!73p_4lAh=qO8qJPNCO*4Bkxu$D4&0KN1bDarTbh9vfKr8K)^ zQ6rTi9X%V`asa{jte?@yYX-^aFz^M*1%W15Y0X6V2iL?ELD^Z-YaSuvK7cjBrvkot za`OCHd@v`f2hG*qxy2M6{e+5QYTw3p2(GHYRh@23xO&@voTs@`5~$vcr`XsvH@o1X zt`O!D7%Teu1=OBTP_J6HxBnLp@pT`;s(DCY9CfR&>>r&gpquqou2j42OP^x#adJXQ zieWxQyan1Cu>rg31QLjM&~!fR0LWYMk=9vi`3cPE+o@QZ@M7!vnA!k(L U$SvX!M`;t&fwc_FauyupKbAgxvj6}9 delta 1718 zcma)6&u<$=6rR~%Ua#$Ble&Z^U~j6>WNBqf2qA>30->}5iVLbhl>x0b-mSgqde@p+ zBQ;vdhl1c#3C$TPIRJkE2adgR;DUryvE1O)f1xMd8`rTxBx0p`JMX=jH{ZPX&HK*z zskyydt62oT#@+{>RwLvuRA!$BlQ&@SufN&eoD7YwLCJLzXo3C(3G`68uXanI8$k*5 zlGIJm&7cf=IW)mv2HgrOpjV{d0=*j4K(B=s_$#0<1OT8M{w}`uPuzbT$`eVNz zI`5}J81u(O1t32<^mj_T;)&8+tAPIGYI~3l!}h*^Z;+0BcDLOUTk2c%j2NgNUV`fo zhRYU26;%TSIyqj@)HH<|;<5RzXluXhJ`1Ld+$Z}qS90|Mr6i|0@sxl7F4}R`Lg(XA z$O_Z*q9n>Zuh9GcaOk@lGhr_~E3WCyb$P5jtPaL481gVem&8Q>^Fj#>1?{l2_*dgF zDfG|Np1~gdW&8{88-p!qzNraMu&BfS?vIHy1%S!1pqYdG#7ly^tP>o8|HL z)znGS%!v}7`AHV}S?FYgh&wVFP7txMm&Nxs#l!Lj-4%b7n{Ps`^<23_=hfafaZIZ~vT9BQIbxiqbu&R&2R-P$;b`fvk}_fi&0jFK$fiXV>#zBlDDK!wF@N8d-P%Z_Z>+wUKvz6(ve@590Fh7N|pNN-gt?MVEJq73v1ip$KwKre#f6tsKbo<1<~ zXlZNmB5wcx&uTHvE^PHM+A(^xbljS3EZs^HXLnoiu;q^;hA*07Kklx~%zT)pS#J;} zeTD&K#$}&n*kc&JS>~X?EHMndK&9iRQJ9y9X)un%E7%}8_BBnhsjb?!y=GhXx?TDk DA*wQ# diff --git a/nlp_resource_data/nltk/lm/__pycache__/preprocessing.cpython-37.pyc b/nlp_resource_data/nltk/lm/__pycache__/preprocessing.cpython-37.pyc index e7c7319faf858594edb6bdfc28f52738aea2f82c..8f9e7f47e7bdeabe15100a901a3466aaa19abfcb 100644 GIT binary patch delta 58 zcmZqV`^wAf#LLUY00gD%6XRPq@|G~M+3S}U7wJ!)%;d``z4;;24JJmz$-7u5Fe*>> NW}C{$G5H~z0szIc5PSdt delta 70 zcmey$+sMc3#LLUY00eSL0r9yTc}tjty!A8kb5r%R5-T(E3lfX6_4OvVGx;(~ZNAEM ZgNe~#@;cTDj7pR3*`_kGPrk~g0063z6m$Rp diff --git a/nlp_resource_data/nltk/lm/__pycache__/smoothing.cpython-37.pyc b/nlp_resource_data/nltk/lm/__pycache__/smoothing.cpython-37.pyc index 0918ee6cd84558a120b9d4e3bf63be98f32d106e..c7dce4475c27c8360084b828d424887649fd8f38 100644 GIT binary patch literal 3456 zcmb_f&u<$=6rR~#JGSGrp-uCn<;T)Oi-Fiw6;y@N(gdj>P%cGOTB%xXch-)R^{z9! zZWGHsIjA=zj&MNrkw1ZdWyOsH>WwoezBlV#J842Mj5L!O@9e%e-}}BdFB=OB6^7QE z`^o;J%-El_$eMx47wFNq7zDGJ;6n3xN3%4}gf5IjX6g5tD2U=A6GcxO>6U@@oG4*k zQtJZNWl_PpqSi$*|A5u1{U085!Z6-$2QAa_TcK>n+nva4hSKz%pw)9)p4kb7=VL>? zQn}~4p%k<^4$aMNFEFo~_rp+hoWQ((>(<9+yzNDv8EuC>UzkrrxobYf7tO~oVQx6? zZrE&^P9Q2@2eBu+q3^`)FsN5v;9#uAlXC8j%jqXH`bz+8qDRXZVs-pOHek>Bn0?7c ze8h1SeJg)O!!Dh+Q@@B|z#g3&@&OlY*N~dv-{Y%$n$YMqNd{WzXn)_``i<4JM5}+4 za97ed)bP(|jb0=he&{-W!*4%sbPwX~Ft|~_*@)V)x8B9cfK=27{CKzFcN$R!YQ1~# zPx*Gs3%vcVymJm8jL18K^{?4(*b8Di2m`zC$=kLHBJ?eFmBA)lW z2q0BN+a=f>c`$WR7V+6+>7v9plTcuvmZb_#TB}P+vdo@jYC)e=oxH}7z ztu|nmo{1fb{EYWkXYf0+DF>09$sHoYQYQtAh3{+Kh80(b>I zb767D1%*hC1)0s`_aPQB8Hd?5#uuH^BZR-VP6hw`< ztQ<^q-wP52q4JU3vx&LQjjD8NpVPFTcR0d?^+?H8O`N+8q^~~CWA`_G|wA#Jc($R-loi0HY>Ko*OvcdZ+Q}ZY5j)F&hlU_10WV#yyC(_

    mmhi7LsL>P64kE^%%2u1=KTh7!qylZcy%OYlnSl~-Q0 zUjnuk(LDB*GEzzV$9s|D=fpc>)%i7m4EwSljfIqOZl6wze~-5hcEc1uh+qi7W_)Pt zeH};hG`|dEUEC97yCfp|8KiYphud1lYqqJfy=w9Gx8_zR3R}Ig@Y*qHo$yeop>y@|>gIOtkp2saK z&d*|A@XV6rh~lf@*8n@s2gRPLarx)s{M0G`in<=SDtq?@nqAbwvkP(V=+w~iahUg7 zRJs!OiEGpMttN5s1zagRt}>hUF1|S&okx)8jt|}8=my3|3Pu%dYkaZ_Fap0Jz*N9O zav4w+G*YbaH-Q&VWBMxqh)3Qrc*JAJKNx=t5S|6#%ISQCVV+6i$Ar0r$pq@f4qB2` zB?3xefz45|7EYSZVz>A8h5K=& zghu9|g@IcgK5uw83tcKIiU}fwS3F>-7uUv7Okl*KzFl?F7H$IEE5HaYB+&>9Fx!+lzO0ox9^C z4yIdh5S0)RxS$rwBWXV5>7AuVK3t^5tONPE@zmq*Y8)VE*UiyTKEqh}q!fyp>)v#crUYOi=O#&cZ08 zpNm^d`ssMmh$k1*>vARZ!fw>Do%5-cwc?r-Kt}eK|M^aAp6Dg{L_BSYKBTky%X}?u zyU)fFt2`|nz%OphE*^hMG;|(Z77%3D~kMSxZRyY z{}MjlnQQJ25Vf6gi<{XNQI^=}&q_xYIpnw^#=a%v6?fceyV9#Les+njL#VoLJMT}$ zlcF7DDP|D`4910u)r2^+OLcud5lg3aJwE_q8rGhcEHcE(&S3pEtR+ zlrqFVV2OSNG@1JmYeF!N01eoIoauRoS2wFA(~Yj#cp`gVE`VVQ}x#Q1KtXc z%j594Jpme3nHs3t{3z0*vK=y1T^q?an@miC=FjA4^+6wHItrv{laEz5A0WZIfwsMZ zV@arIPUu;BYwO@@wjgNBcBgrQ&s(o`@T5Zk6t|aoS;$f5fX%x3z%6~O2jdr~0Nfb6$eY<7CV^G_ zlYFmkW`-igQK3l1y@-7cjpX7ZZ3^;##V42R>)0PQAFU6teFLCvJB|3fC0uAIH-Zy@ z7tFN4kl&NPYWQd7Bw(6PPp+DJg2&FnCPOpVon2^3I04u$OK_gviZ6zI0|7>AJ8`kV zjqQd5HqFz21%b35fWoR&FN?6STE#9@%J%-!hO>p6 zl;3FkVTSCIGc*pA=QH1~+Zbf+1$l+J;lI4rJ~CIV zbXcKmEawnAk075=zLFL!453Y&Hjmq>x>b)ySWwMZd{i!215NpHwBj)3O0Ri5sBL#V zndZXU+9BJuw4L~tFP4wMLY$Oe>FC{G5c5qty#^X5Fg_I!4-;$D=XW{=9GM>A!0QpC zJO+Pp&MC!cd3L6$^wb2JWPF=wF1RNoxj2rp7ZA`5rVKFKn?Dq6Cmaq7bPQzgIonB# z2S7CH!|AD3#CFQr!;c(x%jdhATi4N|YauS+1yN*}s1QQF-*xK$oBIdBtd7;7xi5#g ze-P@?FRymLlA%msEms7uxF2y!k{X%c;v`8m!9LQVkj?AAb_Jcv*<5tI_}ny?7n%J1 zzR54$)rZ0F3)gk^Va#Ilp&lp8rvWX!6~y3A9RAGYIt(WneqZ8y4P)#4-Xp zqw=}Z4TAJQ{}-@RYD4i~}HYuDA`B6YK@ z|LOa1xUTgSTDTk)S^-3Jj!ii>*c7QpHi4a60v|~+ICT4sUhJx=YK3oq5Q|-bF5W9K2+DzjM8*+KaN-t=btlqCR8Ec8`{xz zV+0iYbsG7Yp9E9?Vsg*$v*6_y!*#p9*o#B{d$_N#kXW3e4xWMqRqFdMcaz8yK*E>P z7klx@t^?y=+KU9UC7aJ5_&eLXUDVw5$aY()WuDC5RI?#&cdK6hXe8Z<8n!o{j9c+o zB7T9xzN7dGD*SB_Ym~=E9}ANU+DN%fH2LvpP2o78z^=`PF|X>&Z`di>JJH@xTX_)H z>6>V)9<&FaM{a@V=YSWLPfYxAummtIxSEWgSb?)K z?9q;&7F%~m@>$>xLzt2Nsot4_DWZEDwGnGU2dKcg4qy~G-BbYlAr zZl?4_5i`x0n3Lnvm8?e|pB`twl+RAjIV|^v{M&Tbo@Xr8Z~>vL`)ZUb1%EAASH-pS zE$e0Q8-oAAprx({k$dJPIeW;Uxrgt!l|2yyOBN*%=f*67p`R<{w z4w#5O3&3WUtw`xqGum^)nCLovWC5Q@(px3ru9M$*-U(Ri2rx%+Tph2fGWI zc;aiGWWrkRO2bDjjYcVvbW_LMjktoK6aikKgb#rqK>z_>0<;jRP<^_nRw9UNAp~^p zg%PVz6#2<)M+=Sq5_0ewCGf5y@D(i{Mu3OActp%sPH*`5&-epyNIG0Y{6F*%uEt%08SS(DQ<90|$5?Wg_& D2G=T* diff --git a/nlp_resource_data/nltk/parse/__pycache__/generate.cpython-37.pyc b/nlp_resource_data/nltk/parse/__pycache__/generate.cpython-37.pyc index e619604965f8c36153db60af5488748ffec2a4c2..a55f72cbf0a6e420142ab6fb48934feabf732729 100644 GIT binary patch delta 717 zcmZ8e&2G~`5Z+m@V|U}EPK1K^X%+QCeJb>b0I7X|aNuSntTgLVLu^O0_JvChRO-3p z#v5=%eFWYB38~sQ;J^!TV#Wmt)z*G9J2N};&CIX((`ax!7-&IX8(e1JE(iD4Cv-8w zz*3Zu0BOL=gwUHv3^w1@#hPg(zSD&*D%;GfMUEbs z(*6o-({^3iE}ir6%qBi|gt)1)oGRvlvsNr*v#YAgDzRV`33YWY34sdOE$j z_7~7&KaMs>!4!5kC&A-2&&8>Cbx{#FnBgu5aAo9Br)*v|x2id1^0IzO?!V+Yg){+3?-Q`6g8pG>Xd=!fy}6zL&qKW`1&tx oH81%wZ}>mpMY!-;Nn>2*FPOZ&Ll8+`cI4^1;W!*CPwNo=0G0laNdN!< delta 707 zcmZ9KJ&)5s5QcZwYsYcyB$pxxxIidQk(-2%3JFo1XlNn?5`{ssOm?~W65G+NDY)he zPNIf%NR$+m=>E;tNC+hUz*WpTAfdCh?|64+XP=q%$GzYE=rD=`M$e5`PtwmHqN~|+ znn)&L&61sk1Zd9Jyl@f+nCwbNiZe)rbftI35?8jQPu7#|q$PW@BSZ4~sx6}x$EV7F z$7mNkV{!kdJ?Yz*F`qj0VO*&;_0m9TL|!ZW$}pnki^sENRjAo<_I6oqGF;83+aLV! zdY^pS3JV1I^m@F@1%w=X=Z~BTfVW+X(Tg?(7?<)D8f|5X`x>*OJ3DjNc&S3HusLZb zT?#^vWI{s^Nf-#=FobI~`VepqzBAh zrEbz02bVf_u>rCL77dRPHqZ!5b(vz165XevLlQ-n;kR&%F8d z?5(A)`L3>P0)MT{rge9-YiQ#R3QodNyo9eX#aEfCkgR$d(1F*K%Sk@v<^3+!<#)4gzlZfmUD_-7y{uRA8L!XZ!FDJc6ioMueu+@^ZmwD(q{QA5k<~S6EqwT__UuspvAGAG)_q1`uf3yl>DQQiY z$c4uAz>BU-Ms^+YsE9S>OjYtMW1&#za6175X#4NTR4arj@k?@18Tl%;s0_7Fr7kLa zW9_mHazzIQX{|9+=K)`B!t4@?p^A`9tnU)HGG*}~eRGH$K3yK`GoI})JNE23@l9rO zWQ430ggDJt-7pFvO@bEk2y}5j6Xl-SFKoT2EQqW6sIp&tt>3PeX>H~zhvi1d%_KbW z27AbFMp#Dwa}GW;ibEfvDU_p^)Rm-?SCT3mL|tvOx77aK*woSa`M%xCL9yO9JwHSt z(w*{{sW=&OZw02v`vHO(f>#NiIVXy-Zdq>NMwYeOe$>~aC`LP3yrbgj@AZG&J54K$ z6RS1eq2-3GBU9=c&|oBy7ncWik5XQ7y3V88H78EhE;%m9qQNL_IV^r0 zm^efk#cAMv<0?1vaFj$Nhrzept2S@r?)TZa*I!~GQs z{@4Z0Q=IFBmvR4WN5fO<(h1bBtNR-G_syIvH%`b}80$^Xj#e7r!x*`Jm*5b=wm~>d zI;xeKhNI%|k+JjwX)cIzrCfYIeK>=bN8(iFH}e9mp}NKCMk~Ua#Y%Hqo8cWa7uyF$ zZzzLv$ezR@JdPCR31pBr#NF!KBUO}{>eRRIr&>*j*LEJhz|Vx*Q#rtu+GA>*P(<0-08!)(}_5%N5xzM(S-1r2zZH_v3FF z$H$9X(_h v{byc~7T(f+!)bYZmiC4d2nAJFwjMRF=9MD;@zL}Ny`<+08;QEUpsW7_Z9l7%Q6!z?oy=yy( zl`k)LEOd0F6Y!MRFV*VzI{NoMhK-%DiJLH6D=~@KOQs}N*_6eqm(IiwL&`KT6E+U&bF9N(^amK#AYkF+{S}DnpHxkh&Y}2zsAHm_ay!@Jtxk##*iB_)b)-Ma}7+E<)ty zV(-@yndkY^weFLsp)cO5(?;Y3KAYz6mih`Wpc@TE>f6kTC^RKv{84F~4DfGDJsIp8 zdk$fW|5}=uPN8a&Q7^bjjVwEbTRo(HMEyDq*)i0737KifZ`_D#8;(n3exYw}v@O@X z=xl2q>>7FqKWaYeTa%7rc`z`CU@^6)t_>n$9h@;MgZvkMWN@a8@j!0kqPEm71HGRN ze{Zl@#^6s~-~h#$R(KKj$L6PlRcYlM@NY}U0{A>5BN`0d*a>5;;abr~z&tjDZdVYV zM`%0xDdb_Vj3hh7Pn1WK^T@fvuayUS|6M+N1th=myX7aw71Y7L#mV4C1U-vMqwQeX zMG)tj;m{q@Hw)wvjNKVDn?n#m&hoC|)kC6ZlD*k(kG&e zX$iN>{F~vzYZs6d)x8eXep5}|rj1QzHMaK8c@1Qy-a2QPnzo>^8Zpc9i_POBoJ{^- z+TdaZ4hGXQRhL@Khuu8?eROtj0eEXh4kjsd{4la$z?cNQ_T*QQcL||Y0a6}AB`Q4$l%a`9G1K+HcU>{3t$`6$<>~={JT_1khjSGT+s+RI82qQPSM}3KNlWZB zpBX>9_af@zB(iG=7(LEH@{v>bXtWjBtz3K*iX;#G3JQml9S3+5z7Q`0Ew89>p_BA@ zdLf-lS0s{E(>+25e|QgiT&o#e0Cfkmr96sR}_oomo~Wa{Elwynq#Jv2MT-huy? z`Ig(d5SfC;D!{&2{3DZA!(mAtOjJmUe=t!Pz}tdhX6z^eUSlE~*5F@F6kq<2g5W;( l0{ezb=Q5xijsR$6|CO?&7e07pZ9*$(+1y@YQ#-GbzX8PFquu}j diff --git a/nlp_resource_data/nltk/parse/__pycache__/nonprojectivedependencyparser.cpython-37.pyc b/nlp_resource_data/nltk/parse/__pycache__/nonprojectivedependencyparser.cpython-37.pyc index 6d3072d227b75d6722ddc25e164a34ebe591988d..7f70faa60d7e56151ca72ebbbcf8cba6a6d182e1 100644 GIT binary patch delta 3190 zcma)8U2Ggz6`sGHon5cj>%U#QZj(5U<4#;{{@k=Fjvc2-4EeF+#L#w17{)u->rG~V zI5V5rCY41Sq>A#AUh#r>pyGvyiVE#p3#gS4QV~@j$_rNl9uOeEAn_w16{+EzJBjUV zp~BXFbMBe*f6l#k?%q}ZaaZj;)!CU=@UNM?xbPKU4V0?ts4CG~pgEd+ z>W+?2Jus>XCn4V%fmuyDNmXI`_brxsLv_-#il6Y!+lp_pj#qRigX^Sk;o4#uTzBF+ z<)?9-mg_98JNyi;Gbq2JJ6*`=^s~swN&`8+%g^0boNm9{@4;t}-|P3`v)AwU^Z4vz zDZg-CgP*eQR}>6NyQ0_wo0OETifjWawa8s=)=E*OUYkQ!0=!z8{Y1Q>sR@2dAM8k> z=*O22HbXeb-_)N|M_Y#RsybpJTbO0=8Y>}?Obk>mk%+*n)a=$cEcjD82Yh2U*GzQ z5(}8`vQ@9z2v`VR{8abBzU@$@Co)uSeX~2EAA5v!hY)N79wquQf?Wi=2~>pj)M#;H zc@F^$5Oa{f+t=5>hsa@q5rTaLqkOY(x4rC&n0`?%&rQ3RUtoezX7i zxo(o_A?PLOBltfm-j8yZ@fTM-hZWbYeb7%8=g0DmT{0F~q5o2Zcla;zFHPB~zDce& z4FW3Brc{&7ipN8y)7~)({^xw7u>WLR@Si;-M^No;{DtJsQY{^h$N~AJFq0e}N8V=) zh@!T>y8AWlY4Tv}+x^Eq#u}{Vvs!7*l`{+N(UOYU8rjQL`*TRjAr_{T4exJ)Pi4vf zFfyd(TmK&E(ai&-G|qd+j;IztJ2t!f;ev{6sU8GgBV=x^?z3>X_2$@uJ{oJGsu(EM ztBq#F+&Wasm6{j06^wTk$77Q39UtGIDY84_lxE1+Dbi9aa7aF{2@@SOt2L1cn@hNh zOOboS3xri;H;D+HWT^>~Wt3PDw52b4nhoDW?=+Edg~BIsx{CX_2gYtF8wnxI&_f{Pl@_^3039rTKdk`NFp-iM{wsR=$6(>0fUnya*5z9f-vz7 zW&_j2q59f+Syf?XqxyAL3xyGRPI50Xfq;aQbiN{uhya(01wxY_%&M1=;!npE>{Zfi0hS!dxX$ zN_$5nGKjAr?>7Iz`7;|fQk(bi@2tGwTT5uM<2R9^6Ep;6Gj{s5e``T==1S_OfFztGj#jnm zfuq;_M(jg2v(5Y}%Kn+ZcjfQ~QNnWFDjpoJYxj$^i=VGi6PW7)Rz`s*2%bT3QekDa zSgqf{IgvB@GQFYth!mbq7wY9Q1CY~jfS6`|=^86V^7|5G5oDWW*JR*pigHzfTV#du z1&U=yA!N){VPQ>H>wYs}Q^b;&Uq-`k5G{lM>16JBZY9^7+fX)?0I^`1R?^B?BbNFf D!NrQq delta 3112 zcma)8|8E@C5#P7Fx86J7+2`0k$0Rt8<1g75>?9>$@fQLyrr`J-VwGg8a$4W6&zIcY z9&azHZK=JgL;?v3seM91v_c@YKtvG|Cjk=wK-wS54^dII{iXB=DhsKoYJX`Yq|WE9 zV>{Ob^`!mW%#CB=^Znazk- zOKMrOHKus%E!~T~&1_>6T~N^ zek<{8M ztjq?2pKqkt+bYw3khyNKfdCZz46P=+KOLw_?7W7aZ7uS4Kzmem)QY>R6?V` zo$5-!p&x+>ajeFjVr97TMeNtQh16Iom-;cO|3B(M-sj$7@vFq*WE_@~fHuIkVM)O{ z*9bE71mMhsu9~`;kW$glW*RTF-cE<$eIies~W-;Wc;d%lfy!o!o$mmE?VRvk$NzFaS6p(%sWN z$S6+{*fGy7P*8&q42k9L!RE-2%a_HQ-51ZdLnjUB0PF(%A35(Ny(~RZ&eN20%^c*A zQqD8tN11A$3`ged9%=bhe4Y8p)ClQsKxIP*pffgP=482|9+5k0t{LM0OuW-G{6tgy z-`y$WwCodl0=P4osc}dUabsxrnU?()ao^D;-ztrl_PwH};Kb54n?mdx=xI(b6tu?E z1A>j6BvQ6+XvnS(He3H*+X^!xJD5pzQl$fx)+zDz6XF}z-x)Fu(4dR;(Dum9@;HsXEo>W zJkLUPL45Ma;K{pPg&j*?P@^?@zg(;op!^cji({k2lPsy~OlMteHxr%5{_V+V&coPN z1eO`)>RCwPm0tkBqiw>!&x-}V0NEm-**{(vD^4jV10nzKHcH3qM*j#Yhx+_tc7aC1 z)_4dt?v?nX^iI6GkP-VQ_k0hGv}&oLI(FDLu6evh8Mxcg?ZiZ>7}RK+)v3P;fAW_Z zY4Z{m!veX=&d=~iMbA`qv6}?zv9o@~8=JRw%N%nw9G?MT!i2`vs_WD|JC*k<6)#`Q zMU(n86t>D0l}HYwhs0l|dW|38^>gC()PpA`LCyi5AP9|OC7`(Z1SH6?m63T3jhcVO zs|2=Qa|!`}Ow3OY7-yjLxVSd$8t6_wCH^`+bZnN$P)83m?LCaqbrg6Xqp<6wq_i8F z!FI7WwpSOYAA6(T4kPy+a&k`yG74Ar06LUMy8-0^wi7NXIjD})i$Bm4U|^d^Xbu`? z+H5tu%wcm6VZYhVkK=-pL#A}*WA$OoMSGX)RXu9tN`4KQEteg>!@e|)DLm20p8cmW ze38gIU4q*2t+Ga)BU_|fEI)Jd&{mK%XKbhWms)>p^r8$pa&gP7WT&087x*x$oCPZM|6K&l7L$ zjn0#EM}g-t_oK_ITYNA#G$KPrOkr%rbKHPG1zHDfn+~0&wSaE7viQ&3tbzNEy86j; zeSI!SnVGF5aG6O|Z$|w5+~>3RZG`(4c$wB7rLz&BY^F&Cj`L;l`n-E^+li#$Db!uB z?B9gCPOPFDj9(Rl*%vOMPomG5aZu9TVWQ|vYn5s>Y?U3nneA=M(rA3eDV1`t{6|=Z zOkoCfcOT=c;vd;tV^~1<7QMxn70MpQyh@F)-K+3_7T+wqR#=ltLA6w@@$KMnf$s=T z=TPJF62AuO$AG)T;{nP3gurkqD>PBNWN-!>P4HkU|+ryaC#!C+pW;gPAr+|Ecmyz#5zTGJzU*sLgpF&=DO30TK--~?N=|Fy` zLkUjvE);b-T`1}z$pdEgp`_dCK}ip9$4&Z?Kjriy-^=@vKZE>frw{o)#h*pK-#LT) z8O0AEf7Tg5en9aRXV|GYgAbWG$cLPv?fCl)R`Y!DK@5FM+>RL|yRWhzY9mHmwl|>W zht_7(uZFd{FZHHhtJWQExwVjk?FQnn$u|cKCi@!q!+Uk#nzi;cs&%hnhqVoti;oN4 z;%li*UB`d`k$-$jlx z#p{`|^CeQE)l#!zE0C$}x9IoiHm6WyL41_CQ|ZN#!9cCsHwe^Ow9OKl(<_#;Yph3n zCws1PSVa~$q3&4?s5?!C$cp%V_VRisE`%;3-9+fH$fkS{A_i5DBRCEWlDxH{8mWVO zaN>FVf)tVr%cL?1=n-$^eo-SGBPo+$`&-=1`AyGqx&0;&6fmZS%%)7${D4DfB$Zb( zS-odNDAQYDd!7w2fP2V{&)>4Rv&9b>KxfOO-DtS?Z;5x>7FG}YA+u}ix2zYhU4QX5 z7^r$+Wy|zooVbBspdraHEe^wI`19go{-;bAf6Ncv?Wd(@h>)8K1eru!AXFH^C`ye~ z6p(YI@f;GVk*!iAq+yJtE!-4jWY8CdS-5ufu+cD1%Fi@+0%z~w*D`lPncLm|sd*md zh9ad1w|eZWv|9Qt>mjYQw#7oZFk?Y4Og<+@8fls`$0so<+BN z#Y26aoflI7RgL@t%0cDRs*!+8`;yBzGBUD6eZa9j?uWK(LDS`+Tu%ku<<&4+K)Mn9 zYDa?6NFQ?g-=*%}XGQcqF;#wNL?!*PB`VXF6@*|}KM+sKi?@#pJ-G@m>|60!8ntq# zmADHy@eTZf43bP-i>Kn55-jZ69TQlA;!)r9xatvsbvF7r(v3&BT*L7J6IFVX?037XXP>Z6O3SzNmCESk#hIXT zcQkkn=EQjAg>#f;*m|`P_))85=89{38;-r$Exuh@XUpPGmChZyE~Vzy5J4(ipke^R zAqp7TB$KM+%W?yF7~Z!w>)_d8)Hqa^O<}vhI*J6E~A|cgO!-fDmR+q{MYH=_{Bq}lS?qL5WAuUs3eVh9M>^A{3z0tI4el0ZP8%?;P z%!>}2q4iY9GDjVP8uCT}7nFuN?I@miaq8pU+|WGRQN@{7s;<4kU0A{~(1QW+6+vNc zVim(Wj?GNKL$&!{4zH4$Nu<>vu;12PCo1|@rl~N}Ca3WQ5|DO`<(Zb){q^ufVpyft zmuVm6?U7}#;KYn*d+u*+PO!258!wY0pf!~RCtSQ~gXgTm*?reDAX zD`IQx*4;T;+6ITKhf2$-S1l{LzEs+)%p~X(8bD?NZF2x!b%^qH5hs2mibgM6q?WpC z8-?U(l%vPma~<_v#0EYu#MGr(x^}b+px4Adr_LX3gEp>?>}+}8GI0ZE zU!riR%c@;Vz=rDnkEj2RZSeau{gZ^Wp&dD0vf2TUm9qrJcg3G)rcP!+6_>rYZ-9md zRK+V7emhPk{$EaiD6Y<)@XUL-_xJYCOyHUKRD3>}9bso@aPp{c6qujm0u0R9{N)Q2 zn3KfiXX5?&$&&14%V-X_ha-OW1N{hfh4evz#9UzTapK zfMCFk!vaGf7&L}}A2Nr4KL-3UBL{qr^Eu#;8^gd4bN)E+d1D0l5zY@AMPt-B@d{D% z=7>?)4Sb8xaWj8MfDi*~f^t%S1K^20Zt0F=It9IM6l(SMJ#tTE=fcmkUrZ0O55imd zB)~_J{O8*APQx~*Z|b*p8cm(GoquoR^I?5oN-_EGFbl4N!~_Y?x)iI%BvdEr{!F{!JBcm!Uk zx}#Iqi)>TfwslJLs43OW7c|q@Hur6adwC(f*|csgvtP#-t{(K;i>_|0Xjd*@yRt@4 zgX%uZuB!!@_%wVD)`djG04;DwznORo9Gy*`d?p7I^f(d>iwEt600-)FOz5XTs)T*; zPoc&b5Kly3JrR+m<0$Ro#34e5HrS7nmn#RIrW2_AiS8;e`+4}f?ke0}JN02}66A{L ziB40m(~j3(&}g_<*Oi19H0uVP0wpg}H^1yUw`^bQ42t44N2@mMre(UO9~Y07$NexU zsM+dH%`&uRqgDqE&8j)B5%WTul0T96ryl=g?bT9b6l!v-({z> zi))8v0|rmeb^Lk^M_pL?i09NiOx%Ibk$_+>hXYcE&O>L(wSSfElSr8TX?S{)w+T#A zS^?sPT&h=l%Zk_hPuPWgwQv!X4tJer`Y+kf^K+!!{zv|0GPZ=e0UxbPFy7}T_Yh_8 zweOFy;;+w{&~c&=<4pkA;mn zbWKfFcg*T8B=7Sk)uBIN@X%NwKWPDm6ibF8H2$eBvEAYcBC~H4M@WVJuvi@6=Zxh~ zA41|+4tu|t-$JCku-n)*>kjSIwi?;o(`xmas|Ggr=%c6;dT!6$GieF!u{^yP7C!dX zO@~f%4Y(el$#sv78G`HI!PY8}1QCPn{!?dy(>yi4i0klZd(2j0VhYXh z&JyI?XV~ZRMfUpXcS)IDpU6MCf`%0&mfY~Q-8BAX(B?jpYXp0N%pVR+>w)nT1{L~6eC}NK{ z9;xlahQorOz0MZOrK35ZgW27V10pz}^ZV%i@@tdm=Klr6_t#~;9@#w1@iY`1I6ipsL;j$I4r^nGbtF6)IN|4ragO`y zrOK-do>;fsU0MUF8pC19??X4y6klgv3}1lID{J*_zha9xwZi^hDK0;Y!y8C+BnUdi zuqdy%Cve2e4jUAOO$9Z)1zr2L=|wfIimzy>Q(nInc4KLZG}&uQiu@dEQzYmXy~*BL zDrQU=!qWw=W`3*CFR*_vT~e@@QvR4|;)ri{FknwKOuO+mGcNt0g?FDPX_~Fq>YAnm zS65e+QOYX_Yuby#^UH|o1z>$Jv1_{5Gu;(0#GREV>*NwSz(r_2r@M^cr#S@_B sz|}P+h*1fexlH6hf*cFVA-PYE$!Yndd|Yk`Q}RVQ0emLb60$M!F<{TM`v3p{ diff --git a/nlp_resource_data/nltk/parse/__pycache__/projectivedependencyparser.cpython-37.pyc b/nlp_resource_data/nltk/parse/__pycache__/projectivedependencyparser.cpython-37.pyc index 08e4a801456a4e0d43412016468ae4a323969722..9cc67970857ad1ab5e6c59def5650cc92cecc8ff 100644 GIT binary patch delta 3130 zcma)8U2Ggz6`t9d*`3`T+Z)HRV<)K-hX8B0cK)2GjpHUxT{kpwOzaXmGz^oSiM`4E zEN8}T97;DO7M1z{nEQl;cmW=&O0?CAhqg#ifrNyF5J>!Jo{&IAAP|p8;f3?vZ4zhY zpjzH<&bjxTd(ZE^_t)Q6|0LArOjlP);lI_whEoa@FGv)&e8U+j&XrK_N74Q_@CI3`=$?EZgPKvMo8554yr`yPN5z;uV6P zuwqx_gc)oLd+lCT@lC&XPVtIfXA%Jk7)|f44VyJx3qsCYa7=ZS8xJ%ERcXQtzj-yp|1(fVoOQT5z|N|`}!wCrbE z;77C#OZ75THMw)9B`IZUy~Bd6;5aXgqrj{jxHSrWf3 zp8YiZ{Yab+U_%VtcQ0e%$akFQ#Y~9~cVVs!*tE;T)PmG;UJxIaHV*W&#@5k8I9A+b z#c^Em-SX1qJy;tNrAGk!0iOhX3NQ*71Kc-KQZf-R4*Cf~W?FjAESd$gY<8Q5shI|K zwa#1nbHm7CrryZZwN0zY1)#jz`et`Y9Xkpk&Go#yk<^oh8+9riXQ^A_7d<~8J;W3p z20(LbU!^y%i+3tN+AD*;bveQB4bj{-^4z0X?lHhPU;-fHgKgWPkwMxj76(}*5|9PD1tZ}^etN3}I)ncOgRJF7)*`DQEbe_ykQS%vm8 zWO=s%J;FXD?UAJ+!&a`vQO!;Lh=1)Gh7sIgvu!H)Sg9?WQJ`1`&~g#VASN@Qnwx@bcR{kdGr)0X%M=xT4?v}vlu-m z{`$!1V8>3gJo7jHCDY<(6GP(k@Z$VKgAc0+3Y&^SE+=h%oF~4Al}rY+%yBQ7hv{({ z_5Tk4ME`%8M{cKEfBeL(o{w_#qfl)w109!OTc%G5Pc~CLeV7PH}44kNg`AnqC*y=!NDCWMuU2CAb|znO#I# zof_Z4Bo_fofad_819a?)YWTX!Nk7GeV(F@-nyp_=ysM9&U_R5i#}eXsg^!9 zE=zK8>B2&0EH0hCxWxH>=jg^h9pf6`a&#{lnGx|FmypbpA!?Ij`((=BUkJ6;t*RBC%&(={O;@H%bMK40~CBO|4PxW^m z17F~o9`T*2_g?rIJ-LQups^`O(skLLmv~}75@j)A4sv(q%^7pjT%t?jt&`W~UARdR zI>=raU(2x)GNY*Z9KGC{nZB&fqs6n*RfdB2HPaeigT9EF zE&7pe@&XH2X3IGBil% zz=?HnZuX4&YU^9G)9NIG!q*g6dpukOi8 z4!`!&RpKWIaS{~LAi`Hn5cu*9n2_w4A|8Ng90v&*kSNMGqU7_;+g22$*T?T(ak5`z zWM$L5hJ`s4O~vk(a@>ACMf{$Wj~%-#2is3bLhWSv_{ufjzkSKbV^U^E#^YtIhMdx? z0Qf|*bP@B;@-nBw*jo+!X&7=O8E35IEIvW3T%RGog&cFGc|KDPR+>tle@*2q7jB<< J%-n9O{{td}slP5? zXT}noO4~Zl=rm(AAfK_b$Y%xLhzJ^UOJh8ubk%V5}IFjM#?Y$6%QljS(LGzF@BJ}TV;v2WM=TU)pZ*p#ezZ*DWNqU zd@R8-lzecmd%|~}?u*v+gkQE`vb&4lS4UZ%cO>t%*NMF#haVvw!*XI<5VrA5>Zk4u zu~~vgi$|Jif>Q?5+*w_rZDz+FR*F=xym2RzOO6NiwAB)Xyeuq7au7nqiZvcYDbvIS z)7;N*rpq0Tq>zXeR6m?>pjJLcv z!6t%vCj})ELC&qh5i}ua86}~mwUn08>NQo9H5GJzs%d%kURF=EQ63{U6|w0BJBdw% z1J&Q^b=J9+gk;OM|BI#)lr67T;VG1w;cX3HcXS~I-2|k$_<6;NUgz1yPufM`7Y&Ly z`DI(w(B71B-l(KTqLt`ctoHUi^moenZyxLU^ErukF`GwE4!N?jzB-+ z)$IBpP5(Bc4!b0Y53g98f;}WW!2ix~-zp5sQKG9SQiFmI4(y{v;j#eA?SpjxxZ^of zWf*vsk9VA?JWVo#1aqm5gDLV%@fMfQf{c&Q9Hl)Dj<}yhy0Zn}Enf;9vkd+PVLVa- zAHsZ7oQ?gYbAX_Y;2=WOBIE4~!^FqQBI*b~(z&Kr#QzxbM#i=98X5D!wL&;f5@!h> zJ{gewQ1$)Jq*9^c0pX|cj&4s;I&gxhL4rDh$5jojpCmX%aGKx@L5)=^kn5DoK_sNC z!m>=OuJ5^{bnQSsQf#Lj0Io|w$&8dMKGec0tl4FS97 zv)kLJl{`5K&lAk~2uxKIni>M0(1p=FZoH0_SbJ_92 zAfJm%fRbk@qCUn+Xo8@Kz+da{-)N*J%z}T(3-KpKOw>mEITX$EKl?v_{(rRXW!juH z7VIKIAuhR5TxiEb2XtX*%kg4NYJ0U_Z4^eiJ}@bs#083^L)mowam-SnAS;Jf$;7rpVg`sk7nb(vS9)HqnKYe6KJ?J}IBl|}j!uHb@87N~Niq@pGh z8q-*g=?tz^UmUz9v#Zq~_f4@~qQ1#4yiBmb7Q8|eGX#s;;j;%?*sJ`*1N+#u>hA{z zSTBVI-zsWjeCM9QPo!erp!Q{ap!b)GK}d~0aF@R@eE->skPXVEVh9w{_}~=!NV(`2 zz4-VlkQ_J&n0^cZ-VC(=ViA}^p-=f6hgMgJqTdECmaA!w)V$~&Hz18!3KmZUKH6xPDRx4 Z${1R$@-;)8uB2{R5dolDiJJ#$vEw$e-NtDfk0Ce1or!bnxexxC z3u!30QqzKHg^13AF3JM2K!_C@B*+3JBqSsx7D%ueB&5Isp`uDi2noZ2|NF!<6UvD+To=&q~YuQfK zt@{pHuJ=G!O2zd}a@zFchqrQtJuJOCInwcHM33fQP~-Y5`FR75RumQahrkh$9;`}C zF6)o-ACz%W$%RR`?)jA0?-!1xQ{X7*UltCPAseRs*0R&|s84@bIJAGerZCmE$a7u@ zo+zGAse)eYJ*`suTfL8;6VWm`4XfU)HHj_Y?L+s|hU!K2OestvGpadcw#)n5kRp(uR+>F?2{v0AyYc<*z{;aPU7mqx}=amv6 z$YG&PExRKr^TLFC$D@7vhla5-fWs77%_WD#n*{NhI8$14#<|*FM9inx=`;dO|Ly~@&IfG#F=ob?&k3!EA zAW0@++G@8QMj@Hshi%xXBqKbh!IG`rsk=5g%`oBY%-S%sd>9-Z_>~%fca?Y~S>|yMX_22pm=ANB(ym$Ei^*}Q;;EqlmP*18Uh(%60QgHHVZ5{ z>$p(~W(GlWTQ#bH8x}W`nu*af{M{Q&SKd=AW4yeVHjIIqu}Ek5gJX9Vdntqe76sa> z`ur?whl?};B8Z@5t4#^HnQOJVW6{Oit#Y-jr5?zRv&SAnqUQ%cSb$lIfXw?A`4dqn zl42}OzSgQYBje|HGs)eB2H56fj2>gwvwHgE*RPA>{G=4zgfZZW@=#4y5?eglRWqRk zx)q<|z0c@lL+{Ls;@k|{lJ(M>-kJZeH%#zo>DK#6);p#DJM^8J=q+5HoBOQs;-zcz zlTkSC^-i@qP1~>x89fvE0dG|e?lhK&YA~7yyWl^_dsp?phTm3W`i+s(pWevoj$CqJ zTgzTIPRyR``Y$6>EPrD3tlbM67_4*vWIqU|4WaJ{9YWF%C>KJQB{hUF5_~2Al8Ls( zk|a~QH^--a`q!fsl?(nc`q$Jx998mR%Ik2U(en`Q)qfwmJc!8~0f9Yktz5NQb1l!S zRwK=#4{-15o8#UDrWB=_gqd2a`6Y*Z4yGtU>_S!Bf(N2uhDVL#+;i^h!kIVKUj5z~ z z)*-&@efRY$o;&-_@cP(HzTH1c) z-v$JC2SGRZ=nehVSN}MKjI6&YO1$n;7Z$h=s!L=w8W!ExZ{K)sWfELdAeTX=L3%+X zZZe=TB**|kOGjAIdW3!;R4bw82w9SB5Q0c`5E<^kN6Sq3{Sgh=n&Y}MWZ8gVWY4aF zOG*zTx~gw2jHu`Jw-gEXKz&z0QZr#U}tqSxO9 zc2i!P|05vW3Hiwa`Dug z)#_}eC(Kv*pAf%84!aYFaDoXxGa20QTA<|-X69r|UZT=GxJS%_Y@%{3Umk2`S#|`? zu+^?p7CHeR7uVGo0Rc(UHc@W0QJ6i~;PaQK*h*|x=vek} GR{amhRlW-V delta 3577 zcmai0O>A6O6`nh9{%1U~<2H8N;7r;&>C4ZI-8OOUCJt%ixQXj=Qs<|hl007KUE5Q? z`RRGjCGloZTMAN;s_069L{Sv6K!^?S7N}xT76`!tSg^Zv!3LoeA+cbCyWyPk{4<`o z!jkXYckj99eCPkHe4+oX z`uzxU$j(R1+(Qy;3Y{%C8@_~ktzo-nRNwpiK#&tR(gQiXUcX#gZq{Y#ZTrr0vt`3d zsi^$SMR7uXnwdYAWCoDNA0X;*>7gCzG==|O6qkrt8&a2x1L|t_PM!$Ow2rwo&xfS? zEc?uefk%+SMdvIZ?5eap-v(bNEEMTUGtMG(!f&p~h6lZ>l-s{=r$8NN($@kzcRDUo z>VDtzPY_$3wB+rU-Efdu-S(@?#7uqO_vFFH={5n$B}emwgS|Gmd> ziR{mbK_e%6Mew`9YsnnZUNHp}@K=@Q_NmFjp`&cbsE!s#w5Z`FiQD$Tdgc^axZ13@ zCjX{BDx5iR5bw<%AgAeU3!2Vq)vq-ho;KWN5Bk)<3WfI5bQuS`u_S>tHvsS0H9Z@y z3C+oxo7vF!Um?j`D(lQPb9j!fOk65SZ}k>y|15pw&=%GrVC2;#Y#F2<<1y5OgXk%w z{6%p8;5)|E!vT(*J~}2DW-P>0B9c^fx#l_`8`_YMdiW@kJPhRU zZ^bZaXGX)9S=@JfhR&iv`S7nqn@q07k#$kPyfFxXz>_ktY10^kP;2-hOhy(456BiU zY&b&fj}eniYcAr8CAkTlY*mE2|(4iPnp-1I6Ug>n4e6WlewE7~vMb}dmT3|9+p z!3q38CuGB@vX@6L z>U&2&ULdg$0-2|rod|6aV_`b>cC*$9g};o1kBHPU&X!Ick=@3`D`%h>=u!iUcs8jPkAFDNjMKAJ5Rfly@SXkte6@i^sGILI$oHH& zQ~Z8E^UY4r&7CV;I{U`_6q-XlEbf1C+y0E!2%~K)Wx5#-BEySny!0*cg8H=d{E4mS zyIHWg;&s2Liu8s$G;$u_*GI-3au=6m*hjm5TBzUvWf)K{07pIr7Z5`B0ow~mIUqn> z=Q<>@*EmlTc-yBkqozm%FOL2r{uo`E86EdlF`{6a9_LkQZ2Bm-3hE6k50fZuSyk8e zJj)6>Ge3g+u6i)$O;A087%nI}3E57IY6l*AisgP0V{}cvr^+vVQ|wWH zeyK3T^i)>6h}jt533|p?#bH8R#_D*1$f!g@FD+bJIF&Eb=Zv~PF<4@6P>^n7VctyN z70FkS=p*&-iGLP|FQz9$=^~=)`5^e>s$H=&spu70G@b47~Cq<8dw$1SYIy%*9ZDn0of+>v*U$p7b5LXjA8$ zC#n%zIF?w?M5rgH;B$0!jb6p5Eue#TZ@CLd@>wu*-i_MqgIRiagBF&+5LiqXHg*oj zav6y?J@Qq0XwlMT>^Jc&77SkKkB&1VdZdk3vlV)N+g`i-s)FQ0fiv?1ah4dPI1_)u zFgZd8Q5eP_Em|1c`OW@jg{_F8aDX31np5iZY-Ma)8y3wSj#c6qB;$2@8uwK^f*$p! z*{4J?_xSGw1SnsmbP$D>BykzoYjg8Jdy*H6BVJ8dk<4No!(hZ$@K zD~gqP1dRP_xH}t+UK$gxG5Bq7EfUR^w>Da8r6+{J^OAg|QeDS4GwSD;znI##1jo>> z_Sbu>tAH)tckfs<*|^e|*Kr^Sr_99Ks5&@)AlNg1##rE%oC5N17@mxC%fDeCoTFQ+ zZJTv<=Ooh|JXAL=^^7#M z>i2KH+J2R&&e1}%Gn}V|ieYF~uH95pfMHTa2&XQ}T%ZI33U8nw1@a+~uYgPpOSBMv z&PAx<4rh?C_Ex3qa{a}U!5C4qOGL{4p2pn`_2Y$-m{xyT_@mgT9#sCrvj>t<*C?AQ z9c#G$O4wXhh;;@-zmD@g4maTlI7X*&ujST!pmu@tGOI(>VQ0EY6fqB-s}nrS0|#=q z(PECQH}AE{=~RLv@!n`Gn=tYs7fp#gPR#HQF-loD?$;WYRW`FYY$CeihNXEDtEBme z=TS~K>M@l(AF}YG-Ks$x4;gLP!iX`ji*no}w-S#HrBkQsI8(cFGL3W|X@>)RG#Bq} ON7|7xuP0MODe*r6hPfC3 diff --git a/nlp_resource_data/nltk/parse/__pycache__/shiftreduce.cpython-37.pyc b/nlp_resource_data/nltk/parse/__pycache__/shiftreduce.cpython-37.pyc index 93f72654dad86b569c795705c54da4da4044e2a9..d241cf5edcb6abf1aa335cae07ea6a3a9e54c649 100644 GIT binary patch delta 2296 zcmah~O>7%Q6y8~{*Y;Ayq;`D}=;_3!J!c;xHifhsp8NJyfEzO2V%gEQ4?fQyY zZ>?gKh7DSn)8Alvcv;^&^rR_Iwrk+qZ$+Q#rhJrmu>8lvPu|2K?VB4`IU<5VpM86LIz@~_(lH8*vk8)g<_I?h(ADLwR z(fyI1Sfd{|uFJE>zRvGzv2`ed+;y*IJ*c8_p4Y@%3cHkGWy>VPomPFQDbz0spJ{@!_UWt z-x)x$idhP0B5lDvQ6&@Zm{>~F74RALht|ZQ3){$?UJr~DP&4MaFU9>*~y>helBR{Et_dA3p! zkIsdK*@yGw2`U6<5Y~J9Y0+Q8^Eab~IY-_pGK|H28IRwKzT5W|D~pmF!K2Gm@f zA>W2qkDlt5$@ejW;{+!N#A~4Df-!<30aY8Qra+Yeq7%*&yhcD7!0QM+_qe0$w&SdK zH6?-ME|QYyaC#C&CQ1HCxVJot-?hc^*K9oeyZmt1fu1hC4;+w&;yJ-e?@?AiIliMy zuWNlSiVUl?L09F9Z+ih(#di#+V>`~dV%cu%M~X_$rDUJ1WYt7k&a*xSm&ilf#Lr99 zGtFwLfS(%tUSl`ecncX%A%U=9=#Fww15?>1*-*WO*D?t$+C0XT{`>YnJEF( zGzJLzNaleT6HPQxG%+#r;G>Dg2VeA6M_zn1MtLztc`%-Hwq>^)8k_X&y>svR&Ub$A zUzcuN%FJ|iB^CV3onNs2IC(R(v+^45Y{eFiQWd5Uv{oIp8ZjfhjyPIXH+7+$R&34I zuPL@J^{Wvxiu<#N4Igq6Yp|E=V#IPQOwT*yc9h9MwK7#xmnFpf=hJ6I!hb(Kw0*N`sG}WMo(q0VTk2#CmBX~BcvZigO03)ng>$0yU5YrfmhxOvi)C)VS+c*CSD5H?RRNQ-@fzz z&F&N3!O*U6MY$V4Tv5mJ_r$Abk;o8TV#*rG2DF8xmrfo+VMIIWMyps05<=mI5$SlH zHXlZ%pMrA(F(JnNmvVa!6G?^wI{M!f?!`;L1)t_7#M(G%GUe_?j$M8j@+Zdg!)_=R zp$AXTB0a`uzMKXk4WlI3oZd!%jJ|T&@XgKH1_Mw3#(y`1LWw0s@Jjx=5Iz1cdq);2 z@thnOq*W{H?J=U{!7z$YTrf3E;z5=Zogs?jm6&f1&c?|@?4oY}&5Zxfh_RoGh+C^F&VI!_b{ad4-jXjR5Ek*)0?6knxQsXJQ z|9fG;f7iJE==9bql&9JID$y&W{@p^>zcluJf$RWj!V{#vUjGl`_9UL430C(zYCrSu zpgx1gF9!F<9XFHPm}5xhS2xybp4kNr8f z-o!L)aj1q`&GA;6E&LpQ3X@Avueu&oYRjSSHk^tF)SesDl}Xb@NcyiI?_29|U$bSk z{xuoqv84B?S}Gces~OQFV3F|S7Cuvznq{3el4hEWA6O3;&vP@upZF=EQEz5=&kyZ^ zVxs~Y*=9!M<1B*$$GAm>R}_aZexizxpChNKgj^c2X^~dbde54&rgWnmj_;0({{Zup Bq0;~W diff --git a/nlp_resource_data/nltk/parse/__pycache__/stanford.cpython-37.pyc b/nlp_resource_data/nltk/parse/__pycache__/stanford.cpython-37.pyc index 255f7bdd0e315cbc0da5b1afac4976b30ceaa081..6aa9c927eddf4898aaf313da9ed4d1353464b08f 100644 GIT binary patch delta 2296 zcmbVNO>7%Q6!!QpYkTc%+{BHY2GUj~*)~57A}OV5pw3T4MdPR>v|1syu6LTc*kBOwl`mr979IIN0Pi4)urq9@*)m7By8Kf;#2dGqG| z&Uc_JeUA3Sz6kqnLxH^l`?E2v1dJXd4~)DJ7@$dG#|ANa zjlQdq4WjKbqOV5O!mpU3N&>~R>$QeKb+hhLRyCa^V8)&GdK1^>mzNh{O}Vt?>h9$x z_0wc9Ml?PWe@*EH3k>1$(FxCC6Z~epOy>B{@zwYa;PT1%fn#hZ3iTrt0aU_DNIy|! z1GwDh>BdpwSV8VSX`rpt#+6AnAJDuDiNE=6{jgO-PRTj-__uvWq_86F`Hu>N-69ClT`z#sU-)QYb+~FYSIR3<)iE6ccOz8$Vs5J>7V!04%Z|B=h`-(t@@J z$sq)BfoG69h;RTww9iXO%^_fCumuFXsIad#{cTny2d8_vIWS3b{;h!@WeJU8&v0+# z=`EE;_7cA{vie*Ws33iwH7v(v@DWg>;XEZ7bv)DU%xV(E&+t)o=CJ5;5i9b7tR8+{ zYSc7!eNzppybkB#=j$9$MgKGPyUb`iFg&XxTm)zfZzhCd*ZH-n_x8zn9tOuORM^a4 zbPz^ylaEg?PHc~i+1rHME`&}4j$ZfgOmCznMJCus?FsvrpzisnU-HRb@%`AG>W-MO zSP%GE`NC8XKUJx$fb%!e1!4asn}_^ah9w#z7G*uZQz0yaOx7Gqdfh6yu_drNvx8Y_@y!t`d* giG#N)M7%Q6!xsw&e~o(jvG6*oBl$ZY}+KQ)C5XHnmUP5OVfmsK)WDqx87-Dvb%O? z*Gi~BMM{pXL^To!1mb}Bx$l9WOT`Hei1tzmEtTK`65Kdds)WRQW6~rRst~sH&3iL% z-p{;mW7?|Oqv|Tb9WguYWb}-X<94T$)w4qGu)Ca`o&#CYw>=lJ5>|38VkK#2 zHL7=mp0ZTXRoV@D9`v-80X-x10_dGq7WAypd#G%6t@aTsXKh=HtP*{@MP7_(-M?cg z=QI+^-cqfB@=LurFklD|PY*5D9Xj-y`TAnLVX~#6LH?~=CMS6^ zHcz_vrP$DT0gZbQwgYH{6;XcVf|XF+0dN+DUIYT5b%s<wyZ56B{`A$VnLU8g|VO zV}7$?Q>K7rm^7-k>3I#)Ut~poApUX*Lxc)-t97g9F0gKN+J|s}zZ*YP+!{Po45Q}O ze8bR!pW=H7$@0HCW=KDuR)$lXO!-Y^JJEwrm28w8;$Ni3P8K#p8H#{bL}cxO7r6Fqr;Ma`HZ<%}^iKQ}u+XH;g+PoJ7& z$G{rWqbs!9^eGeZWY&+fiS>Y= zDKnQ1vFYvL%glXJ9fsL!>4AFFZ#4Z%ojIn@4#AujXV0J$(aRGkVXABtVN;guS)34& z#!%WCj-7(J4}u5T&j=akALUAiAD^A-n+prnY9$y1zvgyIGA78)R4?C`e+#Pa~Y;uNF&v`#}ujbOm^2u>&|ifPjg!4ulv& zAOEm85DTN8&-k69HXX;wClR6mq3oIt4LcZZ*k+Z6af`yGpbRfm7;n0E&0W$Gf>I1y zq{nM6wAyGjP2*Fgy}Mc|p~trVVY`3hYbDy1#?=f!g>|BsR6@WS}g zGLGKWviyO}HyZ!UWQXqxNE3EFo3BqfkNi8>VM!lx(x7U0G%;)$;ZS=UH zy{H7|!t_63s?@gELt)myfh8DP@7!gO^1%<BddwD$Q)UP=k_m2d=spV2jmHHB{!~d zTMpQ&@3~%H9M?*n>?7w9HBY#kynHJd#f(WVz(zH=UFaqm+Tx|^2-w*gi z7a>Ld$>bZv=liDiU-xm_*0e*_qwxJOoVwMt>3st4au%#2k}63=B8rqG85r3z%xOjT z0(uZ%APSPMG~r|6Q(y}|=3&xnUT&~@m3p4IaeBtJ{iVSgd=8jv+J`+R<9=~h#oZKF zS3yrLOYzHbq^D{=f@@>cRsOAf%}90*JCmg(oE71qp#DZ}*8OBm}(hf@lThoU_I2ZX;gs z%HN)Q?wNb;Ip;fP_7~q}-~S#fUFz>IXz5bB8>+i5~G>3Vb&rHTpPJx^< zQzWNBPMc|xbx${S*w;P7G~nCtGJe*~(q6{P`FS%>a@H&OeP$oiUOWd@Ij`vVoBgzt z_e%bNIY4s3EBk|HYmgRw-jF|R4m0hB797wueqgsCzQZ*9Jf{Vzn|Q6FH{w^!ihs~N z=#QGCOk2~Oey8-R=9KuMSJLKTSPwX5SeI#i1lEJj5UhuI1@_0BVducBnmO+1*R|@% zpD_>B8dC%ItvK|W3AbE73Pn<7D(AR&+}I5RkV}zpgT!hygL(p*x84_e-w}B9-OFd1 zu{a~YWIV-YUP@JsDqWq)h1vXuR|*)eI9-mD!!Pjrb(G{yh;;>YuiOdN!os{pFUZbc!xT*ZgzzEy8|K%3q^w%|b`YN*Jg zqQ32Vj>@2Tcc{EPH0?KrUwW8~-=VT+^b{k1W{$y;1 ze+)gJML37>D1gd6eR*}$TDkUw%3r&>c7;Uo7;eua5bZ9bw2H8Xa0Q|3h9n}-dDwVc zE{~71^YZ!eFR+F7AIICQZxQW@C6CK*Pb@!#=}~#$U?|*V+uwH-oq{e;OLpw*t!XsW zx4G@8tnE00$FWKa?mX2NTs?0tON>zw?;_!yu^pZ|Zv4~3F3Ee(ng>KI#VzcY27o!NjV#HSECv;=$12GLy%qlB!i7COnfE+3m-nCMoEs9v(% z*os8B!(o`-;Wye}ot|N9c$|0^K(WF-qnR`wo=3`m64lQk5Zl*L!lWqUF4PZF%!4|l z8c0gKx8%QOWv=*!ow>PWy_s%_^7zyA_f3vz8*G%WCLn1LR9X>D*L+lpn z_Ju9))lRaXweQy&DOQz*v#+1{1X@rr6O@_&%BXu`%tZlZJ+OT)`sCYZCnpV1RCC>H znt3018x1mpCj^W1v3^_aEo7GgOI$6Q*Z~-oN z@sv~Y{`|`3F%&7B4SZ%&yaHcJ_d?qdui+likgoqA24eGeIxL6MrxEgU{=!5HOQ8&C z1V0v8kj-4(v*Vb@mY`O77)^<>MAmK%A5^}_(Kv%ZEsa#J9{)&&Rxv|!X!W{w5)cc! zuaH#49oDY(j7ao#q;vt7oKHGKC`fL9w{t7hk&`DtU!_QeJnOn(VKC2cgK8E z{$}AD&I4W{opKYo>1{zRN*zmF-@z02$RNImOZj0h#)>XT2XurzN<%7!Gd3jIHoO2;2;)h1hf(_BT(T}VJwh;O0S2} zi))WGf^UK_tr wL|^>~B>#DOqsh))@7nXT zJu`0X2x;lB5D%a-;vo`3NQm+RPh0Vb5HIjl)JH}<^a%t+NPPh*QYFqglX~54#0#mtS%~2fTDrWjAOo4jF&3IWe zD`+k2s-9+QEURwL%bR(j?98A=b9Jv^7TA(=i(Z%6#d6*)d1bTAvhH?!J!X$kzNz>F zsv>(gi|`$x(BG?yZ)Wb1i-Ozd^_%_PK69TpU=9dnQE`e+*E@>SCHKFbH3v~IIc3z# ztRFzV+v!2QNA{!rpwsL0y`z|i9C24M`u|Qjw$=ow2M(h2;r)8x83@ zi5528dc=Woh(>?vGzT)=awD@CMm9Ncwx<&<3O1z=C+L!q_-4<$V;wp4%Iyg3N@P*Q zTX@6r&;QjU<`2_>qo2EmNs`(pS)vj^3E>F-4~ao~NzLdby;$sqQfsR3*O`-4HM3~D z>sHW+>Vd!K4n9t9e%BHMgPG<8icm&aTUap)zy;t2a23vz^#Z{Q1k{uy+Xxz9poH7W zQKD7?znbVET`X(hCpj#E^`H);WGs?`eU^R%&(T7G7Am#6>(Itw+_}*aUZT6Q8d)|} z{?z(#@IRs*sshy4YwYwNXblGul-yCF zijruZKJ>GvK?piY)mm6vG4jw+o;G_p0*VDUF@-q-c0Nj1Tp^%xz*T~dSddZ9Rz-e> zrjSfZ9)nmq;)jXCl+HP24mYqYxhw=A@hQzt}gQ2&? zg_beg5?!;jpXoFg|83;T3Fc4%$r?aCs(E{^;#!d~#eY z(sgi`?4tNk+ls2s%@RLv(ef(*#kHRk6Xk`@To37ADxLZDeov*)o{ zB$esKyd!!gS_A6%fN=AkKDz}+78>hdH)2s~sv)8hDs^<#S;zv_!X_Cy>|0l_IR@`TvCLW*N zB%w|*a*DlaKQ=h%D&Z!AS*QeCezc0s2bUIi69$je-HgE}bvI>xh{2+@4l*5&e|u{< z{$yDbziB<5u4cq|eD~aY6E|rqx3xji>j;Tfaf47oo@CXxJqh}cW{$m_LEX^X;hF^x z3FQ(~V4Dt8^k#nB!f(5}Z{~MWA<=kSXFMA3`giNW%&K~ldSD3r+O63Qxn^XzeMuhV z8xk;*8r?n-Kc1am8zPa@U8Xa~pot%e>ISw0Z<399%2_>&qBPy<2x=tVBgn;XzA)0H zRwWuviWh>4vZ+^GI}BxL0ngHD+RBt>Hn)5HtoF{)<}3lvJaOF^`%>i=sB&j$*IUXd zL~8t=MpKWIY$b4$ECde{)v+TR%y@C`6=B9dotr;T9EWccP%XGB*gN{Vh1;~y!2k`v znnO7f@4xt{`Hb&_lm$$Fe)@Tad8Bz3*6GGwj=oY+J#7z2W(%9jmCwkD3U`GLPClk%z^3X&bM~YSIDCbZ%d&5mv+R*eTskHdyhs=31 zpRW6$(}}JnuYoWA4S)d5qVGnVlUztBpcRS)U9>M3+^2^+eHodBR0R%rPbYS3l<1dm TF1B2Gxj+|%;|TG4cgOz=`w5Gm diff --git a/nlp_resource_data/nltk/parse/__pycache__/util.cpython-37.pyc b/nlp_resource_data/nltk/parse/__pycache__/util.cpython-37.pyc index a5cff93c30fbcd72ec7bf1df1d2fc0de3facde6d..5e0696ce41ccb6533c345ed0ace0a05208cdfd0d 100644 GIT binary patch delta 1099 zcmYjP&ui3B5KcDPY&OYm_h+_0JXEx9Q`#;?1dAf5sJ9{%JUo;T^Immpl3gcR=>L zB6zR_Jg5i3e?TcBc=F;=&yt(}f=5rzysg?mzBliic{A_Ld@nXXZq{zpYLhTPli5ske`!26?>riD)Zgb~QVJ%+cbzIwb6h6bxHVzfu5S;^!b-Bx1yp5qQ z?+DlLadTVodPkp^n`%)X!!XJGok&>^PX64PZQn&Rr7}uqUOl7MRaiRy z060gMtF&TN>ep8y=0lNe7w*bX{|HJ6bh)gzy^VvznSlMZW$>bs?y1WEC3__zTobY?4$r$Q@W(s z*2hxYQmm|Zir*0dm702`cBP8Dl!T-NeT2doJ*4)_E&@G);k4!R{8O)&eHh3dm{;Qw gpG4vsNhzI0&{e0@!M~w8YD2d!oAc&sV_^0F0IFN~>;M1& delta 1024 zcmZ8f&1(}u6yL9GHrZ_2d~MpMSgN(@Vw0%VL#2ons|XSW!NVXd+stUw?k3^w2J}?4 zJ&R@Vq97=ERM6mELW9C^SUAK7b|&Fc4G( z>=!Z!X#`z>lP2bU@PfbtcL+WtdGvt?Ap@|`D<^>PSB70J3hUQKYpWly*0%R>t1sMk zT221hSeI?CWg2pMbT;FuCOSd_CH^uuRk|tmA(kPPXXIHqFT=v`K$<$m0^*;Eqt=EVn-*rh=q~+rxhsky`r;Hz&MV8 zSob;|Hi%*!*f#4#M}yRB(R=fpbgDqddC3pwK!yrN!YlAT|XtgMy0Em=9cxSQI2n(3;gSozzMUb4$pVM_+z zF6~Nqtax40)SuC@q{(r)1&$vXH`{(QasofDw0)-;ShnFhkqxFB-rb!X8EM^$HUr;S zF!pwvf!8u4XWjk$)1Xv5fVLhk&bC9CNGc($b-ycr z!m33SH929Gdn~~hRMMHP_mFcA(K5tvQeXhC4LMR{Nd^;#*();JN55sdObMz0=!l*E$+ec65lb@#ik z9s4@N3VeEE`j99|?|3Z3Hq^O0F>oPLl~e%xjIvy3&*PS3$^*Vn`5gOWG|yyHgR7kQ|AaM zhB)(vV?~>BDvY2%bqlz!F&tECL#bS0!}zDN0#oIk6R$Y@mzg`yUBn&03C9W}>_vkS zQxUaH;_?LTA18Q<;4(okk;%1&QMb&$sJ&VJ|H+9J!vMPlhH-_j&Caj^{{HNl&Ox$9 zGX-LTCP<>90~HV`T|gHB@&H!|_FW4}q&1Hl6mm#4QyOWxP*GBal5$$Ptc)qs%8-%) z;xzFmJ|a`!c9-Vvr&ykUd*)p>$Zymqlo`}w0&4ud`qoKmcAP3^#%%pxj)l+DxVMA( zg=s!?_Oq#k3ahAv=Lz=X?9uNe^xw{Yw5+FnH`;=0$kYoY*2I8|D0NjRHo){e6W|%L z_ACKK4WccF_&4Xqk6xfPo_OWx*%A+kUKGmYafTeK_40mNYG%s;g}78jAjfcZnQ%Wk lAyKOF8T0VfaqT5KAzGp$tE_^JujW*ybfk`SUU@-be*>maXNCX( delta 1681 zcmZ`(&2Jk;6yI6zuGentSV`OTBMDB^e5_M!r_D!|rfC~cQ#EQ@%83oa+IY9|;$7Q) z>ohbVMUeahXe5qE95^8)R3DKzQZ5L%4sbv{AtVlP%Zc}9g;OkH$-jN`<~Q$q<~R7| z@&0NilUDFIy7i#)#Vtj7L5qKX8VWb@A3a2H6_*7{$ZW<)Rs%Im*ojcHHMvd%dT7`N zQ`Qt$bM-^T)ji`NVJBU~O&%(?>7`t=uEK_A9w>Ms^`>H_e*A6sO>bW9wcIwS}Y4z?KymuUJm@IU2TS~mFN8X^Y4$@ zY6}Ew_wa#Tgo{o%C}6KE3{L5_btd}B%_k(444)FNR-Ps6Qd090HUBA|l6 z03RC}8aj!hm5FOJO6qe2XZih++oP|LI8M-0+taA~q|+JsL1#&RFn;MZ*?{hGp~4d= zBS)mGH9v4EsmM-s6$Z5!Zk*V>UZi?g&u`_ejDI_f-}iBvC# z;-30Jeovi!QGR0fa&fl&D8GS0##O~l%ZS}m3E(mT#US)u-)+}KB5Ff# z1&X*o$qU8L?p-JP3c<@hF|GmBS$b@EnD);T+#t9~&@ERS{3+C3=Vwaq761Q^2(wZF zuLYILDo#&CW25k&vV#M$as$26RN<$ezbIX)XK^9#706qZrzMR7Py>O=1(XPoZ@|an z<)CDEvaAMfP`e^JYA$GMI%g)*8FR*b-OQVp%pp?;qw`_;%LFrcY3?JICO$*r= zNrC@5SD&Qe#*4)q!M1ucpTfOej2otTWB#jBOoh9sg>{0SLJ|E?(oxte@HZAF`H%A& z-(7foQ)rE#U5DGaW2Z?h%G(W;g54(-+u($u1F(iFxI;jV0y&Q-`Sjx0xofnhg4ro~ zy5a-@P_1oVU7WS=p=6zc4PxCVAaBycu}xcdy|5Vx=G$7-*a`6g@*3^96=#tyj5sbk zjkYUNj$o4ixj1%Fjvx7eJi&jH7aG}J=U1*a_czH74n*kqjY?(1N{iH^okq3YYz9#r zZb~LBA3b`- # Edward Loper # URL: diff --git a/nlp_resource_data/nltk/parse/bllip.py b/nlp_resource_data/nltk/parse/bllip.py index 144f040..01934b8 100644 --- a/nlp_resource_data/nltk/parse/bllip.py +++ b/nlp_resource_data/nltk/parse/bllip.py @@ -2,10 +2,12 @@ # # Author: David McClosky # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # URL: # For license information, see LICENSE.TXT +from __future__ import print_function + from nltk.parse.api import ParserI from nltk.tree import Tree @@ -79,7 +81,7 @@ See http://pypi.python.org/pypi/bllipparser/ for more information on BLLIP Parser's Python interface. """ -__all__ = ["BllipParser"] +__all__ = ['BllipParser'] # this block allows this module to be imported even if bllipparser isn't # available @@ -100,7 +102,7 @@ except ImportError as ie: def _ensure_ascii(words): try: for i, word in enumerate(words): - word.decode("ascii") + word.decode('ascii') except UnicodeDecodeError: raise ValueError( "Token %d (%r) is non-ASCII. BLLIP Parser " @@ -260,19 +262,19 @@ def demo(): from nltk.data import find - model_dir = find("models/bllip_wsj_no_aux").path + model_dir = find('models/bllip_wsj_no_aux').path - print("Loading BLLIP Parsing models...") + print('Loading BLLIP Parsing models...') # the easiest way to get started is to use a unified model bllip = BllipParser.from_unified_model_dir(model_dir) - print("Done.") + print('Done.') - sentence1 = "British left waffles on Falklands .".split() - sentence2 = "I saw the man with the telescope .".split() + sentence1 = 'British left waffles on Falklands .'.split() + sentence2 = 'I saw the man with the telescope .'.split() # this sentence is known to fail under the WSJ parsing model - fail1 = "# ! ? : -".split() + fail1 = '# ! ? : -'.split() for sentence in (sentence1, sentence2, fail1): - print("Sentence: %r" % " ".join(sentence)) + print('Sentence: %r' % ' '.join(sentence)) try: tree = next(bllip.parse(sentence)) print(tree) @@ -281,22 +283,22 @@ def demo(): # n-best parsing demo for i, parse in enumerate(bllip.parse(sentence1)): - print("parse %d:\n%s" % (i, parse)) + print('parse %d:\n%s' % (i, parse)) # using external POS tag constraints print( "forcing 'tree' to be 'NN':", - next(bllip.tagged_parse([("A", None), ("tree", "NN")])), + next(bllip.tagged_parse([('A', None), ('tree', 'NN')])), ) print( "forcing 'A' to be 'DT' and 'tree' to be 'NNP':", - next(bllip.tagged_parse([("A", "DT"), ("tree", "NNP")])), + next(bllip.tagged_parse([('A', 'DT'), ('tree', 'NNP')])), ) # constraints don't have to make sense... (though on more complicated # sentences, they may cause the parse to fail) print( "forcing 'A' to be 'NNP':", - next(bllip.tagged_parse([("A", "NNP"), ("tree", None)])), + next(bllip.tagged_parse([('A', 'NNP'), ('tree', None)])), ) @@ -307,6 +309,6 @@ def setup_module(module): _ensure_bllip_import_or_error() except ImportError: raise SkipTest( - "doctests from nltk.parse.bllip are skipped because " - "the bllipparser module is not installed" + 'doctests from nltk.parse.bllip are skipped because ' + 'the bllipparser module is not installed' ) diff --git a/nlp_resource_data/nltk/parse/chart.py b/nlp_resource_data/nltk/parse/chart.py index dffd644..f1f68b4 100644 --- a/nlp_resource_data/nltk/parse/chart.py +++ b/nlp_resource_data/nltk/parse/chart.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Natural Language Toolkit: A Chart Parser # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Edward Loper # Steven Bird # Jean Mark Gawron @@ -35,16 +35,20 @@ defines three chart parsers: - ``SteppingChartParser`` is a subclass of ``ChartParser`` that can be used to step through the parsing process. """ +from __future__ import print_function, division, unicode_literals import itertools import re import warnings from functools import total_ordering +from six.moves import range + from nltk.tree import Tree from nltk.grammar import PCFG, is_nonterminal, is_terminal from nltk.util import OrderedDict from nltk.internals import raise_unorderable_types +from nltk.compat import python_2_unicode_compatible, unicode_repr from nltk.parse.api import ParserI @@ -90,7 +94,7 @@ class EdgeI(object): def __init__(self): if self.__class__ == EdgeI: - raise TypeError("Edge is an abstract interface") + raise TypeError('Edge is an abstract interface') # //////////////////////////////////////////////////////////// # Span @@ -225,6 +229,7 @@ class EdgeI(object): return self._hash +@python_2_unicode_compatible class TreeEdge(EdgeI): """ An edge that records the fact that a tree is (partially) @@ -345,21 +350,22 @@ class TreeEdge(EdgeI): # String representation def __str__(self): - str = "[%s:%s] " % (self._span[0], self._span[1]) - str += "%-2r ->" % (self._lhs,) + str = '[%s:%s] ' % (self._span[0], self._span[1]) + str += '%-2r ->' % (self._lhs,) for i in range(len(self._rhs)): if i == self._dot: - str += " *" - str += " %s" % repr(self._rhs[i]) + str += ' *' + str += ' %s' % unicode_repr(self._rhs[i]) if len(self._rhs) == self._dot: - str += " *" + str += ' *' return str def __repr__(self): - return "[Edge: %s]" % self + return '[Edge: %s]' % self +@python_2_unicode_compatible class LeafEdge(EdgeI): """ An edge that records the fact that a leaf value is consistent with @@ -419,10 +425,10 @@ class LeafEdge(EdgeI): # String representations def __str__(self): - return "[%s:%s] %s" % (self._index, self._index + 1, repr(self._leaf)) + return '[%s:%s] %s' % (self._index, self._index + 1, unicode_repr(self._leaf)) def __repr__(self): - return "[Edge: %s]" % (self) + return '[Edge: %s]' % (self) ######################################################################## @@ -600,7 +606,7 @@ class Chart(object): # Make sure it's a valid index. for key in restr_keys: if not hasattr(EdgeI, key): - raise ValueError("Bad restriction: %s" % key) + raise ValueError('Bad restriction: %s' % key) # Create the index. index = self._indexes[restr_keys] = {} @@ -778,26 +784,26 @@ class Chart(object): width = 50 // (self.num_leaves() + 1) (start, end) = (edge.start(), edge.end()) - str = "|" + ("." + " " * (width - 1)) * start + str = '|' + ('.' + ' ' * (width - 1)) * start # Zero-width edges are "#" if complete, ">" if incomplete if start == end: if edge.is_complete(): - str += "#" + str += '#' else: - str += ">" + str += '>' # Spanning complete edges are "[===]"; Other edges are # "[---]" if complete, "[--->" if incomplete elif edge.is_complete() and edge.span() == (0, self._num_leaves): - str += "[" + ("=" * width) * (end - start - 1) + "=" * (width - 1) + "]" + str += '[' + ('=' * width) * (end - start - 1) + '=' * (width - 1) + ']' elif edge.is_complete(): - str += "[" + ("-" * width) * (end - start - 1) + "-" * (width - 1) + "]" + str += '[' + ('-' * width) * (end - start - 1) + '-' * (width - 1) + ']' else: - str += "[" + ("-" * width) * (end - start - 1) + "-" * (width - 1) + ">" + str += '[' + ('-' * width) * (end - start - 1) + '-' * (width - 1) + '>' - str += (" " * (width - 1) + ".") * (self._num_leaves - end) - return str + "| %s" % edge + str += (' ' * (width - 1) + '.') * (self._num_leaves - end) + return str + '| %s' % edge def pretty_format_leaves(self, width=None): """ @@ -809,12 +815,12 @@ class Chart(object): width = 50 // (self.num_leaves() + 1) if self._tokens is not None and width > 1: - header = "|." + header = '|.' for tok in self._tokens: - header += tok[: width - 1].center(width - 1) + "." - header += "|" + header += tok[: width - 1].center(width - 1) + '.' + header += '|' else: - header = "" + header = '' return header @@ -835,8 +841,8 @@ class Chart(object): return ( self.pretty_format_leaves(width) - + "\n" - + "\n".join(self.pretty_format_edge(edge, width) for edge in edges) + + '\n' + + '\n'.join(self.pretty_format_edge(edge, width) for edge in edges) ) # //////////////////////////////////////////////////////////// @@ -845,10 +851,10 @@ class Chart(object): def dot_digraph(self): # Header - s = "digraph nltk_chart {\n" + s = 'digraph nltk_chart {\n' # s += ' size="5,5";\n' - s += " rankdir=LR;\n" - s += " node [height=0.1,width=0.1];\n" + s += ' rankdir=LR;\n' + s += ' node [height=0.1,width=0.1];\n' s += ' node [style=filled, color="lightgray"];\n' # Set up the nodes @@ -862,28 +868,28 @@ class Chart(object): s += ' %04d.%04d [label=""];\n' % (x, y) # Add a spacer - s += " x [style=invis]; x->0000.0000 [style=invis];\n" + s += ' x [style=invis]; x->0000.0000 [style=invis];\n' # Declare ranks. for x in range(self.num_leaves() + 1): - s += " {rank=same;" + s += ' {rank=same;' for y in range(self.num_edges() + 1): if y == 0 or ( x <= self._edges[y - 1].start() or x >= self._edges[y - 1].end() ): - s += " %04d.%04d" % (x, y) - s += "}\n" + s += ' %04d.%04d' % (x, y) + s += '}\n' # Add the leaves - s += " edge [style=invis, weight=100];\n" - s += " node [shape=plaintext]\n" - s += " 0000.0000" + s += ' edge [style=invis, weight=100];\n' + s += ' node [shape=plaintext]\n' + s += ' 0000.0000' for x in range(self.num_leaves()): - s += "->%s->%04d.0000" % (self.leaf(x), x + 1) - s += ";\n\n" + s += '->%s->%04d.0000' % (self.leaf(x), x + 1) + s += ';\n\n' # Add the edges - s += " edge [style=solid, weight=1];\n" + s += ' edge [style=solid, weight=1];\n' for y, edge in enumerate(self): for x in range(edge.start()): s += ' %04d.%04d -> %04d.%04d [style="invis"];\n' % ( @@ -906,7 +912,7 @@ class Chart(object): x + 1, y + 1, ) - s += "}\n" + s += '}\n' return s @@ -962,6 +968,7 @@ class ChartRuleI(object): raise NotImplementedError() +@python_2_unicode_compatible class AbstractChartRule(ChartRuleI): """ An abstract base class for chart rules. ``AbstractChartRule`` @@ -1004,12 +1011,12 @@ class AbstractChartRule(ChartRuleI): yield new_edge else: - raise AssertionError("NUM_EDGES>3 is not currently supported") + raise AssertionError('NUM_EDGES>3 is not currently supported') # Default: return a name based on the class name. def __str__(self): # Add spaces between InitialCapsWords. - return re.sub("([a-z])([A-Z])", r"\1 \2", self.__class__.__name__) + return re.sub('([a-z])([A-Z])', r'\1 \2', self.__class__.__name__) # //////////////////////////////////////////////////////////// @@ -1421,7 +1428,7 @@ class ChartParser(ParserI): print_rule_header = trace > 1 for edge in new_edges: if print_rule_header: - print("%s:" % rule) + print('%s:' % rule) print_rule_header = False print(chart.pretty_format_edge(edge, edge_width)) @@ -1589,7 +1596,7 @@ class SteppingChartParser(ChartParser): added with the current strategy and grammar. """ if self._chart is None: - raise ValueError("Parser must be initialized first") + raise ValueError('Parser must be initialized first') while True: self._restart = False w = 50 // (self._chart.num_leaves() + 1) @@ -1736,7 +1743,7 @@ def demo( print_grammar=False, print_trees=True, trace=2, - sent="I saw John with a dog with my cookie", + sent='I saw John with a dog with my cookie', numparses=5, ): """ @@ -1761,34 +1768,34 @@ def demo( # Ask the user which parser to test, # if the parser wasn't provided as an argument if choice is None: - print(" 1: Top-down chart parser") - print(" 2: Bottom-up chart parser") - print(" 3: Bottom-up left-corner chart parser") - print(" 4: Left-corner chart parser with bottom-up filter") - print(" 5: Stepping chart parser (alternating top-down & bottom-up)") - print(" 6: All parsers") - print("\nWhich parser (1-6)? ", end=" ") + print(' 1: Top-down chart parser') + print(' 2: Bottom-up chart parser') + print(' 3: Bottom-up left-corner chart parser') + print(' 4: Left-corner chart parser with bottom-up filter') + print(' 5: Stepping chart parser (alternating top-down & bottom-up)') + print(' 6: All parsers') + print('\nWhich parser (1-6)? ', end=' ') choice = sys.stdin.readline().strip() print() choice = str(choice) if choice not in "123456": - print("Bad parser number") + print('Bad parser number') return # Keep track of how long each parser takes. times = {} strategies = { - "1": ("Top-down", TD_STRATEGY), - "2": ("Bottom-up", BU_STRATEGY), - "3": ("Bottom-up left-corner", BU_LC_STRATEGY), - "4": ("Filtered left-corner", LC_STRATEGY), + '1': ('Top-down', TD_STRATEGY), + '2': ('Bottom-up', BU_STRATEGY), + '3': ('Bottom-up left-corner', BU_LC_STRATEGY), + '4': ('Filtered left-corner', LC_STRATEGY), } choices = [] if choice in strategies: choices = [choice] - if choice == "6": + if choice == '6': choices = "1234" # Run the requested chart parser(s), except the stepping parser. @@ -1803,7 +1810,7 @@ def demo( times[strategies[strategy][0]] = time.time() - t print("Nr edges in chart:", len(chart.edges())) if numparses: - assert len(parses) == numparses, "Not all parses found" + assert len(parses) == numparses, 'Not all parses found' if print_trees: for tree in parses: print(tree) @@ -1819,20 +1826,20 @@ def demo( cp = SteppingChartParser(grammar, trace=trace) cp.initialize(tokens) for i in range(5): - print("*** SWITCH TO TOP DOWN") + print('*** SWITCH TO TOP DOWN') cp.set_strategy(TD_STRATEGY) for j, e in enumerate(cp.step()): if j > 20 or e is None: break - print("*** SWITCH TO BOTTOM UP") + print('*** SWITCH TO BOTTOM UP') cp.set_strategy(BU_STRATEGY) for j, e in enumerate(cp.step()): if j > 20 or e is None: break - times["Stepping"] = time.time() - t + times['Stepping'] = time.time() - t print("Nr edges in chart:", len(cp.chart().edges())) if numparses: - assert len(list(cp.parses())) == numparses, "Not all parses found" + assert len(list(cp.parses())) == numparses, 'Not all parses found' if print_trees: for tree in cp.parses(): print(tree) @@ -1846,11 +1853,11 @@ def demo( print("* Parsing times") print() maxlen = max(len(key) for key in times) - format = "%" + repr(maxlen) + "s parser: %6.3fsec" + format = '%' + repr(maxlen) + 's parser: %6.3fsec' times_items = times.items() for (parser, t) in sorted(times_items, key=lambda a: a[1]): print(format % (parser, t)) -if __name__ == "__main__": +if __name__ == '__main__': demo() diff --git a/nlp_resource_data/nltk/parse/corenlp.py b/nlp_resource_data/nltk/parse/corenlp.py index 1ba4801..1c4f785 100644 --- a/nlp_resource_data/nltk/parse/corenlp.py +++ b/nlp_resource_data/nltk/parse/corenlp.py @@ -1,12 +1,14 @@ # -*- coding: utf-8 -*- # Natural Language Toolkit: Interface to the CoreNLP REST API. # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Dmitrijs Milajevs # # URL: # For license information, see LICENSE.TXT +from __future__ import unicode_literals + import re import json import time @@ -22,7 +24,7 @@ from nltk.tree import Tree from unittest import skip -_stanford_url = "http://stanfordnlp.github.io/CoreNLP/" +_stanford_url = 'http://stanfordnlp.github.io/CoreNLP/' class CoreNLPServerError(EnvironmentError): @@ -31,7 +33,7 @@ class CoreNLPServerError(EnvironmentError): def try_port(port=0): sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - sock.bind(("", port)) + sock.bind(('', port)) p = sock.getsockname()[1] sock.close() @@ -41,8 +43,8 @@ def try_port(port=0): class CoreNLPServer(object): - _MODEL_JAR_PATTERN = r"stanford-corenlp-(\d+)\.(\d+)\.(\d+)-models\.jar" - _JAR = r"stanford-corenlp-(\d+)\.(\d+)\.(\d+)\.jar" + _MODEL_JAR_PATTERN = r'stanford-corenlp-(\d+)\.(\d+)\.(\d+)-models\.jar' + _JAR = r'stanford-corenlp-(\d+)\.(\d+)\.(\d+)\.jar' def __init__( self, @@ -55,13 +57,13 @@ class CoreNLPServer(object): ): if corenlp_options is None: - corenlp_options = ["-preload", "tokenize,ssplit,pos,lemma,parse,depparse"] + corenlp_options = ['-preload', 'tokenize,ssplit,pos,lemma,parse,depparse'] jars = list( find_jar_iter( self._JAR, path_to_jar, - env_vars=("CORENLP",), + env_vars=('CORENLP',), searchpath=(), url=_stanford_url, verbose=verbose, @@ -81,13 +83,13 @@ class CoreNLPServer(object): else: try_port(port) - self.url = "http://localhost:{}".format(port) + self.url = 'http://localhost:{}'.format(port) model_jar = max( find_jar_iter( self._MODEL_JAR_PATTERN, path_to_models_jar, - env_vars=("CORENLP_MODELS",), + env_vars=('CORENLP_MODELS',), searchpath=(), url=_stanford_url, verbose=verbose, @@ -101,31 +103,29 @@ class CoreNLPServer(object): self._classpath = stanford_jar, model_jar self.corenlp_options = corenlp_options - self.java_options = java_options or ["-mx2g"] - - def start(self, stdout="devnull", stderr="devnull"): - """ Starts the CoreNLP server + self.java_options = java_options or ['-mx2g'] - :param stdout, stderr: Specifies where CoreNLP output is redirected. Valid values are 'devnull', 'stdout', 'pipe' - """ + def start(self): import requests - cmd = ["edu.stanford.nlp.pipeline.StanfordCoreNLPServer"] + cmd = ['edu.stanford.nlp.pipeline.StanfordCoreNLPServer'] if self.corenlp_options: cmd.extend(self.corenlp_options) # Configure java. - default_options = " ".join(_java_options) + default_options = ' '.join(_java_options) config_java(options=self.java_options, verbose=self.verbose) try: + # TODO: it's probably a bad idea to pipe stdout, as it will + # accumulate when lots of text is being parsed. self.popen = java( cmd, classpath=self._classpath, blocking=False, - stdout=stdout, - stderr=stderr, + stdout='pipe', + stderr='pipe', ) finally: # Return java configurations to their default values. @@ -137,31 +137,31 @@ class CoreNLPServer(object): _, stderrdata = self.popen.communicate() raise CoreNLPServerError( returncode, - "Could not start the server. " - "The error was: {}".format(stderrdata.decode("ascii")), + 'Could not start the server. ' + 'The error was: {}'.format(stderrdata.decode('ascii')), ) for i in range(30): try: - response = requests.get(requests.compat.urljoin(self.url, "live")) + response = requests.get(requests.compat.urljoin(self.url, 'live')) except requests.exceptions.ConnectionError: time.sleep(1) else: if response.ok: break else: - raise CoreNLPServerError("Could not connect to the server.") + raise CoreNLPServerError('Could not connect to the server.') for i in range(60): try: - response = requests.get(requests.compat.urljoin(self.url, "ready")) + response = requests.get(requests.compat.urljoin(self.url, 'ready')) except requests.exceptions.ConnectionError: time.sleep(1) else: if response.ok: break else: - raise CoreNLPServerError("The server is not ready.") + raise CoreNLPServerError('The server is not ready.') def stop(self): self.popen.terminate() @@ -180,13 +180,13 @@ class CoreNLPServer(object): class GenericCoreNLPParser(ParserI, TokenizerI, TaggerI): """Interface to the CoreNLP Parser.""" - def __init__(self, url="http://localhost:9000", encoding="utf8", tagtype=None): + def __init__(self, url='http://localhost:9000', encoding='utf8', tagtype=None): import requests self.url = url self.encoding = encoding - if tagtype not in ["pos", "ner", None]: + if tagtype not in ['pos', 'ner', None]: raise ValueError("tagtype must be either 'pos', 'ner' or None") self.tagtype = tagtype @@ -208,7 +208,7 @@ class GenericCoreNLPParser(ParserI, TokenizerI, TaggerI): :rtype: iter(iter(Tree)) """ # Converting list(list(str)) -> list(str) - sentences = (" ".join(words) for words in sentences) + sentences = (' '.join(words) for words in sentences) return self.raw_parse_sents(sentences, *args, **kwargs) def raw_parse(self, sentence, properties=None, *args, **kwargs): @@ -221,7 +221,7 @@ class GenericCoreNLPParser(ParserI, TokenizerI, TaggerI): :type sentence: str :rtype: iter(Tree) """ - default_properties = {"tokenize.whitespace": "false"} + default_properties = {'tokenize.whitespace': 'false'} default_properties.update(properties or {}) return next( @@ -230,10 +230,10 @@ class GenericCoreNLPParser(ParserI, TokenizerI, TaggerI): ) ) - def api_call(self, data, properties=None, timeout=60): + def api_call(self, data, properties=None): default_properties = { - "outputFormat": "json", - "annotators": "tokenize,pos,lemma,ssplit,{parser_annotator}".format( + 'outputFormat': 'json', + 'annotators': 'tokenize,pos,lemma,ssplit,{parser_annotator}'.format( parser_annotator=self.parser_annotator ), } @@ -242,9 +242,9 @@ class GenericCoreNLPParser(ParserI, TokenizerI, TaggerI): response = self.session.post( self.url, - params={"properties": json.dumps(default_properties)}, + params={'properties': json.dumps(default_properties)}, data=data.encode(self.encoding), - timeout=timeout, + timeout=60, ) response.raise_for_status() @@ -266,7 +266,7 @@ class GenericCoreNLPParser(ParserI, TokenizerI, TaggerI): """ default_properties = { # Only splits on '\n', never inside the sentence. - "ssplit.eolonly": "true" + 'ssplit.ssplit.eolonly': 'true' } default_properties.update(properties or {}) @@ -281,8 +281,8 @@ class GenericCoreNLPParser(ParserI, TokenizerI, TaggerI): tree = self.make_tree(parse) yield iter([tree]) """ - parsed_data = self.api_call("\n".join(sentences), properties=default_properties) - for parsed_sent in parsed_data["sentences"]: + parsed_data = self.api_call('\n'.join(sentences), properties=default_properties) + for parsed_sent in parsed_data['sentences']: tree = self.make_tree(parsed_sent) yield iter([tree]) @@ -297,7 +297,7 @@ class GenericCoreNLPParser(ParserI, TokenizerI, TaggerI): """ parsed_data = self.api_call(text, *args, **kwargs) - for parse in parsed_data["sentences"]: + for parse in parsed_data['sentences']: yield self.make_tree(parse) def tokenize(self, text, properties=None): @@ -319,15 +319,15 @@ class GenericCoreNLPParser(ParserI, TokenizerI, TaggerI): ['The', 'color', 'of', 'the', 'wall', 'is', 'blue', '.'] """ - default_properties = {"annotators": "tokenize,ssplit"} + default_properties = {'annotators': 'tokenize,ssplit'} default_properties.update(properties or {}) result = self.api_call(text, properties=default_properties) - for sentence in result["sentences"]: - for token in sentence["tokens"]: - yield token["originalText"] or token["word"] + for sentence in result['sentences']: + for token in sentence['tokens']: + yield token['originalText'] or token['word'] def tag_sents(self, sentences): """ @@ -335,13 +335,13 @@ class GenericCoreNLPParser(ParserI, TokenizerI, TaggerI): Takes multiple sentences as a list where each sentence is a list of tokens. - + :param sentences: Input sentences to tag :type sentences: list(list(str)) :rtype: list(list(tuple(str, str)) """ # Converting list(list(str)) -> list(str) - sentences = (" ".join(words) for words in sentences) + sentences = (' '.join(words) for words in sentences) return [sentences[0] for sentences in self.raw_tag_sents(sentences)] def tag(self, sentence): @@ -370,27 +370,27 @@ class GenericCoreNLPParser(ParserI, TokenizerI, TaggerI): Tag multiple sentences. Takes multiple sentences as a list where each sentence is a string. - + :param sentences: Input sentences to tag :type sentences: list(str) :rtype: list(list(list(tuple(str, str))) """ default_properties = { - "ssplit.isOneSentence": "true", - "annotators": "tokenize,ssplit,", + 'ssplit.isOneSentence': 'true', + 'annotators': 'tokenize,ssplit,', } # Supports only 'pos' or 'ner' tags. - assert self.tagtype in ["pos", "ner"] - default_properties["annotators"] += self.tagtype + assert self.tagtype in ['pos', 'ner'] + default_properties['annotators'] += self.tagtype for sentence in sentences: tagged_data = self.api_call(sentence, properties=default_properties) yield [ [ - (token["word"], token[self.tagtype]) - for token in tagged_sentence["tokens"] + (token['word'], token[self.tagtype]) + for token in tagged_sentence['tokens'] ] - for tagged_sentence in tagged_data["sentences"] + for tagged_sentence in tagged_data['sentences'] ] @@ -539,11 +539,11 @@ class CoreNLPParser(GenericCoreNLPParser): """ - _OUTPUT_FORMAT = "penn" - parser_annotator = "parse" + _OUTPUT_FORMAT = 'penn' + parser_annotator = 'parse' def make_tree(self, result): - return Tree.fromstring(result["parse"]) + return Tree.fromstring(result['parse']) class CoreNLPDependencyParser(GenericCoreNLPParser): @@ -710,44 +710,44 @@ class CoreNLPDependencyParser(GenericCoreNLPParser): """ - _OUTPUT_FORMAT = "conll2007" - parser_annotator = "depparse" + _OUTPUT_FORMAT = 'conll2007' + parser_annotator = 'depparse' def make_tree(self, result): return DependencyGraph( ( - " ".join(n_items[1:]) # NLTK expects an iterable of strings... + ' '.join(n_items[1:]) # NLTK expects an iterable of strings... for n_items in sorted(transform(result)) ), - cell_separator=" ", # To make sure that a non-breaking space is kept inside of a token. + cell_separator=' ', # To make sure that a non-breaking space is kept inside of a token. ) def transform(sentence): - for dependency in sentence["basicDependencies"]: + for dependency in sentence['basicDependencies']: - dependent_index = dependency["dependent"] - token = sentence["tokens"][dependent_index - 1] + dependent_index = dependency['dependent'] + token = sentence['tokens'][dependent_index - 1] # Return values that we don't know as '_'. Also, consider tag and ctag # to be equal. yield ( dependent_index, - "_", - token["word"], - token["lemma"], - token["pos"], - token["pos"], - "_", - str(dependency["governor"]), - dependency["dep"], - "_", - "_", + '_', + token['word'], + token['lemma'], + token['pos'], + token['pos'], + '_', + str(dependency['governor']), + dependency['dep'], + '_', + '_', ) -@skip("Skipping all CoreNLP tests.") +@skip('Skipping all CoreNLP tests.') def setup_module(module): from nose import SkipTest @@ -756,18 +756,18 @@ def setup_module(module): try: server = CoreNLPServer(port=9000) except LookupError as e: - raise SkipTest("Could not instantiate CoreNLPServer.") + raise SkipTest('Could not instantiate CoreNLPServer.') try: server.start() except CoreNLPServerError as e: raise SkipTest( - "Skipping CoreNLP tests because the server could not be started. " - "Make sure that the 9000 port is free. " - "{}".format(e.strerror) + 'Skipping CoreNLP tests because the server could not be started. ' + 'Make sure that the 9000 port is free. ' + '{}'.format(e.strerror) ) -@skip("Skipping all CoreNLP tests.") +@skip('Skipping all CoreNLP tests.') def teardown_module(module): server.stop() diff --git a/nlp_resource_data/nltk/parse/dependencygraph.py b/nlp_resource_data/nltk/parse/dependencygraph.py index 4d3f7c5..8c6156b 100644 --- a/nlp_resource_data/nltk/parse/dependencygraph.py +++ b/nlp_resource_data/nltk/parse/dependencygraph.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Dependency Grammars # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Jason Narad # Steven Bird (modifications) # @@ -13,6 +13,7 @@ Tools for reading and writing dependency trees. The input is assumed to be in Malt-TAB format (http://stp.lingfil.uu.se/~nivre/research/MaltXML.html). """ +from __future__ import print_function, unicode_literals from collections import defaultdict from itertools import chain @@ -20,13 +21,18 @@ from pprint import pformat import subprocess import warnings +from six import string_types + from nltk.tree import Tree +from nltk.compat import python_2_unicode_compatible + ################################################################# # DependencyGraph Class ################################################################# +@python_2_unicode_compatible class DependencyGraph(object): """ A container for the nodes and labelled edges of a dependency structure. @@ -38,7 +44,7 @@ class DependencyGraph(object): cell_extractor=None, zero_based=False, cell_separator=None, - top_relation_label="ROOT", + top_relation_label='ROOT', ): """Dependency graph. @@ -59,19 +65,19 @@ class DependencyGraph(object): """ self.nodes = defaultdict( lambda: { - "address": None, - "word": None, - "lemma": None, - "ctag": None, - "tag": None, - "feats": None, - "head": None, - "deps": defaultdict(list), - "rel": None, + 'address': None, + 'word': None, + 'lemma': None, + 'ctag': None, + 'tag': None, + 'feats': None, + 'head': None, + 'deps': defaultdict(list), + 'rel': None, } ) - self.nodes[0].update({"ctag": "TOP", "tag": "TOP", "address": 0}) + self.nodes[0].update({'ctag': 'TOP', 'tag': 'TOP', 'address': 0}) self.root = None @@ -98,21 +104,21 @@ class DependencyGraph(object): """ for node in self.nodes.values(): new_deps = [] - for dep in node["deps"]: + for dep in node['deps']: if dep in originals: new_deps.append(redirect) else: new_deps.append(dep) - node["deps"] = new_deps + node['deps'] = new_deps def add_arc(self, head_address, mod_address): """ Adds an arc from the node specified by head_address to the node specified by the mod address. """ - relation = self.nodes[mod_address]["rel"] - self.nodes[head_address]["deps"].setdefault(relation, []) - self.nodes[head_address]["deps"][relation].append(mod_address) + relation = self.nodes[mod_address]['rel'] + self.nodes[head_address]['deps'].setdefault(relation, []) + self.nodes[head_address]['deps'][relation].append(mod_address) # self.nodes[head_address]['deps'].append(mod_address) def connect_graph(self): @@ -122,10 +128,10 @@ class DependencyGraph(object): """ for node1 in self.nodes.values(): for node2 in self.nodes.values(): - if node1["address"] != node2["address"] and node2["rel"] != "TOP": - relation = node2["rel"] - node1["deps"].setdefault(relation, []) - node1["deps"][relation].append(node2["address"]) + if node1['address'] != node2['address'] and node2['rel'] != 'TOP': + relation = node2['rel'] + node1['deps'].setdefault(relation, []) + node1['deps'][relation].append(node2['address']) # node1['deps'].append(node2['address']) def get_by_address(self, node_address): @@ -163,23 +169,23 @@ class DependencyGraph(object): """ # Start the digraph specification - s = "digraph G{\n" - s += "edge [dir=forward]\n" - s += "node [shape=plaintext]\n" + s = 'digraph G{\n' + s += 'edge [dir=forward]\n' + s += 'node [shape=plaintext]\n' # Draw the remaining nodes - for node in sorted(self.nodes.values(), key=lambda v: v["address"]): + for node in sorted(self.nodes.values(), key=lambda v: v['address']): s += '\n%s [label="%s (%s)"]' % ( - node["address"], - node["address"], - node["word"], + node['address'], + node['address'], + node['word'], ) - for rel, deps in node["deps"].items(): + for rel, deps in node['deps'].items(): for dep in deps: if rel is not None: - s += '\n%s -> %s [label="%s"]' % (node["address"], dep, rel) + s += '\n%s -> %s [label="%s"]' % (node['address'], dep, rel) else: - s += "\n%s -> %s " % (node["address"], dep) + s += '\n%s -> %s ' % (node['address'], dep) s += "\n}" return s @@ -200,19 +206,19 @@ class DependencyGraph(object): try: process = subprocess.Popen( - ["dot", "-Tsvg"], + ['dot', '-Tsvg'], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, ) except OSError: - raise Exception("Cannot find the dot binary from Graphviz package") + raise Exception('Cannot find the dot binary from Graphviz package') out, err = process.communicate(dot_string) if err: raise Exception( - "Cannot create svg representation by running dot from string: {}" - "".format(dot_string) + 'Cannot create svg representation by running dot from string: {}' + ''.format(dot_string) ) return out @@ -224,7 +230,7 @@ class DependencyGraph(object): @staticmethod def load( - filename, zero_based=False, cell_separator=None, top_relation_label="ROOT" + filename, zero_based=False, cell_separator=None, top_relation_label='ROOT' ): """ :param filename: a name of a file in Malt-TAB format @@ -246,7 +252,7 @@ class DependencyGraph(object): cell_separator=cell_separator, top_relation_label=top_relation_label, ) - for tree_str in infile.read().split("\n\n") + for tree_str in infile.read().split('\n\n') ] def left_children(self, node_index): @@ -254,8 +260,8 @@ class DependencyGraph(object): Returns the number of left children under the node specified by the given address. """ - children = chain.from_iterable(self.nodes[node_index]["deps"].values()) - index = self.nodes[node_index]["address"] + children = chain.from_iterable(self.nodes[node_index]['deps'].values()) + index = self.nodes[node_index]['address'] return sum(1 for c in children if c < index) def right_children(self, node_index): @@ -263,13 +269,13 @@ class DependencyGraph(object): Returns the number of right children under the node specified by the given address. """ - children = chain.from_iterable(self.nodes[node_index]["deps"].values()) - index = self.nodes[node_index]["address"] + children = chain.from_iterable(self.nodes[node_index]['deps'].values()) + index = self.nodes[node_index]['address'] return sum(1 for c in children if c > index) def add_node(self, node): - if not self.contains_address(node["address"]): - self.nodes[node["address"]].update(node) + if not self.contains_address(node['address']): + self.nodes[node['address']].update(node) def _parse( self, @@ -277,7 +283,7 @@ class DependencyGraph(object): cell_extractor=None, zero_based=False, cell_separator=None, - top_relation_label="ROOT", + top_relation_label='ROOT', ): """Parse a sentence. @@ -295,11 +301,11 @@ class DependencyGraph(object): def extract_3_cells(cells, index): word, tag, head = cells - return index, word, word, tag, tag, "", head, "" + return index, word, word, tag, tag, '', head, '' def extract_4_cells(cells, index): word, tag, head, rel = cells - return index, word, word, tag, tag, "", head, rel + return index, word, word, tag, tag, '', head, rel def extract_7_cells(cells, index): line_index, word, lemma, tag, _, head, rel = cells @@ -308,7 +314,7 @@ class DependencyGraph(object): except ValueError: # index can't be parsed as an integer, use default pass - return index, word, lemma, tag, tag, "", head, rel + return index, word, lemma, tag, tag, '', head, rel def extract_10_cells(cells, index): line_index, word, lemma, ctag, tag, feats, head, rel, _, _ = cells @@ -326,8 +332,8 @@ class DependencyGraph(object): 10: extract_10_cells, } - if isinstance(input_, str): - input_ = (line for line in input_.split("\n")) + if isinstance(input_, string_types): + input_ = (line for line in input_.split('\n')) lines = (l.rstrip() for l in input_) lines = (l for l in lines if l) @@ -345,8 +351,8 @@ class DependencyGraph(object): cell_extractor = extractors[cell_number] except KeyError: raise ValueError( - "Number of tab-delimited fields ({0}) not supported by " - "CoNLL(10) or Malt-Tab(4) format".format(cell_number) + 'Number of tab-delimited fields ({0}) not supported by ' + 'CoNLL(10) or Malt-Tab(4) format'.format(cell_number) ) try: @@ -359,7 +365,7 @@ class DependencyGraph(object): # extractor and doesn't accept or return an index. word, lemma, ctag, tag, feats, head, rel = cell_extractor(cells) - if head == "_": + if head == '_': continue head = int(head) @@ -368,24 +374,24 @@ class DependencyGraph(object): self.nodes[index].update( { - "address": index, - "word": word, - "lemma": lemma, - "ctag": ctag, - "tag": tag, - "feats": feats, - "head": head, - "rel": rel, + 'address': index, + 'word': word, + 'lemma': lemma, + 'ctag': ctag, + 'tag': tag, + 'feats': feats, + 'head': head, + 'rel': rel, } ) # Make sure that the fake root node has labeled dependencies. if (cell_number == 3) and (head == 0): rel = top_relation_label - self.nodes[head]["deps"][rel].append(index) + self.nodes[head]['deps'][rel].append(index) - if self.nodes[0]["deps"][top_relation_label]: - root_address = self.nodes[0]["deps"][top_relation_label][0] + if self.nodes[0]['deps'][top_relation_label]: + root_address = self.nodes[0]['deps'][top_relation_label][0] self.root = self.nodes[root_address] self.top_relation_label = top_relation_label else: @@ -394,9 +400,9 @@ class DependencyGraph(object): ) def _word(self, node, filter=True): - w = node["word"] + w = node['word'] if filter: - if w != ",": + if w != ',': return w return w @@ -407,8 +413,8 @@ class DependencyGraph(object): :return: either a word (if the indexed node is a leaf) or a ``Tree``. """ node = self.get_by_address(i) - word = node["word"] - deps = sorted(chain.from_iterable(node["deps"].values())) + word = node['word'] + deps = sorted(chain.from_iterable(node['deps'].values())) if deps: return Tree(word, [self._tree(dep) for dep in deps]) @@ -422,8 +428,8 @@ class DependencyGraph(object): """ node = self.root - word = node["word"] - deps = sorted(chain.from_iterable(node["deps"].values())) + word = node['word'] + deps = sorted(chain.from_iterable(node['deps'].values())) return Tree(word, [self._tree(dep) for dep in deps]) def triples(self, node=None): @@ -435,22 +441,22 @@ class DependencyGraph(object): if not node: node = self.root - head = (node["word"], node["ctag"]) - for i in sorted(chain.from_iterable(node["deps"].values())): + head = (node['word'], node['ctag']) + for i in sorted(chain.from_iterable(node['deps'].values())): dep = self.get_by_address(i) - yield (head, dep["rel"], (dep["word"], dep["ctag"])) + yield (head, dep['rel'], (dep['word'], dep['ctag'])) for triple in self.triples(node=dep): yield triple def _hd(self, i): try: - return self.nodes[i]["head"] + return self.nodes[i]['head'] except IndexError: return None def _rel(self, i): try: - return self.nodes[i]["rel"] + return self.nodes[i]['rel'] except IndexError: return None @@ -484,8 +490,8 @@ class DependencyGraph(object): distances = {} for node in self.nodes.values(): - for dep in node["deps"]: - key = tuple([node["address"], dep]) + for dep in node['deps']: + key = tuple([node['address'], dep]) distances[key] = 1 for _ in self.nodes: @@ -506,13 +512,13 @@ class DependencyGraph(object): return False # return []? def get_cycle_path(self, curr_node, goal_node_index): - for dep in curr_node["deps"]: + for dep in curr_node['deps']: if dep == goal_node_index: - return [curr_node["address"]] - for dep in curr_node["deps"]: + return [curr_node['address']] + for dep in curr_node['deps']: path = self.get_cycle_path(self.get_by_address(dep), goal_node_index) if len(path) > 0: - path.insert(0, curr_node["address"]) + path.insert(0, curr_node['address']) return path return [] @@ -526,23 +532,23 @@ class DependencyGraph(object): """ if style == 3: - template = "{word}\t{tag}\t{head}\n" + template = '{word}\t{tag}\t{head}\n' elif style == 4: - template = "{word}\t{tag}\t{head}\t{rel}\n" + template = '{word}\t{tag}\t{head}\t{rel}\n' elif style == 10: template = ( - "{i}\t{word}\t{lemma}\t{ctag}\t{tag}\t{feats}\t{head}\t{rel}\t_\t_\n" + '{i}\t{word}\t{lemma}\t{ctag}\t{tag}\t{feats}\t{head}\t{rel}\t_\t_\n' ) else: raise ValueError( - "Number of tab-delimited fields ({0}) not supported by " - "CoNLL(10) or Malt-Tab(4) format".format(style) + 'Number of tab-delimited fields ({0}) not supported by ' + 'CoNLL(10) or Malt-Tab(4) format'.format(style) ) - return "".join( + return ''.join( template.format(i=i, **node) for i, node in sorted(self.nodes.items()) - if node["tag"] != "TOP" + if node['tag'] != 'TOP' ) def nx_graph(self): @@ -555,7 +561,7 @@ class DependencyGraph(object): ] self.nx_labels = {} for n in nx_nodelist: - self.nx_labels[n] = self.nodes[n]["word"] + self.nx_labels[n] = self.nodes[n]['word'] g = networkx.MultiDiGraph() g.add_nodes_from(nx_nodelist) @@ -616,7 +622,7 @@ Nov. NNP 9 VMOD networkx.draw_networkx_labels(g, pos, dg.nx_labels) pylab.xticks([]) pylab.yticks([]) - pylab.savefig("tree.png") + pylab.savefig('tree.png') pylab.show() @@ -633,11 +639,11 @@ def conll_demo(): def conll_file_demo(): - print("Mass conll_read demo...") - graphs = [DependencyGraph(entry) for entry in conll_data2.split("\n\n") if entry] + print('Mass conll_read demo...') + graphs = [DependencyGraph(entry) for entry in conll_data2.split('\n\n') if entry] for graph in graphs: tree = graph.tree() - print("\n") + print('\n') tree.pprint() @@ -645,11 +651,11 @@ def cycle_finding_demo(): dg = DependencyGraph(treebank_data) print(dg.contains_cycle()) cyclic_dg = DependencyGraph() - cyclic_dg.add_node({"word": None, "deps": [1], "rel": "TOP", "address": 0}) - cyclic_dg.add_node({"word": None, "deps": [2], "rel": "NTOP", "address": 1}) - cyclic_dg.add_node({"word": None, "deps": [4], "rel": "NTOP", "address": 2}) - cyclic_dg.add_node({"word": None, "deps": [1], "rel": "NTOP", "address": 3}) - cyclic_dg.add_node({"word": None, "deps": [3], "rel": "NTOP", "address": 4}) + cyclic_dg.add_node({'word': None, 'deps': [1], 'rel': 'TOP', 'address': 0}) + cyclic_dg.add_node({'word': None, 'deps': [2], 'rel': 'NTOP', 'address': 1}) + cyclic_dg.add_node({'word': None, 'deps': [4], 'rel': 'NTOP', 'address': 2}) + cyclic_dg.add_node({'word': None, 'deps': [1], 'rel': 'NTOP', 'address': 3}) + cyclic_dg.add_node({'word': None, 'deps': [3], 'rel': 'NTOP', 'address': 4}) print(cyclic_dg.contains_cycle()) @@ -775,5 +781,5 @@ conll_data2 = """1 Cathy Cathy N N eigen|ev|ne 16 . . Punc Punc punt 15 punct _ _ """ -if __name__ == "__main__": +if __name__ == '__main__': demo() diff --git a/nlp_resource_data/nltk/parse/earleychart.py b/nlp_resource_data/nltk/parse/earleychart.py index d6cc14e..fdb8136 100644 --- a/nlp_resource_data/nltk/parse/earleychart.py +++ b/nlp_resource_data/nltk/parse/earleychart.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Natural Language Toolkit: An Incremental Earley Chart Parser # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Peter Ljunglöf # Rob Speer # Edward Loper @@ -25,8 +25,9 @@ This is appealing for, say, speech recognizer hypothesis filtering. The main parser class is ``EarleyChartParser``, which is a top-down algorithm, originally formulated by Jay Earley (1970). """ +from __future__ import print_function, division -from time import perf_counter +from six.moves import range from nltk.parse.chart import ( Chart, @@ -99,7 +100,7 @@ class IncrementalChart(Chart): # Make sure it's a valid index. for key in restr_keys: if not hasattr(EdgeI, key): - raise ValueError("Bad restriction: %s" % key) + raise ValueError('Bad restriction: %s' % key) # Create the index. index = self._indexes[restr_keys] = tuple({} for x in self._positions()) @@ -149,7 +150,7 @@ class FeatureIncrementalChart(IncrementalChart, FeatureChart): # Make sure it's a valid index. for key in restr_keys: if not hasattr(EdgeI, key): - raise ValueError("Bad restriction: %s" % key) + raise ValueError('Bad restriction: %s' % key) # Create the index. index = self._indexes[restr_keys] = tuple({} for x in self._positions()) @@ -510,7 +511,7 @@ def demo( print_grammar=False, print_trees=True, trace=2, - sent="I saw John with a dog with my cookie", + sent='I saw John with a dog with my cookie', numparses=5, ): """ @@ -534,14 +535,14 @@ def demo( # Do the parsing. earley = EarleyChartParser(grammar, trace=trace) - t = perf_counter() + t = time.clock() chart = earley.chart_parse(tokens) parses = list(chart.parses(grammar.start())) - t = perf_counter() - t + t = time.clock() - t # Print results. if numparses: - assert len(parses) == numparses, "Not all parses found" + assert len(parses) == numparses, 'Not all parses found' if print_trees: for tree in parses: print(tree) @@ -551,5 +552,5 @@ def demo( print("Time:", t) -if __name__ == "__main__": +if __name__ == '__main__': demo() diff --git a/nlp_resource_data/nltk/parse/evaluate.py b/nlp_resource_data/nltk/parse/evaluate.py index 672ad6d..d79ad46 100644 --- a/nlp_resource_data/nltk/parse/evaluate.py +++ b/nlp_resource_data/nltk/parse/evaluate.py @@ -2,10 +2,12 @@ # # Author: Long Duong # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # URL: # For license information, see LICENSE.TXT +from __future__ import division + import unicodedata diff --git a/nlp_resource_data/nltk/parse/featurechart.py b/nlp_resource_data/nltk/parse/featurechart.py index a06c50f..ee9e274 100644 --- a/nlp_resource_data/nltk/parse/featurechart.py +++ b/nlp_resource_data/nltk/parse/featurechart.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Natural Language Toolkit: Chart Parser for Feature-Based Grammars # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Rob Speer # Peter Ljunglöf # URL: @@ -11,8 +11,11 @@ Extension of chart parsing implementation to handle grammars with feature structures as nodes. """ -from time import perf_counter +from __future__ import print_function, unicode_literals +from six.moves import range + +from nltk.compat import python_2_unicode_compatible from nltk.featstruct import FeatStruct, unify, TYPE, find_variables from nltk.sem import logic from nltk.tree import Tree @@ -44,6 +47,7 @@ from nltk.parse.chart import ( # //////////////////////////////////////////////////////////// +@python_2_unicode_compatible class FeatureTreeEdge(TreeEdge): """ A specialized tree edge that allows shared variable bindings @@ -145,12 +149,12 @@ class FeatureTreeEdge(TreeEdge): def __str__(self): if self.is_complete(): - return super().__str__() + return TreeEdge.__unicode__(self) else: - bindings = "{%s}" % ", ".join( - "%s: %r" % item for item in sorted(self._bindings.items()) + bindings = '{%s}' % ', '.join( + '%s: %r' % item for item in sorted(self._bindings.items()) ) - return "%s %s" % (super().__str__(), bindings) + return '%s %s' % (TreeEdge.__unicode__(self), bindings) # //////////////////////////////////////////////////////////// @@ -197,7 +201,7 @@ class FeatureChart(Chart): # Make sure it's a valid index. for key in restr_keys: if not hasattr(EdgeI, key): - raise ValueError("Bad restriction: %s" % key) + raise ValueError('Bad restriction: %s' % key) # Create the index. index = self._indexes[restr_keys] = {} @@ -583,7 +587,7 @@ class InstantiateVarsChart(FeatureChart): return dict( (var, logic.unique_variable()) for var in edge.lhs().variables() - if var.name.startswith("@") + if var.name.startswith('@') ) @@ -626,7 +630,7 @@ def demo( print_sentence=True, trace=1, parser=FeatureChartParser, - sent="I saw John with a dog with my cookie", + sent='I saw John with a dog with my cookie', ): import sys, time @@ -639,12 +643,12 @@ def demo( if print_sentence: print("Sentence:", sent) tokens = sent.split() - t = perf_counter() + t = time.clock() cp = parser(grammar, trace=trace) chart = cp.chart_parse(tokens) trees = list(chart.parses(grammar.start())) if print_times: - print("Time: %s" % (perf_counter() - t)) + print("Time: %s" % (time.clock() - t)) if print_trees: for tree in trees: print(tree) @@ -655,22 +659,22 @@ def demo( def run_profile(): import profile - profile.run("for i in range(1): demo()", "/tmp/profile.out") + profile.run('for i in range(1): demo()', '/tmp/profile.out') import pstats - p = pstats.Stats("/tmp/profile.out") - p.strip_dirs().sort_stats("time", "cum").print_stats(60) - p.strip_dirs().sort_stats("cum", "time").print_stats(60) + p = pstats.Stats('/tmp/profile.out') + p.strip_dirs().sort_stats('time', 'cum').print_stats(60) + p.strip_dirs().sort_stats('cum', 'time').print_stats(60) -if __name__ == "__main__": +if __name__ == '__main__': from nltk.data import load demo() print() - grammar = load("grammars/book_grammars/feat0.fcfg") + grammar = load('grammars/book_grammars/feat0.fcfg') cp = FeatureChartParser(grammar, trace=2) - sent = "Kim likes children" + sent = 'Kim likes children' tokens = sent.split() trees = cp.parse(tokens) for tree in trees: diff --git a/nlp_resource_data/nltk/parse/generate.py b/nlp_resource_data/nltk/parse/generate.py index 4549b8d..e0a7cb2 100644 --- a/nlp_resource_data/nltk/parse/generate.py +++ b/nlp_resource_data/nltk/parse/generate.py @@ -1,12 +1,13 @@ # -*- coding: utf-8 -*- # Natural Language Toolkit: Generating from a CFG # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Steven Bird # Peter Ljunglöf # URL: # For license information, see LICENSE.TXT # +from __future__ import print_function import itertools import sys @@ -78,12 +79,12 @@ demo_grammar = """ def demo(N=23): from nltk.grammar import CFG - print("Generating the first %d sentences for demo grammar:" % (N,)) + print('Generating the first %d sentences for demo grammar:' % (N,)) print(demo_grammar) grammar = CFG.fromstring(demo_grammar) for n, sent in enumerate(generate(grammar, n=N), 1): - print("%3d. %s" % (n, " ".join(sent))) + print('%3d. %s' % (n, ' '.join(sent))) -if __name__ == "__main__": +if __name__ == '__main__': demo() diff --git a/nlp_resource_data/nltk/parse/malt.py b/nlp_resource_data/nltk/parse/malt.py index 523901e..2523927 100644 --- a/nlp_resource_data/nltk/parse/malt.py +++ b/nlp_resource_data/nltk/parse/malt.py @@ -4,16 +4,20 @@ # Author: Dan Garrette # Contributor: Liling Tan, Mustufain, osamamukhtar11 # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # URL: # For license information, see LICENSE.TXT +from __future__ import print_function, unicode_literals + import os import sys import tempfile import subprocess import inspect +from six import text_type + from nltk.data import ZipFilePathPointer from nltk.internals import find_dir, find_file, find_jars_within_path @@ -27,34 +31,34 @@ def malt_regex_tagger(): _tagger = RegexpTagger( [ - (r"\.$", "."), - (r"\,$", ","), - (r"\?$", "?"), # fullstop, comma, Qmark - (r"\($", "("), - (r"\)$", ")"), # round brackets - (r"\[$", "["), - (r"\]$", "]"), # square brackets - (r"^-?[0-9]+(.[0-9]+)?$", "CD"), # cardinal numbers - (r"(The|the|A|a|An|an)$", "DT"), # articles - (r"(He|he|She|she|It|it|I|me|Me|You|you)$", "PRP"), # pronouns - (r"(His|his|Her|her|Its|its)$", "PRP$"), # possesive - (r"(my|Your|your|Yours|yours)$", "PRP$"), # possesive - (r"(on|On|in|In|at|At|since|Since)$", "IN"), # time prepopsitions - (r"(for|For|ago|Ago|before|Before)$", "IN"), # time prepopsitions - (r"(till|Till|until|Until)$", "IN"), # time prepopsitions - (r"(by|By|beside|Beside)$", "IN"), # space prepopsitions - (r"(under|Under|below|Below)$", "IN"), # space prepopsitions - (r"(over|Over|above|Above)$", "IN"), # space prepopsitions - (r"(across|Across|through|Through)$", "IN"), # space prepopsitions - (r"(into|Into|towards|Towards)$", "IN"), # space prepopsitions - (r"(onto|Onto|from|From)$", "IN"), # space prepopsitions - (r".*able$", "JJ"), # adjectives - (r".*ness$", "NN"), # nouns formed from adjectives - (r".*ly$", "RB"), # adverbs - (r".*s$", "NNS"), # plural nouns - (r".*ing$", "VBG"), # gerunds - (r".*ed$", "VBD"), # past tense verbs - (r".*", "NN"), # nouns (default) + (r'\.$', '.'), + (r'\,$', ','), + (r'\?$', '?'), # fullstop, comma, Qmark + (r'\($', '('), + (r'\)$', ')'), # round brackets + (r'\[$', '['), + (r'\]$', ']'), # square brackets + (r'^-?[0-9]+(.[0-9]+)?$', 'CD'), # cardinal numbers + (r'(The|the|A|a|An|an)$', 'DT'), # articles + (r'(He|he|She|she|It|it|I|me|Me|You|you)$', 'PRP'), # pronouns + (r'(His|his|Her|her|Its|its)$', 'PRP$'), # possesive + (r'(my|Your|your|Yours|yours)$', 'PRP$'), # possesive + (r'(on|On|in|In|at|At|since|Since)$', 'IN'), # time prepopsitions + (r'(for|For|ago|Ago|before|Before)$', 'IN'), # time prepopsitions + (r'(till|Till|until|Until)$', 'IN'), # time prepopsitions + (r'(by|By|beside|Beside)$', 'IN'), # space prepopsitions + (r'(under|Under|below|Below)$', 'IN'), # space prepopsitions + (r'(over|Over|above|Above)$', 'IN'), # space prepopsitions + (r'(across|Across|through|Through)$', 'IN'), # space prepopsitions + (r'(into|Into|towards|Towards)$', 'IN'), # space prepopsitions + (r'(onto|Onto|from|From)$', 'IN'), # space prepopsitions + (r'.*able$', 'JJ'), # adjectives + (r'.*ness$', 'NN'), # nouns formed from adjectives + (r'.*ly$', 'RB'), # adverbs + (r'.*s$', 'NNS'), # plural nouns + (r'.*ing$', 'VBG'), # gerunds + (r'.*ed$', 'VBD'), # past tense verbs + (r'.*', 'NN'), # nouns (default) ] ) return _tagger.tag @@ -67,16 +71,16 @@ def find_maltparser(parser_dirname): if os.path.exists(parser_dirname): # If a full path is given. _malt_dir = parser_dirname else: # Try to find path to maltparser directory in environment variables. - _malt_dir = find_dir(parser_dirname, env_vars=("MALT_PARSER",)) + _malt_dir = find_dir(parser_dirname, env_vars=('MALT_PARSER',)) # Checks that that the found directory contains all the necessary .jar - malt_dependencies = ["", "", ""] + malt_dependencies = ['', '', ''] _malt_jars = set(find_jars_within_path(_malt_dir)) _jars = set(os.path.split(jar)[1] for jar in _malt_jars) - malt_dependencies = set(["log4j.jar", "libsvm.jar", "liblinear-1.8.jar"]) + malt_dependencies = set(['log4j.jar', 'libsvm.jar', 'liblinear-1.8.jar']) assert malt_dependencies.issubset(_jars) assert any( - filter(lambda i: i.startswith("maltparser-") and i.endswith(".jar"), _jars) + filter(lambda i: i.startswith('maltparser-') and i.endswith('.jar'), _jars) ) return list(_malt_jars) @@ -86,11 +90,11 @@ def find_malt_model(model_filename): A module to find pre-trained MaltParser model. """ if model_filename is None: - return "malt_temp.mco" + return 'malt_temp.mco' elif os.path.exists(model_filename): # If a full path is given. return model_filename else: # Try to find path to malt model in environment variables. - return find_file(model_filename, env_vars=("MALT_MODEL",), verbose=False) + return find_file(model_filename, env_vars=('MALT_MODEL',), verbose=False) class MaltParser(ParserI): @@ -149,13 +153,13 @@ class MaltParser(ParserI): ) # Initialize model. self.model = find_malt_model(model_filename) - self._trained = self.model != "malt_temp.mco" + self._trained = self.model != 'malt_temp.mco' # Set the working_dir parameters i.e. `-w` from MaltParser's option. self.working_dir = tempfile.gettempdir() # Initialize POS tagger. self.tagger = tagger if tagger is not None else malt_regex_tagger() - def parse_tagged_sents(self, sentences, verbose=False, top_relation_label="null"): + def parse_tagged_sents(self, sentences, verbose=False, top_relation_label='null'): """ Use MaltParser to parse multiple POS tagged sentences. Takes multiple sentences where each sentence is a list of (word, tag) tuples. @@ -170,17 +174,17 @@ class MaltParser(ParserI): raise Exception("Parser has not been trained. Call train() first.") with tempfile.NamedTemporaryFile( - prefix="malt_input.conll.", dir=self.working_dir, mode="w", delete=False + prefix='malt_input.conll.', dir=self.working_dir, mode='w', delete=False ) as input_file: with tempfile.NamedTemporaryFile( - prefix="malt_output.conll.", + prefix='malt_output.conll.', dir=self.working_dir, - mode="w", + mode='w', delete=False, ) as output_file: # Convert list of sentences to CONLL format. for line in taggedsents_to_conll(sentences): - input_file.write(str(line)) + input_file.write(text_type(line)) input_file.close() # Generate command to run maltparser. @@ -199,15 +203,15 @@ class MaltParser(ParserI): ret = self._execute(cmd, verbose) # Run command. os.chdir(_current_path) # Change back to current path. - if ret != 0: + if ret is not 0: raise Exception( "MaltParser parsing (%s) failed with exit " - "code %d" % (" ".join(cmd), ret) + "code %d" % (' '.join(cmd), ret) ) # Must return iter(iter(Tree)) with open(output_file.name) as infile: - for tree_str in infile.read().split("\n\n"): + for tree_str in infile.read().split('\n\n'): yield ( iter( [ @@ -221,7 +225,7 @@ class MaltParser(ParserI): os.remove(input_file.name) os.remove(output_file.name) - def parse_sents(self, sentences, verbose=False, top_relation_label="null"): + def parse_sents(self, sentences, verbose=False, top_relation_label='null'): """ Use MaltParser to parse multiple sentences. Takes a list of sentences, where each sentence is a list of words. @@ -247,26 +251,26 @@ class MaltParser(ParserI): :type outputfilename: str """ - cmd = ["java"] + cmd = ['java'] cmd += self.additional_java_args # Adds additional java arguments # Joins classpaths with ";" if on Windows and on Linux/Mac use ":" - classpaths_separator = ";" if sys.platform.startswith("win") else ":" + classpaths_separator = ';' if sys.platform.startswith('win') else ':' cmd += [ - "-cp", + '-cp', classpaths_separator.join(self.malt_jars), ] # Adds classpaths for jars - cmd += ["org.maltparser.Malt"] # Adds the main function. + cmd += ['org.maltparser.Malt'] # Adds the main function. # Adds the model file. if os.path.exists(self.model): # when parsing - cmd += ["-c", os.path.split(self.model)[-1]] + cmd += ['-c', os.path.split(self.model)[-1]] else: # when learning - cmd += ["-c", self.model] + cmd += ['-c', self.model] - cmd += ["-i", inputfilename] - if mode == "parse": - cmd += ["-o", outputfilename] - cmd += ["-m", mode] # mode use to generate parses. + cmd += ['-i', inputfilename] + if mode == 'parse': + cmd += ['-o', outputfilename] + cmd += ['-m', mode] # mode use to generate parses. return cmd @staticmethod @@ -285,10 +289,10 @@ class MaltParser(ParserI): # Write the conll_str to malt_train.conll file in /tmp/ with tempfile.NamedTemporaryFile( - prefix="malt_train.conll.", dir=self.working_dir, mode="w", delete=False + prefix='malt_train.conll.', dir=self.working_dir, mode='w', delete=False ) as input_file: - input_str = "\n".join(dg.to_conll(10) for dg in depgraphs) - input_file.write(str(input_str)) + input_str = '\n'.join(dg.to_conll(10) for dg in depgraphs) + input_file.write(text_type(input_str)) # Trains the model with the malt_train.conll self.train_from_file(input_file.name, verbose=verbose) # Removes the malt_train.conll once training finishes. @@ -305,11 +309,11 @@ class MaltParser(ParserI): # then we need to do some extra massaging if isinstance(conll_file, ZipFilePathPointer): with tempfile.NamedTemporaryFile( - prefix="malt_train.conll.", dir=self.working_dir, mode="w", delete=False + prefix='malt_train.conll.', dir=self.working_dir, mode='w', delete=False ) as input_file: with conll_file.open() as conll_input_file: conll_str = conll_input_file.read() - input_file.write(str(conll_str)) + input_file.write(text_type(conll_str)) return self.train_from_file(input_file.name, verbose=verbose) # Generate command to run maltparser. @@ -318,14 +322,14 @@ class MaltParser(ParserI): if ret != 0: raise Exception( "MaltParser training (%s) failed with exit " - "code %d" % (" ".join(cmd), ret) + "code %d" % (' '.join(cmd), ret) ) self._trained = True if __name__ == '__main__': - """ - A demonstration function to show how NLTK users can use the malt parser API. + ''' + A demostration function to show how NLTK users can use the malt parser API. >>> from nltk import pos_tag >>> assert 'MALT_PARSER' in os.environ, str( @@ -360,9 +364,9 @@ if __name__ == '__main__': >>> # Parse a single sentence. >>> parsed_sent1 = mp.parse_one(sent1) >>> parsed_sent2 = mp.parse_one(sent2) - >>> print(parsed_sent1.tree()) + >>> print (parsed_sent1.tree()) (sees John Mary .) - >>> print(parsed_sent2.tree()) + >>> print (parsed_sent2.tree()) (walks John (dog a) .) >>> >>> # Parsing multiple sentences. @@ -389,7 +393,7 @@ if __name__ == '__main__': (shot I (elephant an) (in (pajamas my)) .) >>> print(next(next(parsed_sents)).tree()) (flies Time (like banana) .) - """ - + ''' import doctest + doctest.testmod() diff --git a/nlp_resource_data/nltk/parse/nonprojectivedependencyparser.py b/nlp_resource_data/nltk/parse/nonprojectivedependencyparser.py index 2b901dd..9b8bddc 100644 --- a/nlp_resource_data/nltk/parse/nonprojectivedependencyparser.py +++ b/nlp_resource_data/nltk/parse/nonprojectivedependencyparser.py @@ -1,15 +1,18 @@ # Natural Language Toolkit: Dependency Grammars # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Jason Narad # # URL: # For license information, see LICENSE.TXT # +from __future__ import print_function import math import logging +from six.moves import range + from nltk.parse.dependencygraph import DependencyGraph logger = logging.getLogger(__name__) @@ -33,7 +36,7 @@ class DependencyScorerI(object): def __init__(self): if self.__class__ == DependencyScorerI: - raise TypeError("DependencyScorerI is an abstract interface") + raise TypeError('DependencyScorerI is an abstract interface') def train(self, graphs): """ @@ -115,17 +118,17 @@ class NaiveBayesDependencyScorer(DependencyScorerI): for graph in graphs: for head_node in graph.nodes.values(): for child_index, child_node in graph.nodes.items(): - if child_index in head_node["deps"]: + if child_index in head_node['deps']: label = "T" else: label = "F" labeled_examples.append( ( dict( - a=head_node["word"], - b=head_node["tag"], - c=child_node["word"], - d=child_node["tag"], + a=head_node['word'], + b=head_node['tag'], + c=child_node['word'], + d=child_node['tag'], ), label, ) @@ -152,10 +155,10 @@ class NaiveBayesDependencyScorer(DependencyScorerI): edges.append( ( dict( - a=head_node["word"], - b=head_node["tag"], - c=child_node["word"], - d=child_node["tag"], + a=head_node['word'], + b=head_node['tag'], + c=child_node['word'], + d=child_node['tag'], ) ) ) @@ -165,7 +168,7 @@ class NaiveBayesDependencyScorer(DependencyScorerI): row = [] count = 0 for pdist in self.classifier.prob_classify_many(edges): - logger.debug("%.4f %.4f", pdist.prob("T"), pdist.prob("F")) + logger.debug('%.4f %.4f', pdist.prob('T'), pdist.prob('F')) # smoothing in case the probability = 0 row.append([math.log(pdist.prob("T") + 0.00000000001)]) count += 1 @@ -182,7 +185,7 @@ class NaiveBayesDependencyScorer(DependencyScorerI): # A short class necessary to show parsing example from paper class DemoScorer(DependencyScorerI): def train(self, graphs): - print("Training...") + print('Training...') def score(self, graph): # scores for Keith Hall 'K-best Spanning Tree Parsing' paper @@ -254,7 +257,7 @@ class ProbabilisticNonprojectiveParser(object): """ Creates a new non-projective parser. """ - logging.debug("initializing prob. nonprojective...") + logging.debug('initializing prob. nonprojective...') def train(self, graphs, dependency_scorer): """ @@ -296,12 +299,12 @@ class ProbabilisticNonprojectiveParser(object): :type g_graph, b_graph, c_graph: DependencyGraph :param g_graph, b_graph, c_graph: Graphs which need to be updated. """ - logger.debug("Collapsing nodes...") + logger.debug('Collapsing nodes...') # Collapse all cycle nodes into v_n+1 in G_Graph for cycle_node_index in cycle_path: g_graph.remove_by_address(cycle_node_index) g_graph.add_node(new_node) - g_graph.redirect_arcs(cycle_path, new_node["address"]) + g_graph.redirect_arcs(cycle_path, new_node['address']) def update_edge_scores(self, new_node, cycle_path): """ @@ -313,12 +316,12 @@ class ProbabilisticNonprojectiveParser(object): :type cycle_path: A list of integers. :param cycle_path: A list of node addresses that belong to the cycle. """ - logger.debug("cycle %s", cycle_path) + logger.debug('cycle %s', cycle_path) cycle_path = self.compute_original_indexes(cycle_path) - logger.debug("old cycle %s", cycle_path) - logger.debug("Prior to update: %s", self.scores) + logger.debug('old cycle %s', cycle_path) + logger.debug('Prior to update: %s', self.scores) for i, row in enumerate(self.scores): for j, column in enumerate(self.scores[i]): @@ -326,7 +329,7 @@ class ProbabilisticNonprojectiveParser(object): if j in cycle_path and i not in cycle_path and self.scores[i][j]: subtract_val = self.compute_max_subtract_score(j, cycle_path) - logger.debug("%s - %s", self.scores[i][j], subtract_val) + logger.debug('%s - %s', self.scores[i][j], subtract_val) new_vals = [] for cur_val in self.scores[i][j]: @@ -339,7 +342,7 @@ class ProbabilisticNonprojectiveParser(object): if i in cycle_path and j in cycle_path: self.scores[i][j] = [] - logger.debug("After update: %s", self.scores) + logger.debug('After update: %s', self.scores) def compute_original_indexes(self, new_indexes): """ @@ -398,18 +401,19 @@ class ProbabilisticNonprojectiveParser(object): the node that is arced to. """ originals = self.compute_original_indexes([node_index]) - logger.debug("originals: %s", originals) + logger.debug('originals: %s', originals) max_arc = None max_score = None for row_index in range(len(self.scores)): for col_index in range(len(self.scores[row_index])): + # print self.scores[row_index][col_index] if col_index in originals and ( max_score is None or self.scores[row_index][col_index] > max_score ): max_score = self.scores[row_index][col_index] max_arc = row_index - logger.debug("%s, %s", row_index, col_index) + logger.debug('%s, %s', row_index, col_index) logger.debug(max_score) @@ -456,15 +460,16 @@ class ProbabilisticNonprojectiveParser(object): g_graph = DependencyGraph() for index, token in enumerate(tokens): g_graph.nodes[index + 1].update( - {"word": token, "tag": tags[index], "rel": "NTOP", "address": index + 1} + {'word': token, 'tag': tags[index], 'rel': 'NTOP', 'address': index + 1} ) + # print (g_graph.nodes) # Fully connect non-root nodes in g_graph g_graph.connect_graph() original_graph = DependencyGraph() for index, token in enumerate(tokens): original_graph.nodes[index + 1].update( - {"word": token, "tag": tags[index], "rel": "NTOP", "address": index + 1} + {'word': token, 'tag': tags[index], 'rel': 'NTOP', 'address': index + 1} ) b_graph = DependencyGraph() @@ -472,32 +477,32 @@ class ProbabilisticNonprojectiveParser(object): for index, token in enumerate(tokens): c_graph.nodes[index + 1].update( - {"word": token, "tag": tags[index], "rel": "NTOP", "address": index + 1} + {'word': token, 'tag': tags[index], 'rel': 'NTOP', 'address': index + 1} ) # Assign initial scores to g_graph edges self.initialize_edge_scores(g_graph) logger.debug(self.scores) # Initialize a list of unvisited vertices (by node address) - unvisited_vertices = [vertex["address"] for vertex in c_graph.nodes.values()] + unvisited_vertices = [vertex['address'] for vertex in c_graph.nodes.values()] # Iterate over unvisited vertices nr_vertices = len(tokens) betas = {} while unvisited_vertices: # Mark current node as visited current_vertex = unvisited_vertices.pop(0) - logger.debug("current_vertex: %s", current_vertex) + logger.debug('current_vertex: %s', current_vertex) # Get corresponding node n_i to vertex v_i current_node = g_graph.get_by_address(current_vertex) - logger.debug("current_node: %s", current_node) + logger.debug('current_node: %s', current_node) # Get best in-edge node b for current node best_in_edge = self.best_incoming_arc(current_vertex) betas[current_vertex] = self.original_best_arc(current_vertex) - logger.debug("best in arc: %s --> %s", best_in_edge, current_vertex) + logger.debug('best in arc: %s --> %s', best_in_edge, current_vertex) # b_graph = Union(b_graph, b) for new_vertex in [current_vertex, best_in_edge]: b_graph.nodes[new_vertex].update( - {"word": "TEMP", "rel": "NTOP", "address": new_vertex} + {'word': 'TEMP', 'rel': 'NTOP', 'address': new_vertex} ) b_graph.add_arc(best_in_edge, current_vertex) # Beta(current node) = b - stored for parse recovery @@ -505,17 +510,17 @@ class ProbabilisticNonprojectiveParser(object): cycle_path = b_graph.contains_cycle() if cycle_path: # Create a new node v_n+1 with address = len(nodes) + 1 - new_node = {"word": "NONE", "rel": "NTOP", "address": nr_vertices + 1} + new_node = {'word': 'NONE', 'rel': 'NTOP', 'address': nr_vertices + 1} # c_graph = Union(c_graph, v_n+1) c_graph.add_node(new_node) # Collapse all nodes in cycle C into v_n+1 self.update_edge_scores(new_node, cycle_path) self.collapse_nodes(new_node, cycle_path, g_graph, b_graph, c_graph) for cycle_index in cycle_path: - c_graph.add_arc(new_node["address"], cycle_index) + c_graph.add_arc(new_node['address'], cycle_index) # self.replaced_by[cycle_index] = new_node['address'] - self.inner_nodes[new_node["address"]] = cycle_path + self.inner_nodes[new_node['address']] = cycle_path # Add v_n+1 to list of unvisited vertices unvisited_vertices.insert(0, nr_vertices + 1) @@ -527,30 +532,30 @@ class ProbabilisticNonprojectiveParser(object): for cycle_node_address in cycle_path: b_graph.remove_by_address(cycle_node_address) - logger.debug("g_graph: %s", g_graph) - logger.debug("b_graph: %s", b_graph) - logger.debug("c_graph: %s", c_graph) - logger.debug("Betas: %s", betas) - logger.debug("replaced nodes %s", self.inner_nodes) + logger.debug('g_graph: %s', g_graph) + logger.debug('b_graph: %s', b_graph) + logger.debug('c_graph: %s', c_graph) + logger.debug('Betas: %s', betas) + logger.debug('replaced nodes %s', self.inner_nodes) # Recover parse tree - logger.debug("Final scores: %s", self.scores) + logger.debug('Final scores: %s', self.scores) - logger.debug("Recovering parse...") + logger.debug('Recovering parse...') for i in range(len(tokens) + 1, nr_vertices + 1): betas[betas[i][1]] = betas[i] - logger.debug("Betas: %s", betas) + logger.debug('Betas: %s', betas) for node in original_graph.nodes.values(): # TODO: It's dangerous to assume that deps it a dictionary # because it's a default dictionary. Ideally, here we should not # be concerned how dependencies are stored inside of a dependency # graph. - node["deps"] = {} + node['deps'] = {} for i in range(1, len(tokens) + 1): original_graph.add_arc(betas[i][0], betas[i][1]) - logger.debug("Done.") + logger.debug('Done.') yield original_graph @@ -598,21 +603,21 @@ class NonprojectiveDependencyParser(object): for index, token in enumerate(tokens): self._graph.nodes[index] = { - "word": token, - "deps": [], - "rel": "NTOP", - "address": index, + 'word': token, + 'deps': [], + 'rel': 'NTOP', + 'address': index, } for head_node in self._graph.nodes.values(): deps = [] for dep_node in self._graph.nodes.values(): if ( - self._grammar.contains(head_node["word"], dep_node["word"]) - and head_node["word"] != dep_node["word"] + self._grammar.contains(head_node['word'], dep_node['word']) + and head_node['word'] != dep_node['word'] ): - deps.append(dep_node["address"]) - head_node["deps"] = deps + deps.append(dep_node['address']) + head_node['deps'] = deps # Create lattice of possible heads roots = [] @@ -692,13 +697,13 @@ class NonprojectiveDependencyParser(object): head_address = head_index + 1 node = graph.nodes[address] - node.update({"word": token, "address": address}) + node.update({'word': token, 'address': address}) if head_address == 0: - rel = "ROOT" + rel = 'ROOT' else: - rel = "" - graph.nodes[head_index + 1]["deps"][rel].append(address) + rel = '' + graph.nodes[head_index + 1]['deps'][rel].append(address) # TODO: check for cycles yield graph @@ -718,18 +723,18 @@ def demo(): def hall_demo(): npp = ProbabilisticNonprojectiveParser() npp.train([], DemoScorer()) - for parse_graph in npp.parse(["v1", "v2", "v3"], [None, None, None]): + for parse_graph in npp.parse(['v1', 'v2', 'v3'], [None, None, None]): print(parse_graph) def nonprojective_conll_parse_demo(): from nltk.parse.dependencygraph import conll_data2 - graphs = [DependencyGraph(entry) for entry in conll_data2.split("\n\n") if entry] + graphs = [DependencyGraph(entry) for entry in conll_data2.split('\n\n') if entry] npp = ProbabilisticNonprojectiveParser() npp.train(graphs, NaiveBayesDependencyScorer()) for parse_graph in npp.parse( - ["Cathy", "zag", "hen", "zwaaien", "."], ["N", "V", "Pron", "Adj", "N", "Punc"] + ['Cathy', 'zag', 'hen', 'zwaaien', '.'], ['N', 'V', 'Pron', 'Adj', 'N', 'Punc'] ): print(parse_graph) @@ -751,23 +756,23 @@ def rule_based_demo(): ndp = NonprojectiveDependencyParser(grammar) graphs = ndp.parse( [ - "the", - "man", - "in", - "the", - "corner", - "taught", - "his", - "dachshund", - "to", - "play", - "golf", + 'the', + 'man', + 'in', + 'the', + 'corner', + 'taught', + 'his', + 'dachshund', + 'to', + 'play', + 'golf', ] ) - print("Graphs:") + print('Graphs:') for graph in graphs: print(graph) -if __name__ == "__main__": +if __name__ == '__main__': demo() diff --git a/nlp_resource_data/nltk/parse/pchart.py b/nlp_resource_data/nltk/parse/pchart.py index 2b14eab..924d9a6 100644 --- a/nlp_resource_data/nltk/parse/pchart.py +++ b/nlp_resource_data/nltk/parse/pchart.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Probabilistic Chart Parsers # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Edward Loper # Steven Bird # URL: @@ -29,6 +29,7 @@ The ``BottomUpProbabilisticChartParser`` constructor has an optional argument beam_size. If non-zero, this controls the size of the beam (aka the edge queue). This option is most useful with InsideChartParser. """ +from __future__ import print_function, unicode_literals ##////////////////////////////////////////////////////// ## Bottom-Up PCFG Chart Parser @@ -44,6 +45,7 @@ from nltk.grammar import Nonterminal, PCFG from nltk.parse.api import ParserI from nltk.parse.chart import Chart, LeafEdge, TreeEdge, AbstractChartRule +from nltk.compat import python_2_unicode_compatible # Probabilistic edges class ProbabilisticLeafEdge(LeafEdge): @@ -128,6 +130,7 @@ class ProbabilisticFundamentalRule(AbstractChartRule): yield new_edge +@python_2_unicode_compatible class SingleEdgeProbabilisticFundamentalRule(AbstractChartRule): NUM_EDGES = 1 @@ -151,7 +154,7 @@ class SingleEdgeProbabilisticFundamentalRule(AbstractChartRule): yield new_edge def __str__(self): - return "Fundamental Rule" + return 'Fundamental Rule' class BottomUpProbabilisticChartParser(ParserI): @@ -236,7 +239,7 @@ class BottomUpProbabilisticChartParser(ParserI): for edge in bu_init.apply(chart, grammar): if self._trace > 1: print( - " %-50s [%s]" + ' %-50s [%s]' % (chart.pretty_format_edge(edge, width=2), edge.prob()) ) queue.append(edge) @@ -253,7 +256,7 @@ class BottomUpProbabilisticChartParser(ParserI): edge = queue.pop() if self._trace > 0: print( - " %-50s [%s]" + ' %-50s [%s]' % (chart.pretty_format_edge(edge, width=2), edge.prob()) ) @@ -322,7 +325,7 @@ class BottomUpProbabilisticChartParser(ParserI): split = len(queue) - self.beam_size if self._trace > 2: for edge in queue[:split]: - print(" %-50s [DISCARDED]" % chart.pretty_format_edge(edge, 2)) + print(' %-50s [DISCARDED]' % chart.pretty_format_edge(edge, 2)) del queue[:split] @@ -380,7 +383,7 @@ class InsideChartParser(BottomUpProbabilisticChartParser): # bestp.get(elt,0)) # # self._bestp = bestp -# for (k,v) in self._bestp.items(): print(k,v) +# for (k,v) in self._bestp.items(): print k,v # # def _sortkey(self, edge): # return edge.structure()[PROB] * self._bestp[edge.lhs()] @@ -482,23 +485,23 @@ def demo(choice=None, draw_parses=None, print_parses=None): ) demos = [ - ("I saw John with my telescope", toy_pcfg1), - ("the boy saw Jack with Bob under the table with a telescope", toy_pcfg2), + ('I saw John with my telescope', toy_pcfg1), + ('the boy saw Jack with Bob under the table with a telescope', toy_pcfg2), ] if choice is None: # Ask the user which demo they want to use. print() for i in range(len(demos)): - print("%3s: %s" % (i + 1, demos[i][0])) - print(" %r" % demos[i][1]) + print('%3s: %s' % (i + 1, demos[i][0])) + print(' %r' % demos[i][1]) print() - print("Which demo (%d-%d)? " % (1, len(demos)), end=" ") + print('Which demo (%d-%d)? ' % (1, len(demos)), end=' ') choice = int(sys.stdin.readline().strip()) - 1 try: sent, grammar = demos[choice] except: - print("Bad sentence number") + print('Bad sentence number') return # Tokenize the sentence. @@ -519,7 +522,7 @@ def demo(choice=None, draw_parses=None, print_parses=None): num_parses = [] all_parses = {} for parser in parsers: - print("\ns: %s\nparser: %s\ngrammar: %s" % (sent, parser, grammar)) + print('\ns: %s\nparser: %s\ngrammar: %s' % (sent, parser, grammar)) parser.trace(3) t = time.time() parses = list(parser.parse(tokens)) @@ -532,11 +535,11 @@ def demo(choice=None, draw_parses=None, print_parses=None): # Print some summary statistics print() - print(" Parser Beam | Time (secs) # Parses Average P(parse)") - print("------------------------+------------------------------------------") + print(' Parser Beam | Time (secs) # Parses Average P(parse)') + print('------------------------+------------------------------------------') for i in range(len(parsers)): print( - "%18s %4d |%11.4f%11d%19.14f" + '%18s %4d |%11.4f%11d%19.14f' % ( parsers[i].__class__.__name__, parsers[i].beam_size, @@ -550,29 +553,29 @@ def demo(choice=None, draw_parses=None, print_parses=None): p = reduce(lambda a, b: a + b.prob(), parses, 0) / len(parses) else: p = 0 - print("------------------------+------------------------------------------") - print("%18s |%11s%11d%19.14f" % ("(All Parses)", "n/a", len(parses), p)) + print('------------------------+------------------------------------------') + print('%18s |%11s%11d%19.14f' % ('(All Parses)', 'n/a', len(parses), p)) if draw_parses is None: # Ask the user if we should draw the parses. print() - print("Draw parses (y/n)? ", end=" ") - draw_parses = sys.stdin.readline().strip().lower().startswith("y") + print('Draw parses (y/n)? ', end=' ') + draw_parses = sys.stdin.readline().strip().lower().startswith('y') if draw_parses: from nltk.draw.tree import draw_trees - print(" please wait...") + print(' please wait...') draw_trees(*parses) if print_parses is None: # Ask the user if we should print the parses. print() - print("Print parses (y/n)? ", end=" ") - print_parses = sys.stdin.readline().strip().lower().startswith("y") + print('Print parses (y/n)? ', end=' ') + print_parses = sys.stdin.readline().strip().lower().startswith('y') if print_parses: for parse in parses: print(parse) -if __name__ == "__main__": +if __name__ == '__main__': demo() diff --git a/nlp_resource_data/nltk/parse/projectivedependencyparser.py b/nlp_resource_data/nltk/parse/projectivedependencyparser.py index b4d56cf..d29ee8c 100644 --- a/nlp_resource_data/nltk/parse/projectivedependencyparser.py +++ b/nlp_resource_data/nltk/parse/projectivedependencyparser.py @@ -1,11 +1,12 @@ # Natural Language Toolkit: Dependency Grammars # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Jason Narad # # URL: # For license information, see LICENSE.TXT # +from __future__ import print_function, unicode_literals from collections import defaultdict from itertools import chain @@ -18,7 +19,7 @@ from nltk.grammar import ( ) from nltk.parse.dependencygraph import DependencyGraph from nltk.internals import raise_unorderable_types - +from nltk.compat import python_2_unicode_compatible ################################################################# # Dependency Span @@ -26,6 +27,7 @@ from nltk.internals import raise_unorderable_types @total_ordering +@python_2_unicode_compatible class DependencySpan(object): """ A contiguous span over some part of the input string representing @@ -59,7 +61,7 @@ class DependencySpan(object): :return: A concise string representatino of the ``DependencySpan``. :rtype: str. """ - return "Span %d-%d; Head Index: %d" % ( + return 'Span %d-%d; Head Index: %d' % ( self._start_index, self._end_index, self._head_index, @@ -70,13 +72,13 @@ class DependencySpan(object): :return: A verbose string representation of the ``DependencySpan``. :rtype: str """ - str = "Span %d-%d; Head Index: %d" % ( + str = 'Span %d-%d; Head Index: %d' % ( self._start_index, self._end_index, self._head_index, ) for i in range(len(self._arcs)): - str += "\n%d <- %d, %s" % (i, self._arcs[i], self._tags[i]) + str += '\n%d <- %d, %s' % (i, self._arcs[i], self._tags[i]) return str def __eq__(self, other): @@ -104,6 +106,7 @@ class DependencySpan(object): ################################################################# +@python_2_unicode_compatible class ChartCell(object): """ A cell from the parse chart formed when performing the CYK algorithm. @@ -137,14 +140,14 @@ class ChartCell(object): :return: A verbose string representation of this ``ChartCell``. :rtype: str. """ - return "CC[%d,%d]: %s" % (self._x, self._y, self._entries) + return 'CC[%d,%d]: %s' % (self._x, self._y, self._entries) def __repr__(self): """ :return: A concise string representation of this ``ChartCell``. :rtype: str. """ - return "%s" % self + return '%s' % self ################################################################# @@ -192,7 +195,7 @@ class ProjectiveDependencyParser(object): for j in range(0, len(self._tokens) + 1): chart[i].append(ChartCell(i, j)) if i == j + 1: - chart[i][j].add(DependencySpan(i - 1, i, i - 1, [-1], ["null"])) + chart[i][j].add(DependencySpan(i - 1, i, i - 1, [-1], ['null'])) for i in range(1, len(self._tokens) + 1): for j in range(i - 2, -1, -1): @@ -209,17 +212,17 @@ class ProjectiveDependencyParser(object): # malt_format += '%s\t%s\t%d\t%s\n' % (tokens[i], 'null', parse._arcs[i] + 1, 'null') # conll_format += '\t%d\t%s\t%s\t%s\t%s\t%s\t%d\t%s\t%s\t%s\n' % (i+1, tokens[i], tokens[i], 'null', 'null', 'null', parse._arcs[i] + 1, 'null', '-', '-') # Modify to comply with the new Dependency Graph requirement (at least must have an root elements) - conll_format += "\t%d\t%s\t%s\t%s\t%s\t%s\t%d\t%s\t%s\t%s\n" % ( + conll_format += '\t%d\t%s\t%s\t%s\t%s\t%s\t%d\t%s\t%s\t%s\n' % ( i + 1, tokens[i], tokens[i], - "null", - "null", - "null", + 'null', + 'null', + 'null', parse._arcs[i] + 1, - "ROOT", - "-", - "-", + 'ROOT', + '-', + '-', ) dg = DependencyGraph(conll_format) # if self.meets_arity(dg): @@ -239,7 +242,7 @@ class ProjectiveDependencyParser(object): """ spans = [] if span1._start_index == span2._start_index: - print("Error: Mismatched spans - replace this with thrown error") + print('Error: Mismatched spans - replace this with thrown error') if span1._start_index > span2._start_index: temp_span = span1 span1 = span2 @@ -250,7 +253,7 @@ class ProjectiveDependencyParser(object): if self._grammar.contains( self._tokens[span1._head_index], self._tokens[span2._head_index] ): - # print('Performing rightward cover %d to %d' % (span1._head_index, span2._head_index)) + # print 'Performing rightward cover %d to %d' % (span1._head_index, span2._head_index) new_arcs[span2._head_index - span1._start_index] = span1._head_index spans.append( DependencySpan( @@ -266,7 +269,7 @@ class ProjectiveDependencyParser(object): if self._grammar.contains( self._tokens[span2._head_index], self._tokens[span1._head_index] ): - # print('performing leftward cover %d to %d' % (span2._head_index, span1._head_index)) + # print 'performing leftward cover %d to %d' % (span2._head_index, span1._head_index) new_arcs[span1._head_index - span1._start_index] = span2._head_index spans.append( DependencySpan( @@ -340,7 +343,7 @@ class ProbabilisticProjectiveDependencyParser(object): ) else: print( - "No tag found for input token '%s', parse is impossible." + 'No tag found for input token \'%s\', parse is impossible.' % tokens[i - 1] ) return [] @@ -358,25 +361,25 @@ class ProbabilisticProjectiveDependencyParser(object): conll_format = "" malt_format = "" for i in range(len(tokens)): - malt_format += "%s\t%s\t%d\t%s\n" % ( + malt_format += '%s\t%s\t%d\t%s\n' % ( tokens[i], - "null", + 'null', parse._arcs[i] + 1, - "null", + 'null', ) # conll_format += '\t%d\t%s\t%s\t%s\t%s\t%s\t%d\t%s\t%s\t%s\n' % (i+1, tokens[i], tokens[i], parse._tags[i], parse._tags[i], 'null', parse._arcs[i] + 1, 'null', '-', '-') # Modify to comply with recent change in dependency graph such that there must be a ROOT element. - conll_format += "\t%d\t%s\t%s\t%s\t%s\t%s\t%d\t%s\t%s\t%s\n" % ( + conll_format += '\t%d\t%s\t%s\t%s\t%s\t%s\t%d\t%s\t%s\t%s\n' % ( i + 1, tokens[i], tokens[i], parse._tags[i], parse._tags[i], - "null", + 'null', parse._arcs[i] + 1, - "ROOT", - "-", - "-", + 'ROOT', + '-', + '-', ) dg = DependencyGraph(conll_format) score = self.compute_prob(dg) @@ -398,7 +401,7 @@ class ProbabilisticProjectiveDependencyParser(object): """ spans = [] if span1._start_index == span2._start_index: - print("Error: Mismatched spans - replace this with thrown error") + print('Error: Mismatched spans - replace this with thrown error') if span1._start_index > span2._start_index: temp_span = span1 span1 = span2 @@ -453,7 +456,7 @@ class ProbabilisticProjectiveDependencyParser(object): for dg in graphs: for node_index in range(1, len(dg.nodes)): # children = dg.nodes[node_index]['deps'] - children = list(chain(*dg.nodes[node_index]["deps"].values())) + children = list(chain(*dg.nodes[node_index]['deps'].values())) nr_left_children = dg.left_children(node_index) nr_right_children = dg.right_children(node_index) @@ -461,34 +464,34 @@ class ProbabilisticProjectiveDependencyParser(object): for child_index in range( 0 - (nr_left_children + 1), nr_right_children + 2 ): - head_word = dg.nodes[node_index]["word"] - head_tag = dg.nodes[node_index]["tag"] + head_word = dg.nodes[node_index]['word'] + head_tag = dg.nodes[node_index]['tag'] if head_word in tags: tags[head_word].add(head_tag) else: tags[head_word] = set([head_tag]) - child = "STOP" - child_tag = "STOP" - prev_word = "START" - prev_tag = "START" + child = 'STOP' + child_tag = 'STOP' + prev_word = 'START' + prev_tag = 'START' if child_index < 0: array_index = child_index + nr_left_children if array_index >= 0: - child = dg.nodes[children[array_index]]["word"] - child_tag = dg.nodes[children[array_index]]["tag"] + child = dg.nodes[children[array_index]]['word'] + child_tag = dg.nodes[children[array_index]]['tag'] if child_index != -1: - prev_word = dg.nodes[children[array_index + 1]]["word"] - prev_tag = dg.nodes[children[array_index + 1]]["tag"] - if child != "STOP": + prev_word = dg.nodes[children[array_index + 1]]['word'] + prev_tag = dg.nodes[children[array_index + 1]]['tag'] + if child != 'STOP': productions.append(DependencyProduction(head_word, [child])) - head_event = "(head (%s %s) (mods (%s, %s, %s) left))" % ( + head_event = '(head (%s %s) (mods (%s, %s, %s) left))' % ( child, child_tag, prev_tag, head_word, head_tag, ) - mod_event = "(mods (%s, %s, %s) left))" % ( + mod_event = '(mods (%s, %s, %s) left))' % ( prev_tag, head_word, head_tag, @@ -498,21 +501,21 @@ class ProbabilisticProjectiveDependencyParser(object): elif child_index > 0: array_index = child_index + nr_left_children - 1 if array_index < nr_children: - child = dg.nodes[children[array_index]]["word"] - child_tag = dg.nodes[children[array_index]]["tag"] + child = dg.nodes[children[array_index]]['word'] + child_tag = dg.nodes[children[array_index]]['tag'] if child_index != 1: - prev_word = dg.nodes[children[array_index - 1]]["word"] - prev_tag = dg.nodes[children[array_index - 1]]["tag"] - if child != "STOP": + prev_word = dg.nodes[children[array_index - 1]]['word'] + prev_tag = dg.nodes[children[array_index - 1]]['tag'] + if child != 'STOP': productions.append(DependencyProduction(head_word, [child])) - head_event = "(head (%s %s) (mods (%s, %s, %s) right))" % ( + head_event = '(head (%s %s) (mods (%s, %s, %s) right))' % ( child, child_tag, prev_tag, head_word, head_tag, ) - mod_event = "(mods (%s, %s, %s) right))" % ( + mod_event = '(mods (%s, %s, %s) right))' % ( prev_tag, head_word, head_tag, @@ -535,34 +538,34 @@ class ProbabilisticProjectiveDependencyParser(object): prob = 1.0 for node_index in range(1, len(dg.nodes)): # children = dg.nodes[node_index]['deps'] - children = list(chain(*dg.nodes[node_index]["deps"].values())) + children = list(chain(*dg.nodes[node_index]['deps'].values())) nr_left_children = dg.left_children(node_index) nr_right_children = dg.right_children(node_index) nr_children = nr_left_children + nr_right_children for child_index in range(0 - (nr_left_children + 1), nr_right_children + 2): - head_word = dg.nodes[node_index]["word"] - head_tag = dg.nodes[node_index]["tag"] - child = "STOP" - child_tag = "STOP" - prev_word = "START" - prev_tag = "START" + head_word = dg.nodes[node_index]['word'] + head_tag = dg.nodes[node_index]['tag'] + child = 'STOP' + child_tag = 'STOP' + prev_word = 'START' + prev_tag = 'START' if child_index < 0: array_index = child_index + nr_left_children if array_index >= 0: - child = dg.nodes[children[array_index]]["word"] - child_tag = dg.nodes[children[array_index]]["tag"] + child = dg.nodes[children[array_index]]['word'] + child_tag = dg.nodes[children[array_index]]['tag'] if child_index != -1: - prev_word = dg.nodes[children[array_index + 1]]["word"] - prev_tag = dg.nodes[children[array_index + 1]]["tag"] - head_event = "(head (%s %s) (mods (%s, %s, %s) left))" % ( + prev_word = dg.nodes[children[array_index + 1]]['word'] + prev_tag = dg.nodes[children[array_index + 1]]['tag'] + head_event = '(head (%s %s) (mods (%s, %s, %s) left))' % ( child, child_tag, prev_tag, head_word, head_tag, ) - mod_event = "(mods (%s, %s, %s) left))" % ( + mod_event = '(mods (%s, %s, %s) left))' % ( prev_tag, head_word, head_tag, @@ -579,19 +582,19 @@ class ProbabilisticProjectiveDependencyParser(object): elif child_index > 0: array_index = child_index + nr_left_children - 1 if array_index < nr_children: - child = dg.nodes[children[array_index]]["word"] - child_tag = dg.nodes[children[array_index]]["tag"] + child = dg.nodes[children[array_index]]['word'] + child_tag = dg.nodes[children[array_index]]['tag'] if child_index != 1: - prev_word = dg.nodes[children[array_index - 1]]["word"] - prev_tag = dg.nodes[children[array_index - 1]]["tag"] - head_event = "(head (%s %s) (mods (%s, %s, %s) right))" % ( + prev_word = dg.nodes[children[array_index - 1]]['word'] + prev_tag = dg.nodes[children[array_index - 1]]['tag'] + head_event = '(head (%s %s) (mods (%s, %s, %s) right))' % ( child, child_tag, prev_tag, head_word, head_tag, ) - mod_event = "(mods (%s, %s, %s) right))" % ( + mod_event = '(mods (%s, %s, %s) right))' % ( prev_tag, head_word, head_tag, @@ -633,7 +636,7 @@ def projective_rule_parse_demo(): ) print(grammar) pdp = ProjectiveDependencyParser(grammar) - trees = pdp.parse(["the", "cats", "scratch", "the", "walls"]) + trees = pdp.parse(['the', 'cats', 'scratch', 'the', 'walls']) for tree in trees: print(tree) @@ -646,9 +649,9 @@ def arity_parse_demo(): created by a ``ProjectiveDependencyParser``. """ print() - print("A grammar with no arity constraints. Each DependencyProduction") - print("specifies a relationship between one head word and only one") - print("modifier word.") + print('A grammar with no arity constraints. Each DependencyProduction') + print('specifies a relationship between one head word and only one') + print('modifier word.') grammar = DependencyGrammar.fromstring( """ 'fell' -> 'price' | 'stock' @@ -660,18 +663,18 @@ def arity_parse_demo(): print(grammar) print() - print("For the sentence 'The price of the stock fell', this grammar") - print("will produce the following three parses:") + print('For the sentence \'The price of the stock fell\', this grammar') + print('will produce the following three parses:') pdp = ProjectiveDependencyParser(grammar) - trees = pdp.parse(["the", "price", "of", "the", "stock", "fell"]) + trees = pdp.parse(['the', 'price', 'of', 'the', 'stock', 'fell']) for tree in trees: print(tree) print() - print("By contrast, the following grammar contains a ") - print("DependencyProduction that specifies a relationship") - print("between a single head word, 'price', and two modifier") - print("words, 'of' and 'the'.") + print('By contrast, the following grammar contains a ') + print('DependencyProduction that specifies a relationship') + print('between a single head word, \'price\', and two modifier') + print('words, \'of\' and \'the\'.') grammar = DependencyGrammar.fromstring( """ 'fell' -> 'price' | 'stock' @@ -684,10 +687,10 @@ def arity_parse_demo(): print() print( - "This constrains the number of possible parses to just one:" + 'This constrains the number of possible parses to just one:' ) # unimplemented, soon to replace pdp = ProjectiveDependencyParser(grammar) - trees = pdp.parse(["the", "price", "of", "the", "stock", "fell"]) + trees = pdp.parse(['the', 'price', 'of', 'the', 'stock', 'fell']) for tree in trees: print(tree) @@ -699,17 +702,17 @@ def projective_prob_parse_demo(): """ from nltk.parse.dependencygraph import conll_data2 - graphs = [DependencyGraph(entry) for entry in conll_data2.split("\n\n") if entry] + graphs = [DependencyGraph(entry) for entry in conll_data2.split('\n\n') if entry] ppdp = ProbabilisticProjectiveDependencyParser() - print("Training Probabilistic Projective Dependency Parser...") + print('Training Probabilistic Projective Dependency Parser...') ppdp.train(graphs) - sent = ["Cathy", "zag", "hen", "wild", "zwaaien", "."] - print("Parsing '", " ".join(sent), "'...") - print("Parse:") + sent = ['Cathy', 'zag', 'hen', 'wild', 'zwaaien', '.'] + print('Parsing \'', " ".join(sent), '\'...') + print('Parse:') for tree in ppdp.parse(sent): print(tree) -if __name__ == "__main__": +if __name__ == '__main__': demo() diff --git a/nlp_resource_data/nltk/parse/recursivedescent.py b/nlp_resource_data/nltk/parse/recursivedescent.py index 8496d4c..a9ab322 100644 --- a/nlp_resource_data/nltk/parse/recursivedescent.py +++ b/nlp_resource_data/nltk/parse/recursivedescent.py @@ -1,13 +1,15 @@ # Natural Language Toolkit: Recursive Descent Parser # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Edward Loper # Steven Bird # URL: # For license information, see LICENSE.TXT +from __future__ import print_function, unicode_literals from nltk.grammar import Nonterminal from nltk.tree import Tree, ImmutableTree +from nltk.compat import unicode_repr from nltk.parse.api import ParserI @@ -278,17 +280,17 @@ class RecursiveDescentParser(ParserI): """ if treeloc == (): - print("*", end=" ") + print("*", end=' ') if isinstance(tree, Tree): if len(tree) == 0: - print(repr(Nonterminal(tree.label())), end=" ") + print(unicode_repr(Nonterminal(tree.label())), end=' ') for i in range(len(tree)): if treeloc is not None and i == treeloc[0]: self._trace_fringe(tree[i], treeloc[1:]) else: self._trace_fringe(tree[i]) else: - print(repr(tree), end=" ") + print(unicode_repr(tree), end=' ') def _trace_tree(self, tree, frontier, operation): """ @@ -299,48 +301,48 @@ class RecursiveDescentParser(ParserI): :rtype: None """ if self._trace == 2: - print(" %c [" % operation, end=" ") + print(' %c [' % operation, end=' ') else: - print(" [", end=" ") + print(' [', end=' ') if len(frontier) > 0: self._trace_fringe(tree, frontier[0]) else: self._trace_fringe(tree) - print("]") + print(']') def _trace_start(self, tree, frontier, text): - print("Parsing %r" % " ".join(text)) + print('Parsing %r' % " ".join(text)) if self._trace > 2: - print("Start:") + print('Start:') if self._trace > 1: - self._trace_tree(tree, frontier, " ") + self._trace_tree(tree, frontier, ' ') def _trace_expand(self, tree, frontier, production): if self._trace > 2: - print("Expand: %s" % production) + print('Expand: %s' % production) if self._trace > 1: - self._trace_tree(tree, frontier, "E") + self._trace_tree(tree, frontier, 'E') def _trace_match(self, tree, frontier, tok): if self._trace > 2: - print("Match: %r" % tok) + print('Match: %r' % tok) if self._trace > 1: - self._trace_tree(tree, frontier, "M") + self._trace_tree(tree, frontier, 'M') def _trace_succeed(self, tree, frontier): if self._trace > 2: - print("GOOD PARSE:") + print('GOOD PARSE:') if self._trace == 1: - print("Found a parse:\n%s" % tree) + print('Found a parse:\n%s' % tree) if self._trace > 1: - self._trace_tree(tree, frontier, "+") + self._trace_tree(tree, frontier, '+') def _trace_backtrack(self, tree, frontier, toks=None): if self._trace > 2: if toks: - print("Backtrack: %r match failed" % toks[0]) + print('Backtrack: %r match failed' % toks[0]) else: - print("Backtrack") + print('Backtrack') ##////////////////////////////////////////////////////// @@ -678,11 +680,11 @@ def demo(): for prod in grammar.productions(): print(prod) - sent = "I saw a man in the park".split() + sent = 'I saw a man in the park'.split() parser = parse.RecursiveDescentParser(grammar, trace=2) for p in parser.parse(sent): print(p) -if __name__ == "__main__": +if __name__ == '__main__': demo() diff --git a/nlp_resource_data/nltk/parse/shiftreduce.py b/nlp_resource_data/nltk/parse/shiftreduce.py index 5991465..a3514db 100644 --- a/nlp_resource_data/nltk/parse/shiftreduce.py +++ b/nlp_resource_data/nltk/parse/shiftreduce.py @@ -1,13 +1,15 @@ # Natural Language Toolkit: Shift-Reduce Parser # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Edward Loper # Steven Bird # URL: # For license information, see LICENSE.TXT +from __future__ import print_function, unicode_literals from nltk.grammar import Nonterminal from nltk.tree import Tree +from nltk.compat import unicode_repr from nltk.parse.api import ParserI @@ -86,7 +88,7 @@ class ShiftReduceParser(ParserI): # Trace output. if self._trace: - print("Parsing %r" % " ".join(tokens)) + print('Parsing %r' % " ".join(tokens)) self._trace_stack(stack, remaining_text) # iterate through the text, pushing the token onto @@ -214,7 +216,7 @@ class ShiftReduceParser(ParserI): # 3: display which tokens & productions are shifed/reduced self._trace = trace - def _trace_stack(self, stack, remaining_text, marker=" "): + def _trace_stack(self, stack, remaining_text, marker=' '): """ Print trace output displaying the given stack and text. @@ -223,13 +225,13 @@ class ShiftReduceParser(ParserI): stack. This is used with trace level 2 to print 'S' before shifted stacks and 'R' before reduced stacks. """ - s = " " + marker + " [ " + s = ' ' + marker + ' [ ' for elt in stack: if isinstance(elt, Tree): - s += repr(Nonterminal(elt.label())) + " " + s += unicode_repr(Nonterminal(elt.label())) + ' ' else: - s += repr(elt) + " " - s += "* " + " ".join(remaining_text) + "]" + s += unicode_repr(elt) + ' ' + s += '* ' + ' '.join(remaining_text) + ']' print(s) def _trace_shift(self, stack, remaining_text): @@ -239,9 +241,9 @@ class ShiftReduceParser(ParserI): :rtype: None """ if self._trace > 2: - print("Shift %r:" % stack[-1]) + print('Shift %r:' % stack[-1]) if self._trace == 2: - self._trace_stack(stack, remaining_text, "S") + self._trace_stack(stack, remaining_text, 'S') elif self._trace > 0: self._trace_stack(stack, remaining_text) @@ -254,9 +256,9 @@ class ShiftReduceParser(ParserI): """ if self._trace > 2: rhs = " ".join(production.rhs()) - print("Reduce %r <- %s" % (production.lhs(), rhs)) + print('Reduce %r <- %s' % (production.lhs(), rhs)) if self._trace == 2: - self._trace_stack(stack, remaining_text, "R") + self._trace_stack(stack, remaining_text, 'R') elif self._trace > 1: self._trace_stack(stack, remaining_text) @@ -277,7 +279,7 @@ class ShiftReduceParser(ParserI): rhs1 = productions[i].rhs() rhs2 = productions[j].rhs() if rhs1[: len(rhs2)] == rhs2: - print("Warning: %r will never be used" % productions[i]) + print('Warning: %r will never be used' % productions[i]) ##////////////////////////////////////////////////////// @@ -468,12 +470,12 @@ def demo(): """ ) - sent = "I saw a man in the park".split() + sent = 'I saw a man in the park'.split() parser = parse.ShiftReduceParser(grammar, trace=2) for p in parser.parse(sent): print(p) -if __name__ == "__main__": +if __name__ == '__main__': demo() diff --git a/nlp_resource_data/nltk/parse/stanford.py b/nlp_resource_data/nltk/parse/stanford.py index 4350b35..8943df1 100644 --- a/nlp_resource_data/nltk/parse/stanford.py +++ b/nlp_resource_data/nltk/parse/stanford.py @@ -1,18 +1,22 @@ # -*- coding: utf-8 -*- # Natural Language Toolkit: Interface to the Stanford Parser # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Steven Xu # # URL: # For license information, see LICENSE.TXT +from __future__ import unicode_literals + import tempfile import os import warnings from unittest import skip from subprocess import PIPE +from six import text_type + from nltk.internals import ( find_jar_iter, config_java, @@ -25,15 +29,15 @@ from nltk.parse.api import ParserI from nltk.parse.dependencygraph import DependencyGraph from nltk.tree import Tree -_stanford_url = "https://nlp.stanford.edu/software/lex-parser.shtml" +_stanford_url = 'https://nlp.stanford.edu/software/lex-parser.shtml' class GenericStanfordParser(ParserI): """Interface to the Stanford Parser""" - _MODEL_JAR_PATTERN = r"stanford-parser-(\d+)(\.(\d+))+-models\.jar" - _JAR = r"stanford-parser\.jar" - _MAIN_CLASS = "edu.stanford.nlp.parser.lexparser.LexicalizedParser" + _MODEL_JAR_PATTERN = r'stanford-parser-(\d+)(\.(\d+))+-models\.jar' + _JAR = r'stanford-parser\.jar' + _MAIN_CLASS = 'edu.stanford.nlp.parser.lexparser.LexicalizedParser' _USE_STDIN = False _DOUBLE_SPACED_OUTPUT = False @@ -42,11 +46,11 @@ class GenericStanfordParser(ParserI): self, path_to_jar=None, path_to_models_jar=None, - model_path="edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz", - encoding="utf8", + model_path='edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz', + encoding='utf8', verbose=False, - java_options="-mx4g", - corenlp_options="", + java_options='-mx4g', + corenlp_options='', ): # find the most recent code and model jar @@ -54,7 +58,7 @@ class GenericStanfordParser(ParserI): find_jar_iter( self._JAR, path_to_jar, - env_vars=("STANFORD_PARSER", "STANFORD_CORENLP"), + env_vars=('STANFORD_PARSER', 'STANFORD_CORENLP'), searchpath=(), url=_stanford_url, verbose=verbose, @@ -67,7 +71,7 @@ class GenericStanfordParser(ParserI): find_jar_iter( self._MODEL_JAR_PATTERN, path_to_models_jar, - env_vars=("STANFORD_MODELS", "STANFORD_CORENLP"), + env_vars=('STANFORD_MODELS', 'STANFORD_CORENLP'), searchpath=(), url=_stanford_url, verbose=verbose, @@ -93,17 +97,17 @@ class GenericStanfordParser(ParserI): cur_trees = [] blank = False for line in output_.splitlines(False): - if line == "": + if line == '': if blank: res.append(iter(cur_trees)) cur_trees = [] blank = False elif self._DOUBLE_SPACED_OUTPUT: - cur_trees.append(self._make_tree("\n".join(cur_lines))) + cur_trees.append(self._make_tree('\n'.join(cur_lines))) cur_lines = [] blank = True else: - res.append(iter([self._make_tree("\n".join(cur_lines))])) + res.append(iter([self._make_tree('\n'.join(cur_lines))])) cur_lines = [] else: cur_lines.append(line) @@ -125,19 +129,19 @@ class GenericStanfordParser(ParserI): """ cmd = [ self._MAIN_CLASS, - "-model", + '-model', self.model_path, - "-sentences", - "newline", - "-outputFormat", + '-sentences', + 'newline', + '-outputFormat', self._OUTPUT_FORMAT, - "-tokenized", - "-escaper", - "edu.stanford.nlp.process.PTBEscapingProcessor", + '-tokenized', + '-escaper', + 'edu.stanford.nlp.process.PTBEscapingProcessor', ] return self._parse_trees_output( self._execute( - cmd, "\n".join(" ".join(sentence) for sentence in sentences), verbose + cmd, '\n'.join(' '.join(sentence) for sentence in sentences), verbose ) ) @@ -165,15 +169,15 @@ class GenericStanfordParser(ParserI): """ cmd = [ self._MAIN_CLASS, - "-model", + '-model', self.model_path, - "-sentences", - "newline", - "-outputFormat", + '-sentences', + 'newline', + '-outputFormat', self._OUTPUT_FORMAT, ] return self._parse_trees_output( - self._execute(cmd, "\n".join(sentences), verbose) + self._execute(cmd, '\n'.join(sentences), verbose) ) def tagged_parse(self, sentence, verbose=False): @@ -198,29 +202,29 @@ class GenericStanfordParser(ParserI): :type sentences: list(list(tuple(str, str))) :rtype: iter(iter(Tree)) """ - tag_separator = "/" + tag_separator = '/' cmd = [ self._MAIN_CLASS, - "-model", + '-model', self.model_path, - "-sentences", - "newline", - "-outputFormat", + '-sentences', + 'newline', + '-outputFormat', self._OUTPUT_FORMAT, - "-tokenized", - "-tagSeparator", + '-tokenized', + '-tagSeparator', tag_separator, - "-tokenizerFactory", - "edu.stanford.nlp.process.WhitespaceTokenizer", - "-tokenizerMethod", - "newCoreLabelTokenizerFactory", + '-tokenizerFactory', + 'edu.stanford.nlp.process.WhitespaceTokenizer', + '-tokenizerMethod', + 'newCoreLabelTokenizerFactory', ] # We don't need to escape slashes as "splitting is done on the last instance of the character in the token" return self._parse_trees_output( self._execute( cmd, - "\n".join( - " ".join(tag_separator.join(tagged) for tagged in sentence) + '\n'.join( + ' '.join(tag_separator.join(tagged) for tagged in sentence) for sentence in sentences ), verbose, @@ -229,19 +233,19 @@ class GenericStanfordParser(ParserI): def _execute(self, cmd, input_, verbose=False): encoding = self._encoding - cmd.extend(["-encoding", encoding]) + cmd.extend(['-encoding', encoding]) if self.corenlp_options: cmd.append(self.corenlp_options) - default_options = " ".join(_java_options) + default_options = ' '.join(_java_options) # Configure java. config_java(options=self.java_options, verbose=verbose) # Windows is incompatible with NamedTemporaryFile() without passing in delete=False. - with tempfile.NamedTemporaryFile(mode="wb", delete=False) as input_file: + with tempfile.NamedTemporaryFile(mode='wb', delete=False) as input_file: # Write the actual sentences to the temporary input file - if isinstance(input_, str) and encoding: + if isinstance(input_, text_type) and encoding: input_ = input_.encode(encoding) input_file.write(input_) input_file.flush() @@ -262,8 +266,8 @@ class GenericStanfordParser(ParserI): cmd, classpath=self._classpath, stdout=PIPE, stderr=PIPE ) - stdout = stdout.replace(b"\xc2\xa0", b" ") - stdout = stdout.replace(b"\x00\xa0", b" ") + stdout = stdout.replace(b'\xc2\xa0', b' ') + stdout = stdout.replace(b'\x00\xa0', b' ') stdout = stdout.decode(encoding) os.unlink(input_file.name) @@ -325,7 +329,7 @@ class StanfordParser(GenericStanfordParser): [Tree('DT', ['the']), Tree('JJ', ['lazy']), Tree('NN', ['dog'])])])]), Tree('.', ['.'])])])] """ - _OUTPUT_FORMAT = "penn" + _OUTPUT_FORMAT = 'penn' def __init__(self, *args, **kwargs): warnings.warn( @@ -391,7 +395,7 @@ class StanfordDependencyParser(GenericStanfordParser): """ - _OUTPUT_FORMAT = "conll2007" + _OUTPUT_FORMAT = 'conll2007' def __init__(self, *args, **kwargs): warnings.warn( @@ -404,11 +408,11 @@ class StanfordDependencyParser(GenericStanfordParser): super(StanfordDependencyParser, self).__init__(*args, **kwargs) def _make_tree(self, result): - return DependencyGraph(result, top_relation_label="root") + return DependencyGraph(result, top_relation_label='root') class StanfordNeuralDependencyParser(GenericStanfordParser): - """ + ''' >>> from nltk.parse.stanford import StanfordNeuralDependencyParser >>> dep_parser=StanfordNeuralDependencyParser(java_options='-mx4g') @@ -437,12 +441,12 @@ class StanfordNeuralDependencyParser(GenericStanfordParser): ... ))], []) # doctest: +NORMALIZE_WHITESPACE [Tree('dog', ['I', "'m", 'a']), Tree('cat', ['This', 'is', Tree('friends', ['my', "'"]), Tree('tabby', ['-LRB-', 'the', '-RRB-'])])] - """ + ''' - _OUTPUT_FORMAT = "conll" - _MAIN_CLASS = "edu.stanford.nlp.pipeline.StanfordCoreNLP" - _JAR = r"stanford-corenlp-(\d+)(\.(\d+))+\.jar" - _MODEL_JAR_PATTERN = r"stanford-corenlp-(\d+)(\.(\d+))+-models\.jar" + _OUTPUT_FORMAT = 'conll' + _MAIN_CLASS = 'edu.stanford.nlp.pipeline.StanfordCoreNLP' + _JAR = r'stanford-corenlp-(\d+)(\.(\d+))+\.jar' + _MODEL_JAR_PATTERN = r'stanford-corenlp-(\d+)(\.(\d+))+-models\.jar' _USE_STDIN = True _DOUBLE_SPACED_OUTPUT = True @@ -455,22 +459,22 @@ class StanfordNeuralDependencyParser(GenericStanfordParser): ) super(StanfordNeuralDependencyParser, self).__init__(*args, **kwargs) - self.corenlp_options += "-annotators tokenize,ssplit,pos,depparse" + self.corenlp_options += '-annotators tokenize,ssplit,pos,depparse' def tagged_parse_sents(self, sentences, verbose=False): - """ + ''' Currently unimplemented because the neural dependency parser (and the StanfordCoreNLP pipeline class) doesn't support passing in pre- tagged tokens. - """ + ''' raise NotImplementedError( - "tagged_parse[_sents] is not supported by " - "StanfordNeuralDependencyParser; use " - "parse[_sents] or raw_parse[_sents] instead." + 'tagged_parse[_sents] is not supported by ' + 'StanfordNeuralDependencyParser; use ' + 'parse[_sents] or raw_parse[_sents] instead.' ) def _make_tree(self, result): - return DependencyGraph(result, top_relation_label="ROOT") + return DependencyGraph(result, top_relation_label='ROOT') @skip("doctests from nltk.parse.stanford are skipped because it's deprecated") @@ -479,10 +483,10 @@ def setup_module(module): try: StanfordParser( - model_path="edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz" + model_path='edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz' ) StanfordNeuralDependencyParser() except LookupError: raise SkipTest( - "doctests from nltk.parse.stanford are skipped because one of the stanford parser or CoreNLP jars doesn't exist" + 'doctests from nltk.parse.stanford are skipped because one of the stanford parser or CoreNLP jars doesn\'t exist' ) diff --git a/nlp_resource_data/nltk/parse/transitionparser.py b/nlp_resource_data/nltk/parse/transitionparser.py index 6615288..a60bc37 100644 --- a/nlp_resource_data/nltk/parse/transitionparser.py +++ b/nlp_resource_data/nltk/parse/transitionparser.py @@ -2,10 +2,13 @@ # # Author: Long Duong # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # URL: # For license information, see LICENSE.TXT +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function import tempfile import pickle @@ -52,11 +55,11 @@ class Configuration(object): def __str__(self): return ( - "Stack : " + 'Stack : ' + str(self.stack) - + " Buffer : " + + ' Buffer : ' + str(self.buffer) - + " Arcs : " + + ' Arcs : ' + str(self.arcs) ) @@ -67,10 +70,10 @@ class Configuration(object): """ if feat is None: return False - if feat == "": + if feat == '': return False if flag is False: - if feat == "_": + if feat == '_': return False return True @@ -88,28 +91,28 @@ class Configuration(object): # Stack 0 stack_idx0 = self.stack[len(self.stack) - 1] token = self._tokens[stack_idx0] - if self._check_informative(token["word"], True): - result.append("STK_0_FORM_" + token["word"]) - if "lemma" in token and self._check_informative(token["lemma"]): - result.append("STK_0_LEMMA_" + token["lemma"]) - if self._check_informative(token["tag"]): - result.append("STK_0_POS_" + token["tag"]) - if "feats" in token and self._check_informative(token["feats"]): - feats = token["feats"].split("|") + if self._check_informative(token['word'], True): + result.append('STK_0_FORM_' + token['word']) + if 'lemma' in token and self._check_informative(token['lemma']): + result.append('STK_0_LEMMA_' + token['lemma']) + if self._check_informative(token['tag']): + result.append('STK_0_POS_' + token['tag']) + if 'feats' in token and self._check_informative(token['feats']): + feats = token['feats'].split("|") for feat in feats: - result.append("STK_0_FEATS_" + feat) + result.append('STK_0_FEATS_' + feat) # Stack 1 if len(self.stack) > 1: stack_idx1 = self.stack[len(self.stack) - 2] token = self._tokens[stack_idx1] - if self._check_informative(token["tag"]): - result.append("STK_1_POS_" + token["tag"]) + if self._check_informative(token['tag']): + result.append('STK_1_POS_' + token['tag']) # Left most, right most dependency of stack[0] left_most = 1000000 right_most = -1 - dep_left_most = "" - dep_right_most = "" + dep_left_most = '' + dep_right_most = '' for (wi, r, wj) in self.arcs: if wi == stack_idx0: if (wj > wi) and (wj > right_most): @@ -119,48 +122,48 @@ class Configuration(object): left_most = wj dep_left_most = r if self._check_informative(dep_left_most): - result.append("STK_0_LDEP_" + dep_left_most) + result.append('STK_0_LDEP_' + dep_left_most) if self._check_informative(dep_right_most): - result.append("STK_0_RDEP_" + dep_right_most) + result.append('STK_0_RDEP_' + dep_right_most) # Check Buffered 0 if len(self.buffer) > 0: # Buffer 0 buffer_idx0 = self.buffer[0] token = self._tokens[buffer_idx0] - if self._check_informative(token["word"], True): - result.append("BUF_0_FORM_" + token["word"]) - if "lemma" in token and self._check_informative(token["lemma"]): - result.append("BUF_0_LEMMA_" + token["lemma"]) - if self._check_informative(token["tag"]): - result.append("BUF_0_POS_" + token["tag"]) - if "feats" in token and self._check_informative(token["feats"]): - feats = token["feats"].split("|") + if self._check_informative(token['word'], True): + result.append('BUF_0_FORM_' + token['word']) + if 'lemma' in token and self._check_informative(token['lemma']): + result.append('BUF_0_LEMMA_' + token['lemma']) + if self._check_informative(token['tag']): + result.append('BUF_0_POS_' + token['tag']) + if 'feats' in token and self._check_informative(token['feats']): + feats = token['feats'].split("|") for feat in feats: - result.append("BUF_0_FEATS_" + feat) + result.append('BUF_0_FEATS_' + feat) # Buffer 1 if len(self.buffer) > 1: buffer_idx1 = self.buffer[1] token = self._tokens[buffer_idx1] - if self._check_informative(token["word"], True): - result.append("BUF_1_FORM_" + token["word"]) - if self._check_informative(token["tag"]): - result.append("BUF_1_POS_" + token["tag"]) + if self._check_informative(token['word'], True): + result.append('BUF_1_FORM_' + token['word']) + if self._check_informative(token['tag']): + result.append('BUF_1_POS_' + token['tag']) if len(self.buffer) > 2: buffer_idx2 = self.buffer[2] token = self._tokens[buffer_idx2] - if self._check_informative(token["tag"]): - result.append("BUF_2_POS_" + token["tag"]) + if self._check_informative(token['tag']): + result.append('BUF_2_POS_' + token['tag']) if len(self.buffer) > 3: buffer_idx3 = self.buffer[3] token = self._tokens[buffer_idx3] - if self._check_informative(token["tag"]): - result.append("BUF_3_POS_" + token["tag"]) + if self._check_informative(token['tag']): + result.append('BUF_3_POS_' + token['tag']) # Left most, right most dependency of stack[0] left_most = 1000000 right_most = -1 - dep_left_most = "" - dep_right_most = "" + dep_left_most = '' + dep_right_most = '' for (wi, r, wj) in self.arcs: if wi == buffer_idx0: if (wj > wi) and (wj > right_most): @@ -170,9 +173,9 @@ class Configuration(object): left_most = wj dep_left_most = r if self._check_informative(dep_left_most): - result.append("BUF_0_LDEP_" + dep_left_most) + result.append('BUF_0_LDEP_' + dep_left_most) if self._check_informative(dep_right_most): - result.append("BUF_0_RDEP_" + dep_right_most) + result.append('BUF_0_RDEP_' + dep_right_most) return result @@ -184,10 +187,10 @@ class Transition(object): """ # Define set of transitions - LEFT_ARC = "LEFTARC" - RIGHT_ARC = "RIGHTARC" - SHIFT = "SHIFT" - REDUCE = "REDUCE" + LEFT_ARC = 'LEFTARC' + RIGHT_ARC = 'RIGHTARC' + SHIFT = 'SHIFT' + REDUCE = 'REDUCE' def __init__(self, alg_option): """ @@ -290,8 +293,8 @@ class TransitionParser(ParserI): Class for transition based parser. Implement 2 algorithms which are "arc-standard" and "arc-eager" """ - ARC_STANDARD = "arc-standard" - ARC_EAGER = "arc-eager" + ARC_STANDARD = 'arc-standard' + ARC_EAGER = 'arc-eager' def __init__(self, algorithm): """ @@ -313,11 +316,11 @@ class TransitionParser(ParserI): p_node = depgraph.nodes[idx_parent] c_node = depgraph.nodes[idx_child] - if c_node["word"] is None: + if c_node['word'] is None: return None # Root word - if c_node["head"] == p_node["address"]: - return c_node["rel"] + if c_node['head'] == p_node['address']: + return c_node['rel'] else: return None @@ -333,8 +336,8 @@ class TransitionParser(ParserI): unsorted_result.append(self._dictionary[feature]) # Default value of each feature is 1.0 - return " ".join( - str(featureID) + ":1.0" for featureID in sorted(unsorted_result) + return ' '.join( + str(featureID) + ':1.0' for featureID in sorted(unsorted_result) ) def _is_projective(self, depgraph): @@ -342,9 +345,9 @@ class TransitionParser(ParserI): for key in depgraph.nodes: node = depgraph.nodes[key] - if "head" in node: - childIdx = node["address"] - parentIdx = node["head"] + if 'head' in node: + childIdx = node['address'] + parentIdx = node['head'] if parentIdx is not None: arc_list.append((parentIdx, childIdx)) @@ -370,8 +373,8 @@ class TransitionParser(ParserI): self._transition.setdefault(key, len(self._transition) + 1) self._match_transition[self._transition[key]] = key - input_str = str(self._transition[key]) + " " + binary_features + "\n" - input_file.write(input_str.encode("utf-8")) + input_str = str(self._transition[key]) + ' ' + binary_features + '\n' + input_file.write(input_str.encode('utf-8')) def _create_training_examples_arc_std(self, depgraphs, input_file): """ @@ -398,7 +401,7 @@ class TransitionParser(ParserI): # Left-arc operation rel = self._get_dep_relation(b0, s0, depgraph) if rel is not None: - key = Transition.LEFT_ARC + ":" + rel + key = Transition.LEFT_ARC + ':' + rel self._write_to_file(key, binary_features, input_file) operation.left_arc(conf, rel) training_seq.append(key) @@ -419,7 +422,7 @@ class TransitionParser(ParserI): precondition = False if precondition: - key = Transition.RIGHT_ARC + ":" + rel + key = Transition.RIGHT_ARC + ':' + rel self._write_to_file(key, binary_features, input_file) operation.right_arc(conf, rel) training_seq.append(key) @@ -460,7 +463,7 @@ class TransitionParser(ParserI): # Left-arc operation rel = self._get_dep_relation(b0, s0, depgraph) if rel is not None: - key = Transition.LEFT_ARC + ":" + rel + key = Transition.LEFT_ARC + ':' + rel self._write_to_file(key, binary_features, input_file) operation.left_arc(conf, rel) training_seq.append(key) @@ -469,7 +472,7 @@ class TransitionParser(ParserI): # Right-arc operation rel = self._get_dep_relation(s0, b0, depgraph) if rel is not None: - key = Transition.RIGHT_ARC + ":" + rel + key = Transition.RIGHT_ARC + ':' + rel self._write_to_file(key, binary_features, input_file) operation.right_arc(conf, rel) training_seq.append(key) @@ -509,7 +512,7 @@ class TransitionParser(ParserI): try: input_file = tempfile.NamedTemporaryFile( - prefix="transition_parse.train", dir=tempfile.gettempdir(), delete=False + prefix='transition_parse.train', dir=tempfile.gettempdir(), delete=False ) if self._algorithm == self.ARC_STANDARD: @@ -525,7 +528,7 @@ class TransitionParser(ParserI): # Todo : because of probability = True => very slow due to # cross-validation. Need to improve the speed here model = svm.SVC( - kernel="poly", + kernel='poly', degree=2, coef0=0, gamma=0.2, @@ -536,7 +539,7 @@ class TransitionParser(ParserI): model.fit(x_train, y_train) # Save the model to file name (as pickle) - pickle.dump(model, open(modelfile, "wb")) + pickle.dump(model, open(modelfile, 'wb')) finally: remove(input_file.name) @@ -550,7 +553,7 @@ class TransitionParser(ParserI): """ result = [] # First load the model - model = pickle.load(open(modelFile, "rb")) + model = pickle.load(open(modelFile, 'rb')) operation = Transition(self._algorithm) for depgraph in depgraphs: @@ -635,13 +638,13 @@ class TransitionParser(ParserI): new_depgraph = deepcopy(depgraph) for key in new_depgraph.nodes: node = new_depgraph.nodes[key] - node["rel"] = "" + node['rel'] = '' # With the default, all the token depend on the Root - node["head"] = 0 + node['head'] = 0 for (head, rel, child) in conf.arcs: c_node = new_depgraph.nodes[child] - c_node["head"] = head - c_node["rel"] = rel + c_node['head'] = head + c_node['rel'] = rel result.append(new_depgraph) return result diff --git a/nlp_resource_data/nltk/parse/util.py b/nlp_resource_data/nltk/parse/util.py index 34630a0..6ebe146 100644 --- a/nlp_resource_data/nltk/parse/util.py +++ b/nlp_resource_data/nltk/parse/util.py @@ -2,7 +2,7 @@ # # Author: Ewan Klein # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # URL: # For license information, see LICENSE.TXT @@ -10,6 +10,7 @@ """ Utility functions for parsers. """ +from __future__ import print_function from nltk.grammar import CFG, FeatureGrammar, PCFG from nltk.data import load @@ -97,7 +98,7 @@ def taggedsent_to_conll(sentence): :return: a generator yielding a single sentence in CONLL format. """ for (i, (word, tag)) in enumerate(sentence, start=1): - input_str = [str(i), word, "_", tag, tag, "_", "0", "a", "_", "_"] + input_str = [str(i), word, '_', tag, tag, '_', '0', 'a', '_', '_'] input_str = "\t".join(input_str) + "\n" yield input_str @@ -138,7 +139,7 @@ def taggedsents_to_conll(sentences): for sentence in sentences: for input_str in taggedsent_to_conll(sentence): yield input_str - yield "\n\n" + yield '\n\n' ###################################################################### @@ -169,8 +170,8 @@ class TestGrammar(object): according to the grammar, then the value of ``trees`` will be None. """ for test in self.suite: - print(test["doc"] + ":", end=" ") - for key in ["accept", "reject"]: + print(test['doc'] + ":", end=' ') + for key in ['accept', 'reject']: for sent in test[key]: tokens = sent.split() trees = list(self.cp.parse(tokens)) @@ -179,7 +180,7 @@ class TestGrammar(object): print(sent) for tree in trees: print(tree) - if key == "accept": + if key == 'accept': if trees == []: raise ValueError("Sentence '%s' failed to parse'" % sent) else: @@ -212,14 +213,14 @@ def extract_test_sentences(string, comment_chars="#%;", encoding=None): if encoding is not None: string = string.decode(encoding) sentences = [] - for sentence in string.split("\n"): - if sentence == "" or sentence[0] in comment_chars: + for sentence in string.split('\n'): + if sentence == '' or sentence[0] in comment_chars: continue - split_info = sentence.split(":", 1) + split_info = sentence.split(':', 1) result = None if len(split_info) == 2: - if split_info[0] in ["True", "true", "False", "false"]: - result = split_info[0] in ["True", "true"] + if split_info[0] in ['True', 'true', 'False', 'false']: + result = split_info[0] in ['True', 'true'] sentence = split_info[1] else: result = int(split_info[0]) diff --git a/nlp_resource_data/nltk/parse/viterbi.py b/nlp_resource_data/nltk/parse/viterbi.py index bcb9687..7f6217e 100644 --- a/nlp_resource_data/nltk/parse/viterbi.py +++ b/nlp_resource_data/nltk/parse/viterbi.py @@ -1,13 +1,15 @@ # Natural Language Toolkit: Viterbi Probabilistic Parser # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Edward Loper # Steven Bird # URL: # For license information, see LICENSE.TXT +from __future__ import print_function, unicode_literals from functools import reduce from nltk.tree import Tree, ProbabilisticTree +from nltk.compat import python_2_unicode_compatible from nltk.parse.api import ParserI @@ -16,6 +18,7 @@ from nltk.parse.api import ParserI ##////////////////////////////////////////////////////// +@python_2_unicode_compatible class ViterbiParser(ParserI): """ A bottom-up ``PCFG`` parser that uses dynamic programming to find @@ -121,7 +124,7 @@ class ViterbiParser(ParserI): # Initialize the constituents dictionary with the words from # the text. if self._trace: - print(("Inserting tokens into the most likely" + " constituents table...")) + print(('Inserting tokens into the most likely' + ' constituents table...')) for index in range(len(tokens)): token = tokens[index] constituents[index, index + 1, token] = token @@ -134,8 +137,8 @@ class ViterbiParser(ParserI): if self._trace: print( ( - "Finding the most likely constituents" - + " spanning %d text elements..." % length + 'Finding the most likely constituents' + + ' spanning %d text elements...' % length ) ) for start in range(len(tokens) - length + 1): @@ -207,9 +210,9 @@ class ViterbiParser(ParserI): if self._trace > 1: if c is None or c != tree: if c is None or c.prob() < tree.prob(): - print(" Insert:", end=" ") + print(' Insert:', end=' ') else: - print(" Discard:", end=" ") + print(' Discard:', end=' ') self._trace_production(production, p, span, len(tokens)) if c is None or c.prob() < tree.prob(): constituents[span[0], span[1], production.lhs()] = tree @@ -305,22 +308,22 @@ class ViterbiParser(ParserI): :rtype: None """ - str = "|" + "." * span[0] - str += "=" * (span[1] - span[0]) - str += "." * (width - span[1]) + "| " - str += "%s" % production + str = '|' + '.' * span[0] + str += '=' * (span[1] - span[0]) + str += '.' * (width - span[1]) + '| ' + str += '%s' % production if self._trace > 2: - str = "%-40s %12.10f " % (str, p) + str = '%-40s %12.10f ' % (str, p) print(str) def _trace_lexical_insertion(self, token, index, width): - str = " Insert: |" + "." * index + "=" + "." * (width - index - 1) + "| " - str += "%s" % (token,) + str = ' Insert: |' + '.' * index + '=' + '.' * (width - index - 1) + '| ' + str += '%s' % (token,) print(str) def __repr__(self): - return "" % self._grammar + return '' % self._grammar ##////////////////////////////////////////////////////// @@ -342,22 +345,22 @@ def demo(): # Define two demos. Each demo has a sentence and a grammar. demos = [ - ("I saw the man with my telescope", toy_pcfg1), - ("the boy saw Jack with Bob under the table with a telescope", toy_pcfg2), + ('I saw the man with my telescope', toy_pcfg1), + ('the boy saw Jack with Bob under the table with a telescope', toy_pcfg2), ] # Ask the user which demo they want to use. print() for i in range(len(demos)): - print("%3s: %s" % (i + 1, demos[i][0])) - print(" %r" % demos[i][1]) + print('%3s: %s' % (i + 1, demos[i][0])) + print(' %r' % demos[i][1]) print() - print("Which demo (%d-%d)? " % (1, len(demos)), end=" ") + print('Which demo (%d-%d)? ' % (1, len(demos)), end=' ') try: snum = int(sys.stdin.readline().strip()) - 1 sent, grammar = demos[snum] except: - print("Bad sentence number") + print('Bad sentence number') return # Tokenize the sentence. @@ -366,7 +369,7 @@ def demo(): parser = ViterbiParser(grammar) all_parses = {} - print("\nsent: %s\nparser: %s\ngrammar: %s" % (sent, parser, grammar)) + print('\nsent: %s\nparser: %s\ngrammar: %s' % (sent, parser, grammar)) parser.trace(3) t = time.time() parses = parser.parse_all(tokens) @@ -380,33 +383,33 @@ def demo(): # Print some summary statistics print() - print("Time (secs) # Parses Average P(parse)") - print("-----------------------------------------") - print("%11.4f%11d%19.14f" % (time, num_parses, average)) + print('Time (secs) # Parses Average P(parse)') + print('-----------------------------------------') + print('%11.4f%11d%19.14f' % (time, num_parses, average)) parses = all_parses.keys() if parses: p = reduce(lambda a, b: a + b.prob(), parses, 0) / len(parses) else: p = 0 - print("------------------------------------------") - print("%11s%11d%19.14f" % ("n/a", len(parses), p)) + print('------------------------------------------') + print('%11s%11d%19.14f' % ('n/a', len(parses), p)) # Ask the user if we should draw the parses. print() - print("Draw parses (y/n)? ", end=" ") - if sys.stdin.readline().strip().lower().startswith("y"): + print('Draw parses (y/n)? ', end=' ') + if sys.stdin.readline().strip().lower().startswith('y'): from nltk.draw.tree import draw_trees - print(" please wait...") + print(' please wait...') draw_trees(*parses) # Ask the user if we should print the parses. print() - print("Print parses (y/n)? ", end=" ") - if sys.stdin.readline().strip().lower().startswith("y"): + print('Print parses (y/n)? ', end=' ') + if sys.stdin.readline().strip().lower().startswith('y'): for parse in parses: print(parse) -if __name__ == "__main__": +if __name__ == '__main__': demo() diff --git a/nlp_resource_data/nltk/probability.py b/nlp_resource_data/nltk/probability.py index 5a59c3f..a83af71 100644 --- a/nlp_resource_data/nltk/probability.py +++ b/nlp_resource_data/nltk/probability.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Natural Language Toolkit: Probability and Statistics # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Edward Loper # Steven Bird (additions) # Trevor Cohn (additions) @@ -37,6 +37,7 @@ implementation of the ``ConditionalProbDistI`` interface is ``ConditionalProbDist``, a derived distribution. """ +from __future__ import print_function, unicode_literals, division import math import random @@ -46,16 +47,19 @@ from collections import defaultdict, Counter from functools import reduce from abc import ABCMeta, abstractmethod +from six import itervalues, text_type, add_metaclass + +from nltk import compat from nltk.internals import raise_unorderable_types -_NINF = float("-1e300") +_NINF = float('-1e300') ##////////////////////////////////////////////////////// ## Frequency Distributions ##////////////////////////////////////////////////////// - +@compat.python_2_unicode_compatible class FreqDist(Counter): """ A frequency distribution for the outcomes of an experiment. A @@ -242,7 +246,7 @@ class FreqDist(Counter): """ if len(self) == 0: raise ValueError( - "A FreqDist must have at least one sample before max is defined." + 'A FreqDist must have at least one sample before max is defined.' ) return self.most_common(1)[0][0] @@ -260,19 +264,19 @@ class FreqDist(Counter): :type title: bool """ try: - import matplotlib.pyplot as plt + from matplotlib import pylab except ImportError: raise ValueError( - "The plot function requires matplotlib to be installed." - "See http://matplotlib.org/" + 'The plot function requires matplotlib to be installed.' + 'See http://matplotlib.org/' ) if len(args) == 0: args = [len(self)] samples = [item for item, _ in self.most_common(*args)] - cumulative = _get_kwarg(kwargs, "cumulative", False) - percents = _get_kwarg(kwargs, "percents", False) + cumulative = _get_kwarg(kwargs, 'cumulative', False) + percents = _get_kwarg(kwargs, 'percents', False) if cumulative: freqs = list(self._cumulative_frequencies(samples)) ylabel = "Cumulative Counts" @@ -284,24 +288,17 @@ class FreqDist(Counter): ylabel = "Counts" # percents = [f * 100 for f in freqs] only in ProbDist? - ax = plt.gca() - ax.grid(True, color="silver") - + pylab.grid(True, color="silver") if "linewidth" not in kwargs: kwargs["linewidth"] = 2 if "title" in kwargs: - ax.set_title(kwargs["title"]) + pylab.title(kwargs["title"]) del kwargs["title"] - - ax.plot(freqs, **kwargs) - ax.set_xticks(range(len(samples))) - ax.set_xticklabels([str(s) for s in samples], rotation=90) - ax.set_xlabel("Samples") - ax.set_ylabel(ylabel) - - plt.show() - - return ax + pylab.plot(freqs, **kwargs) + pylab.xticks(range(len(samples)), [text_type(s) for s in samples], rotation=90) + pylab.xlabel("Samples") + pylab.ylabel(ylabel) + pylab.show() def tabulate(self, *args, **kwargs): """ @@ -319,21 +316,21 @@ class FreqDist(Counter): args = [len(self)] samples = [item for item, _ in self.most_common(*args)] - cumulative = _get_kwarg(kwargs, "cumulative", False) + cumulative = _get_kwarg(kwargs, 'cumulative', False) if cumulative: freqs = list(self._cumulative_frequencies(samples)) else: freqs = [self[sample] for sample in samples] # percents = [f * 100 for f in freqs] only in ProbDist? - width = max(len("{}".format(s)) for s in samples) + width = max(len("%s" % s) for s in samples) width = max(width, max(len("%d" % f) for f in freqs)) for i in range(len(samples)): - print("%*s" % (width, samples[i]), end=" ") + print("%*s" % (width, samples[i]), end=' ') print() for i in range(len(samples)): - print("%*d" % (width, freqs[i]), end=" ") + print("%*d" % (width, freqs[i]), end=' ') print() def copy(self): @@ -387,47 +384,16 @@ class FreqDist(Counter): return self.__class__(super(FreqDist, self).__and__(other)) def __le__(self, other): - """ - Returns True if this frequency distribution is a subset of the other - and for no key the value exceeds the value of the same key from - the other frequency distribution. - - The <= operator forms partial order and satisfying the axioms - reflexivity, antisymmetry and transitivity. - - >>> FreqDist('a') <= FreqDist('a') - True - >>> a = FreqDist('abc') - >>> b = FreqDist('aabc') - >>> (a <= b, b <= a) - (True, False) - >>> FreqDist('a') <= FreqDist('abcd') - True - >>> FreqDist('abc') <= FreqDist('xyz') - False - >>> FreqDist('xyz') <= FreqDist('abc') - False - >>> c = FreqDist('a') - >>> d = FreqDist('aa') - >>> e = FreqDist('aaa') - >>> c <= d and d <= e and c <= e - True - """ if not isinstance(other, FreqDist): raise_unorderable_types("<=", self, other) return set(self).issubset(other) and all( self[key] <= other[key] for key in self ) - def __ge__(self, other): - if not isinstance(other, FreqDist): - raise_unorderable_types(">=", self, other) - return set(self).issuperset(other) and all( - self[key] >= other[key] for key in other - ) - + # @total_ordering doesn't work here, since the class inherits from a builtin class + __ge__ = lambda self, other: not self <= other or self == other __lt__ = lambda self, other: self <= other and not self == other - __gt__ = lambda self, other: self >= other and not self == other + __gt__ = lambda self, other: not self <= other def __repr__(self): """ @@ -455,10 +421,10 @@ class FreqDist(Counter): :type maxlen: int :rtype: string """ - items = ["{0!r}: {1!r}".format(*item) for item in self.most_common(maxlen)] + items = ['{0!r}: {1!r}'.format(*item) for item in self.most_common(maxlen)] if len(self) > maxlen: - items.append("...") - return "FreqDist({{{0}}})".format(", ".join(items)) + items.append('...') + return 'FreqDist({{{0}}})'.format(', '.join(items)) def __str__(self): """ @@ -466,16 +432,7 @@ class FreqDist(Counter): :rtype: string """ - return "" % (len(self), self.N()) - - def __iter__(self): - """ - Return an iterator which yields tokens ordered by frequency. - - :rtype: iterator - """ - for token, _ in self.most_common(self.B()): - yield token + return '' % (len(self), self.N()) ##////////////////////////////////////////////////////// @@ -483,7 +440,8 @@ class FreqDist(Counter): ##////////////////////////////////////////////////////// -class ProbDistI(metaclass=ABCMeta): +@add_metaclass(ABCMeta) +class ProbDistI(object): """ A probability distribution for the outcomes of an experiment. A probability distribution specifies how likely it is that an @@ -586,7 +544,7 @@ class ProbDistI(metaclass=ABCMeta): return random.choice(list(self.samples())) - +@compat.python_2_unicode_compatible class UniformProbDist(ProbDistI): """ A probability distribution that assigns equal probability to each @@ -606,7 +564,7 @@ class UniformProbDist(ProbDistI): """ if len(samples) == 0: raise ValueError( - "A Uniform probability distribution must " + "have at least one sample." + 'A Uniform probability distribution must ' + 'have at least one sample.' ) self._sampleset = set(samples) self._prob = 1.0 / len(self._sampleset) @@ -622,10 +580,10 @@ class UniformProbDist(ProbDistI): return self._samples def __repr__(self): - return "" % len(self._sampleset) - + return '' % len(self._sampleset) +@compat.python_2_unicode_compatible class RandomProbDist(ProbDistI): """ Generates a random probability distribution whereby each sample @@ -636,7 +594,7 @@ class RandomProbDist(ProbDistI): def __init__(self, samples): if len(samples) == 0: raise ValueError( - "A probability distribution must " + "have at least one sample." + 'A probability distribution must ' + 'have at least one sample.' ) self._probs = self.unirand(samples) self._samples = list(self._probs.keys()) @@ -664,7 +622,7 @@ class RandomProbDist(ProbDistI): return dict((s, randrow[i]) for i, s in enumerate(samples)) def max(self): - if not hasattr(self, "_max"): + if not hasattr(self, '_max'): self._max = max((p, v) for (v, p) in self._probs.items())[1] return self._max @@ -675,10 +633,10 @@ class RandomProbDist(ProbDistI): return self._samples def __repr__(self): - return "" % len(self._probs) - + return '' % len(self._probs) +@compat.python_2_unicode_compatible class DictionaryProbDist(ProbDistI): """ A probability distribution whose probabilities are directly @@ -705,8 +663,8 @@ class DictionaryProbDist(ProbDistI): if normalize: if len(prob_dict) == 0: raise ValueError( - "A DictionaryProbDist must have at least one sample " - + "before it can be normalized." + 'A DictionaryProbDist must have at least one sample ' + + 'before it can be normalized.' ) if log: value_sum = sum_logs(list(self._prob_dict.values())) @@ -746,7 +704,7 @@ class DictionaryProbDist(ProbDistI): return math.log(self._prob_dict[sample], 2) def max(self): - if not hasattr(self, "_max"): + if not hasattr(self, '_max'): self._max = max((p, v) for (v, p) in self._prob_dict.items())[1] return self._max @@ -754,10 +712,10 @@ class DictionaryProbDist(ProbDistI): return self._prob_dict.keys() def __repr__(self): - return "" % len(self._prob_dict) - + return '' % len(self._prob_dict) +@compat.python_2_unicode_compatible class MLEProbDist(ProbDistI): """ The maximum likelihood estimate for the probability distribution @@ -801,10 +759,10 @@ class MLEProbDist(ProbDistI): :rtype: str :return: A string representation of this ``ProbDist``. """ - return "" % self._freqdist.N() - + return '' % self._freqdist.N() +@compat.python_2_unicode_compatible class LidstoneProbDist(ProbDistI): """ The Lidstone estimate for the probability distribution of the @@ -844,15 +802,15 @@ class LidstoneProbDist(ProbDistI): if (bins == 0) or (bins is None and freqdist.N() == 0): name = self.__class__.__name__[:-8] raise ValueError( - "A %s probability distribution " % name + "must have at least one bin." + 'A %s probability distribution ' % name + 'must have at least one bin.' ) if (bins is not None) and (bins < freqdist.B()): name = self.__class__.__name__[:-8] raise ValueError( - "\nThe number of bins in a %s distribution " % name - + "(%d) must be greater than or equal to\n" % bins - + "the number of bins in the FreqDist used " - + "to create it (%d)." % freqdist.B() + '\nThe number of bins in a %s distribution ' % name + + '(%d) must be greater than or equal to\n' % bins + + 'the number of bins in the FreqDist used ' + + 'to create it (%d).' % freqdist.B() ) self._freqdist = freqdist @@ -902,10 +860,10 @@ class LidstoneProbDist(ProbDistI): :rtype: str """ - return "" % self._freqdist.N() - + return '' % self._freqdist.N() +@compat.python_2_unicode_compatible class LaplaceProbDist(LidstoneProbDist): """ The Laplace estimate for the probability distribution of the @@ -939,10 +897,10 @@ class LaplaceProbDist(LidstoneProbDist): :rtype: str :return: A string representation of this ``ProbDist``. """ - return "" % self._freqdist.N() - + return '' % self._freqdist.N() +@compat.python_2_unicode_compatible class ELEProbDist(LidstoneProbDist): """ The expected likelihood estimate for the probability distribution @@ -977,10 +935,10 @@ class ELEProbDist(LidstoneProbDist): :rtype: str """ - return "" % self._freqdist.N() - + return '' % self._freqdist.N() +@compat.python_2_unicode_compatible class HeldoutProbDist(ProbDistI): """ The heldout estimate for the probability distribution of the @@ -1141,11 +1099,11 @@ class HeldoutProbDist(ProbDistI): :rtype: str :return: A string representation of this ``ProbDist``. """ - s = "" + s = '' return s % (self._base_fdist.N(), self._heldout_fdist.N()) - +@compat.python_2_unicode_compatible class CrossValidationProbDist(ProbDistI): """ The cross-validation estimate for the probability distribution of @@ -1213,10 +1171,10 @@ class CrossValidationProbDist(ProbDistI): :rtype: str """ - return "" % len(self._freqdists) - + return '' % len(self._freqdists) +@compat.python_2_unicode_compatible class WittenBellProbDist(ProbDistI): """ The Witten-Bell estimate of a probability distribution. This distribution @@ -1260,7 +1218,7 @@ class WittenBellProbDist(ProbDistI): :type bins: int """ assert bins is None or bins >= freqdist.B(), ( - "bins parameter must not be less than %d=freqdist.B()" % freqdist.B() + 'bins parameter must not be less than %d=freqdist.B()' % freqdist.B() ) if bins is None: bins = freqdist.B() @@ -1298,7 +1256,7 @@ class WittenBellProbDist(ProbDistI): :rtype: str """ - return "" % self._freqdist.N() + return '' % self._freqdist.N() ##////////////////////////////////////////////////////// @@ -1360,7 +1318,7 @@ class WittenBellProbDist(ProbDistI): ##////////////////////////////////////////////////////// - +@compat.python_2_unicode_compatible class SimpleGoodTuringProbDist(ProbDistI): """ SimpleGoodTuring ProbDist approximates from frequency to frequency of @@ -1395,7 +1353,7 @@ class SimpleGoodTuringProbDist(ProbDistI): """ assert ( bins is None or bins > freqdist.B() - ), "bins parameter must not be less than %d=freqdist.B()+1" % (freqdist.B() + 1) + ), 'bins parameter must not be less than %d=freqdist.B()+1' % (freqdist.B() + 1) if bins is None: bins = freqdist.B() + 1 self._freqdist = freqdist @@ -1454,10 +1412,10 @@ class SimpleGoodTuringProbDist(ProbDistI): self._slope = xy_cov / x_var if x_var != 0 else 0.0 if self._slope >= -1: warnings.warn( - "SimpleGoodTuring did not find a proper best fit " - "line for smoothing probabilities of occurrences. " - "The probability estimates are likely to be " - "unreliable." + 'SimpleGoodTuring did not find a proper best fit ' + 'line for smoothing probabilities of occurrences. ' + 'The probability estimates are likely to be ' + 'unreliable.' ) self._intercept = y_mean - self._slope * x_mean @@ -1581,7 +1539,7 @@ class SimpleGoodTuringProbDist(ProbDistI): :rtype: str """ - return "" % self._freqdist.N() + return '' % self._freqdist.N() class MutableProbDist(ProbDistI): @@ -1634,7 +1592,7 @@ class MutableProbDist(ProbDistI): # inherit documentation i = self._sample_dict.get(sample) if i is None: - return float("-inf") + return float('-inf') return self._data[i] if self._logs else math.log(self._data[i], 2) def update(self, sample, prob, log=True): @@ -1696,7 +1654,7 @@ class MutableProbDist(ProbDistI): # where possible. - +@compat.python_2_unicode_compatible class KneserNeyProbDist(ProbDistI): """ Kneser-Ney estimate of a probability distribution. This is a version of @@ -1745,7 +1703,7 @@ class KneserNeyProbDist(ProbDistI): def prob(self, trigram): # sample must be a triple if len(trigram) != 3: - raise ValueError("Expected an iterable with 3 members.") + raise ValueError('Expected an iterable with 3 members.') trigram = tuple(trigram) w0, w1, w2 = trigram @@ -1803,12 +1761,12 @@ class KneserNeyProbDist(ProbDistI): return self._trigrams.max() def __repr__(self): - """ + ''' Return a string representation of this ProbDist :rtype: str - """ - return "" % len(self) - + return '' % len(self) -class ConditionalProbDistI(dict, metaclass=ABCMeta): +@compat.python_2_unicode_compatible +@add_metaclass(ABCMeta) +class ConditionalProbDistI(dict): """ A collection of probability distributions for a single experiment run under different conditions. Conditional probability @@ -2170,7 +2116,7 @@ class ConditionalProbDistI(dict, metaclass=ABCMeta): :rtype: str """ - return "<%s with %d conditions>" % (type(self).__name__, len(self)) + return '<%s with %d conditions>' % (type(self).__name__, len(self)) class ConditionalProbDist(ConditionalProbDistI): @@ -2336,13 +2282,13 @@ class ProbabilisticMixIn(object): the object. :type logprob: float """ - if "prob" in kwargs: - if "logprob" in kwargs: - raise TypeError("Must specify either prob or logprob " "(not both)") + if 'prob' in kwargs: + if 'logprob' in kwargs: + raise TypeError('Must specify either prob or logprob ' '(not both)') else: - ProbabilisticMixIn.set_prob(self, kwargs["prob"]) - elif "logprob" in kwargs: - ProbabilisticMixIn.set_logprob(self, kwargs["logprob"]) + ProbabilisticMixIn.set_prob(self, kwargs['prob']) + elif 'logprob' in kwargs: + ProbabilisticMixIn.set_logprob(self, kwargs['logprob']) else: self.__prob = self.__logprob = None @@ -2396,10 +2342,10 @@ class ProbabilisticMixIn(object): class ImmutableProbabilisticMixIn(ProbabilisticMixIn): def set_prob(self, prob): - raise ValueError("%s is immutable" % self.__class__.__name__) + raise ValueError('%s is immutable' % self.__class__.__name__) def set_logprob(self, prob): - raise ValueError("%s is immutable" % self.__class__.__name__) + raise ValueError('%s is immutable' % self.__class__.__name__) ## Helper function for processing keyword arguments @@ -2425,6 +2371,7 @@ def _create_rand_fdist(numsamples, numoutcomes): samples are numbers from 1 to ``numsamples``, and are generated by summing two numbers, each of which has a uniform distribution. """ + import random fdist = FreqDist() for x in range(numoutcomes): @@ -2493,82 +2440,82 @@ def demo(numsamples=6, numoutcomes=500): # Print the results in a formatted table. print( ( - "%d samples (1-%d); %d outcomes were sampled for each FreqDist" + '%d samples (1-%d); %d outcomes were sampled for each FreqDist' % (numsamples, numsamples, numoutcomes) ) ) - print("=" * 9 * (len(pdists) + 2)) - FORMATSTR = " FreqDist " + "%8s " * (len(pdists) - 1) + "| Actual" + print('=' * 9 * (len(pdists) + 2)) + FORMATSTR = ' FreqDist ' + '%8s ' * (len(pdists) - 1) + '| Actual' print(FORMATSTR % tuple(repr(pdist)[1:9] for pdist in pdists[:-1])) - print("-" * 9 * (len(pdists) + 2)) - FORMATSTR = "%3d %8.6f " + "%8.6f " * (len(pdists) - 1) + "| %8.6f" + print('-' * 9 * (len(pdists) + 2)) + FORMATSTR = '%3d %8.6f ' + '%8.6f ' * (len(pdists) - 1) + '| %8.6f' for val in vals: print(FORMATSTR % val) # Print the totals for each column (should all be 1.0) zvals = list(zip(*vals)) sums = [sum(val) for val in zvals[1:]] - print("-" * 9 * (len(pdists) + 2)) - FORMATSTR = "Total " + "%8.6f " * (len(pdists)) + "| %8.6f" + print('-' * 9 * (len(pdists) + 2)) + FORMATSTR = 'Total ' + '%8.6f ' * (len(pdists)) + '| %8.6f' print(FORMATSTR % tuple(sums)) - print("=" * 9 * (len(pdists) + 2)) + print('=' * 9 * (len(pdists) + 2)) # Display the distributions themselves, if they're short enough. if len("%s" % fdist1) < 70: - print(" fdist1: %s" % fdist1) - print(" fdist2: %s" % fdist2) - print(" fdist3: %s" % fdist3) + print(' fdist1: %s' % fdist1) + print(' fdist2: %s' % fdist2) + print(' fdist3: %s' % fdist3) print() - print("Generating:") + print('Generating:') for pdist in pdists: fdist = FreqDist(pdist.generate() for i in range(5000)) - print("%20s %s" % (pdist.__class__.__name__[:20], ("%s" % fdist)[:55])) + print('%20s %s' % (pdist.__class__.__name__[:20], ("%s" % fdist)[:55])) print() def gt_demo(): from nltk import corpus - emma_words = corpus.gutenberg.words("austen-emma.txt") + emma_words = corpus.gutenberg.words('austen-emma.txt') fd = FreqDist(emma_words) sgt = SimpleGoodTuringProbDist(fd) - print("%18s %8s %14s" % ("word", "freqency", "SimpleGoodTuring")) + print('%18s %8s %14s' % ("word", "freqency", "SimpleGoodTuring")) fd_keys_sorted = ( key for key, value in sorted(fd.items(), key=lambda item: item[1], reverse=True) ) for key in fd_keys_sorted: - print("%18s %8d %14e" % (key, fd[key], sgt.prob(key))) + print('%18s %8d %14e' % (key, fd[key], sgt.prob(key))) -if __name__ == "__main__": +if __name__ == '__main__': demo(6, 10) demo(5, 5000) gt_demo() __all__ = [ - "ConditionalFreqDist", - "ConditionalProbDist", - "ConditionalProbDistI", - "CrossValidationProbDist", - "DictionaryConditionalProbDist", - "DictionaryProbDist", - "ELEProbDist", - "FreqDist", - "SimpleGoodTuringProbDist", - "HeldoutProbDist", - "ImmutableProbabilisticMixIn", - "LaplaceProbDist", - "LidstoneProbDist", - "MLEProbDist", - "MutableProbDist", - "KneserNeyProbDist", - "ProbDistI", - "ProbabilisticMixIn", - "UniformProbDist", - "WittenBellProbDist", - "add_logs", - "log_likelihood", - "sum_logs", - "entropy", + 'ConditionalFreqDist', + 'ConditionalProbDist', + 'ConditionalProbDistI', + 'CrossValidationProbDist', + 'DictionaryConditionalProbDist', + 'DictionaryProbDist', + 'ELEProbDist', + 'FreqDist', + 'SimpleGoodTuringProbDist', + 'HeldoutProbDist', + 'ImmutableProbabilisticMixIn', + 'LaplaceProbDist', + 'LidstoneProbDist', + 'MLEProbDist', + 'MutableProbDist', + 'KneserNeyProbDist', + 'ProbDistI', + 'ProbabilisticMixIn', + 'UniformProbDist', + 'WittenBellProbDist', + 'add_logs', + 'log_likelihood', + 'sum_logs', + 'entropy', ] diff --git a/nlp_resource_data/nltk/sem/__init__.py b/nlp_resource_data/nltk/sem/__init__.py index bc2bca4..2d60761 100644 --- a/nlp_resource_data/nltk/sem/__init__.py +++ b/nlp_resource_data/nltk/sem/__init__.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Semantic Interpretation # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Ewan Klein # URL: # For license information, see LICENSE.TXT diff --git a/nlp_resource_data/nltk/sem/__pycache__/__init__.cpython-37.pyc b/nlp_resource_data/nltk/sem/__pycache__/__init__.cpython-37.pyc index bc69cda562a898c9f3ff953cb5c816173ad8eeff..1551347db883875ff108a1dcac1fd313bc7e184b 100644 GIT binary patch delta 31 lcmZ1}G*^h*iIL5&-8R3}FBO diff --git a/nlp_resource_data/nltk/sem/__pycache__/boxer.cpython-37.pyc b/nlp_resource_data/nltk/sem/__pycache__/boxer.cpython-37.pyc index d3a4ad26564707f0ad80c5f6f54b29a8eb87a53e..854ba8d8b0a1d79ff634ffe03bc5cd734a6eb296 100644 GIT binary patch delta 14633 zcmcIr34B!5xt}|eOePCq%}$aD5W)}$gnbPmVF@50vS=^RVaUCZfmyOLm(8u=tra^;pZ1k@q>xg&R8%OesyWX+eY?@8AX^LhKC_%ew4?0wb zg1#e=p=R)Brkcs0PSweuS!x!2W&~WpY&APJH%HB(vCKejFi*|nF=t>d0WZ zTF#TR0~NtawNkMywP`t8?g5*YtGo9*)GF%dX@jUgNH3-SDC*~HgQ-85`=hBpL>o%| zp*(+#UP&XvwBZMAUD|Mc%znpyMXlz+5n2IFD~OGa<*`Doh{lRyV>Pt2SSz8C61|3q zjiY|4R!03Y?t7>|QY)u^Irqm?ze1~|ekJ#7sqfaRs9#0>E{8gS21aS4X<#%@m`MFG zS~c~nxnD>9v04rFYq&p&`r|YY^*y|s$o6VzhQby-$^R)Rq+K@83 zKx@<%@@P|h^!UsZwoP_*mO5LXqtD*2Xw5qvM0$rl{rFsM(Fuj9)a$c$%+nU%s;Kkz zt}1>0eq~34K3h{tY{wU9OHL^CxRL%AYD;f*s7-p4=Gf^X6i@2S8YxP#h3fRCbIP*k>xnn+B_{jsn~2e3qx*yD5{HXzE<5G z4ZFMlX1_@pWGL*^4^^JO0#@3=kw8JG%7{{lmlRr z3EXIv67Y6k%q_AjWpZf#=z2^t25J6CYqbgy5F7>Z_DW@Phj+Jzq7)~)Be?CT}~ z*+hUk0-h|vWG7)@GC(~5GcX0MsQ^7$F$O1adV?K-sK2B8`5}4s$%$DcEqmQ$!8j9x z!cCYy%b6GON1~8N!(sW4VY8aSjwh3uFr5BSdq*@Pm~fJho+whONAyrMa-e(f@OJxn z%<|a94Eiwaeybcl%bvGg59xc`MZWK%)xH2@xf@dS)aZ>uj5FdUJNpyBBBw~J~T^*BuxTG_&a44>E@ zvC>#VPfm6JrnKCyjFtZ!S*jJ&fG7c22~bPGa0)#b-mQCFCi3NYz!s-$Gwj3f zwtKhv1A538)WzkzCKA=c9WnW_*4$XOl)x+Un)3gc2?QQRIPsBT>O6y&_i6QGXzHK4 zPgDf$$`(1es@l1Q281T(RE<_HlV7ZAQ9SaIs`4O&Yvht$1>tkPsBYwt$#;klT$Ih- zlg$KGatJaqZzL)V=T?8nC-yP(=H?u&LCsLwg-HdZdKw=K==1LSQMGm=zj;h&MFR-u zM4~}z^$(!~7dIAVikTOedj1>@=FQYZJNJCR@`)7RSC5wxNi-0njXAnr6j@>4*5 zVdDMD2Kl#%K~u(9Ygr`jps~H(`|Iwq4@z)_7%Iosj~&b-ZmdESd{l0%UsKB)Q#giE z#LNV>2rYODvrYoI0nGWotY5DjlM|;rrL2;_no?2C(q=C1#iSUjQ;AjgQ0qEbG4(s8 zw_^;d^JEygzDPtD(RJaSdPv+YKb`6^x3d9r5=@(PPt({*IcwUla~T8cJt`}wFRFln zrSeIJxRr(sM|(IT%H;La3rZ8)HS)ctF1@5T&AaWaJUxB6^ZTH8_Z>6Jrtv<0h%Rrz z#3Y`_06#j1)(Ze`fM*CiEmzODFoc|U6q_wVhY6cq>6|7% zxvX6NeRgSMlHD|%LPtr|;#G8$l6{uCAIO#Sie+fdtG&1l&n;_6!|h`|P#PJ#eWIS> z6w{jAcn)^$cKOoW#uRaaKk;J%7IF5(D9&Tr>i}+mZxQ$_G5NsA6q}?m^6iwZ%iQ^u z9_YYuP{vlXL;4J^k!!9hmRqJ}$=4e8)Ubi4xHs+m$=<6gWN^W{2|U*f4W?tn=DM)} z5uqYZ#A%ZsUsg#3%H*j9u56OT-I()zIjeCLr!=h};X3tPElp)>Qnl~w3zox$G z65ci{*u7IY3aDAa8}!pz0gZ^>>YYLk`ZmooW%3TSj2v&!w^R4}qTwLhOuXP@l|o8J z1~3{9LPWKp-fHFOEt(Uu+vLK}}Mxn)%$*8qvIA)mgiDy0U1m|v8? zyKHren2})O+y4{r#k}|Blo)PL5n#ye`Qu z3_GP{h?Uw7I!jUEyRY5UZ9gR3SpOV>-^hb&k1qHa-9G}HB0#*c>=LmwMYGIXlELFN z-R6)R*X^3dJG}Ij-@w8<)@#fE$F#pm7@Uq+F0?6~9x77`K^ z)I40jcw=W1BeUJ-r;v0KQ$ahH@-1x2-fmOVm`^Xj&=$PcYM+~dEKVX z$}suPrkN!?k&!1UKn@dR-R5=Xl~ycj+=b;w0W8xYtm)lPZeEtP=@C4B6o6~ehtYZf z;30qq0lp6KH~6VY-JPzQNiJA@z$TgLY(q&Hg$ zyPZLqu3dNvQmWab@^jt2`30=q3h+F@Ie=FI&I7y-z;fAu)+Yd;0{k6-%afO=WhlL_ z6JV3Gy06~0M;XV$z~1A&uUIbvgq`H@t}Jc)+EkvL1?*R2MJ%G`YI>wq_}g(v7a0&* zibCZ_Z6lTUWR|~S#X&4g`pM1cRsq01Oc!Bi0X^aBiH9+|G4FE%qvc_LiSqd!oh9pe05=jr)Cc4!>}bc4lvl=+1@8vuMnj2+ z!D&!pWs!Vt*K^g_DJf?LFegu%$<58B2czUYVsc9+rZf^T9AV)#*QOASJ9UXduhN*2g= zs=R%7TQ&0+(?U8qVCFPAa!=`O549}z5*I-!k+ldAR*%^2_14Sqo_`(3bj&v*s=bwN zxLRYEFf4aw9;6!O&b{+ivdK1p-YI~D{>4IcIZ9#wG4e)cINGLL_h>l1UVWF>J4gO` z?@E(=%&*;PxTVP<-RoT}{rjrYmMs?NWcARCX!0?DQ2^|6eflbNIXHvrP2^mcJ;&+F zaXDQX^q1|*bvZa;w$dYdla$@*&p3kyF4k}9teBK^X5_QD)H%1Y_>wZLqUS5+Tb;|Q z`6;+gA2d1ItdUEvj?H8TN_$PtzOzm~b9KvjFlC<3*?rPa$D=D{&A%+4l$;(je|FEL z-6z?`b5$Py7uC%hOph4i-YiS6SwEiM`kQ>jMUh6Ote7oQxwc52zGgz)&Y6RBv!TIO znqB$-qkV{CMN<2ucSq`fu9*_rA0kg(ds}l->0Fy7j#Kf_-E-99ejNID%Imt;^p}UB z^j-4Pt~06bDd5e961HN5dv?kcpOD4TR#ihPk*|PLFf}I4GSS01Rl`c!c)V?Ga?Vq~UlC7&~ zwV9*S`8@{&w(2BsW5sB~=ZE_9sOPei(>uuR9ZowvkxFN=JbrA2a#qeiuzC}wnu(i> z)t)tO{1vT5_-STZr31!qmmeJ1RLz+aEBXlxcS-*ZTb7@tNh#uk1pOW|8w0l}QL}?j9mcv&}iwU#(6IA-KD**w$B3p0X(~=I4#h{WH z)9Q}}{U#YyiJW`OmCO1>n4Y`7GTD0>a`Ta5dEu4~(`ul(q&W@54z&HTyyn)0@{>n1 zy9O|%Poa1_$NxDJ^5lsBrH$pqUTZ-r=j5u}ij-GiQeC$-E3a8h z>O3@>JazYDQfX$UGpYZSq1(OVIaDQ9^x;$Rnt#Z@-oAa=Yv96SSBcf;d5SLun7zL3 z!Imi>IUH>1A6pVp-={_P;9R-sNYJ!{BrnO;>DhoC6v}@bSy0V02QY*Z>ARz1Q=i(t z)L?=Q|0R*6lEK6wUNi3#)+!&$KOAdrv7{(2*3X^G+%fHfC&3AzVm`pS6 zv(kDMQ3X7lgvJvxv&^|vLnEgJEZv9)@HsJ?w86yx(Ldm``acZoh89uKML6+Pu}0y#h2s* zD5MiojAh%woNb$ZB9UxeeJ5MKDuD9)EaOV~QoI!ea!BM<$KiaCLrQudNgT}0gxr}3}9V9l7Dz)-}2oMaTs6^0rF6eNgfQOk#0ZH>AXDk z=)$fiuy6p&`$dZ7<+J5o*|zKd#`IFY7y^+$D6f62NO=*4cIvTB@k<%ps~Ps*XXQYk#RF@|gS59Q*M*UtJF-CTe_ z0yF_M12F5S(Bez@GibpV&70A)^1n`&bUly$R)F^aI9>AT@>O(EX`AmS&!hD^02hlx z(E^L+yV5~u4F}-t_yJlRKnJ76?gdXaU#BiWi?8Mm+GrQ?#4rJyMBOMJ>A8qSo|HRw z^;`M!sk04CkeR@3Rs78kuf7~8z6$ULfCUA%nk!#A{oSUCAj$zA4r(|fVG*UPxRcb0 zzZ=ci>Z33FGItgFSY@%9D$r!vj(?65qMsn~FDuk~b2Op{>jL5J{#KEN_1L$_27nsXJYI=8 z8hvy0$kU^{1i$!)qSah}X#Q$RjGz(28Q!`>r=P|Y0Kts_p9AoHG&{u^bh%(Wix%58 zn>5?-1&qB9z^3{lT5xIswV1y~aze!7pe%o;c0QX_13fY_=^aj32uRe(P5kVL2B%}; zK7fRXO^euGDkl8OnGA(P14t;gJG>Bpf{A#;t1Z+)Auv&lr#B%9>F|&p1nA0 YLSCLjD$kC{rJ=d>S5AMj^jYtJ1O7tSSO5S3 delta 14470 zcmcIr34B%Mm4EkbdD#OYWFrX>9^nN@2ni4%gg`(NkVO`?7x4MWy^u%V#`j*>L^Bj| zX~lxaU7=970&2lnYb^-v(5XX(F1FM54o>Y@#?}q{o3>M_t#i(~Z@s(_e*MkJ&y#OG z=X~e<&-s>{eCcEJryrX0=H}*Rnee~Lb<5nlf4VQPr0*58$)uQMlOic*pCn8C(=40$ zr$x5#PpfRjpBA6ZZs4H@!TLyL6xK&^eH_+DE7e%9=DG{(W0XE8HaNMV78}MYHCV6V6^zIFIK_o^ z7uP3Xy;d2I_3`RNRIpC1S0?nB_nO5^s$H45OOhw4lQuRebxOT5Noi0fD~(Fi9&{JC zPEn>R)3~)csdajvGDDfk%`HjIvy@h4Hn+CLS`SV>WLjgE8|5Z-irTbGQs(qnQ1o1N z(!r_9+(QzInV>dqoTjwzk>qA|gxb7I+BjWpQlwJT!5PZDLlXAR#Q$5A`Fkw#EOnM* z?s4p$V-|O+t%`XUG6>YEvkuNaWIEV($bGe{)W;St@~0UUdTv(sC7VxGUHz!m8V=gf&SqOw%&Gfru(X zLXC9wpUHEWSGtHUT72Gcq$}tTweEp6h0X1xj?2bMXm)QP)Efy47M#!{l|{1j zuo{SjclQ6HFl4T!Q7*G+#2=d3YqZ1V%$5ymK;0S=t@~lq@Yzxs93MW{Nu$Lynki`x zMTFgow=0rl^d{^&0wu*OBn$Y8R~NCZrqHA$9pOT8&2Sbki5b+#{-TBL?Jq6)RFXzQ zVcDiyHn)Wp)U8T#4W<>%*meqDENd*Nrrt3`#uBN4Ka>^C7>z}jU3ZyL4eZja98 zT>AJ7f?qT9I7ZAwV1oZ5hd6g zH4Qt27da36&ccO?kERh~T#~TT53BA@Yrx}I2L|*g9Qucss{CeYEd)k6?Q5|vJn-14 zYN-=Wj_T;6b!%DS-gO}n>{7#F&9*EUQUjWGS?99(j9x7pk>u|Uc)L6iRm(z<_liJQ zP*EA+u1wZL#t=YQxx*2m+1Ggk9oTwwoMt|+!J9@ks z^*S+Td*w`BuZTxP!dtwNZq4QmDC$<$NVLF8=Y*DF#Hay@Ylv>o4r;iLCfa=7fEs3? zXp&d6QdQ2Rb$Bp@w~C2y!ntTzOo^xt^Ks@FC?EUXdP1O_MNq}{cmo^spe4?6#iMg} zVA~SNt_iQIq+ZG&tdf{arKv~0vF&n;|E zN_@}2YYjJy=Y>g}wlGQBNLm;t93kFEi6o7mI(e1!B-}Uo1!)n?XsmRywONZtXk?Vt z^YK{^W9fFdz46;+_fZ?U&SllIJ>jq_A}fPEYCt>$%bHyJa#qloxYD}Z7qIOpJkoR_ zn>n!GH^K2~BjLc5F_i?eWL*)AZfw#lph1+3ZEsUxmldv1zqAT?K( zGLwWljuxNCzfSSA{sx+x=grp@Atv7;@;xNUaK@E=pW6S0$Pb9TL4G-EZO6g+ndPl%G=7$*$K9Y=H+w__)5$tw zm@}#Txdb&WZAs=N{lq^bF_<%zqc}&y&J$r)FJtMCsN~m1B;k&jSVhv(VdJbS7ipzg zaATG;1L_tI*DJQ@Z{ipWyfo|VP-3T9G25wVb}#Oqi00|z;FZ?Wwv*V!={^P07V%?h z=d~vSKEB=(bWhYpaO`Hpd`kw@&E8VO7)}afT>W9GxbbXz10@^jRnf>yd23nsHwvPuR0-)~HWE5JMmAibokegaXT&nMgo!CaA40X}&W!t&QynM0lmNYg z39ED*=id*-ohNVN%^%Z~ege=l?;fnC1SmTuTi-_c42O}$YQ?v!q+5+_kLL#{Ts*2g)w*Y5p^7QHjZ*dlONPMhi;I#BP8&?j z5Ff{+W2}Bi<&TJPD;xYttiBHC7N;g^RHT8##ucIR6%0}p`!S=jerYlqSrh#PsLSN7 zf;nHojnz4Tg5H38mL5*AOUts>bSPv`pTik?UQbm=gQLsZrlz9M06@chR^S-2`&s{} z8*Y<|(t%_T9A7@#?jxCWN@<=W&QSS^5V^{3o~q`jFt)`7x2-5ynNXEbopefwo}evw zN5b>$wh)79(DTB#kja0*dn+EC{XVh#fCxbmC-S=E$ukKStXe|9{W$z@W_WGorY2tC zwaE{;(4m?n^tt$e)IfutYa}yJ-n7^F0zoh=@6kl8T%|FDLHk9YeE)JYGyde3~cO zF+CX3-^PYK3v85^_U*=^xQoc$MCiIE=;9^rC-MN12Z>N(F0 zCtfB(ryfDOy`ZgC&;}=HQxTNLIcsvF6Hik0fN365iZ4Of6of88KoR6>eU~7I2vRu7 zFbQMQEp<8USiL~n1)tZH!rQCgs?VdrypU)NGPtRn#DeWWgPic|HEX2;n6-9V(s4Z@ zf$ZRdJ!@C$uTjy=1;lM%8kS_N$^F^4bY`r1n!2AM!spJXsPt7Lhlm^`@--sQ5uv-p zb*?HoU%x0V!Tm(PEEQ1iUy1Mv2>Ie~COD=R_0jGk+K3RU$gbb z*ci0-SnNy23%&R|_FjN(-TNDPunntXE6s?e&}>BwcL{HZF53bhdQ&h0o!$}BJFwo{ zx@b2|yo(6O$vsqJj1fNc5aD1UdBU2?Cjq9**gFy~c}u0s;ND&eD_}W9HXg&*UuTcg z_&}6_Tz&_)^z2^AdiCI3T<7&9fV!f=FFP>b(Pb_y%-G5oBz!d9Y{0sfhr_&{bw1V2 zgD=4}-#125CK(_R;$yz)&tUg2t{!+0K4Rd}H~g6eF8D{l1OA$7T55WR_{`u_J%P_@ z?2XfZNvjCxgD=pJ=pl{q!uEzRIejn&| zj@S8f_$Q6PR?XqYmw|A%+wFkOLDw>acgegWhX`3gZV={tmrdQVp|r&Qf0(b zRxadhI<M=o*CO5crkPj)TFS$FJ{SVGfSFT2&LjRCoL2`(MX&V zH^;=PZ z{caE#-41wSTxyIHGoe{E@P6-a^w@}g&5*7*qL6Lt;p5HSPBur>3+e1YBPYRaTgs-7 z#gYLpF%t|&u?7PY=qXpb-4o&bmYo4cY9D{OaGoAu}(G~{>RJVH$ys>Rm z+GC0#HrYL;#HfNuB@qs}0YjC1nPbJH)le)rq%6B5%VBp|@&8Omw!^|3WEXbfxwU({ z*Ls2q^dSDh_R9K1Fw_1OvpVl(Y`%mIX_x4L=G!`*+)G-g&ybvKu7_u1BRPQ5K5uhu zErnq_I%-KN{qD`-lYTgLEr+{yEUZrqkHVkBGlBbr+SI=aKHnja~1cnglM`v37hnPNf0`{;K|@;^Q!#l2qu zd42b{C!Ee_u_I*rqex<@JGIpiew_MM_;ue6SKD^U={>OQpH3x*r=V!oq@a-_g=Yk& z?VQrV<~N8>6QhGUCjc++TzXB8*#hIf`13IY7v$N9KVv0BS6Gk&B=dEEfOf0;4)XIJH$FKZV?%gUUN3 z40;QOy~S=8RTA8~y#$usxkx$*f4Fz)8XBr^+<(EbhBIyK>#D){X+|2YLyF%E3wEz@ z^3F*!u96t`!W+A9U3iidFo;S$W5-;~3Z$+XPaAke*r2-cCgV?4YE=J)RUWKfCtGd!*i}#t)gaKNse*7ClC7PC`E$hwL zA_Lq**vi8KVk-l&m5u*$a8o(S2Y&+B*#W`zL(=yY=^OoYZ}51+5b=>odaw!vu9NWO zm)2erxXwW3mp$!gNvuH>`zYYW!ocoHq zma|oS#sF5Mp!pxTaqotXH%SWvs^YWt8x_9;uzCZd;WDCO7?kdl2Vt1tHNXXt)S<(T z4ZgF_uY)0>PGWX?O$ZDHu;zi;P98Z37z*L^1C?tA^!T-=6SCl?q^&BJPAuZh=qths z=^`vT*xq5-srcj;{na9a2N?f+Amy}`$X%2;N?`JX;np^?)GQ*!xN7~?F)?kBb8H2? z`{0sBhRVR_M{NkcRjK%@)P-N?eV%oyFFYEQhi=eM%7dJLosjqFaQNh*_STD}KX@|9`?3hoyE1Bl^dYqk#)6++%oX01%YS=9DbzD`4nkB2#99kI;)H?6nyC6 zjcF|ThL1nj3MX1?G2H%WDS{@JIxZRD6OV}4CryKSdTO&l~Ja zybv}#ep4$yPY$F?Y;ziD3s?-FJ$|e1p}6`30E0YqBi!{wtCL@72l3Ds;P+2d3>@a{ z9D)!#e&ACBEpCo2aU!+xbp+3(;u8DmwkKyw@4~m9>|AaL^WPGRXh)6x=BL3z9JKL4 z={$A|EZ;9*(@8;rpX^`P{w_(ps+7T;u>m$8=&nzMOI*FHcw`ggezmRdIg&kyM31#azXQaf<+VJV0eO|-)V~uxKD4@(zX=%p z%q%$9T!jVo-wAhr%`-3yo;}DOy$3_K9OgVVUk}^3ovs3shvC>$t$hc{N`rvpu_Tb> zGDwz+YdEJ}ORgPD;`y9L7TijLqE~@fP*Yg{DGr!3hLhX(X+E#^G?my?>8}E5My3PN z0eI?}Egk6q#9K5=VR_dArRQMXvum#%O2^^%&nlf~XdwnDC6s48CDxiA#x&?<8#LTq z2#24OJFd<#!E6wWoq|PQ-?lJJ>iLL7kYJp0YGOYov`p|!uOMw3XCd$KoId{5z(Mf$ zBU>U#fZE!1Hh!;`Xnpuhqn(GOJ^8vc}Ld7o^497OEX`2<8HwApTYe{ny2r^ zQqos1>PSKttxrdJ4KDnx*m4{a{$~59ewZ9kW3;U5SdM+jR>p$ zJeByF{sfhBh|oEWKa^^P_`AD`J4sRcRqWTPo3EPhP>HuvzLN3{_DyQzo4`3Lo$n)h zkw`uf(p&$enoXr4M0o4u-HwxMF139~go0fE>e@mje$gZQ$M%eIv~bq;Y3fhRVK?Z$ z;FZ%-FM#XV=~foVO3+pn`yj;A7ZZ<1iTs#I5)Z1rgTw|k$Gpb|oy{w*r%>Icw%3y65Y)Wv z?4#m#^O^txt&U`uA#u$U_fWZ-$e)Su&Z}Taa|A;X{xvf}2Mw``+A`PE?|ujI7tZ+$ z2|jK4B0Y8dr!@f&Hd=8|v@xvu>wLit-Y$Xb!w-#4q8UVJt#S^}q(O>^=!hmC!e=j6 z_i6Uvx{WISQ44*y5W9%bhh_b1$NN+|L4;3i1jUt%Mt^XYn zS&mOhniZEB*0MM>_$#>vuYI#<7UPG$2Wd9^&Jz@Z%qM5Z|B473CllYzMB?_;@!JJC zuZ)(aLj5ZhtJ1hlbF}z_O0Q3CB^QM6M`AVOIg9`0Ak8u7m`gGyDa diff --git a/nlp_resource_data/nltk/sem/__pycache__/chat80.cpython-37.pyc b/nlp_resource_data/nltk/sem/__pycache__/chat80.cpython-37.pyc index 45a022e9bdf7813a724224d08bb59882f4c41450..397760968d5be87f336d8d8346b8650053bb2612 100644 GIT binary patch delta 3895 zcmai0Yit}>6`ngYyB_bZ*XvE<_d9-Mu-C7=)6})o#12VnlcZE+h&qh-UT-#;-Pzok zO=1_aX_BTC2u&HPN+m@^P?{D25{rb&Q-U9Wg!sXus3QTYgpi8(!5<_@`N27N5+~UN zVQas+_uO;NJ@>rsefM>8CBWQ3qeu)(Z7T{cYZu5eHhtP81r92B1Z0U5-$V*5_mzqw(vi zk)#RQAZSUTH40j#k*0DIP0=Qr&M9=AAi@l2O(49UZlKLHOekx`(okb_1mwHVZk{(;k>>K)EBcf%XEWx3b34O#6V)w>EEq6Z8wiHqrsw z4Z^*!8Z%m18*68+m&hdo<0NZjDb~c&Y#m$AHn3*aLIxhhVO^y0s1JNhLsWe7@YyiC~I9Nbe8S` zT82JO_X2HR7d9MMNZkKp>*P=uQo0d(5aI|M z0p67OAbf7^951_k+@EQimI?7Mv^_g^R|5HT!Us?WnL`+YQBY@^a1zsO^~c+dK@|BP zjGAe3RsznU>bKfcWV#on6^l~d2P58(Aoh{PK7mRt&w5}ttDGyaqNn@+>3A=L_5{QY zNG>4f0mGHHQ)SDMZz4xwpcyh^D9!RL$)Nqf}RYt zqV1g;b6J7!L~<`ckeqd0#xafdaqjRiY`8z)ipvp57N^Lkkgx?|2SSZQ>%;R3rQ2{F z4=s*50jF3gKoo2{h>8PJBO*>)>x76{J?8(RuaC(7zx(zq;r@IWVH1LmFp5xP%nZ`* zx>SrXb9g7fE%>=<04<`*=@u=iDO!V;fM1MvR$uG)Ws>nH2S-y4D1i-`ClKO(VQ~A* zU9BP&8?&}mcG*hpT@XJfx60e^MW4 zhVwgxLf+#uz^})7xS@Ya?~EP+3SaQA>H}RhJ0`MuFVEa5&w?s_)&I4wlc%fy($AA2 zlz~)$fbtPBT+SCMKZ-*I>V-dvIzQ@PAK5bVF(gJ#mlhS%RO&F5e(@PCx)Aam7sGl%8u zK4g}|_8^K-5#bR@CZeiGf{1M`F`FO4Ef!D#$?`HzYLxE>+WY8wY(!F13Y?uLe1G-o z#8=fyJqk1e7>emEzYkZcq$}aRYaK~fYc>yKQpeAruwWBLd<3Ux2|tVQFk245f&`nm zSYuLWs)0cKMf5eo4^)4+RhBCvtAtjOS{Qh!>%;Kjq64Bo!pxq*rFfunX1n|#k~zXd z6c~xyNeDe3!Y%9*){mgTeGW@Qk(aoWWv**3m5bD7L;m89VX|61yWNVQV;4@J z0SKby5_CuwBt%$DNU!`ESFaOxp@|mdpF}?O4BR-O1wu}N`TZ7v0irR%LK!X`r}#K} ztQCZ%xc`q`E#zWVd*u71QbBd3xB#21;xLK=%;guw#67|6Gb~V@5-ajg0V#}HgseqIG7t)gHl)YMGG~h64xlsB9nt{%h;t7%<(S#h6`3u$gM~@QnjQ^$Sfu-kg zn=1%weu2vq-(q^U(NlE8DTCx0WNCCMQWsj*$d34E|!Z@3Gg& z*qRvJ?(*0~Kv<2YTe003XuPtgZ~A|qd1?Wz3SwtDT={ms2wp;$Ks|mM&A`C$FCt(+ z4iuZ63+liX%Xzz2Gd~X;ulm~AU9q?YV-tT4C8_3u|mm)#NuN}hD(imuA#za zt`R8}tpYPtUVhNbmx22`=ta?R<7!leGE>2iF_MIze_>yQyj;Dq&nH`MAD{tTQ!oq6 zv+$;0I|F~%8Q$}snBRUxg*`;-@yjsMl^}vD+%Uu9cPg%6J>Ukc)!(mWQ+3Sm&M#NxN9Wjsedei|1S}jKR_i z^Za!j33Xq?k$8QIHYgh6{U;C9f|W1+&7WGlLEiR%xA@PBI71EuA=?%1oUKWWK*Kwb zvVwC|fxHv9jT(gppCbGRAQixi6IGg~F3-)0ADPo9`CeR$neaaM$`DSILt;oAR7l)G zcn$%4RgprcD{lTA$J8_G#RE;eOYx#L;$j}Y(|H8Nq6lpWDTM#?JVzFEOc3$PB`7K3 z{ToQV3{VL}H;)WaXXzx%df~k&uBNAv4V@z{bG{!(2M|O=FXBjqUql&u2*(NygYhY3TkKK!vubfqUU!hUQ2T{@9V{)dbPb`rkX!|6hHF Be5C*Y delta 3618 zcmb7GTWlOx8J;tBEX@^wHc+bgtK= z-}%pf&VM=o<;;2gCb@lsq-L6$;tKrrFCNJ)&`YVcwU3FSP{mXzp{h+xB1hF!<*1pO z97ASEj$t!GH5w`q8m5tgN~2VNH)KX>jK(D|0lcK->9nb!&?cIq%>|97B`-!(z)OK_ z3vH!qXfti2wf2HeJLGblcEWNeT?gvjfhaEU0GZoI=;p;R^~KW^2!DW>-NuLFfn_y+F)vnxz}40aOni zqZ=Bep=g1r+^rsTj*mjX6P*4N*@Q2LAs4@2hI@PLB9m!Iyy&p z(p|7JOn1{gz!_mJU}`Vj2fR`G1f2%ndOAb*!?=MN^uV%uk?=cg{dt8bYy%=yk158J z)~{L=C4P8?@s(nILH(eGEQlA{eoai#*Z$K970QFRAtVu+5ZVzs5I|DlA%s?hHTD0r zAJ<4!9PgUivKBdA2;B%#gnoec6`q0NZgz6XDY40u*4ZVeV)5n42~qFbtNvRN|Ll5o z;=VM>7=#bNAO!;$gsC6RFa%;rZZgT`h{>Mf=m*cDgI|u87J!!JH2ch9lC{io1#VTAro=b; zKAYYE5=J;U$QWAOh$?i8p0x6ymTy8{rUCMViJZr+vYWRZm+cWh@BhOA)Z%L{^Bz>? zhbvZ*yMCzbP{t=3I;6IHiljSo+f(%q2Yyd#8g9e>UocX>UbemE373`lR%FWnrKVk% zaj)oCdR62$0cxZW4(LL1Nk-?b+>19|R z^vmL@p+Q1~Jv6h3miQ3DI)q__^$4rX?m}MXVL+9<6X0L)bFo78P)Kd<)Kj{qH|a_E z#d&8vy-uiPgSbAjDIG(J2!f6f6@MAop2;97LHgL7x2>wnw&b01o>e^87TZS$$b9{) zqw^Y>7T1m84l|N>N~NO5(JLdt1GVDMMsMU0Dm(byvEe?c@xIz*zUUR1yT!8>ZFX5q zj~V2-`tsPzWE4dpvydM?D#;VYGUW?MYEb_CFi`x6Xx_B->qn6pK2>G>EDyvgmbb*e zf;*l?cmW~P0C}L8d&#9nUBt>)*Cv8MmLnN+ptA1@$D~UXh@}uH| z@h!Tn=?IUBPsZ1`Wsolc_3~+eKZ?y0J!HB*Kk7CQ4~ZgPW~{@BhivwNNdLTN41LmUB-01FtYc z3(PidAtSWPc95$mub{BpCVhAsr|1Yjh43g>vaoO$T}c6<^ro92XszH+x2hAKtxs-K z)tam+sZ|yh76kO~JPcgkbQq4Hu7i^CL!8+zpGCHf@CXHF@?Ht%>>|{G^iO(^Rq}AW zS`|N4;ZB~pZf>z!rZ&rn-|kpXy!yvG?vi%Q;sbs>j?=36bZ74YbO3#^C|{A}iUn>- zQ&pU6Fn`z-SduplEL1JLJNAg@cMX!4>M!p41*x4yAyQs2qTE9=g4>wOUy>7diP=W9=BI%bBqhvUTvq9#42U0V=!OT#V$P|0t6k7xM~+rY6%&&0$~R6T3H6*vs!+z~ zL0g`Pi@k02kM`^&q$*PTh8JsS?-hjL{6Im9UPT7Bl^F(8#WKfZ^A`~^4Zw=@rA^zh zXc=wFT7WJ}k+d2oNmW<*oAt~4zE36^VutFg6Pta)R%yB$-;D`{E2Ogbs>n@0yYvd` zkDum6kJ-gCgbQP1MEGkk$h`2kk;41c*KBsik3vjN6z$cT`5D-88Qn_*p(G+&iiDtA zlcWutvACTbAn(*a%?d&?;;p&P-oP4v zl(j#J-^^`4`ZW~B3gRyV7@8l3*p%F$P!gZ_MalYt!&uNCHi{w^ClNJ;eL7n|_@t&q zj-kp+;^@;Kb=&~*oI1|W#mC2?8D%Z=ir6)OyxB~+p2J!01T+KV$Hb53M|$2z$?FLB z1kaockB>t#K;51B!{n;icIZwJ+k01_e6IwN4LT}=(h_8BFt*#op@o?tY(03{VDlK= z{0$g<-Klu+Y6A1`!PFGD7Dj8=fZ{iCHO%A(Tb||_3}rLuK)wv)pMcb4p*?<;Spl-g3F~d*f>Fz{tzjt?gvQ8 zccHwjkfwT+UEuXXTH~8e@DPtnng(Y-z(0>7b C${Kk9 diff --git a/nlp_resource_data/nltk/sem/__pycache__/cooper_storage.cpython-37.pyc b/nlp_resource_data/nltk/sem/__pycache__/cooper_storage.cpython-37.pyc index fd34437b4aaab5cbcf6957abf37b5a456b6f8bdf..9206ce7d798a6fbe3f07b65d72c7c702a9bc0b58 100644 GIT binary patch delta 915 zcmZWnO>5Oa6wTy2c`td-e&9oF?Z>8B^h*(nN}&oCHw6(|8IX|oX4=M1lIKlQs|YUI zz1DyW|3P<3SN@g!1KkVa%DH(;rDkz+a?ae$xo774)Vpc_zUR4$d`jyyobR6cy|+Ss zF@+JKB!m)zSQTobQ7zG_o*2|i~rr}k-gRvCGfwYfq>Nh5aQG=!rd|*6mgb*IsK%LIZ``QpetX$ zHkQjAma9kFLo$B;UVBOwkq%U2)#Y+Kh%np0dD(PPO1%U!!`#A)#hpTF-{mcu_`aL5;%*{vX86 zL*NC({E)9yZya%F4F8SpU=9mr@lV0w!IlM`W4eOO|3nMk|YAag|#`1Am>n0B-UPXb2tC(^i27WNGRAmEvo_(o z)o1V48U6?G6OfSDbwHrv!+7@lc6MgInK!NPGvUo3@C@~}ddu<8=G*YZ za8;d{!H_T{B1oFVloqj&#!O*Lhd7mQ30Kxg4H>1EhzaRIygH-bFI^7sC7X##r#urm&WQ zrD0RUc#}dxdx8_3C2}m1XWLNMgiFU`K4KAuuvn;{x?0*f2T9zXY9}2N=lKR5<-}3p z|2OAlG4LD`zRA1A>$+Gtr1R;D;jA`VpMQ)N1Zv9YYBMb}wrmqOaR{~IviId=TS3I{ z4=ChuU(hUz9B>o{Zc+1RAC|Q>pi)u*eX(@aKr#-co2NrQ$_^wdrz<*sk>rDT(oD9j z!m?_+Y#Z4qG%}aDcyvQ69Qg!~L_X|fTtZg?6LCA*)5VvLpgk@oy6eX# zy;cLM7hi)*NB_26t$@wcj8_UTymtGFj>mBH_g9deW~`ybos{xcVhG?XB~EUX=w^fjR-qYV)^<%KN>HGG!ZrFKX>Gy@f|m12wFv KTV4a7#^@LK)5g{S diff --git a/nlp_resource_data/nltk/sem/__pycache__/drt.cpython-37.pyc b/nlp_resource_data/nltk/sem/__pycache__/drt.cpython-37.pyc index fb7273642cc84f8c458e1732f5a1c70f53ccc8ff..ac0a3bd127a1241d0680a2eca8da161ca5897623 100644 GIT binary patch delta 14957 zcmbVT34D~*wV!XcOjbf5fg}X7!vshOdsre%09nJTFrr^3Ghf1xS;CzOkcb8aQ9)6; zwko($t6~dMOtq^|pVrsDKHqbB?^XL%)YhdgwN_i-Ykj}A@BGgr zvC&^!&>nJl9E`IEouLd*2DcqSS18ky$!%vaE0pcY=5|K#wyx#

    dvZp>j`osKQeb zs`ONbsytPpYEN~j##5tMdMA-!SFkoT-ZMTl!83swGlT9>ou`i5S;2{+NuEiHQY`p?izRH`Y*}XUXZv&ZTl_g%**=@6o<8UL^XPM)HkLm3HxOc&Kc5i! z+7v=GYT1Mt?k^xrf!3&v-e=#Zc&7SC_($%yc&1Tbp}&awil{GdpWQQ^Fr)mV2{T%o zu9XsI24RZ*C4?ydh4nlNJsGnX(m{#wG+5~hGK^9VEEKY=h4w0YVH!ptX( z+h0eRI&HpIPMBuGO!W6pBGe=zDkRiJtioh}J>lxL`TJ~S`2r$q@J}Jk6cRq1s22Jg z{Zq*oi(L|*A`Rn=zR|2Xd#Xn{+WcGsVyOoF732*DE?Xg+4~jGGU{vc&!N6K z?CpzN{d4{ENZ4}!e19|jU7@Y?U({v2)hZs-mhYn|`mGx*?gj576WvxluUiDdF}0&7 z?282=VLiVm9PmZ_ni>qmG~o?KMDb z7FpqHl(%L&=3WTjrWBvsy5{{%`sf}eisg%$BQJtG^z2m&S1)Q=sIFSRVWaL^)4Gvc z!bN?aj3w)gPw=wGwI=a#Dt#37vMOu(NR#<3)U!f%Wd$1{TudY2R>TUlhf?apI<=VU z4F=U{Z>T*IjBb&x>{f$oI=HONrQ2eWE;DOZAiC2l0$$3C%Li`AzFHZ>gDoRA&#-OY zus^7&VXc!gClVG*Wo53rb_(_BS@FJ>^&9l;cstUqg>^?ycef@)t=y4Y=v)nDr^=gh ztLs?rRbb8VMZz&{cg(Dc(-6}p_1xWKK(1$LySs%JjbiLUCd+VMsnRRYjw+Q8=G~Ir zib2)_)X0m56)#43b;sgWts9nzT57u;hSM;(xCEVcp>;%c2kGID4i*f8bp-avW5a$r z4sry00~U+aFpc=QRbG=nek@z1;`1Q=@bNMN*UR4xFP3lQKV1DKqQKyl-x@xyiseB^ z(FR}=dO6{akkH+(I%>I{`Y3s-cGMQZ;@iQ%>LR7|ycQ8#D)Dy46&soRQ(8txMSv z^^kcn0eu*Gqg5RN_?iS41{O{<>`bqA;lT643J&Mx&ZQ1}+gIBE$|t#x+bU%c-gn zp{lRTwy{6hloTTlLEsnWx`=$aXUWy+9Igy!oJ(Eu4?ik}M$ zABwp{c2?h>&t69+iJb)WGI@AKS&`6y=mX%Q(16x1*<3T;&3R!GIU*2M$zkD0PdK^l z%9V#}F0bn(#ze_T7AliQJY$MvVeOWhUXUB}+E8KWKs=9%iE^A=Ry;yJS39>jjXL69 zk}^gXk3U*$Vh>VBxk9Fd5fwXErLJ}s)lDaxm9TNT2!9_w`LoGeq7xYG>A&oYx zesLZ8t_QdQ;2^+F05=05PjwfChPD^6Uhy^H?f|$G;3&X70NVk$NWpjYY`%a+;$Z-2 zHYjN94X#{!spDVeCw0@5y(n9oq735w@uO(l z#A*7YyTaNoS_HJ+x{Y$*Feb^`A)BWx?1x#D4(CX)vjl;0^<1HK2faQ`ZKquKr&D7m z<(IKhR@~WasO|`a{n~E9rb4<=+Sr6P2(?AEm~QXy5Sk{+(a+PsWh}Hz#x_5OE;dF9 zurS?e7{%7OohdcNcR+dufSFhZTZ`eFp&?dyi!IA)SM1gt#Yz9ms~SHvR%}H)PMvWt z>WXhr=#wtHS{|7?ajTu^bQiMAOKZDsqhfAs!oEu#aUFZQ{a#vgA(Gt@pWhy&g+HpY z)tpfbcmUAuk;T(qn}`%mg!B85FqHE1%z2$!82gy!>t*@$DI1V=x*caG;Q)CX035G7 zY4@yA$XH^06LS}JB=kTaR~YvMWA0mKskm_j^?2NOv3Mdb3?1;y!9grs8w zvDrao65K!&=@$HnPsKE`8O0i7!>U!|U_)gp3(9G^i~3sez?)7b!E6 zJ;c^T+@4ING+i@RGh`#e6Ll&6{}7Su4!^3`hv>qi@X`9X7ODQ6E9q#XDUQ1i$j zd1avdqAF#`&BPfHq_O5xxoyForg`Z72>|I#9?)%cv%?yZb;?dNBV1^uGvY~l4y zm@3RouKQ24VG1erUuc`g1gni%sUg=b+|c$_x$OyV+DMMW2ws4(0A&Q|92gNX&F_xS zF;(8WXio8dbemK`5Iz}rYtgmJj4--+8gmp78QsVsA@Bw+WrK;Rx z_0i7NXWqH$wBy!xsA`5h+p>8A2PwYYIsiFCH34y0Q`KU5`I56kqLwZPe@p@@q`GwC z)}KXCpi7AT3Wr-rJ zE~`AYtjrFiY*=3;UtadkXbyZI`0oY)nO;WcXnEXLVijfb!;71WHT0y!v*cdFO_B4L zkD@yoY^1`=XV{MsPF}IHvK+LOw(J}eA52;x-(Ma}golZ+u9-)GLl(nylWVBNnnb4)1Jbn+L;ox;o4A_=ymjFMKzhj_0dKK@ zw=Y}(NVg6W@HPv0=fVYmv^^<65${kw99I0YZOya>&KyjS_&R0mKhw+?zOmRQpIP(L zdgkFe0v{VkYd>1dh6$56_%h`dbR~D1<|G1nP`=aJ(ZC{-`tbZZPl_VuSIWM%?R7jN zk|8nE&zGlHOXs@IhTEY^a)<=a0boVqP#)otzTKg%3%7O zM{RW7EU#YwkSkHnR~Y;2DQ4QHvc2DghP;%-$H$l+2i;|= z#^#q)Vk=k|Rs$@zLXwjj7WwCm9UR9*2JR%Jv2QiS}b`(Z}VFE}3jz3tFHLF9Td_xbzGy^x`)lNv#D?)L10WTlbOd9q_?uYPhT} zd)6?jP&QYF%a-G`x@@jYm)&I-elFcNT{>CWEPr@uV_gX(Uj^_bdG?Sj+}tyhDKBJt zJOWkdA8mH}%4EgnLV0q5#b)3!0EUi8^qO2T(deZaJYdKBaOk1K^{^YiK%Dd4wv`Y=N#EUc1i{= zYg}epE;!(v4j>t%E$1z#`ey2Q5F;bijYUXorZ4JRlq+TJ);~9KL1JY%!8k{Ii8QX3 z_});#Eb}zszArDm+}AJ)z07kVDe=s}`*fyeb-K+fwqF01aGBlGS1*_g?nWICO5e4=WRCsBSnE6!gAg0(j9DZq`eL2NT+s>4mIc8qC{sDj*0=liG zbtCUEX4^vkyz(jCtpbzZL#%@k!_tLxe)C0H7$Wa z@e%<&mu_bHvNGD!PM1&^KzGm;a4X$LVuCc4|wg?wrzEFM_Dg6}RDJLz?&<;lxL*l5UN3n{wlmgsD!2 z3Q%07L95}pLeeGqXDKPP>v;hqVl~u&|n1B?+X(sW=gjbC=8P7bF(?Z8mvr;Le8R0Xs3*zHr%c z$>rM@H1S4u2S(#64!zj$#7y}k5Ge~-c{s)?pV{8Hz+@@wdS16-6swKgW~T;L1F?gx zUe#5qbi(R}d<=HMR_DqWyWCCbR7mN|EmpddWanUX@;Z$!I(cugGANZU`9|=vCbql< z3g7}DaSh4J@ZBF@3#0_IOpq52L_;T(eiI=l7B6bwg~-PNu(FAA0Mt--mT~^%%DM&E z)FHtn>v86vS25KJPly-z%zt9ESL9wmloN-^%h;4iLEHks zQ|mso4g+w0vgWCy7rXK|=;b_N_j2up7mO=63^ZtPR)Y%{0y5l&d$)3G(Y@PySB|4P zh>J3uwtBc4=YX4i%>(SBkLQ_F=szvLx94yJle4x=9>0L(X=`!}a2q&t1J^I&h@QCn*L`CNUTX80z?xIt;tnS};B>xA(7Jj%^b4 zKY%~+Jy#~4H5ywk$cS=tkkgV9eu6TE#e+6X^N#FYy6pGV7H}J@E;ovX+M-lu37RAoc?-<_faV6qz0e{73-uNnnrLmx( z+on744NH_G=W${0$Y1)Hd|&1oT!|U`oYDGcuYU_?zAQsd_74Q}EUk*>{ z$Gp_--Ckc8UmxnukT*(KFNwV%Rvb@uG|;E1K5uxZH!AXA*$9AK0=h#4I=9ga$o5Dq z76}P@-ft0E047CSNYH9K1p+R!IO$CSy+_Eh%AQ*e&SjC1=ENXAIhUA=E?^z2a=@9G}UVMEW3enp&L&Iw7@VG+WKsXqoPZ`iku-o~+q zD0uX!cOHqL$0D3@PKblio@}4c=;Z{PBy#1ex0iW~2(LRAZCbi?$@*kJm!1;pamOpf z--l{6lDb48B<9e#fj+KP>N(bpCW`9@F8SKGm32cEVNKzEZxoN#po17oAh8884QGli z6gxJs{*HrIr9_^+t0r4QJ09Vf%spB@0x{RK>AN?6H%(uA)1BLbqvPn#t?lSoC0F(n z?2-2#t?K6~=m&XGl6=U6UE_Oxo|>5ciTy7kgcsCC@DW_Wp-MVNM1eCYK8h571WAdl zlf^~pxZqf?v6jalM|0%q=O$ke{tD`SMSgVb;qCAPUq5+l^vGR6I?s1~IOna8UPoy{ z^Q$q+MYuQ-%yfgK6gTNyMU48u-|jAs-yBwZ@157r8JHo)K`Smw z55OaB0IVsm(5zOrG5OM{Mb8ejju>Ye%kX5<^8uAhfbkrH)i!BdNh5w>A85RFC zPmi~3g6(<%qKqIL*Pk5gCm;z1dWK<7h&shTN{QcqoohB*Tub6?4teMs@ApGVkJI=S zg>muuK16c9C9_Phc^ts_`-3zAG_F9;n_Sg2W<9)&MbTm{QVMKEHtWD2ADE!PbJDw0Uin}}_GE;p9$=(=_`wSEZ$Id# zV3zsvwFkd_u?BHT+y9}NG@gX{23~lmTFJ%S;NpOScjQEsd0dzll(SBpb3WqPG#vWo z1GGR6=M@$b!%HZ-gF=OJAux#r=7m6%1>7!Rn}AM6AXAelH3?2wY!=JK@{cDbD<=jj z9+~T?eh9? z()ef;i_mX*_%)cGiJyBJpDAnUkVc9NEU%vIa4^l0N+{y*32M#QZAZ5g$hW$b93{s}|0}^V ztCCNj@~wGRr!9Y2e!s<*-W^U9tHkBE(e zk1Mu5x_zQmP%A#KkLlIg7PWq#Uv1S^dfVEbs@V3{N2|T|_V#{WwR+dT&P--HriW*EXK*_&G&5Z1ty3)LTO#A_7Hw9)TfCxJ@c&Lr#Cm~cokex2 z`Bzxfd~L!(n|C%nyVU}EF3={@^Bj6EREy}jNb}HhJv|qzCG=dP4cBX>2kqVlb(~sy zg~dCUy2{jY>MGajwLI#XN0{+y1z{?*dD?iw%qPqQbs}LV5~hqWjf9z`RuZOCo3E7+ zW&vR)t5t-lBAOz?G!bTsT1}X0l8iCWAk5TZbsC|j5o!{l780sPttCt?VJZmIOql8F z48qJHOaWn92s2ZyBTSvvq7@Qm5n*Pjvk5a>TclMIW-(#rs2;+2h^Ck@O9)f1HV~#k zTSPu+rRTZoyeoz+>O2zcCX%JRZh2U8ycnTS?9;b*Z|H{;t;6sLT7UH(14xw(1}SO;xsA zJZHX-gxXy0QFN!!)WLv8Ek~fwAB-e7+xA

    GEaADrLMZc21nqM5F@XlU0p<(XiII z%fGKL8uN?(#`&_vd4{8w`kH0PS*6s15f$`Dclmt5NHF2^>2@_3NC{V-4`ZlfK zzC(9y-m-&R!cKkOypFBL(|A0&N;apT=hL&Om-o1v$D7>IPCaYopWLA)NEeL+Jc?M0 z_Gn7Iq*qJ${GpI9zAxMz4aLuuR~KwCxE6rR%3QiF5$!j#x`XlEei8I{hqRr^yuz<3 zQ+Tk|#O5vO^GDQ>=8I^({zNbu5v%0UBG0V3)Tg^seQUOD_ZFnu(U=y|9fPr$Cd734 zQBj3+J(Qg<3yNpfv)*TcH7^j2B(%K=vnozQOsCvgymt!7b+@)RCbW1QV`rHxKP;|N z24&}@Dmk;{`htxZWD`KGe72-=^%k%?R&Uy}y+cf=w#Q*O4TB5BRJRMQCr(o#8d2lf zf*ENe;9;ZH0{;+c5PQl_Y0t3em8KoDm5s7m|G!~5q z5xP@-;7W&Gx2fV3FJQ{tE-x&9eHsVC(GwK$gzk)M@L9HAa_jgh%H7Gx_y%Q004k^e zqZQX(%R~Nfx9V?uK>ncO*E>`iGI+UG_IB>CRQu=$wwy!>(S-08&fE+ z#~(;U#lGB%$@WQA%6BO-g|b%uY|^9(lhnzqw7k()p?qJKRmNvcg6P!jv`745&7_C8 zmhpv852LKER^a=CQ!WaWqQ+{ul@vtiL301p8@&*xL40iRE_1GNr- zC+0S^&ILfybIuCnm_YP}1`q~_0>lBJj@So~v5Z?>zZX&m3Wu~+%~BT1Cqk9U)m8hI zsthg2ba#)4hT{nljP$-L^(mVhVuVe5!4XaLQTZ^j*DU zt|k)ud_7SS_9v#w8$1_#j*KZxFHDigXwoL?k=*{0jgwRCiHGXj75cCvE`ln*b8vW`NrOxLR>} zg74*tm#?ISCc?xs~QPLoTE zBvK1O*79|1v}l&vPPoS8;dxH0(kzcQZctj}2aQ#gJ3!h6kWs+c^wuosUGxBTP|CER z|3S2E;vxE@yCT{is{7hr-A2i7V)e`83sxGtSw);8!Oju{#?_0277O_Uny;IZTg|1$ z040{OPgXnyY@zN5MpSLDU>hObsAgSgVeQs!aV??ShkJyki7CKx-n)#|mC4xVN72Q` zC_xscI}M}Q8jmxhrg#{nj{qSAz?J-C;7{JwFovZZ3pDt%?q|8-E=!%nS>4Gyf=d5 zbti3>H3}I^Ozm85qK>p4D1j#FO#3xCz2(~4??Qwv?2lz6icZ3%GkHM%v}NLi(aPzL z-9g$o9+IE7T(_Eo&yxY7#PSm0RLl`Oep7u`Uc2;A=qoV1Ix@X1o+85#jxjP(D4+I9o#%zHiMiOwML(2D(FD_f9w2br+8;XlJAL;80 z1tXf_q!EJ5Yxu9^@-uInH9R7dNmy_%;eJh)h(2lz&7t={`Bz`L>^d`FPG}oncQ;|v zp5qAQ5dLeqrEPaFR6)K+a4g}Zl;}qnb;8C+3S%p@ZMG03KTqDhVumv6 z9D-|74r@M?@3!x6T>|#!07z%@fNrCek~Jdhl-=g(2`~PGC}|zPbLG{om>JAXuKOw4 zFol%*8QP{X!TLF~QbWGBa(mZT<^CegO8KxA~=P%vh^GVs!I3?ztFTJYtvvSUV#ZVfbftpW4i zRHr?)HsJGFWoO3)b2vz;?bPLvGg=c6M>L;rynLhM%~4Uym4i={fGP5=wGEvwP;1Cq z7x(1n$|_@y)?&B~0Ive|fbO9v<+fWxOgf4q-g=-vP>qX4%!hY^Xnip6D#PoZ*A%VA|sUrCJ5JHrPUYp$Kr4+E+zHPDiXYN z`fz=V{TAwzZ*H7k1MV?4rm$_9?Awq?$BA8e6JgC`!ZU4B*_c>}IFUG>6{!Uf`KUX( zL;i^QOpE9ED*3?1^S?}u!^FQ%PTRD4^K5Em-jvVgZvUjRtAW(jzU1)My60U$j; zOTbAM@b>8o0O>^|0u=GK#7;W6c~J4mlbaVdc2U2;TuH54Vg!-pSkAI`BI=eax189< zBwSSBXX8!UhZfH{%%1dnE>rG6*T^o@EEeQFQaz`qkwq{E%Rz9*NK1SJA2ej$VI6W#b9D*b#H7s(^xjh`kbp8zx(%7+@B7!n3W!`zaPtDmec~G@)m5-bk ztm}eUs60k&P_6HfOV6KYUKZNHh$Js6N@< zB~+tqt~{5m&}ntqT=_1$%PxF8S&!_Wr)-z47c|vRz(9M`qm5w+6geVqx?pe-Q=ZP` zxHlbGi*=}49=V`Ges+ONcJ6#Co%|yyX`Uv?sfXlZ@0s$Ut_oS>DwVMd$1U>tYHCi& z=3J}~rw68VY?W_&mo_2j*=d}M>ya$3rNp(k=QM|VG{Z<^uzQe=)JA&hZXDb4-uskS zHjo{Y?_Jom&a`ZB*f}XclBF$YFDLwV>bM&tBjJt3NNlI?bZeB|^4pysw{lTpWjM{c zAO}l&N-gmu5T?oS5aFJbzq%;USdQKf~!(l?zXg>nG_^Q)LRg6>W{3r)CxH z;N5fpgMA&HSsQL*uOl;G&Jxo~)sDps*W}2S+{(X_;ip|_#>pJ9PWb#TfLa2&ZOxV) zyt^@WLdG`nBoNxg`Dpn30nBoK(A)nX?OF&Z;b6H)q0b!aV^!Q`zOF zeaddG8((y{I$oyUwNjZjK@MKJrIBCcIa%={pU$tbmdq3KL|~T_62vQVrCL50?7D*v zcDKa%Bw&mJgo*IFyiENKrP^88D}KlwhrFWn4p8M}m#^Ei<7#bIUCY-l0Wv;xkw~oKR zq!R+YMnb$ot+(Why*&%F>ysls*i8q1nJi%Y{z$a{oz~2v4Ash}73FfEucFgbsz$1m zUKmf%N8>>1kQajqNXe4SW3YI{|22qrQG;GAoUXjW3F2kihc?Kf;N)i0AQ|N{B0eI{ zkJB2u6!8%*Dab@{h7yuL-QT)kBF5(2>Wc_MkvQ0mq4Y;ItvrUGkj{bui2FW(NmaaUTrFSP_Z?o&_`uAqyZM~UST06HiSc0H0KksfiPmVx zoLUn6ATkY!*P0t_l)#15uqcJ}|4hXxKOO3CZwC2u1W4~ps$r0{AzWjJL1vQrSHc~U z_g}Kq{024z;&uR@e23A(95TKzvMHGtGA?nZ7cx!=ek>4k0VB;b5+Y zT68eiyMG#0P8^uv9V~{$s?i1R0RT=_L9H=n9r_=Ti!Qyck;!?!F?p&q@xgty7i&Ns^I5snt!l`9*4ro8iDd_bCK}78|-mH`FYZuh9X|@TTUJfDbYPy!j-+ zc)=mbA5w?m2eux9o{{elpS}JCa45XDrKq*glb(+c?#6BnGUD9Ka^i@D<0Gmlbigot z+1hpQW0>Cp{OJ_A;%~tJHvn5S+N>XD$!aB83uMh@6FALBYxm_+Ud%}Gxb!@-V^Gi~ zA)cB)!N}hO;6eyjWo_P4b$x)ax*b+~i+cv;iOcKxi(J}+Y&xVRW$A$q^A4VqghS5* zW~|JZ8YA9gP|`^aem``HnEcj(bsG@~pn&=UUEEnW3Jk`yc!=2(P!ph zLK}7C3Cx(Tzm(-y4m71Q3QkVffniYm4#1=V{Pec`w<|U0O!WR1uc|Xalb!|Ep!s^D zc$&K3k;7Lln_UOORPN@s5H;YB@^@D?nh%#@x=~^{R9q`Ex^auW3&*?$@1hj&;Y|S@ zE`KbsBfjS9+LqanGY7x}kW0j8)bV~gFKLtZnI(Dh>Z@=eq36(&VJ@6pXKsT44g{yr z#dyS)LkIu_$4Fcat=j3HMelg5SKBMT?AD=l*g~#qukDkxB2Tw$K4C*tpwh8 zqqRKw#n+pyWmBoGI}*V}NE6e5ua+IxFIbL=soP`zKtHd~x-;yL)9Fll1Bq41TM!Qp zX}*9zvfCdQ`3mtC1GwZ9*OyPF3;1LofJwD>V(pN>zy8i;%;F*vx}C4l@j)P-drs)% ze7MYnYq7pQE!f+a(Bv~WeCr%PbS(0z!jCW5;V|7;Gc>574!Z46cez7RdKGok5b3O6 z1RiCdS*iO79ZcST;~qsRlT+n{IXpFY5Z1ViJwAsa5G#_M^0?CGwMO=MY`W7Q3G~s8 zIM{^X&^6vAB!cdXAfxC$NJ6lmyafTF(d7v?iTri`&DH)=@HkhUyLN5Iwvm3WEfv(0 zE&-p6)~Ez3j3eMJq;Z2oT#S6DSXWLImnMIC^TWzkPVGrx8|8g}99Pw#gQz5s-iDZl zGer`L$>fP!4_TE8*?G9G;5ul>Bitgtez>L_(?Bnv?{R!nn!bk9vDYA+%IEIbXDrvL>(Cr|`uWF~gdaD}ekMbA9@_;!@EMfXM)%qUr1O!JUdnkJ zpqo(|{l>tTpj?ED)4|L&NJep!&Qpo;=gCXIRhc?GoPqvyMOMt0Lb)KnC;xue@?p-v zJTVzsanZR89^vA|n(`9NY84pEOb)f^DuULN;7nr~^fMa&UZ22JG;T?-+9r*wX~f^# zlaJp$q)=ULING+P87eErd`z7e{1;HEua=qcu_P{J*c3VB(W7S@+m2H4cW7_q7+M1W zT#R`-vN1DZY83z*$XQ(rx(x*Md@V9acZqR#3Cp24A7Cc|&$uz|r(q~v#Q(^Fd)Ax_ zTlF$T6+t!5Jvp{dKoSh{mSCHQxIUxF_UQ;jX5jnzpqu{`w>I-n!0$6r;sC)wOVDoIogL= z&V2_8viAyedryAy?b?DlG@kGPl*n24*P4IKQA0f~^1}NcK8uYWG4=m7QKply3zM6V z%~Xnz`aJhh&*bm}Gt2|Sm8yK|fm2?2c(xQm;R1km2x1>#p)lNpqM8*NzIcjBDvW9yXpa^SHe%2ZkQc$=~)x%F|oGMty_r;9gqgPa%rg7x7T-DTlR z++KSmN|(ADp=M@S7)uAOpyn=bLFV@aad8Ksb!F5K9)QVDXmqZO9QFSn+wdE6k9Gg%V&H${TYN>o@e0$ljW!R#) zO3F&<5(+M(@V_#9SzB3I*|f4b)N+?O#tmDl%VxS9<15G63%AqLuw|ZWj;qS8{2%4* B4*~!H diff --git a/nlp_resource_data/nltk/sem/__pycache__/drt_glue_demo.cpython-37.pyc b/nlp_resource_data/nltk/sem/__pycache__/drt_glue_demo.cpython-37.pyc index 33f6c6eaa28cdd34096168e5c46a514957ecdcd7..44ddf892afcdaa4dcda0e9db38e53f6890edfc8e 100644 GIT binary patch delta 953 zcmZvaOH5Ny5QZ7NEfsBqR9;P?JVY#yz6fGm_#lx;Lcm4cG|IJ*V`=Ow=eAHJK-Cxo zqmj&lw-zXn$U@1D#t0!UOkC(rH{QgpJGU-OOgwWDVc}hTch1b5e`d}(uYwMqTVQK1@mo@=P>aj;>X7@TigcY=qa$M zzly7Kt(w#x3NyzcK0)D00ZodqXp#@(>9ShT@LE~r`Tdw_AizU`-Z0}ml>dFi2sQSZp z1z=G~ zrLl3iG6So)S!abcwY91Rx@<})<&4AznBq*_3Plr)%Mqq1fgtORrTDtosYyzIY>*!% z5u3n4eZS#iTlZC;-{9-;bzb7@R0C%5ef3S_DCLi#yT%HO=VUrD((Al)o zNq(Ln3=lGeBw>v(OL$J$Aj}Y^2|k*McL>XZ`?wRRYfd delta 971 zcmZva%TE(g6vnwYE#)!6fK;dzs0dOXZ7FF?OpK94kc8MSYGNkpI5gL`(pPQ^EkquP zhJcT_I6Rd{3yO&>zzh&#;zk!Hy3>_q<;K6jog42No*{Pe>zQ-DbMF1l%oqQrUu+Y_ zf*k&9Pl+VQPP%^ar9bh4t8NYMn4%OeA%^beWKcs<-SS5&L>x4pDX& z^-nmYJa;S+j7H*;;0;Fl*<+rw>UQTnN6|Q}Rk+I!W@w#~aGi!?R>GdDMw2VIY!roM z2PnsA819)z$e{Ym>>*@UbygM-G6$Edezw2F@+AaCyyD27lH)W)eF-^2Qwf=-lyHpS za-Ns?Wm-@+i&#J`BGPcXS|u4cX)%!{wb9}tw@MN{!AOEKTbRbo@AGj%}t16~T?>xI`A&8ucJg9Feje_NWN5lk$oFY12~vI@S2G3PK=_w1&z zdYU7v^C`Xn-SvNL#~C==`0dCUxUc`50r^dz^{ZH0hwn`eQ9Ebv7HKGMHj{a1YF_TZ zJWC+@5OauL#1di>@dmMim_Pu+kI?MJFq#LWZIMjFmMuwAV6)#Lqwvh0=4^IUHO`=9 zn`3_sUQ8qr$IQ(@((#CFYs;JhV@plP2C6m@TZmSKHg5`}!*(swKEi2U5OQ;J;(8t! zh#}t~uxS`+dCT8kz0+0tfv2+uSq>|~h%Dc*TC#aC;B@2{?SD$}(YaQLQy5Cw1|(ZO F{{Whh^Q!;= diff --git a/nlp_resource_data/nltk/sem/__pycache__/evaluate.cpython-37.pyc b/nlp_resource_data/nltk/sem/__pycache__/evaluate.cpython-37.pyc index b212373fcabc3a7306436376c108d77c88bbd26e..18e5a8bf98a1a2ed0768cab69e9690ef74cf2ed9 100644 GIT binary patch delta 5073 zcmahNTWlQFb?(gU?(BHIYdeXFojA7RILYSWR}4;^cbd%u5|h}5h$(ck-h0=qGqbbY znRV0=x$ej04!`IuyM(G+cf=O)@no8Z|^SJBn*+{{wIv4%DSM>E?pr_2#!E7RuF z^tNln*alc7P1gdtT@YKCHnroLZ0w}1bRDqw(0;m}wmn9SULj}$Z3jW^3xfItt%G&~ zt&{Z&;RAG|v5OASF1iW02I(%knQj5>b~;G6(llUq(A()Yx*f0~dI#OHk?sU^H`_yd zF33+4{vz8=d%=T!-;lvc>X?)s^5@A|MGlfBUMPBIzFf?C1*aG^l#7L&Lz!t6JjSz@ zn6&A+{cAZ2$)0x5(J$A5>c^C7^iH6rrR=qVk0ZZ}sGP<+5~*Tv3WT;A)6_ z5ifdK!Bv;ZoIFRasF&6G7*(irP3Cz|o0Et%vmR*iYtm8afrcv)PowJPNV7CYi1g6t zIq5QRUY4IE;N!>$hs2j|D$-U-3Ly!AtNmgVt zC4S(n|55be?JK8)Xu&l(voa`GlyW>lRCycx_^@Azjgv_A&#`H8XB4z$9`A*991A;8 zvXll2Bo9n@Jgx9Of?8x31C?3Kz9l-SeGmio0oduB|5Ut1Jq#rOt;S~mo%l1mF_`dU2w$IsxhMSRlTQzIBWs;$LQtS2Y}f%) zEG_fXIH6o%vo+fCF_?JHXQ|VhR9s2K%9=$%%rwDmra52zD5a7?+!=h>k20fPxnwbu zAA&&*+b8-ybQ6}UF~y1xShUAschbH@&Q!yEn|nz|}0OBKzfP7a)hjujr_ z_W%}XCGM0M_h$M1IAJ2KE@x$3*JjX5D4#$|y=e+sj#(DS5MdG?BYYIuM944?F2T=D z0%*|cR5`Iqi)u=uJ`v%W>LaV3k-M>_1dV3qROWPMWZWD(bL!-{Ihx`30X_7U#`pNc z&6)0{J%sRJH55sfn$uaU4E+qM)hZO(`~Hi~TS)nZ51QrsER-O`lo7++;;2zN5*CKW ziqKb~bsDE) zkUZ?V;6TtR!@0wGGZJId*kYkJ>r5!qg#oA{f{ScM6kJ@GECvcpill%%6}}XP3y0z! zd!U247)$Io{?(0Fd$AtE0Nzpud>pAOeI^#2^v`yEX_ru0Xd5k;C|JKxKK$UP*#b4^ zlxFX){;g{h+43mNFRq>YjJ1(cnNPBDe`ohc`yashFC#-}rxBYB6H*Yv#QzK#A`d70 zLz{nlauOwg!O{qnOd1CwphBh^)=&h?&*VKY8K|Xff!7+18r#*eEl0^7G?+hv0As?1 zEnz8gp~q*GBHsrqXZ$~I-Ck2bJclBdi|271K7jzsNXQnvJdeYrKr9AT#1o?cw=T(T z07M&9LLD?x+&zW?sd2fP2za^Zy^RL}JMi;#Bz zKwoA8lXCA%A$?{3Sg`nj&Dtx~SmVM~|=k|e$aA^XDMhq@pWzJ{0hWnr( zF8Y(UbQ{w^U>Q3FUxf@RT*+)+2WSw<7r5&MQ5UXX(B@YoGlrF?5Y!`xBM^BaoVO2# zfi^v`aQ6z-V%BCHdnL#FL_iAZWY{0BB5lbV{9FEC?&#kZ$hmw?+O9BFScx|LvN|yI zraba23i%EKkq0JX!t_SOMC%Akiq~wn(%<|!Sgy#`;o&0+ne?yhf4U17EP9;eYU0Du z(f9g&2TpuZ>=wb@)&PJrpEDN5Uid?J2AvXWiK;Ij1EGScWq%kE=#y+_!7cuW2hxW{ z4Sx^Wo(EuPmUB^D@kH7LQO?}5<<+Pc=wT8Tpk~_q{Rc~*U*=DuWKrDTL`>+1&fqU0 zSoV^@g!jJ&6OS*|ccUt6x~6D~-mGWzR*thbHvq_Chr^Eozv_-dhviBJFhhmJ?iYuk zp~8irUvLC6AlNtpd9q)4FHmi?3zdR$7xfZ%Aw}VQV=i8%AX}HUL&d^Fo-*6X%g}v} z0!dPnL?b%U)1ymDIK_TN5nj=WP{dooUXtjF<;1`CBs)WN}E(EF@yywz9Qb#b7JU!w>y`pE^w5tM;GnB(2v$`lZyEI2^DpJC4GN zafs2|RcjrSO~_G*D!hl- z`~LjcnhH)D5tQWCZlNM|C$tC`&nhXAlx29=OhV1_4`7ThT=>oar}VF3xUf;0z<&fI zY5ACVAhvLw8W#7LKsVu^2CvK+uz{uzN*sF-oVVg%2#zZAjR@{SvdkIpKtyp1GvcMw zNEI#bLeJ8F4lc*nL8DGk!~Yt13@J%Lahcs?Ig^E)|M2)wMVz7GNx25rgcs#_W+ul< zczYHX7}58`APnQ0!w8Nd=th8jGKk?u1&i@1Bsqd~Z)Wt!-D4SUBdLtwJc0^4>rK>FZ7^^xdX?i%`<6(E>@vkx9WV2Z*OTj7kW zE-R|G%8=Jw&DQ9x+d91s+n{&Mj=@`Z<3-apN!D;{ik58^Ywg-%on2R~x9f`u+e{Rb zcCwhVUyz#?m@=&J7>{34c-(2asM*cXHhB%SYn*0i-^DFndr7fdcpa~Y_XeKeNqBGM zDZUEccY}NbZv^>9rxoNkImXp{`08uSZX<3DaP7pcb&Rte*HrsnzK*YlnVr0wFEsJy zOU&L3O77wkIZ9zoPjDCwQ&k60%s3o>L!+gTVdk9(q+ z3r5o{tl>`H6S=?>>15bEFy;p$mk;EC)>yfz?q@CQz&QfRC&N9HUeW0}o12^T$~iIB z(=BzqZ&f3Vb)g4UASM%yazwv>VKp?w8YF9xENh9l_1M=0WIH@d6S19#D4<^qD8|Nv z@c5^J1jZDp#(lVk9swDooin+X9`jYsvxT zOzMIW7+jk-)+!enQ=YukWM?z1z7x$z z08yC58dx2Ru>?yo(OgN!&a&n^XM9rAKjD;|*|OO4fRyoXY`t?hj1~N>aNIqplQN^G zj_K+acto%4H%~IP@`5?ThKx}2oj`Pg9B_dMwH)W6RwxB&O=PIIkP5GGPwFBwPQ03 zg{!hdBSS}rMo(r>JT!9nWcK)wz$}iiGDSvywsxp?c@42%j;w29AIR}_oowodm)EI} zj-fP7nL-ZZNZ{&eBZ_*{Ddjy5P}r6qmo)8W4<9k|q$zwQf#{jp&L@Cr}a)La) z1kjy&N zkR4x;Q{pQi>B$$eCkC9>p5igfhCr||eGAm$ET zO{5^u4J%>Rnza-&aEn>`$!u+`GPs3nr>Im={iD5*8*65A?sN_#Dzpl|KMlEvCA|@5 zFP4ocF`t5t6>Fl9#Z6q6pKg0%3ju^8UsCL#91Ew6)lNR!J=yVp#0iO4D-At=W32hb z%+MId=O_dN7W=3m#C@>YADx|E+5Vou*iTLOMYph-a~H83XYx`Q5(E7F*4aSHGFV<-e%>yYGE<^m$Zr z8Oc*fP9dQU*CR)mN4ZqJ+;Y%HVe)Md|Cm)iyMIt)XXM^JSK3fu31^nC@`^xsN8~^E z9Qrz~hG(}}1tiqRh2umXClm+|gd(z%K&L#X7$*ahD|pxj6h&ut5f=H<-t+(^?{`pv z1Y(=6cMdO?4h2gX6OKRa236*TRup;#NR&qT-@WB8&+lhYGiCL2$dP>rhIj?Z3QEKg z`U@~H4|OX_?3AvWmZ_PVwbmN4ngx#D+6ZL%5~@L?^7nlM>cVc|Y#qWmL&q3=ig?8g z%?MRsuwsPjM264{^&;*Ije;DNTsLcD=3-SuYhJi#H72L8v$zX=3*R6v8kW7i@dve{*B4hA3r#cR?%c&OoZ5DJZR zId{qzKfwt%>7UN{LhVi&f=>+-L|!#m0A%L^`T2v-%~L~e{GqW0jAUD#>80xh`L~zqG5NQb z{40Kr8qmQJDy_)<012&%X|Pf_sVJqb(0FpX0A(J19~xLOeY-9ylv!#~O51hvz~P3j zHq`I}5IYVf%!e4OvNtrxJ-6s_$9-NtbNFHQP9;2C!(tVj_gdxF12gLJcaU#ILY9&H z=+GnMpM!zLNJL!%!ZQbfI9_yf)l0=;DU8Dp<;|l5?7hmrj&5SjufhED$rE%WU|zN! z0Ev^}qe`Xw!K9k`5|4{tqCr*Iqt9Dp>DZxh44Kh%&247WF393LeHoqKnALDdA*SM8 zB%~foamdP5%fO8~7vMhrBU_FiVeeNakN=)6oB`QekYLI!Ozc~Dl)xrXA)bEM;VsC7 z_mF!ZNN65$=IAPz)Op!Tw|KGIK{pvVU(0am4+NSm?n1I3m8rskPr^ChPL|wYs(bkx z-WRwvJ3+m^Q|xxVi9$X)H4_F-Curlq$;VIiErc~gvw5eCCn#M?=-i1g?nUWcNcxd% zK!Rm3jN|7~!F9wF*yT{y>7nC;51tqjdF+}(avBNM#h5pC*2xD^Hc@;oLw!LkV@)85 z)i`S0vRfSBjN- zM2v8{Ppi_a#cI7qtkG-5T78+gOs^B`NFFB=w9E8*v7R&fwFbRWY~<|#w3my^poD79+Kao2GowsP7sr4DFyYLwI3fa@}) z9w_z5CALGqL21kbp;2uG!U`ZPSDK*Tq_#o71NzNM3-nvmcId~H6-rcTJ3+)wwM%I~ zaPIN}}W(6Mos70%Ig#*~s%nS`0nW-LKTA4wb7icDqFiL9bZTG~{p ztQouG^~CZdCgpS{POP9|f@DfEXL72+c9Ex=mgi>7$!ta%kQO8pSv@D4=?P6`ACa9z zEUb2ZM10f>Q_~>pa1W3gw%5J3DT;&`LMOm84%!W$3+wx)4BE$@bq|xC!l!Pp(~G0D zhJEfGjv%)s7->DHrIRx>%69qI)`gJhlXE$3#>~=o=*4}si#_1mQ;Qn8S{ldJ6#%2` z-@bHn7`dFZ3qF=RBkQWo#rfmxHlc|m+2g{#{z~L?*?htgnWkk}mgNT5=!+$Qi}nMg zS(pE0z6HBY09JU!FjR^z9;Gx(X%t5y2xul3q3f_!17L-A>ABJA990e63htT(8$DTb zQl({6T9ultPbjh^Pr$aw2~z_tMu4pk{B*SswQ}e7LQAZ`O-XC2l*=0S+)-p`2C!Ty z)r`BrW6_GP`b#}#`HFHTlPc6T+-NR{N=theC)`72=22wIvwu}22TFoyQgKmq2$hsg z=#?b(uq2&e^TDxh9QzXHj4Da@FlT5yZn?k-U&5`Kl5`)N4!wE#2v==6d;2XeSg9uRs9+U2aS;&uKKzZauVyZkoET1HSfs%AT z`)Bz27+>0wZnFeQqG}E}A7)zRczz{Lf{{ztB4C)&D-kv!@Qv7vtt|*!5q2SrA(WiU zTRaIcFtK6O0|?GQqtGU};7Z8}O&2`P>7IsVBif9`W zwxgIoNwc$R#!yYmt4^D0Mv2!$kUmlOK?9HL(%Sgk*MrPcg~u*CMON;G=B#rldVxgg zP1yEChoUjVA^`|g6$n@#;6ryZxuLQ7Ff=V^Dp|goT|jRrJk;PK#KRtKY+H#rWqGx1 z_Rv(09)wQ3qF5OeHJ74tZW3w<)K0pQeb_kIuo^hZbQAV!P*k{SRO5V6w1jAi?_7yoP@yzkCZGyO`?E0_>%79U8-tO*`YR-8E_|z-c*=x z87E2G%@a_Y2@`Fd==qt1fFnw z-voQ#zJb(|sX6T4%Dx|MtwYTVv+xP-0)kvP6J0}6Xc;{L08YuN35xL#DcgYLMue{e z>;}q7_*$Vo*NtwI_Kc3-xbM2rJy;)+g-?1>QMnuL0G2%aYWrJcFB@O6W*lv{XRJzR z(q>xL(z9@~$`h()*wu3wIVJ&$p=56_8o6kF7!>q4JH4WQMHHJcgrf-DktJ{51GJL` ze@6!)2J4MAbfL|n4+|MhC%7f$A`b({lPnW^-pdPI4O`vWRm%g7d%>4BrgAw5l`zY6 z*6-VZlb%B8L(PFi7V`7(l$tSN>u@s5jU}^WyA_060We5*jpd5Vj7D`oD7@c!FWLVH zYR2rfJTNWHdM~y~(jrG@T`}Vb-4A_iQOgB~@vJ*$K)Vizq6bVlC|hL0ZcRWyV-=x~ zA@EqdpS|AQv5fB`H=`6kPcU~pR(@AN!RHDaW~ym-+>uB+rPOjRC7FfY$%KF-taei#zij@}!i{Fqtt zfTIrq({lE+-l6=}sOcJnaRfX+==BJ@5U^O<32`U3FqP>Y2pH!SBiD{azQ^buiqmuT zi^BY(59n?fMP36#9Zo(^Q(#J{4pa$^LWR&GRKuTN2niiRk5EH*7v}r=2|31I=x_HP z00H_EJJ;W_VFxse!A@UA^2-RXARI$@385^+aa~b%)fKnqe}SXF1elGaV8bQ2wxu(@ z5WAWd_j4_xhttGL{*mAi)TJcc+~@%ml>sbw60(WDhW*zOxJmZi10FlYqsS;T!a{|i zbVdS(oJ`?bI>4?SxTEeMY7D{^5H1}u9_j|a3!Wc{wq3dkE5gMjGb`z;oB__^!m8D8 zIAshM%dMmnrsc|Ib9Q;+_Y(9ozuHn>I94H}FagHnG-Ab8yR{u8?Rw&YVgc9sP86F& zxB}sFcJsQ%?z>UAR)RAOEhyRwuN1?KhwViA4KlyQ9$UAO{GR<`T}#7KCGyw6w6##Z zeh=x#jk7#ZzS2kRo7}IE;2)h%s5$;B^6*!ZU7n%5v2PBpZQx0KsblCnAa^@EJNT0! z9Dr9+WMLD~RNUucf{IQhb;(_s=gbQr^bk9}A+;c0nM!IXS-g8IMQOOGC$*);A7lQ~Sny3|sF4EM(yyp>46DO-(Jh;i4HYY=hcF<+4ZA(qxIbfm{ z0AkRTQz=!E3^)S~Dj?mD5C8!Gaz~tFi7j6#KVSX=3_oXYZ@E@lstEE8cpDXA!14&A zxO)^`P0@=K9YoI{psGb%!X3%?`$ZUNWw(vAk{1i#8Of4-Gjj5*^kSH1RBmXB@@kApo_Ri?QhO!xPWRWjQ^Lz>e4j0S=bHP0HQ#Rk% zz;3?kXmZgRp-_|8kKW=F01o8WP~qzcI}mV-`MJ4pR=0o{{7uN;dr8HW9ypF+gvsKyC>4AIe25u%q6oFey#zIkJBm$mSalaTeCbLH+by|jBm{cw&3dNp5 zaVmQr9_EL!i{+NShfs0^I@t#&qN-%k*PHfa@7Y|S9b>xNwv9z^~J z04#5I;-Cr#Io|u|ZxMKf$N0=sOdZODjRykygYt+fIqhQfSi-QLRi-p`lK^bS7Jyo( uK!VO7BB>`$z94*qE+<*|6I=mTkO&6+&pYtX9mzYkMA{>sNH{PMAmo2ILJbE1 delta 5534 zcmahNYj9NMb-#UQlWZ139-H@TAqyD7D@Y6wgb+l))Ht?WgzL@To6W+#dzbHS0*UBS zz_&8ur?pzO`wdmzS4^T=3U4G9*9AkB4h>VdiqFE|uvn3Eys=8E(nV9GXY+ z0L`O&4WHyAnpgK50V!YvrJzwFl^7u@M0B5CYLrQ3M!8gOgr%@iAypU=DMCEIKF63V z%_Telc%@Vcco2RiYA&Rf0#f#{Uz(?ut6{Yw>5-}iU1~&~bJV5I(W;Mlq#78{RV!g! zsnxFm8Zxomvm$daVw|jcSY9s5TvC zQd3N8R-5;_?_oToH63xmy4bKQ-ufXrX?vX6zQmZ4N{gr1_r*=_0cPi)cYnkJw#UkH z9u&XuE@NTwfp=wRBgpeM1nmHxaPb(t&Td#TW*XX(J<8;mnNj%OC0$~*uaC{oPx$<9 zKT7hjI2`DU;7mJcrHqW88lB{g;#6Q|bqNv!N+zREW=-A#c-h{xVrY&O$)l)UV<>wy`C5gilo4GUIFR9*Wu?rtBHBxKbeH0E zk25_uF)d_!vl=&w6!A+yR24rdIliI}FkXww!|N?e<5^HQz`4med8G{5xO$qxmm?My z`$Cn=ao018^_kf*jTafQD|E#eQ59Jk0f#GzY*PF(^we?>svu`C0!%7Rm_{b0YjVc4 z97AL`3Ow7B)Ut6ec9dRCV2fElkX zc7$zrSGVm;rB&@f(PGY)^d+^l2Jv5eT$~ACx`m=hDT$8bD-oQ_YTLWVOr_7oN=YQf zixa{Q$T9>;mhTofRD3N)+q8b}B)}c5n&QHD`$RCJ()!RByAK!{nrCUvN?|6nPzz$hZ(OEr} z)-1k5yi?uoZ$iO-QCbsa9U@k9I!cLL*a+F*211YK+iD+T$}pf)?%tNOtc72VxUZ$R zC1%+y1evJ-0h2WmhHU>p$fb4|CjEH{wlPL39Da{%a-`NldAV{Xw}-?|Vh z%l7NE0X_E?du6XW8^rsmBm zFlV-3uy7ctQgBRDR9fntGz3p5*qDm#9R==CzN&c}3*)@)(EY~Y&F>bOmZlaAJI7su zuW9wPS_%<{ewTQ%W&L_wVjNtJvTg+X5!`}+N{NCLbb@pnQVUDfaX2ZuTbrv<^NbZT za5E5g=C#%pEQu@QhXB}KE2Aa&JV2ostB_oc;F|!p1Lbab+vUSU0~_Swfo<397#bMH z?uQ~|^qit%Hyiw%!DE z%HOx&u@uWE7s9(%cPZ+R7^FY~f(!Jvi-K2o|GAC;=kczHu#`g`lHrZ7;Ub!nWUj}eXtZ*$sLHZQJYOnd1BTCmtc$-w>@wqPkCb& z;B$Z|`Cu)36q}7W*a*}@KhXJu2q=w@iJ^E$6(sEjEMhleO76|1OR8+w6eNCT%uFtKyI0%rvo7K86F_-G}6U z1{fr&?MbDx&VlADq^IqhP#{g90clyda@d}9@hMa3;@<-eZ-5%B9}rg{=n<`pyYp8q z{yJNS*$qYD)USgV3fS!wFjat)Wgu}j)TgJcEM5fG2E==ptY%r!+|}hj1Ux=0hP#$* zaU)6`v7GhtYHdq;!`ajgP~iz(sfM>o789OL8Ep78(hjUMg^wN&i8vvb7eEtHGpGEK-f`bT7A|T}j3%G$g z@$%9e{m&rxw_^LU+8j=id~mHxrKJjb$)nsfl0>^nDH zsbuCSi7cMInZmV{Q@)3R?R1cZ;)$O625KQb z=nrtqZ$}=sQ0Mj&7jO5}v0KFFJq@K)u&0pqs%TrahP^JXU)2;XZj;D=89-nDfmOq- z8v|kcpf#l?oZk985=sURB(w}3eYP*dvFy+!Mo`qPUKyqOJb$nJPatqejII6|i;244 zx@IgQ$*m^O%UjCp6TrDw4EH8M@~PhF^(YB=dYk4 zFHCphIirM3{T_J7^D8df&7zoR@gU`#9ZKujyq_V7%OlqoZ!2xD*FVcMo`~bkI zwENTIgZ}G^&xUtE;5XtM>#vdrQNE}>k=Or#49rMMA&w#A=wFVmJcjkAeC?AvybTpWHcgZLePwWs;>nx%7-?@ zf(WqmW-dlFhge9RwJciE0-(Jq=4~|k$Y>Q9{qo22Ox9fBsHcyb&%}w1yIHwt7^rV2 z2QNoUD=uK}5}^H8NCQ2C7jkmp1F4X31?PPPbN}py3d2xBAFm5~_($T60ksu57fDq- z6+U;1>ozT0U1ag1=f%e`y+@jn9t=h5j=;0ilVTgb8dHu6N6i0Xzi2kL$BQ3 zIg&~qgpZ`jgQIZK)i_A})8SR@?i@=Q2jQ_aeKg)7d4{eYf`GgY>`jn71vhj8Woq8e zojV5(DDcl94Ziet?b_7>FOHYscnSXpyqrdceS`;c2+73>#Ha*E5L*NwULuuQW9EKY zbJWp9DL6f{XaSD(Hk@K0z$VOX1O>lgOyHE$471z=KEph8u`-XFxtWM0yAD@j;fF$ymATEpm6!z?TcfFgZPF$U#c7+g4bXy;{wbv`D&?nwR2Ja6_B89%UYloc z)JTOKQX#|%G&jVF8(cuFkdP3ZkU$*xkq|<2;ee1ha^M69cyFAf+t5V$^5)IVoA$R}Q>Iv&sJ?yo5!#=Ao z+-2pP1jk)o6~|RI9_8hg-KgvKYN)G` zZi88Syg{$m>$}UWy?o5ubu0Ha1MT)O-|O{nutn|Vsx^L`@8jbejA^g_u%z*Q8yd?N z3~Vjfp8s=6^Lnmo=Flf>EO$O9YccrE)V|qjx~U&EWxdt(-N@s1;HMm%ATgON0{2=j zN2x-}RVQe1rFWw+cGC92{DVAoEY3eYV4}Izn0R!`a8g)#5Dc6SET|?e~gvg^~L`kcMshO+U5mYvPZtuMQt3 ztA`NK>qp2qyWL@$tjdy~*i9r5+P_r4Wedu!S(+%^^dU)QG2w31^ptN_32AQutSKg> z-gH6^s7v4of-HG{;yU1&MR-Ka)b|c7k~WXlJMD_=^?vqt zyH!8JrY_N5^2}t4m(F!Gbrj)U@pbPPPpTsepf*rYG9U0JoKaF7#hbiZWz(^!@g?ePuQ`hawz9*vbYLsvCi~V~Z@uL2G&0Z!RYvwu}82^6YVOWoTi6I6{9< zDg!b%b5t@1>$dDPU7n;7BpsnpYX7dx2Y!-L(VzNWd^2=uP6bf0$-Ht_QJ{iRM8aj< zCCle;yF8}mG7aQ&aroe|r%A7?DpAzx4en%KV|4Cyjqi~Jp6=mvJhla!{TqM}xI+?)h zTT6zK(>KN#n<0)is|RlUe}}Bzj38_Uj-3MMj9eC{c=X@JLe5 ztEldQI@ZKl;vk`G$|&hMLkvy~GZSyrYwWambD}-FolG;I#kZ~4nIH1-q*$3;>)5k- z(H5e#!C}V+S+s5bs%_7Uz5|yY-sDA5=%b|vEN*h!J|{K~EREeulM0Apq07GEBn{g> zFYZi@h~BBQ)7!2-_+q0iz+vCrH;r zIvu~;As95KjCC}f>Njcwb(5+Bl?D9Fhlwf)nGc;<)qpJXW=rViorPCu^#uYd8p{B@ zz36t8%$MwcIqIg{23gA1TozxLFR<6euIZD{>{kTIijB{Yv;~+O$dT1Nf-I{iNfZSM zP`}80;@?)^C`)e!=`ASI(G?PBG6G5>njY?_5UrY0Cp?@9qGjI|A5D*}XU_s6BYQ9O zlF?#xV}-kE){I1q+#Ro_ji_lKx3?Zb*(1ltM~%u3yiO)mqF*B^CU`(9r7<5yUMt|I m25?0(O%18Ddag>pjQ$P})<^2=np$T(9CJ*8~3n3vm33eb*Xh}jSk3b(ZD$vj^bhCIT@seHdGHWB! zM#_PnT5cm%>IK22QrklhzErA^TB%Z{_JXRaQs>kPaqqRXhyMRKB*Bm(*ovXp#E!Vcix?4ta@37_s-a3* zbz`1pXi|>3aZfjNFJUCSq>=Pmj216tq`X$6)k_;`FJol9tdS+k+`3|GcKl<-j?*2t zqek1DV(WGS)(N?8hjr3!fpv>qci8QA+Rl7Tj83}K&aOn*!&^DpY45mAPAOMYMi`&#ZFyNBKeN`1R01Vh|n0dYb9Fe(XR-J@tIhzs~?{I zjB6|STD+b2&mV90hC?>IDx`0-l>hX zXHd){>_BJ(;K#M0sT8g>gjNJh3rirx0Yvnz6Z6{)OBd358cq-Lx>h7wY!@p_`^tajK?yk)KWdIKB_9jsQT^6KI^=+GZN9intS+72l?n`q9>($+Qe>3=d*U z4hyS73u(!(*fKUFUaOR9jIBy0gj%sY$}lX}g)qvGXF5A`D56i+jUYqFb0?E8Vg&2} zfKZpfkWekQ9I_eQefAj)1E}#IGE`qvu9JI_`wA~)U)_5cw)3h`OMbA*@@PoAX}L8T zin#5BC5zd4jZN@Rv;Cv9cp3!6PqmR?Cy*lCO2a-sfzEz?@)$uEN9hkhI8#(Yg?yFcLY*vb8YBd6c zpwGijgD+X4XycpuKe?NPO!1(nH;vtZ4d?kcDVyV;_wWmXxby}WJL?~`Z~ zYGJXV`6eo%FR&sFV0;a~GEr~Ca)kHpp0OA4Kt3meQW{|h0f!xX1ps>rWoZW=Ce#2% zsC2??nP&MoW(Bo=E=iy+E48!2OOpfu#gZ1KOGV{^K z`!+YWS(L^w!b$$qp4DwS8}l^gSId+&DG1Fp=?&97!u_Fh`!;xy6fS|K7hJAT)12mi z4lNAGTU!r8_c+tbR#-O8S$=8Q7~Kr>#ghuJGa4|{JjwqU-gD(y+L?_JNXnN{DkH2Q zxCmRGx2QY@2m7%)Qy}OnP^s#g9@V4lSpD+ePa^A4*cY)daGk19ap;H`e0+w`?`U!c zz|cd-gY%_T2H!js)`A*pl`@avHxGu{+SYKg*EUCEW6;N(D)mBH4kGGVfy}>1&`OP4 z`L*fy(ZVSN%(|g7m{B+`1#tvB@*M@dIWAS4m+Qnn{~ LLMLgGA{yZTV51Q4 diff --git a/nlp_resource_data/nltk/sem/__pycache__/lfg.cpython-37.pyc b/nlp_resource_data/nltk/sem/__pycache__/lfg.cpython-37.pyc index a5b88341460fe142ee3e6a0aab39f5d8cb1de0bd..fad5ba59ae5786b2752b3c3010906d4e0eeff9fc 100644 GIT binary patch delta 1831 zcma)6&2Jl35Z|}!wRi2c^Z_CKL(^5~2rsK`WtGXZI!9Wn0UKMoq;Lsd_gM7cN{t6uIJWK~Fs(!3FpaA`Z+vC#hm_V67d`&YO8N^P7)H3*RmF z-mO;48a&<76B;!%?RWe*eoSC)!JBOX3ABJjT1-3=>z*DPo#GS92?Wf-}uE zWUa%)B&S>5#LvStkya2s2s7mB-6ZtWfYB(-8E;3KOQh-Vw8I3L((SaHfEd`R&cl2s zP3RImHt^H9)6T=qi23M6dooe~`P&^*sRDnvy3oz|f;gwYMaG1u?==w_UI8M_2kofK zII0au4Y^VAnb8m<#*MW)Dpe@mW;uiuQ_9Cc;_Ce5iecMvIG+|htZN=w?xj4CM&91$=kdBJ@qBbe{9w%$E&QAn z&#mc?6u8RUKnG5w872YSm!&P%&byp(2ZWxTusx~{IP~~Y2~{yw9cMw-5*v2!ZMw4B-A2num* z`4uE4>s`3!-77PjNa7v$wpGH1u{Fh3JQpk(azPM07Pg?z3^@xy`_;GZe z+=OPj*}ILQ*>~D8d!O@^^9A_ki%4EaqELMex%)_#kh}pz7Vf8E!k2+{iz+x-#MVl3 zX|{bIGF(Mf)pr$`pJvp5$1-;`HyGXa$8d<%(>}1SS>#`rY?Yz_ delta 1661 zcma)6OK;p%6!!Joi6B1B5W5+!9GV??WonrfcLlp9K@Td6sJpdF>gq!G4)I77m=Udy{#!Qs0<@3CnIPmeYmtvuz8 zo#1XOZwIj3sEg0km)WTJUfo+ZQ35OkX?zqUT@Iwu2TB7f2s7e+?VU?52@U)HHZS6W zr@jv($f>DtY+_KiD&E!R#9!L_i>0pX`(d6H7-#2yC_jP5=S8O9t&@Gv*~Z}fC=Ls0 zN4&iaLAy2RZyW^|dy|IBm?vh;H_lToHC3whi;`sn(nrq5HqZEe8=4LArMbbT#E<4x zHYxr#FW)v$QR3b5r71+c#-MU_rd<%)eO{w)E6L#}jG9S;Y6Hb%zH<7dY6+ z^qh?E`Q-u3korIm7m&9uzI6_+k#RZV0}mqqxQa{2tg(Jxr!{5el;j7!ouMY1rBYy3 z^-i0I(r9x3gRO@6gF+CSxo2JC2PBZ)gjn5XUL z$Ksaz()3eg-UW>;(u6@-krnZcyL>QA|0=;V1gi+xpk<<|EnX!Fws?YLR;~bCBc64q zQ$Y7PMr27lY{f|gJfpoK>T$GXYs;}G%K?6!6$C}qnWN|OyK?BK}Sq3Lf8mp|3Mi7RW-3ugI&fGBWPEihBRtzR>klCKF$3Mi# z;_;P!-{>lVDgo^_(6*MV0Sib;w^QKqgefmtc?6f}keUEgK!^w^>wdr|6)UN-W0Vt( zONX}7X0~H%<*N@(*rQ9Xt}=JgT_G0eVsYYAHrabHaYLP-C#$sBILZ)XE@g4Zf6anP zVNIWV_QV}|l4B81^V2)@i+Fo_@e`m^Y+s63obTVP+0wzA5{f)eI_1=QBUw`H*3k=x sEI?*C^IoZxX9RTCVD9)dS^q`-08>0r*>8j{pDw diff --git a/nlp_resource_data/nltk/sem/__pycache__/linearlogic.cpython-37.pyc b/nlp_resource_data/nltk/sem/__pycache__/linearlogic.cpython-37.pyc index 2cf3401f24b096c6b0d554ad9ae461edb66fe7da..fb58cb4a763ce03647e2d210ae45bc9a175b1fcc 100644 GIT binary patch delta 5026 zcmb6dTWlOxb?@wJ_GPbMaqNv9$E|B`{7&37F^*%$4Q`Xv%{%qvF^qSv?M-H8H+ObZ zJERzt2n2noa6v)>Aqom#NE8;t2StdFRtO1NP(XC5h(vrLf%rfmwGx7J?sy-bI@Gmw zkMBMAyzaT@oO{O37s+cclC{H$MAQdA#qgKaw+{P!f5AohCkT@%81^{v`E;L744+99 zVoHica4i{rGoS>-+;0TUkP;H}fDtxjMHcg*5iz4m)Ql-H;-jHkJ|#Zu(?fdrwoeb! zwZtvIQU?fGj{qVhyZR)YAswYlL-D zZ-8}!ST_ltM!g9zP3t_(0@tj!0Ir25*My)Jf!wCI0o!>L&^ut=A=d4Br@r&HPf5`ode>FyRl@es)GZ(QVApA1y8Ex_)3oH&7I&P%Wv2)jY*@cqUi0U>ByT{Ayu2XQ_MC zGG8WV=Cwk0!JvFU`DwBV^e_Y;Ob!=qHq2wvcv}S4h_msoWhp=c{(vMC7WwXFiL{3S z>By>@wXy|OWil)oaDBb^-Hv%YKEcO ztL8$^u>1J2zzg#gE~S21vvh;1*EN>az`ZLN*2MdaY`HC6pphls+H2LsIB6C7iWRY1R^Tf?zGRTo=~!Ps&4~BpTGh z@9%9HTGyaNX~j0yiQBTqmWn2|Hk^lU}+|;wE~f?rgoL8=hnSg zJ@YL8edOR;97tFKK^+o@RaMVr;F|FsSqi~U1iKLQAV?z^LQuU=F%#F{0vpI+zs~dmPoG9c({>aRk*)3&6fpH*MRLVU0M#SdOtl{-Cb;5s_wtDEttDs)kBxW=h-ZFB3PI zU6(lW0(#L}`j8i8J8R~R?BZ&!bSL==kx?dI1OG&0?~aYK;j~a^vXHI$oPCoYYVIxF zX5)+9zaDX=l|V&NLA@IRs9`qSX5QN4h(dp;^?xC zK`oOhnjW`fN`dLB`VIc2ZAyA9Gx%ePFG>fa>FtCjqQi1NJbp80vn5zuqynWuy9m<7Y6+M)mWK&d-o8{>{$2u^L6i#YK{s6{E=SkYCX7h*Ug>Pt0qGf`u?uO$O$Zsn zpF-is^TYkpd)+@Kr-Y0nC{oCH5@)C(9d_ToN`5*-bAp;`(dQkKae^6~dC!R4|13Rt z^nJ1>hTo!^c& za;blo^p=kHpYrb&sy(qR>WfHceAn3&0^uWJ;z|DYP|sr*5Q&e6BbR$&)(6Y9>_*Tf z-0$P|_ng^M zgjoFLJ;s<2Tn!UE9{Y&kxW)(fUYqg+hX^te!kb4G21kLw?FC@sBNE}C8{N+Ty|>|_ zr(TPePq+w=vy}%1FR&j)qa=7>2BLK$ApEQs8y0?Z>=JplPab&ljK>C4@5smKs%XH< z50Va8B@W+kl$lh|YVa3ph6^x#P$?8gSLt{7`SJN-lvHjZ0=VI2i9>@5A2(XovQ_m5 zr9Y21ONT`Q2^Z9yUc85j480hfo$73%qFNXs^g}zM>AI@^h(CVt!j{7IP5zUE&y9=Y zqgE$!20lF9wzI}`_buLdSW!8D#b+m9A8;uhX=p@PwV{xqT^~dymZ*haJ(IuXj~trp z77R5JUFJYyQ7M1JZyh?0G z6T)9r=h7WcRE2+#6p9QQafTjZekAeR*%k3^q|{kP;p&jcK@0w4!h*1^#01j)ZwLz^ z#wD|FR}4eC)^*W=<~}bW#Q~dk=sl6dWiwBq@db$@rtm;W&{SVaz1AC7a>z1sHQM=97VB KFd2{&QSv_&_r|pV delta 4790 zcmb6dZERE5)z_ad{z#m}2?pF;`L z7PVBGrd93KtNo~|s_mq8+rM?*hqO&%KRRt)JGIi7)IC*|s{Pm|A@wgqowjq%^H+@N zQb+IjoqNyMJzw`YZ+(|p-(jl5eQ* zFoy3=-I;YITv>O*%^aNFawI&59hysXzu?f^+h@R(1L&mxgYQV;32IU@Mgk;fVXI2z{5lz0=!jg1H6s!X29FE2;dP? z&;ode)(Lng4}-i`z&B`JfOiqzcGxkcb!!`8VIwWHYf-J|1xF&nJG9=bPH~S%ZaLtz zeWx6;P5%NPIAYB5%^AzN0vGZ<_N$&2SQ7~DKN(*#vOInzxw2&Blj3T8RDRDn)$M^j z+&9t7iqp;9F1J%*!Xrnzy5$$ne{6KYhNY-#CYLFwsz9F$(M# ze9C=dC($$m8e^gpx7JcxEooj*le(^&E7=7@H@C`<+}}K3;YzeDC3Bk2)$2)-NiOJI z1VBh4P+B$eJQs73V9Ql7u3Em70jIuEs8{2)8HVKBp37sl-5|#bJ(0~Hx{(*$G&4pn z=Cn=hss`3c`eO5E~F|M9_m^GlH!MVhE~-pf?I%D{MsJF%eT3q=Hv*!Gka) zTIFYW9^Gcsxd7iHk&tz95aXOp2Q|+DU>h zna}Gh1>>Z=5Ii!x3)xFC*$MCU)R2xTOda)%{BtlhfKGTwmI0;Ud7-N3fU;ZWLP5UP zWde^Zb^x#fDLrYLSzcH&G%<$DXouL1U;;t))K!mdhm9xUDHV{{8Fc$yic7xP7cJfo zoniBq3mjuLA23ZW3Xp4ugfN8I1}s)klhF$HC!68XEfp=x8GvW24?*?b17EpM3D<-3s~ z`QkvZIN5%Y?JBdO2{Q;DMNoAGg|EV&(ev$F>^Zh(@^s3^}fm@hS608#aF7+F3SD5 zHj4nRGlzH#!65{XBRGNp^Qp#!YO*~78yDd*+W;ucAM}Tm0Q^D1VVMftPr+0A$bDUd zY)IaXb{D_Vb;=pu1M314R!kunl%H;#8=&4yj+(~By1;stbC5xA%EzO7_ST(del2;` z$|2Zz10K@}0Pd0@xJ$;_VDWC$>zu2z;V23@hTu4Wat%Dp9DzE2DJS~I*KN!oaQp(E zvN40y!+z7pSyX;))15$#J+Ex;+d|%W3h2cN1Z%xP0sNCZu$hljZW0~E6N$@N+Q5wh ziS87w&3m24Ng?)FI|w9$TuBPQnk)WRyu9V->?FycY$6%6C>%A!eD<|gDO-yrEl(2i z|5=NrEKdsG>d%PWcZ-Vy|7NQs7jfGesKIiF zg3|yJfkn7)m+%VUHxeGutzNI!jymqaV+I`#`RR}rCmX0pYJEJ1GFvJjjBiNflh?Lg zi&ukF%R@<3$Ag6zkgc&}1GrI+U?WPb&Nlgc?8;vIeg{;LZ6SOkzk|sSxrV5$JLJHy zIsQb2k>pgk8{c2l3i{*+!^02nC6qj~DE6y26wH$M;4vcr6sGw79;ICgDlLjz@j**z zqCCQuBL5j`li%KcnDrMw+A$Z>O;H z#8w5;zAXBfC#rmP*Xf<>$&*D^>NgH{vq%0{>5#9*BJ%L=M`lS`HG%NR;{T*grbbvV z2|F@;qCSlRD2S51;k`s@q|3A86Bp~6IL(RVHay~b?jQ{9R%0H2zo$zmYKuFbLHtumW&-^FpS;v#R>M{L$XePS@!{6_#?8uh3{#zJo5q zY9g4ypHf+_Ws>lXv9|#>925-A&=&lv{AB8U93_<}2{&%kvjkyqh2JU3OwLr*w~N}o zi1Q#73>~xPgfzM&6d23kd|VTkDyqo{(hpNfQqxrRC-T?(FRZJ9{Z>xRoSUNKqgJcw z^h%!F18I%vjm9P#QEtl*W`4TErnH=+V`SBu#)Wp>Z<HFmZhEeDkjY7xe&kkIP zW8#f;SgFVZWhhKXI8f;=L;n6YPHKw55mRsXl<70X0vFNHm9H!$eq`s=o$2t?u zIlXXol zQ&0zq;u~E<7~-@r5XW%PpegWt0*$hxSamqPk@d=S*3g!9K7#|iiK2om?DV?A!Eij> Z7M=@th9lt%;ra0SusiGtyCHAb{{XIMd+Y!J diff --git a/nlp_resource_data/nltk/sem/__pycache__/logic.cpython-37.pyc b/nlp_resource_data/nltk/sem/__pycache__/logic.cpython-37.pyc index 508963291e560ffc7b595d29a9ea1620f6a40e29..436e402c3a73bedacaf686483c67b36141123c75 100644 GIT binary patch delta 20187 zcmcIs34D~*wa+&*nVAep2wT`8AsZnJYuIH8gdqD80f%Ln%=d)^W(ju^gs6=Q)&=*g zrP}(ePX(=|F5@mLT6sQet$VAVio4b#Dq3yzS$*e!zM1)wVKNE7_j`eyxy!ldoO|xM z=iF`b{zbO;F0r*&XJxt4=%>|jwP#AElJ)^Iu^&4n=Fo5WRcl(Bn&wVZ71iQb0v5L= zV0Bv+i?sWRJ`RxIR+rf3|erF)Voe^-kU7WZ3dj&Gx#D(x2{;WW@yFHsT zPJiz}jys3b8U8+jzV5!1cDeg$IlI%`{cF?IUTWr^G&NHju-od+RkPIWJ!$TNYHu}% zJ_o6N)V}mNSnEre{nY-1*&mp@755Nzr#e8*-J`gNYA&uHs1Bm~K}LN$p$t=Zse{!a zgfd)hQ-`XHb5yO0S6qSrZi94Th3e-X>Db(^A zXB3wesl`-Q94#y0vJ$m@G!>4H78Y`0sXB(r#%TFk-fkO-SELn?4vMu=1WU9+f}^z} zf~9JiT28$lLn2hDmDHwEDI!~QX1@pBkLYP7bHR=M&FJOeJYOT7E z`Z$fsPgfUF`63W^PiG+(t4pY4i8h^un89UB)n!z+ELt{`+AUXCP{|5yCbc__@+;L< zlwZa9S?%0twOU6-bW+GgUX`-P&BrS5tny zx`FZ=I6sf_8`Vve-^BU(l=rBcDZiQXHI(|qK$~SWUbhSy{N}5@uZd3jAxmXRTLHb;xhSX;IT&iwY1${14!)gnCE>~OC z9rU?ETdAJ4&2p7hyr?bRo#v!zW?h|DdXD_Hbz@Q7dowZ_iX1ImWws=~0HEZEgMYq+7CXGHF^Fpmb!lUHS=3Om~p`d4i zCq}^+3N(9Ln(F;pWU}o`rB?xYmhY#IZ4HaD@-+Kwr8aV|y-*p9Vqt579>Sa^P zVmZ1`Xd;`%7_^xL&G3oLn_37)iu)c_ z%9jyNTSigX#s4>bujG|x>UM$I)hyOirS51JA)giwiw$yI|E>K%RL{V)^EGO|ZKV!T zFAw&w%uV2v-s}}&?efTp{<#(>n3URuQ}!M>C$F9|A_ywhP-}~5rj$FQCA3WoVhW3` zGBj{e|IXAz10mfWd4AwN#omhM%jDLlY=gNLVMkmtS zNi|31@*(#Yvu@{6wp1~iCve_DRTf2-483|1v|+HrbRfe3JKgCI`N*e)f(e=yjZ|xo zd^WUR$>RRo1e%HUa{chJ1uW@qw6{{%6PrSCmpnN9jPWd9HD#mh#Lq#avy8f{DZIlg zn!M!K2FZyd-dC2%tkNNJ+{mm=tbz;ChR4$0=nblVO*B!7p5tw4(E`ma9~2^ChzhWu-k z_42#?4HJ2UT-r|ZkKr)J2(8=>U#svmsXG(~;rU362dBcKnFS&t8q-l73^*fUwmIYoKP zS(IMLPXJDDZ#b-pmSr?Nb(k|^Je5WV&M*#E%w=ehAj0KTwk&e2C{NjJnuQ;T;4Xd% za07tBol?4UP1LSprqN5)!ai~hZ16k|0xddo(g(_eB?A?IWOvCVMOiK%8a=P?#f0W| zdF#osczrFJN>ezibXXvg zsm-p)fzl<04C7>BS-+?pY+y|dO;I_9NMBju#8Z=ni0gyo$jU*p#Lbi@_m&`-qk%da z61&8ILiS_=UPTqx%AE3l*}1pnGP@#Isgy+(c|~yQDW<%*h3fB>TPqr?p!(SO-$rTO z8JmK-jn+XYP25`v=a6(%Zk@%J^=rx+mfmwix|-TN5xJ{!t1=+LmW`++E60@?QwUZc z6OLyE8)p(9CaxjOBl7ZbCq_e!dU`_>#UR}t*2ovB3HBv^M<{=Z>>uw_?1lic_rwDG zooJXV@2<+NV)s;s&huRS3T12$a5938qN6>SDokd_lG^+rpPPE& zqz5S#^_IFbzJhmp#g^89MlL-;daxZS_e>MTH=pd#h=icg~wAX7tET_4s=lhFq(kw@Oadak5GIl zY5`~f*a@%$AkNfq%4{$QxdM?>APRDv6Nog1FNY5n@N?|31e}k6`@nqnrtw4;n0x~B zfUQ};nhb-)n2X;4uwAoNLIdJS08=%b;`qOmDm3~H&nMut$aS;lDN7^2p6#(Ht@4Mt zWy(@Hy1KF#M-%SgHtDUNTBbYvO+hW#8oLTUO@~)+W zrFGtFb8YMo$m5G-@KD zm2UTVwCx_xS+aM{K>6tW^3mvejLi!tvq6KP=JB*qgELNJ5y45%sH63JjSwNxk*Vl{ zf}gev=gOOFRt;g_?2K*k7H+NmtV_z(#Ml(c_WrwS(j%3(8X= zlReTT^LQ?nuG%$uT~lRq*elPg%}=ms!;oKuT3!Np1>i3LuLHaR(4{FsPXP;=QVu1Y zR;Po0t_x)|uobYfogSZYCLvyxa~6%tGYuD( zp!cTJiS4ageC2$lykJp*@}bZe}MB9&ACNk1UxnMM0p$sF~IPMLrE{;3;Zx+A&G( z`0VYp3)K4PH~u4^SehZvUw!@sVj8%`DXTcap=`_A z0y%9>MUem-H0olu!)5lCd)L&Y@*5bhzVgd8BjzMX%?`KA(fo>}i4g zbmP!uK23Mhi7D+dT6USfv6~s2dS*biqcPO|DS?@hY0I=lXK-{q@@-`ArZS~aB?Qq5 zpa48h;M1SzM?8V-GJpY6_mnK)A&ak8bq8$={9d0XjsS%N)F+hEd(|wfJH5_`O1i6V z)fsgQIYJ4J7TaX)=DfmoAliJ5-o(|a0ErclyHf7mJl-Nck%u>5SpE}Ozzk!Cf}INa zo5LtC0N_v$jG)}^En1Y|MNpwz%L6 zNF&_{3jj#PqU0Tv7QD~xTAr}<_-f}Skw+*Xms|?w)Ikb5O@4Co3^_`b^H>tP#5N?M z^BP)}#}+IQMR)NofG@dXJ!2+u4P7ArrA|CmiNNvv4iZ@ni4JJj7PCb5m_){ypuaIK zv4Ya2x1oRk!%$dfX)cwQG>oe`Mg4%=B^{(e-e-}`Z}pG#-BPJ6{T}QU5x|Gl-HW!^ z(Zk%Z2tfoEiRCbb0rJ(L$?IOHOUbtMrIoJ;u=~F3)8fdV8qZdY)7sZSB%wVv2?wOM z_2y!_p6ntR>KUv@MmZ+XptQ)V0|Q3?4U~BGwh<}rwMmX`0W0Rm<(~sBQ|f`jOFoxZRhw{7>-F>T zqpo;6NP%Y9kNLcqK}Y1S;LnT|Fg=urE~5D~$2gQpx~|P4%;#iTXkpPlB1U7xygN&V zQw2Ks_jq2g%AfWQl6QyBF91dt*mk}o&|a2x%{!*DlVWjRMyKvYv#!MfC%WD6c>XFa z+s~eo&^XdfW7;T(Xs`K}yl;D3V%z(=Z@Y!I$nQx_v~|!S3w?j;w!A0(SoRAqE@DgT zx_cZ!j>+TU!Safa%Dat<*zkTHIUfE}X*V^2J*hi8=x^Cp*%WNhgcc;1pNsK^n7T9E z6lnH0HSBVyQv}R3E2}XC@>*-4Vq;pw4oi3!CcAS7kXHuKA47&SMLATPh z1n?&qJGZpkA_I56p(r7FV%Ne78z>le*Sd9^wu>GOSi?Vb=F(2BQTXPomV^n}1M?Jo;pp&_<*22zPc&IuiE6?BIKpjRvPbYb6jJqh(%?On9 zl!gx*MEt_Jlt^tFyEvAEhG7pkR z6;mVkpS#hr2-+o41U_tfme=3h=%q7^Anhw#*xl+`(e;9-o=!GY;~18OfaJP!rg5n^ zYlYl!-gv|Qcqn0iJd}FQj;1i(Leu`S)z{KWuNOGf#582a;zEd9z<{MO($|3wS1~55 z3qn^zU2J*ITqRR+S#5OK5y?D1q$ug~oD1?tbXJroBEp-fjVPu>+bpT<86n@epg>tC z|9!zIWsJ<*U0TdqQ1t9AG?aJ{u>%J+(W5VBJeJz7m7(2@MGz*cw?x|mr+Vfjf7o4F zjL~niY2%8*yeMn4wiSizN;7!*MVB5SfivWFa>btE8xEtdti5t-B-rwqk#XVVb_BJv zSfcGf#A1vG&P|^5tToX!83_d`*{<}AU`Pb$*+`pK7kPG1#4?`EDQX@MQG4A*?_}tV z&r7#giT$V~GXBWHi{7@BK$k?gj7sQZBIw6WfHixm(cgP6ttv;e*un-ZO!Qb&R%HpH zoh$q9-CT4SNC>D@pCqdA!O>-M-`?6hEXBJ(!m9ep$d7vuC>0|?FAv~R00w!LU2PGZe{3njE-1mVAZ9%wue`20aew#}252b&y+Ng`I)J|;`zZiDKTQ)@Rf#?T z^a3?a^aJP*FaY2c;9(nW9RCbL+3U!1Ttv(eqfmAnsXT@Jf#EVHpb+{J+{-6X&AU#H zwlE}tK@`v%M^+>5n*B28He&IER`RsfM8s*aS?r3-Vo@^fbnu34X?pyh)hS>2MWMx| z$o$s^$*g?`lm{dC@7rQGPHQ=)b4Xy9@Gw#9^d>F3*5b7Q zcZ_T+x?Er(5o3*!VGc~3+3+6lCm`eKC8mXxvHE+W9Uej0X_jnSWjQU`ibaV$e{-WU z9i1alVFxi!XQeAxDl%=AyIn+x!^t!82HBq0HbK63bFRGbmf@TDsDRq1Ei2754w>Tt zTz52uJro-Px`SQ~wF-LSz`fxfvf1H8#(1%D7E*j+;WXSh3-Hi2w+q+k`De(t`pYi6ua!Y%Z0sMEa^1b|^z$|<+qQfyMZ z0ToPK%d-yD^=OrUy8QCMkIDe~#qBd<`1v3@mDndzMtEB#m)}uSKMl3h3Fx-1 zp{AfIm@(SsOk%O2f4xt&@rbo1z5_QNj*J7h@dRRTU|1tFqO%JS%c?e$DCi6WwH+Wv^4{R~) zfT{CAMt3Z*FnLk7KIq9~%;YO5nDk%eKOU^=X4-K0PM9(5qRn~23!#L`Vh)>WpXk9A z(BL5A?X=Ropi4rRy#1jZONFs6dHJDQJN8y>7t$pN;T*uo!^eIDIW1Pd`{VsgD={r08& z-J{KUDQpQY=O1$Bp{f*cV=4);jtpQQ45Fuxn2t#*0U;}chRDRmFTyq>!A9jw4f39MPk4s7Cjnft^f z`*`X?+u6~vpR+Ux8{Jj2hHgTPs}JUuOckKxv=a~V!puXAHwKHtDb|Oq zel{wm&mPOPtgd~Rn1|8o%4rRNI2{&=a?w-aR3?J4u8?m(@DFA6N%SSm zj+kbZ+K#Xu>kVmfVs8vk_8=M}l*@Zo=0^HFovo}&h#qfIC2!~jpN+hYEJj)!1;BW4 zdzxgdN^{WIt3N_WljZg!gOx}4>nyh(>1Uajz=0hMH^#OiHo4;kiIdw$NQ)xhKl3xC z{as3)ym&SaGPymw*29F=VaLKc;^VFlsUjAr7~MSciXVq1#KbzNFOS-#S#p)gKcDp| zixYZtnCd!kW4h2nY%Ps5BRWe;>=iSVH$S&$2Jfv{$ewtlb0Cm~=u_YtF7J76u-x^h zOA|HBLa>!EaYV)LqUL#W!Sm;g={O({p<`XG!7rAdJ%8R<=F_7#*dSjP`PHAtDK)IW z!yq1SE}$^pU&gEfVbqONTsMEC_r|!sl(vzquLobuR*S)} z1Yk4(J8RgPvB%}H;Squ6I_&VxNXa(*I$D?$fKxmR7ErSVvh<~qmId%uOVW84Epx8T zmG`_fY7*~J<0E6J0Uj!O;NpmJn{>Qa*Z+Ofg91>;S>y4e2WKPvJi%LaWLO= z6~7|GuUD=5zo@vDhV-cf71tVzl$%ZrRrbruH%g0Gs4|kQ3zaVOYt7yt$-1Q~XLUerx~#lPP{H7ruSTbT)HybH&p3Btj?-`RG^WjDjvanD%MlBN zN*i5)zuO;IxIGF9IVZLJ@uMuc{N1uKY`7ES5+s)x?q`6!_1)2l`z?%dhc&+Gu*@jJ zr_afvNS8hh`uGfX*q;*gPKjPAW%r_k2mCXhD~Ww*pGbyIsOQc;P%m`NcR13&)B$R zF;O1^dRJyOULN^S7*p^t@XXfT?ANR0wvU$gXum#FzVp$!>r|p%>d;;MXg=IpPpgqQ zFY};IPbbg3yxBOfi%ts@EuqgC>ACX0k5?I&eTKk#xV2~CwE^<8Px6&HGW&1Wb-fNrh?J~^ z__8E6v?;^oV)@hGCeJR$sFncm`ple4rh?2K%pnOE#pW~xhdD+(y6n>u`@KY>Z4@8F zz3}M-BQRA_Sr>t6F;QACi~sJMu?)Pw2d2e1O9iD^2bK_8QhSDyH_4~}UYYdW5~zzV z#{mr1p%*mSDa6rU+A8jiKCLRTN2oB`k?W2{J2C|#CHGNa)hItZR)2CwK&eL^*(NXf zEcpYh)Exl>EukY3?nv(Ok@ow{j;uT$?Z~v$9ch)fA1_FJ$jeK179^QTfQx_(Sj0h@ z;=Z^9_d|k0wh7PYlMI`fky?UtI z&Dq390UaHC$|<#*rqn=r=a=)0zMhu4uUE=`CrXwiOhDGQISaegJ9>6r$Fa}O6bIvL zwQcgz6Vs;q2}jSU+3C}aZ#?@TPt@NJ0i{?RMxWtoY{w`R{K&uV(9`n&_@4 z{-}dI%MduMgl^+_=zb8w%u1stLE^1Q#XmR!qed&r)F@vdn ziN?mEf^IqK5Jk`6KL~`w@V=miIg+M`x_tpf8P>jv4@79yLw_&`56S2;I1q#tfDHtX zZs_Y}A-|ud3cZF6^KMzs;Co}b(F}`p;3Aaa_1{kTV)~0D{7RCpg!}^H$6rm-SL*3K zZCGTWuvf4hZz+vGD+nS{tUn?(+`pQ2;gIY zV*n=rz5@6f;2VH%0salZUbRL^YmdGs_ymPZVL$wez=9OcQ;bu90BRk`@?heskS{ll zW7F%AjUSd8=UX@xGM-al7cKS! zo#meJ)}{927u&n$_UV(ELjM&tp5-}_p`@Gu68$fY3bW~Ns4X=mMNM(1sETUy zD{h5<+T1q!nd(n%wY%*K+U)+cR)^csn(j_#Jk9TH&2SS}YIFEATeI9*R8DtiYgv0z z+&J8h)DE@3 zI$*Ej9;l^L+dwss+Var0M{(z=+thq@5Vhv3+tmVfFqH-o)ev!}g|h zsl&7a5UhG(Gd@P$p-xaI(&);lWs+J$Ej2v4a>6I8XApjdR!*EM2%n-( zC48z@L3kxwZJIhAaL-R#p)8O zSfbSs!DPahs>=vp#`qb8pQSD*d^t0mLih^xY{JiGd@A89)pH0xN1IChrxCtNT}}9E zZ5rXz316eGC44R8XAUyeJ&jd3G-=KO3_b|g*gl|;6gnQXh zvk7lhn+R{xW>fz;gsZAXxW@Qgb(7jmmZ()*R3H7Er*2la(9b&6ueQ?9`D#E7($9Le zO&ztB3JX-BhUn))twC+yV!JX`__g_aQfR8!&Q2-YxzD~{?zUIu+E)snR;K9bwZV=+ zyC%XP**{eV4X63}?Yyxq!B%bTX7A3HV4GKL8Cxy?>6oR|hR38AD}zBIOyC0QBrsHJ z&RWGOZ*{)75x5u&FdkqcKn=hd1oX5v;S02v+4Qu4w^a-2_BO9iSXk;=jlrN_^9DS@ zw$Oa)6><wY{FTxn2OSLfH?%pY+^3TwE(9ps^@51JG_2h`%X`r z(A3Z}B8f0|Dg}zDlTT#cxN0^!@(@O&R0dE2&=dEpMqfbn1)8Dr8tM>Xov9QmVnO&w z*8kW_pbsfH4D5;kPFGNjpt@c1t(?uhqC8Kw^{wj_<@s_}?(}k`V3ydE3SC4#yH|`a z0Au-PZd$LXce5I7+um5>=S-f_TdnuV7yAV#GWSyS83&LKFb&{LfEfT2044!U2ACo{ z`p+y{i{k0-(48KSFW_tUc+~Lo{ohcO26@-O!oI7CQcrIa!6q#f5^LnE12+#qZ#|=Z zXPeg4qBU(POBWmDn!M_K3&S*;P$BJ-@bB{SZO#VjQ|1t9@}WVqT^k^NGq|J%JKBYx zO74vI;1(@_nJPBPAq5QsPv<5YiRgy#j)H3x$5wQ2kage_pj}jgEGtqKY|{c|sfH_ftQS)&E0t@g<^p~pfibe*$bHIcdE>}+6L~n9 z+73!~p%8q5Uhed!4&m{sJFH{6h#IHKiKEV+fzcTwKoU$wM?40m&}Kn8ix@QOP6+Vwa*~bg63@tCJ4Ant^G)RX%1J9nWQL-_+eoyQvgR!S$VX3EYq5%fQPVs-BaU!)Y zr-on3LzUkQ<1vmQRL`y3;S05g>da^@m(NwXN|3RWOmlH9wcjE0s#|K{f@mCE2bRw0 zbkyy%#re@B{sYn6FAr94p2-pRYeJ2v?|nf1g8DoZc8}fsqfj=M9C>tH#U$pAppPoY zlLDSgl()nc)Nn*j8GmvN&)>97J`rly9U+bKk{TapVDLu*&xe;!Xi^-;T#^q>9HrbM z-T?^r~_JXTg$Qr#=z>Pcxz7kdS(q~2#vK#g+=7EL{SnTV^x5~}xE*xD9X zH^r#iw8^YX=E6S_`8x_Fkp5IiNzIf8YZ}K|y(w;^HgP*Z+*|{C2Z1kSbMdh7#>pYu z@LN!q4v;vzd^V+O^?o#;KGVfDsAFFt%?J*M_((8KcnR7qO8k=QzLwjjUN|XCrAW@y zotruWO~@3w(<_=gS~W`SmW~iRM2?*%N`Hfnk@eG#y|I-P`DvK^uW4(`51KN>O;i>) z16Vl!j_O>pVR~21I<&4Q5Yfw>K?{m7mLrZsOHY_^l>A}(8*XGR+RP-#mb%?f^HpeI zjg>T-AD!9`+5{oNB}|SWhiNP1ZVEi%mcGx{?#)) zo+dx73m%UcNzKt<3;wt}_H;sP6CTf8d2;52E|7~+0L29KblU3$n~21Nsy2W|fOY@@ z5EE)-WeylDyy7ANtc7A90M;5KnIj1cq&&`5;twbxabTuy0^$(>(==Rip>81oFZ~V8Bj8kI>g-x&VYq&_$EIwRx6G|j7RndrR%h{Y z!UJrU*|k$D-06N_KnrxV!m$Bs?CF_8^QsKQ9( z_g*JNP{a!t-Ei~MsbITYJb#6;%W_7}ej%&i0ih0B@_p?c?V2Y_bDn&D{+bfDO2X|m zVzk8@YVmlw@{jNo5dmc=v041FWwpN8%J z3h*qz3ji+xybRE7%)x*yHj24iDmZP&`h#*Lqg2 zJ*zI6)PIuH=j6=g*EPHcO|1;+EEM!_)0km&Lk5*}Ws^wYCGkGBy&}t2>@8<@=D-t9 zU7jaz%fGGIHI~VrgbF?N0nrC?_t~Q?tC4j|vhxrS0%K&&jCsjY%t$vw0)~79A;I%f zaXK(F9X9&e*Xa;w#rhTdPhR;*hOAtoA=M`{G~D~SoPAD#{A6WiuUz~UQ7PYd;G82bUDJ?^2$K8B&)1CnX?lH3-TIuS*T+%4mX*9ojhs@F7%X2NUpv1i z#Rkg-?qRcklEFSD*?Ffa_9-X#!)}>Onuod-$*t$kOh$AY5si?~o;y7m(d|T3BFon; z9m`YdB%z!ZlLa+Gw@l`&&zFy^>)pJnB9lD^@zDwC^+%DBI51 zpByJX#?~a73~M916v&EO!+5T{PzUP|gAxO&fEk``&$eTk#60Ka@??0jr$Q;-2 zbSg~|4-t6p6#XX-0c`;2CokMMdK5dA9@US{ASX1Bt4rX-WJZJVt~mXGH`(OY83o}-o68mFm(WHL zK6$cbz{D+t7_N(XK)6unBXBXeWKto;nW^O1`qQ47>aoiVFY{fe7}vhfgWA#=XNlj- zoGsUtrjqc)c|y;KUfG3imM?Fqs<#aDF!=JJgdPdROJG?pwA!fQ8tBNoV7uMs%n-Kl zLjR?TvR1y>x@l$`QBl0}QP#uXV+8r(ALz^bzqKf_2`mG^#yc#x2inhw?PK*!ud4cR z!Q=JwO{cDSE#t;vu8F+r@jN6)2G2M4&}q<~k3W1{WL)|rI%*0hQ67;m1s4=c65oy0 z=oCnw7d)QFXZW&K^3``8!C*)aPYRXqfC2>#hCo>bZY2ow#42{+)3qWX11J-j`3Zud?8yq3RM2 zzV3&~E6g!j*&h3_8gsVAvAhhOAj-kvJKO)JWSJIOC9m2xa791(7`o}skgv7P@7uJ~ zP2awQ+qCGVERG#+2UiIJa<`+=m*AFHPowYFzHRTwueYrjjpL?n$0}P8Tl@-uo&KLF@m%@}rT!#8su-rPZl{Ypp{LS>1n^r&kDlM9hL7wx zswi9JzjrREIz=@UK*P{Eb&Ix>o)@Hse9cBj7u9~7ab~j?0Na@(W#3(syKGb``1;R0 z#j~o%(ExtD^6{q+y5kFJ?VK0IxzrNXm1k-JY(0p8o<{oC8Q-?ZK7w1KVKD2EP z7>JcX87@4LzMFPfZ#>z^l49bxb#^c%SCb6`LoIhedLl7IXhvl7)lLQ;SJ}n zw>7}Jq)Grk?0O%szpcef3x0skpzWMx^*)hpgQt-$K~&>Hmz8i)AKiJT@sV(5olM(3 zev9eKRn*F^)N{A_LiCxM_7ojW?H%;AffxU%h0K+BIMgiy0DH=MT)ql?xQQuUK%w** z;p=wqQIu3!vS&oxyAN)65tFmYoMMFZ?3wR?QhK5EK3teCN~lR6B9((f#*sImSakG? zp3_VCwyxXX!LmNCO}>a|!F>}@HB5|JL(n1j^SER_nKrSi%P2g<{H^Gf0VPP;a) zB*dGu&eYD5(8{t5-iFbalc2zL^h)`!y~Eb=+RHOxG8=QzgGS$W5kFS`cl#IgwjX7iG22=dTFvZF%SGj4Wr(-|W ziDCc*A9aKkyYFbwun`lF6yo26LzPB?$dRH7l?WwcwqYU}cXGV$B9H`} zC}2QdV~rKqoLc7m!m4}+cCK@Y34ORZ_nJAjG(|p^RUq%X<`yL!-tg;YhjER|3pnS0 zp8oezhtu)_Dy;G9p}Lo4>b3h;a32_5Fx^TJ9|X7=fQQA#XP3ogGK1Vy-34;+c~ic7 ztuQ|3G4E4o;N$P@D8XjkxE_@6hWGtJQ@U6UUif)nHuU7Z0V|0G)i6FwVCF352hi~u z_%?{?&BFXK#b~IAs8eh?Ho6C}JAAeB#$#pMHkJ4kGD<;CEtf8qU*1P%gn_KG|C!w{5fNtL$^aV`Cj5smp z9IFi<>HX#!4`0of6ULjAN=U0B5PfgMgPbg{y={W^8Q;r9H8=eBZ3C1!F-sT%Ax3xj z0(d_aohb$%C~6r7^j;U9`lo@)2ow`fV0yhv+s(}~aL0M0F%gKWdtxz}_=IG?1BZXJ|9MbN+k@Jzy+WS+F+H zkmbf%V|dCSs&s)627Cb7V+UI(f4aLY+084`?l0t*cfYy7B9DiLDcRHZ-x#5bC^_X{DGS5j zv@)|qNth=&_IZY(-$&_tfDQstQ7Nz<4rF*PK%dz<_>o*8YAwNqbpBhp`<};*_3U(W znZ8W67l4QVB=_9A(}gV5O$Z^)+#?I`d)T;t=QPO1=^-;bAk*((JswI&Rd@=6wTuFp z;Rt-k2}$W3!JfPQ{#iycVrhIu(UVB*KK?9=9`LxBGV!hyq57o!{R1^UOb1@TEi-}B zvl;)q+p$anb8K94ku;v1KgR$I2y@!#GL60;X7F0Dh?ev#E@;huq#=DeV57s+y zs_MK%ZoMmC9($xrPC1ez^B(&1x^Cl&7r?I}i|?wo8gx6Nr6qvZvU%yaCg>dx&EZk4`+h+mZA_b*T0sRCh*Hr&zIz{qoUACpjufMCY!!n3%mJfhaw*ZdKjVmHd6* zFjgWHf!BACgLDZKiAp2*UOzI`fjvoQdqhy*@SMjcCfQc9k2vypc%0G9@asOeiJ7E= z=q{-_>DoDdZz(_jk5IBWLO{6WMSu49=uJ4PSPpu;!jr&w!W@Vhd&z?h#NXVv&L@l))y&KyXC|+wc7l#R^y*}ek=3$Zs8J5eIWJ6~#3x=D)Y#gx zHBJ$lc+ukodX2p6gC6=VXb4yrs{2VUo;$Wt!_7YDi+`du7`zx31|9 zxd^}dAp~2R4c}{XKQ0T8*8D%2doH>69*eo>(&lEcJalxZa=m;^FDqf4Mw7|9v3f1> zcuAJ%)zd7FM?*piM)Yh z>hf2&*d?{Zo=aD}R%RR)IA?I|AVi`$8RfVUxqx~Iydf`nZE;VLd0c+++Rgu;BJ&-J z%u9_cKlAX$+V*BiIZ5ifaRk#R-nZjg_g@bA!kcTy@oa>h zd2x>#Gf~z@(*E8dpFTERzW-eBUR$Ner;e>D=O7&$6OmXYWSum*`B)qK6xXO#fy-8*nGdw*`u77uh@paHpSWh#UTWIUZJ~i@{cPn}bH8a4v zjV-W>Gr#GBdDM`RP65`R6r%uh(DS)$3NGVOahgDTlYBfYPwxNc2>W!xkSB~#{g)`lQ-zVol1{*_`N@m;!YmL%nu`@m0V`d{p&+F$xG+X&J?x#wZ^BXt;704PuGmhMMF9Q04NEM_J!3n`TTD0(f=gg>1e| zAPb7SOv!i4k3L>zNIoOEGTmaecZ-4rzRig`SRhDqZ3`-#O<>=WE0sMqYB3RIVW4(8rz21 zo2gkYe<)x6>@(N27DDulx*biL@oUx|=oOhtXcKz@Oerp)Ai7BY?Xw!|77Yz?#N3U0 zK<0%|DUt!2*##RuAEs#Xyw6{%Lp!sLesh6rcV-8kFd4t%_#h})nIK9DG`d{w|DwLv z8fnDkvN(;jm`1zh;xF6Ani}=S8o!ete0gTCW$z~0zm1cE-6>on!3@;XW!?mCUvQRcPmU#W`m z`$E0f3%6AdpT-L{O)2yec3}99Z{JT{^9teQ?=%eRZ7Pd70QlTuocytsDNA*y`T2RA zO&hPvmOZ4gNReOd;|V#XP{cx0fQa_ZZDnbg#oq4Dp% zyLMEIjQzFBgc``x`+$Kt0Zi-0q8I-VL2J-M=eVE|2d^&xv6KLYp|;8TE;0DQdXtUH_3H?rXg zG%Nthj}!Pl`3KbTcMcy)Hng(4!a(^(l5bb{1C8CO8uwjvYt$8arhzjw9bpOJkSX|_ zgRPgqmWJQ)3aqNMm=VBALy49EhXXQ<@vlu_iVDmv;|g~X`Yr)L9I(R#A|@`h`4>Ed ziz?|+Vrx+C@M|-(2@Cz20Bt|$PB&Z0wPh>WbT98n&8=`|;7@KBY2KCM&n(JToUY8m z%O*i5^;(o$L@oW(b4NRKbJr6B P(zM*l+-nZol~Ml-31T~= diff --git a/nlp_resource_data/nltk/sem/__pycache__/relextract.cpython-37.pyc b/nlp_resource_data/nltk/sem/__pycache__/relextract.cpython-37.pyc index 4f0b2a1a76144802f8ba8fda8302bfda96464ae2..9f313f7f1f48056d17266081754515ac4009c6f0 100644 GIT binary patch delta 3203 zcma(TOKcR`ab9=NOwZSV|KA-O3?3WCHsIB+Z472@c3Bdm03y&V>21$@w#W2*wz|jI zMljN5^U*43ck_}{B5{cnZK5cL(H?TiA)DvB4q{9kF+;4U zh4p+ewfKr;*Axe0RKwLgGev>+a=!{TK_sRz zlVurnHUA!a$i_)2uLW9RD$xCU?Z75467%b#BDQO=z~v9k%@d(G^q>~>k}89K!qwSy zEc78*m%WMnJH*KAf$n;KpnIYgMC%pbSy!?z^^~Pid$U@q+(vOPiy#u$SQ1S%SO-gM zFf2b#{Bcn^ss*fU85mUWfSo~>3$N(ZL>SziP1;Mjo6%TSLw85zC&}mCIi&BJ$H(5g zKXE=k7qCJQ$;a`8C#k`eQRD@25H;5+3QWWht#xmwZ0XA@Hx3?Xx+Tv)joe2l=A{ut z;#rJrlBMw(GiEu{#uKkp{ zKzsR-Pun}$uVlPqCiX7MLAH6ZV>OzhphJd0TK=~8{;f9&ohNV(0fu4`KS9*1xg|e{ zmL0#iftQ(W=M5zg#A}Z43#fonyhMtv`w*D(u2Wv+&O(&QM|}&84%#bu({bx!8K4R< zLj=PY+kRkf2#33+iU2Y%Fet`BY^~(t?)gNxJ((2fyjl!m#VXXWF0LX4)TzseStbUL z3eK|7!X=zR5Ja)oT#=8#Wg?M|k`1CIUz9!c zxe6F(D^UYEFlcrkxG6DE>TPmvaFpGZ&fv2fbF{^I1iQ&m4-Y(vM&(1db#+J(BhPlq zaEBxhZiv02?uipPMmz)g5%UNGyZQOhQ;nsYKN~*8PO2WIx)ojayau;uABugTUB>1H z-=VdN(}Mi-$TOCfUml%dfz*x_Qscy%Ah0Cojtw_%6KW%%B$mr~tr9CvIlK`047`mT zOZPr-fzEqIEa2OO>gXig4YBI|gKox46`PB4mCee( z9kZDy=SKtdulsmHYa~%AY{UosLtk47%BR2~L1kCVs~~=J`P#*+*Oy_5m?Z+61nv<~ z&XP@dg~>CeY(=5!3eSpOyZVjg>rkUxp`lCau3HiOOSxJ&%68@V3%_6)IWhUq%lp+q`@UKo7-qTv=#2_LMP4NqJE*p_ z9Hy5-1zQMoO+PgW~Y;^(d?ae+4UqTDeq`X{40*&Yl+!SJeZoDpLzIT`e8mD zUXWHsf`q8l%K{u94Wcm!x8z?=9}hdgd$hT#jHqQ$A;3eV1gsPol(qj}02N{Hr8bR^ zus!RQ-S}dNX*eu#2zTHQzDJvxwq16x?rjWs5+f4!`4JyOOT~z(>c-C{{JyTfI*_9) za78{j)7wx3d^m{PHclMd4w5#$@BBIl+Xg&65~PY%*A3f}2Q-?2T(=TlX}eIa(nO|y z3g04GgT@lH(YliWjlysy3==!49)>&Wc|(s8oaGIKG#}MG+`q#r)!-4MPCymZE@7w; zY585jmWp;dPpV&Lzd>m~S)v%(B*G(3gfnhVQ|pED;d%#F-O_95c~JdgVvALK*G(Z+}QOSrc4H~3jTf|L{d z3LoV*VjcV{zre>3>*V8n0=c^QH9pBN;>k&Volo&;#7^-UehF!(`67Rrzw(@|I^FzL z{u&^riKmvhnf=ReVASw&7tCB|_oo>hN zi<^_{wW^rhaJScMbr&8@7Nm+^>0CfsK(djT!4fRTpabQ8JeR{0n?V~cK=Lhc0x^{- zdu|Yv+|^YOn_t6||CH})#^TS|1ro9iWfU$dGi=n4DS45cUlYD~QitNI{3Nk)zQqow z$kb^PWza*owoPWC8-b9;r2QrlY*U$D5Gpeebro&+p}VGe_vQ3Wqos{n#lMT}0r?Y& z8!Uw`TC9s@3>cKZNPc!jJ8CFa#S#?{gJbtpIWB^-TNfqahm~-9opkANp{|^P;W{IK znfk|W9?y3Y=SJ^sjV;)7iWOAEj>90Hs;F1n5}*taDqgNt>lIJntsARwhNy86bY{C7J1cmJd;R z(1jq9%@`I-$hNi|n`!3T`Ykpj=Q<}?URFBa8Z+n;W`OGUV&bj}PsKdpZi)aVh#IX! z#SdXh{;_jn_*luc@7tTk8&dWEM2>gO#%`h*9j_32nZO(Z z4DW)x-`g`m8x^m+VJN@{&9q2N2X&?r^PXE>xlw$*VJx7iNDh8S{E*}hxuzT{G z17F;EjhJ6Yu#*}IQ1v*Px3vgbAayE6CAM7!hd7UAIfjF!5r{#2O#&KWkwgcB_nN=Y ze~x|I{LOGX8`q;w4?TK1Ef$h`d5OJA^*{QS%;bL9E6h4d)V zYy#KhgR{fC?-2SP0tYiwtyV&nTym?eEZbSQK@>B1>;c&Vm$s>L&G$t)tkisEmKy+q zNF@Zy!n6xmA=E@C;a&@?Z5?+rUhzv@RBLQnvXK(2Nq0n{fAi;(#x9MP);vG%ANtx# z1#;MFctDK$UI$*O_{!q#t8Xta!ZjkU5m+ZsA)uY5J7HatXIj{aLd_F_6TSWR*B6(; zqtezc9NK~^ogK?mN{eYPcI4kj&$GvJ(EdR}-x+fNI@DYk8)husbjP2tBhlA`;}0HTL|{r%1-=m zXQ4p?)K9jhBccp0ltm4%SnC}@O{5TXR$7jTp#C?RpBiLOo5d-QEgyCOP4vG|uYV;6 zJISXPFVD|Ddph;Z&a`UbAowaN{6vsza{An&uK#z*xNd@|V^Ph+Hl8R} zQ5GfWzt_4iN_$zfD2_f^b+XGF>Vk6hgswf`b{e? zL$nO4c&UUhLaC(E<(lWU21)?>m9*Bwst4MiTga>|dhZ~MD0C8__XxUn3F@~rp}$Nj zB2cN;H;Vp@p^ppwuIlGXKTn6%r;`LVVtGtJx56f2PYJw_KqX2gABSP71mD1|lh7X& t_zc4b#L_;l;Q?qRbiP{S4Nnv)D8WMnmX*Ozl!W}#rCFmrCwpf{{|yP2y@LP% diff --git a/nlp_resource_data/nltk/sem/__pycache__/skolemize.cpython-37.pyc b/nlp_resource_data/nltk/sem/__pycache__/skolemize.cpython-37.pyc index 95e789aefaed4ac1536a07e3d5dd2423692937f1..2e6f7303cf66d85a377af95b9b8bcfa8c061b298 100644 GIT binary patch delta 34 ocmbOx)Fs60#LLUY00gD%6XVM_@_Mkb+31%R7wKA-LMS{Wkm#Wj65Mg_d{p9(*b@gNZm5TzdV!f|CnS;B!@Sv<-+a7zGjm*h zR!zQ|Oll$c+OZ?^v!i7G@N@W~AtOWyRcz8!45b-1!UX@rwrZ$ws&=FqHKHIJrV$!_ zNQ@ZG(>Uz|E=~(nqY2>pXpvr`N#Hc9)6~O|k)R{ApALZJ5*?%&kR<64&BCd(6wU1^ z{8QGie-a`gmPAU!w?n1TqaXg7Dk*{9;+Er^J8h@oTdotN+m6+6DKl-$XS{BEoiB*3 zP6o-0ltc1ENNKS+DQ@kaP^`(2W=sI#HM+ujPr#2A!f+tyH zoqbTZ+jXBoogLqM7d-VYpsReYQ%xiT5}kLPEX~w+7#wmQH&O`G3f7#V>=a z3+G(~8fSIdeY3A1f~Nrqku(_&>qIAfqcfM8BP1efLstC-Q1yo6AnvyPR@+~m*o20W z6NzISXgl0(;>Da@J`MujYbmKDyFm=Yp>i6P=Mk>k_qN*Ufj z3k=L}NwM4Vy+B+Z%}u-k|3-Y5*PA<*&5S-dUC4Gv`7QCz=xAjCEwNqs1j3`;RTo5X zBrk45z6FqD@#E-|L*#)&rf z@lCFPj)#$~12N)q<6|bWI=KGiqU}}(i}gB}1(vnkMD^49e$7%D?P!J>`6;QQ`!bh8wpe~2i#_uID*$*?j zX`2XPBzoZ(J&7JFA*8}zU<5~mxNt$-R~(V}k`M<@Ju&m_d?ZsVr?=0UF5+{X`|uo9ec&1DwDGwEtx(n_)fAL2SsJz$p3&+-g6kW29* z@8TwMX+F%eyc@X;&++_&gk|s%eunp=q>C4LA4*K#&j;|#if%r*qrqK~{V2f_!X(PI zwM3Pb4!-)O^(}L?p(JKvnZ6tCOg9_|1?VV%l)R>2?N1X+fR2!$@}WLHR6y>@t25hy zCuZJt_O=5w-I8{g2)bq2cmIBew-O$qpdG8?`u~GS7k6thZbwovvD%q^HFjRCC3h)n2T;bjB0_` zSX!5FnF|$VsKHYN>m}>cCh!bO*X7sd+$t4}`hGfe$l(EbA}WUJMkfs=Pzm?c{YXFq z1ZC|<;WFYw>R}>hG8k(uWPjGKcl>H7ttQ*4JdHe-AiQcaHk~l6Z}}eD$0-lH;=)ld zpeQQEx)k~Omrj&p9mjT}df4d}j=`)zpdV6kHh|+5Fp+DmE|f zI_|C$iFU2;pr=l)c^xKNn?3WaIZ44UBC(Pi!5&nRi?xk8E6IZ`xJ1e+QtBHxGO>Qo zad)g#ZQBV&Ra0Of2guJ5w8JZ(WQ%fg-FAW9BO?VQpU6)pny96i_(8HmURh0}AlB+W z>Iee#edwUWD;tH=@71Gi)T)#_9+j2K!=Ss1fNs?4T31cM8;H|fe%d=*qDwlS$vDe} z7uf1zXB+bO-pg!WmI|LuwTCHALl{B5fw5>vW9FV(3U&KZ{#JPJoSNdLBkwBC(SUp+ z@As`-I58mbf5?aO*S^J>Q(2lI9Nxaz1B?)wND}M}JDbe2JcAppwf>8YC1rD<-n@XS z&LZs_!EV&pjpj$+z^PITPftc{ZbINuVg44BPyn5|Q%bfKXDA#gX%m1Po+>-;Q*Cp_ zmQ&lax4`i{2Oc3VEDRw{7~Pt#r?qZoutC<%(hOEw9}Iq@F-8UC532U-h^jTj1S*);%92{O`x8>UKi#=90i~>=iLgCdS#=5vKYH)+TpCj@- z5gHtrAhJq?dIPtR#9hMQ^#tIKwYz4JQ0Mhp1kWmVN9={wez3`c0>kS{95kXZmdoWK zc3nE91%QkM diff --git a/nlp_resource_data/nltk/sem/boxer.py b/nlp_resource_data/nltk/sem/boxer.py index bc87dab..8113165 100644 --- a/nlp_resource_data/nltk/sem/boxer.py +++ b/nlp_resource_data/nltk/sem/boxer.py @@ -3,7 +3,7 @@ # # Author: Dan Garrette # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # URL: # For license information, see LICENSE.TXT @@ -24,6 +24,7 @@ Usage: models/ boxer/ """ +from __future__ import print_function, unicode_literals import os import re @@ -54,6 +55,8 @@ from nltk.sem.drt import ( DrtVariableExpression, ) +from nltk.compat import python_2_unicode_compatible + class Boxer(object): """ @@ -90,11 +93,11 @@ class Boxer(object): self.set_bin_dir(bin_dir, verbose) def set_bin_dir(self, bin_dir, verbose=False): - self._candc_bin = self._find_binary("candc", bin_dir, verbose) + self._candc_bin = self._find_binary('candc', bin_dir, verbose) self._candc_models_path = os.path.normpath( - os.path.join(self._candc_bin[:-5], "../models") + os.path.join(self._candc_bin[:-5], '../models') ) - self._boxer_bin = self._find_binary("boxer", bin_dir, verbose) + self._boxer_bin = self._find_binary('boxer', bin_dir, verbose) def interpret(self, input, discourse_id=None, question=False, verbose=False): """ @@ -179,13 +182,13 @@ class Boxer(object): :return: stdout """ args = [ - "--models", - os.path.join(self._candc_models_path, ["boxer", "questions"][question]), - "--candc-printer", - "boxer", + '--models', + os.path.join(self._candc_models_path, ['boxer', 'questions'][question]), + '--candc-printer', + 'boxer', ] return self._call( - "\n".join( + '\n'.join( sum( ( ["'{0}'".format(id)] + d @@ -209,29 +212,29 @@ class Boxer(object): f = None try: fd, temp_filename = tempfile.mkstemp( - prefix="boxer-", suffix=".in", text=True + prefix='boxer-', suffix='.in', text=True ) - f = os.fdopen(fd, "w") + f = os.fdopen(fd, 'w') f.write(candc_out) finally: if f: f.close() args = [ - "--box", - "false", - "--semantics", - "drs", + '--box', + 'false', + '--semantics', + 'drs', #'--flat', 'false', # removed from boxer - "--resolve", - ["false", "true"][self._resolve], - "--elimeq", - ["false", "true"][self._elimeq], - "--format", - "prolog", - "--instantiate", - "true", - "--input", + '--resolve', + ['false', 'true'][self._resolve], + '--elimeq', + ['false', 'true'][self._elimeq], + '--format', + 'prolog', + '--instantiate', + 'true', + '--input', temp_filename, ] stdout = self._call(None, self._boxer_bin, args, verbose) @@ -242,9 +245,9 @@ class Boxer(object): return find_binary( name, path_to_bin=bin_dir, - env_vars=["CANDC"], - url="http://svn.ask.it.usyd.edu.au/trac/candc/", - binary_names=[name, name + ".exe"], + env_vars=['CANDC'], + url='http://svn.ask.it.usyd.edu.au/trac/candc/', + binary_names=[name, name + '.exe'], verbose=verbose, ) @@ -258,63 +261,63 @@ class Boxer(object): :return: stdout """ if verbose: - print("Calling:", binary) - print("Args:", args) - print("Input:", input_str) - print("Command:", binary + " " + " ".join(args)) + print('Calling:', binary) + print('Args:', args) + print('Input:', input_str) + print('Command:', binary + ' ' + ' '.join(args)) # Call via a subprocess if input_str is None: cmd = [binary] + args p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) else: - cmd = 'echo "{0}" | {1} {2}'.format(input_str, binary, " ".join(args)) + cmd = 'echo "{0}" | {1} {2}'.format(input_str, binary, ' '.join(args)) p = subprocess.Popen( cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True ) stdout, stderr = p.communicate() if verbose: - print("Return code:", p.returncode) + print('Return code:', p.returncode) if stdout: - print("stdout:\n", stdout, "\n") + print('stdout:\n', stdout, '\n') if stderr: - print("stderr:\n", stderr, "\n") + print('stderr:\n', stderr, '\n') if p.returncode != 0: raise Exception( - "ERROR CALLING: {0} {1}\nReturncode: {2}\n{3}".format( - binary, " ".join(args), p.returncode, stderr + 'ERROR CALLING: {0} {1}\nReturncode: {2}\n{3}'.format( + binary, ' '.join(args), p.returncode, stderr ) ) return stdout def _parse_to_drs_dict(self, boxer_out, use_disc_id): - lines = boxer_out.split("\n") + lines = boxer_out.split('\n') drs_dict = {} i = 0 while i < len(lines): line = lines[i] - if line.startswith("id("): - comma_idx = line.index(",") + if line.startswith('id('): + comma_idx = line.index(',') discourse_id = line[3:comma_idx] if discourse_id[0] == "'" and discourse_id[-1] == "'": discourse_id = discourse_id[1:-1] - drs_id = line[comma_idx + 1 : line.index(")")] + drs_id = line[comma_idx + 1 : line.index(')')] i += 1 line = lines[i] - assert line.startswith("sem({0},".format(drs_id)) + assert line.startswith('sem({0},'.format(drs_id)) if line[-4:] == "').'": line = line[:-4] + ")." - assert line.endswith(")."), "can't parse line: {0}".format(line) + assert line.endswith(').'), "can't parse line: {0}".format(line) - search_start = len("sem({0},[".format(drs_id)) + search_start = len('sem({0},['.format(drs_id)) brace_count = 1 drs_start = -1 for j, c in enumerate(line[search_start:]): - if c == "[": + if c == '[': brace_count += 1 - if c == "]": + if c == ']': brace_count -= 1 if brace_count == 0: drs_start = search_start + j + 1 @@ -350,7 +353,7 @@ class BoxerOutputDrsParser(DrtParser): return DrtParser.parse(self, data, signature) def get_all_symbols(self): - return ["(", ")", ",", "[", "]", ":"] + return ['(', ')', ',', '[', ']', ':'] def handle(self, tok, context): return self.handle_drs(tok) @@ -371,11 +374,11 @@ class BoxerOutputDrsParser(DrtParser): return accum def handle_drs(self, tok): - if tok == "drs": + if tok == 'drs': return self.parse_drs() - elif tok in ["merge", "smerge"]: + elif tok in ['merge', 'smerge']: return self._handle_binary_expression(self._make_merge_expression)(None, []) - elif tok in ["alfa"]: + elif tok in ['alfa']: return self._handle_alfa(self._make_merge_expression)(None, []) def handle_condition(self, tok, indices): @@ -385,32 +388,32 @@ class BoxerOutputDrsParser(DrtParser): :param indices: list of int :return: list of ``DrtExpression`` """ - if tok == "not": + if tok == 'not': return [self._handle_not()] - if tok == "or": + if tok == 'or': conds = [self._handle_binary_expression(self._make_or_expression)] - elif tok == "imp": + elif tok == 'imp': conds = [self._handle_binary_expression(self._make_imp_expression)] - elif tok == "eq": + elif tok == 'eq': conds = [self._handle_eq()] - elif tok == "prop": + elif tok == 'prop': conds = [self._handle_prop()] - elif tok == "pred": + elif tok == 'pred': conds = [self._handle_pred()] - elif tok == "named": + elif tok == 'named': conds = [self._handle_named()] - elif tok == "rel": + elif tok == 'rel': conds = [self._handle_rel()] - elif tok == "timex": + elif tok == 'timex': conds = self._handle_timex() - elif tok == "card": + elif tok == 'card': conds = [self._handle_card()] - elif tok == "whq": + elif tok == 'whq': conds = [self._handle_whq()] - elif tok == "duplex": + elif tok == 'duplex': conds = [self._handle_duplex()] else: @@ -425,22 +428,22 @@ class BoxerOutputDrsParser(DrtParser): ) def _handle_not(self): - self.assertToken(self.token(), "(") + self.assertToken(self.token(), '(') drs = self.process_next_expression(None) - self.assertToken(self.token(), ")") + self.assertToken(self.token(), ')') return BoxerNot(drs) def _handle_pred(self): # pred(_G3943, dog, n, 0) - self.assertToken(self.token(), "(") + self.assertToken(self.token(), '(') variable = self.parse_variable() - self.assertToken(self.token(), ",") + self.assertToken(self.token(), ',') name = self.token() - self.assertToken(self.token(), ",") + self.assertToken(self.token(), ',') pos = self.token() - self.assertToken(self.token(), ",") + self.assertToken(self.token(), ',') sense = int(self.token()) - self.assertToken(self.token(), ")") + self.assertToken(self.token(), ')') def _handle_pred_f(sent_index, word_indices): return BoxerPred( @@ -451,7 +454,7 @@ class BoxerOutputDrsParser(DrtParser): def _handle_duplex(self): # duplex(whq, drs(...), var, drs(...)) - self.assertToken(self.token(), "(") + self.assertToken(self.token(), '(') # self.assertToken(self.token(), '[') ans_types = [] # while self.token(0) != ']': @@ -470,71 +473,71 @@ class BoxerOutputDrsParser(DrtParser): # ans_types.append(self.token()) # self.token() #swallow the ']' - self.assertToken(self.token(), "whq") - self.assertToken(self.token(), ",") + self.assertToken(self.token(), 'whq') + self.assertToken(self.token(), ',') d1 = self.process_next_expression(None) - self.assertToken(self.token(), ",") + self.assertToken(self.token(), ',') ref = self.parse_variable() - self.assertToken(self.token(), ",") + self.assertToken(self.token(), ',') d2 = self.process_next_expression(None) - self.assertToken(self.token(), ")") + self.assertToken(self.token(), ')') return lambda sent_index, word_indices: BoxerWhq( self.discourse_id, sent_index, word_indices, ans_types, d1, ref, d2 ) def _handle_named(self): # named(x0, john, per, 0) - self.assertToken(self.token(), "(") + self.assertToken(self.token(), '(') variable = self.parse_variable() - self.assertToken(self.token(), ",") + self.assertToken(self.token(), ',') name = self.token() - self.assertToken(self.token(), ",") + self.assertToken(self.token(), ',') type = self.token() - self.assertToken(self.token(), ",") + self.assertToken(self.token(), ',') sense = self.token() # as per boxer rev 2554 - self.assertToken(self.token(), ")") + self.assertToken(self.token(), ')') return lambda sent_index, word_indices: BoxerNamed( self.discourse_id, sent_index, word_indices, variable, name, type, sense ) def _handle_rel(self): # rel(_G3993, _G3943, agent, 0) - self.assertToken(self.token(), "(") + self.assertToken(self.token(), '(') var1 = self.parse_variable() - self.assertToken(self.token(), ",") + self.assertToken(self.token(), ',') var2 = self.parse_variable() - self.assertToken(self.token(), ",") + self.assertToken(self.token(), ',') rel = self.token() - self.assertToken(self.token(), ",") + self.assertToken(self.token(), ',') sense = int(self.token()) - self.assertToken(self.token(), ")") + self.assertToken(self.token(), ')') return lambda sent_index, word_indices: BoxerRel( self.discourse_id, sent_index, word_indices, var1, var2, rel, sense ) def _handle_timex(self): # timex(_G18322, date([]: (+), []:'XXXX', [1004]:'04', []:'XX')) - self.assertToken(self.token(), "(") + self.assertToken(self.token(), '(') arg = self.parse_variable() - self.assertToken(self.token(), ",") + self.assertToken(self.token(), ',') new_conds = self._handle_time_expression(arg) - self.assertToken(self.token(), ")") + self.assertToken(self.token(), ')') return new_conds def _handle_time_expression(self, arg): # date([]: (+), []:'XXXX', [1004]:'04', []:'XX') tok = self.token() - self.assertToken(self.token(), "(") - if tok == "date": + self.assertToken(self.token(), '(') + if tok == 'date': conds = self._handle_date(arg) - elif tok == "time": + elif tok == 'time': conds = self._handle_time(arg) else: return None - self.assertToken(self.token(), ")") + self.assertToken(self.token(), ')') return [ lambda sent_index, word_indices: BoxerPred( - self.discourse_id, sent_index, word_indices, arg, tok, "n", 0 + self.discourse_id, sent_index, word_indices, arg, tok, 'n', 0 ) ] + [lambda sent_index, word_indices: cond for cond in conds] @@ -544,72 +547,72 @@ class BoxerOutputDrsParser(DrtParser): (sent_index, word_indices), = self._sent_and_word_indices( self._parse_index_list() ) - self.assertToken(self.token(), "(") + self.assertToken(self.token(), '(') pol = self.token() - self.assertToken(self.token(), ")") + self.assertToken(self.token(), ')') conds.append( BoxerPred( self.discourse_id, sent_index, word_indices, arg, - "date_pol_{0}".format(pol), - "a", + 'date_pol_{0}'.format(pol), + 'a', 0, ) ) - self.assertToken(self.token(), ",") + self.assertToken(self.token(), ',') (sent_index, word_indices), = self._sent_and_word_indices( self._parse_index_list() ) year = self.token() - if year != "XXXX": - year = year.replace(":", "_") + if year != 'XXXX': + year = year.replace(':', '_') conds.append( BoxerPred( self.discourse_id, sent_index, word_indices, arg, - "date_year_{0}".format(year), - "a", + 'date_year_{0}'.format(year), + 'a', 0, ) ) - self.assertToken(self.token(), ",") + self.assertToken(self.token(), ',') (sent_index, word_indices), = self._sent_and_word_indices( self._parse_index_list() ) month = self.token() - if month != "XX": + if month != 'XX': conds.append( BoxerPred( self.discourse_id, sent_index, word_indices, arg, - "date_month_{0}".format(month), - "a", + 'date_month_{0}'.format(month), + 'a', 0, ) ) - self.assertToken(self.token(), ",") + self.assertToken(self.token(), ',') (sent_index, word_indices), = self._sent_and_word_indices( self._parse_index_list() ) day = self.token() - if day != "XX": + if day != 'XX': conds.append( BoxerPred( self.discourse_id, sent_index, word_indices, arg, - "date_day_{0}".format(day), - "a", + 'date_day_{0}'.format(day), + 'a', 0, ) ) @@ -621,43 +624,43 @@ class BoxerOutputDrsParser(DrtParser): conds = [] self._parse_index_list() hour = self.token() - if hour != "XX": - conds.append(self._make_atom("r_hour_2", arg, hour)) - self.assertToken(self.token(), ",") + if hour != 'XX': + conds.append(self._make_atom('r_hour_2', arg, hour)) + self.assertToken(self.token(), ',') self._parse_index_list() min = self.token() - if min != "XX": - conds.append(self._make_atom("r_min_2", arg, min)) - self.assertToken(self.token(), ",") + if min != 'XX': + conds.append(self._make_atom('r_min_2', arg, min)) + self.assertToken(self.token(), ',') self._parse_index_list() sec = self.token() - if sec != "XX": - conds.append(self._make_atom("r_sec_2", arg, sec)) + if sec != 'XX': + conds.append(self._make_atom('r_sec_2', arg, sec)) return conds def _handle_card(self): # card(_G18535, 28, ge) - self.assertToken(self.token(), "(") + self.assertToken(self.token(), '(') variable = self.parse_variable() - self.assertToken(self.token(), ",") + self.assertToken(self.token(), ',') value = self.token() - self.assertToken(self.token(), ",") + self.assertToken(self.token(), ',') type = self.token() - self.assertToken(self.token(), ")") + self.assertToken(self.token(), ')') return lambda sent_index, word_indices: BoxerCard( self.discourse_id, sent_index, word_indices, variable, value, type ) def _handle_prop(self): # prop(_G15949, drs(...)) - self.assertToken(self.token(), "(") + self.assertToken(self.token(), '(') variable = self.parse_variable() - self.assertToken(self.token(), ",") + self.assertToken(self.token(), ',') drs = self.process_next_expression(None) - self.assertToken(self.token(), ")") + self.assertToken(self.token(), ')') return lambda sent_index, word_indices: BoxerProp( self.discourse_id, sent_index, word_indices, variable, drs ) @@ -665,99 +668,99 @@ class BoxerOutputDrsParser(DrtParser): def _parse_index_list(self): # [1001,1002]: indices = [] - self.assertToken(self.token(), "[") - while self.token(0) != "]": + self.assertToken(self.token(), '[') + while self.token(0) != ']': indices.append(self.parse_index()) - if self.token(0) == ",": + if self.token(0) == ',': self.token() # swallow ',' self.token() # swallow ']' - self.assertToken(self.token(), ":") + self.assertToken(self.token(), ':') return indices def parse_drs(self): # drs([[1001]:_G3943], # [[1002]:pred(_G3943, dog, n, 0)] # ) - self.assertToken(self.token(), "(") - self.assertToken(self.token(), "[") + self.assertToken(self.token(), '(') + self.assertToken(self.token(), '[') refs = set() - while self.token(0) != "]": + while self.token(0) != ']': indices = self._parse_index_list() refs.add(self.parse_variable()) - if self.token(0) == ",": + if self.token(0) == ',': self.token() # swallow ',' self.token() # swallow ']' - self.assertToken(self.token(), ",") - self.assertToken(self.token(), "[") + self.assertToken(self.token(), ',') + self.assertToken(self.token(), '[') conds = [] - while self.token(0) != "]": + while self.token(0) != ']': indices = self._parse_index_list() conds.extend(self.parse_condition(indices)) - if self.token(0) == ",": + if self.token(0) == ',': self.token() # swallow ',' self.token() # swallow ']' - self.assertToken(self.token(), ")") + self.assertToken(self.token(), ')') return BoxerDrs(list(refs), conds) def _handle_binary_expression(self, make_callback): - self.assertToken(self.token(), "(") + self.assertToken(self.token(), '(') drs1 = self.process_next_expression(None) - self.assertToken(self.token(), ",") + self.assertToken(self.token(), ',') drs2 = self.process_next_expression(None) - self.assertToken(self.token(), ")") + self.assertToken(self.token(), ')') return lambda sent_index, word_indices: make_callback( sent_index, word_indices, drs1, drs2 ) def _handle_alfa(self, make_callback): - self.assertToken(self.token(), "(") + self.assertToken(self.token(), '(') type = self.token() - self.assertToken(self.token(), ",") + self.assertToken(self.token(), ',') drs1 = self.process_next_expression(None) - self.assertToken(self.token(), ",") + self.assertToken(self.token(), ',') drs2 = self.process_next_expression(None) - self.assertToken(self.token(), ")") + self.assertToken(self.token(), ')') return lambda sent_index, word_indices: make_callback( sent_index, word_indices, drs1, drs2 ) def _handle_eq(self): - self.assertToken(self.token(), "(") + self.assertToken(self.token(), '(') var1 = self.parse_variable() - self.assertToken(self.token(), ",") + self.assertToken(self.token(), ',') var2 = self.parse_variable() - self.assertToken(self.token(), ")") + self.assertToken(self.token(), ')') return lambda sent_index, word_indices: BoxerEq( self.discourse_id, sent_index, word_indices, var1, var2 ) def _handle_whq(self): - self.assertToken(self.token(), "(") - self.assertToken(self.token(), "[") + self.assertToken(self.token(), '(') + self.assertToken(self.token(), '[') ans_types = [] - while self.token(0) != "]": + while self.token(0) != ']': cat = self.token() - self.assertToken(self.token(), ":") - if cat == "des": + self.assertToken(self.token(), ':') + if cat == 'des': ans_types.append(self.token()) - elif cat == "num": - ans_types.append("number") + elif cat == 'num': + ans_types.append('number') typ = self.token() - if typ == "cou": - ans_types.append("count") + if typ == 'cou': + ans_types.append('count') else: ans_types.append(typ) else: ans_types.append(self.token()) self.token() # swallow the ']' - self.assertToken(self.token(), ",") + self.assertToken(self.token(), ',') d1 = self.process_next_expression(None) - self.assertToken(self.token(), ",") + self.assertToken(self.token(), ',') ref = self.parse_variable() - self.assertToken(self.token(), ",") + self.assertToken(self.token(), ',') d2 = self.process_next_expression(None) - self.assertToken(self.token(), ")") + self.assertToken(self.token(), ')') return lambda sent_index, word_indices: BoxerWhq( self.discourse_id, sent_index, word_indices, ans_types, d1, ref, d2 ) @@ -773,7 +776,7 @@ class BoxerOutputDrsParser(DrtParser): def parse_variable(self): var = self.token() - assert re.match("^[exps]\d+$", var), var + assert re.match('^[exps]\d+$', var), var return var def parse_index(self): @@ -829,7 +832,7 @@ class BoxerDrsParser(DrtParser): # conds = self.handle_conds(None) # self.assertNextToken(DrtTokens.CLOSE) # return BoxerDrs(label, refs, conds) - if tok == "pred": + if tok == 'pred': self.assertNextToken(DrtTokens.OPEN) disc_id = ( self.discourse_id if self.discourse_id is not None else self.token() @@ -848,7 +851,7 @@ class BoxerDrsParser(DrtParser): sense = int(self.token()) self.assertNextToken(DrtTokens.CLOSE) return BoxerPred(disc_id, sent_id, word_ids, variable, name, pos, sense) - elif tok == "named": + elif tok == 'named': self.assertNextToken(DrtTokens.OPEN) disc_id = ( self.discourse_id if self.discourse_id is not None else self.token() @@ -869,7 +872,7 @@ class BoxerDrsParser(DrtParser): return BoxerNamed( disc_id, sent_id, word_ids, variable, name, type, sense ) - elif tok == "rel": + elif tok == 'rel': self.assertNextToken(DrtTokens.OPEN) disc_id = ( self.discourse_id if self.discourse_id is not None else self.token() @@ -888,7 +891,7 @@ class BoxerDrsParser(DrtParser): sense = int(self.token()) self.assertNextToken(DrtTokens.CLOSE) return BoxerRel(disc_id, sent_id, word_ids, var1, var2, rel, sense) - elif tok == "prop": + elif tok == 'prop': self.assertNextToken(DrtTokens.OPEN) disc_id = ( self.discourse_id if self.discourse_id is not None else self.token() @@ -903,19 +906,19 @@ class BoxerDrsParser(DrtParser): drs = self.process_next_expression(None) self.assertNextToken(DrtTokens.CLOSE) return BoxerProp(disc_id, sent_id, word_ids, variable, drs) - elif tok == "not": + elif tok == 'not': self.assertNextToken(DrtTokens.OPEN) drs = self.process_next_expression(None) self.assertNextToken(DrtTokens.CLOSE) return BoxerNot(drs) - elif tok == "imp": + elif tok == 'imp': self.assertNextToken(DrtTokens.OPEN) drs1 = self.process_next_expression(None) self.assertNextToken(DrtTokens.COMMA) drs2 = self.process_next_expression(None) self.assertNextToken(DrtTokens.CLOSE) return BoxerDrs(drs1.refs, drs1.conds, drs2) - elif tok == "or": + elif tok == 'or': self.assertNextToken(DrtTokens.OPEN) disc_id = ( self.discourse_id if self.discourse_id is not None else self.token() @@ -930,7 +933,7 @@ class BoxerDrsParser(DrtParser): drs2 = self.process_next_expression(None) self.assertNextToken(DrtTokens.CLOSE) return BoxerOr(disc_id, sent_id, word_ids, drs1, drs2) - elif tok == "eq": + elif tok == 'eq': self.assertNextToken(DrtTokens.OPEN) disc_id = ( self.discourse_id if self.discourse_id is not None else self.token() @@ -945,7 +948,7 @@ class BoxerDrsParser(DrtParser): var2 = int(self.token()) self.assertNextToken(DrtTokens.CLOSE) return BoxerEq(disc_id, sent_id, word_ids, var1, var2) - elif tok == "card": + elif tok == 'card': self.assertNextToken(DrtTokens.OPEN) disc_id = ( self.discourse_id if self.discourse_id is not None else self.token() @@ -962,7 +965,7 @@ class BoxerDrsParser(DrtParser): type = self.token() self.assertNextToken(DrtTokens.CLOSE) return BoxerCard(disc_id, sent_id, word_ids, var, value, type) - elif tok == "whq": + elif tok == 'whq': self.assertNextToken(DrtTokens.OPEN) disc_id = ( self.discourse_id if self.discourse_id is not None else self.token() @@ -987,13 +990,13 @@ class BoxerDrsParser(DrtParser): def nullableIntToken(self): t = self.token() - return int(t) if t != "None" else None + return int(t) if t != 'None' else None def get_next_token_variable(self, description): try: return self.token() except ExpectedMoreTokensException as e: - raise ExpectedMoreTokensException(e.index, "Variable expected.") + raise ExpectedMoreTokensException(e.index, 'Variable expected.') class AbstractBoxerDrs(object): @@ -1006,7 +1009,7 @@ class AbstractBoxerDrs(object): def variable_types(self): vartypes = {} - for t, vars in zip(("z", "e", "p"), self.variables()): + for t, vars in zip(('z', 'e', 'p'), self.variables()): for v in vars: vartypes[v] = t return vartypes @@ -1024,7 +1027,7 @@ class AbstractBoxerDrs(object): return self def _clean_name(self, name): - return name.replace("-", "_").replace("'", "_") + return name.replace('-', '_').replace("'", "_") def renumber_sentences(self, f): return self @@ -1033,6 +1036,7 @@ class AbstractBoxerDrs(object): return hash("{0}".format(self)) +@python_2_unicode_compatible class BoxerDrs(AbstractBoxerDrs): def __init__(self, refs, conds, consequent=None): AbstractBoxerDrs.__init__(self) @@ -1067,12 +1071,12 @@ class BoxerDrs(AbstractBoxerDrs): ) def __repr__(self): - s = "drs([%s], [%s])" % ( - ", ".join("%s" % r for r in self.refs), - ", ".join("%s" % c for c in self.conds), + s = 'drs([%s], [%s])' % ( + ', '.join("%s" % r for r in self.refs), + ', '.join("%s" % c for c in self.conds), ) if self.consequent is not None: - s = "imp(%s, %s)" % (s, self.consequent) + s = 'imp(%s, %s)' % (s, self.consequent) return s def __eq__(self, other): @@ -1092,6 +1096,7 @@ class BoxerDrs(AbstractBoxerDrs): __hash__ = AbstractBoxerDrs.__hash__ +@python_2_unicode_compatible class BoxerNot(AbstractBoxerDrs): def __init__(self, drs): AbstractBoxerDrs.__init__(self) @@ -1110,7 +1115,7 @@ class BoxerNot(AbstractBoxerDrs): return BoxerNot(self.drs.renumber_sentences(f)) def __repr__(self): - return "not(%s)" % (self.drs) + return 'not(%s)' % (self.drs) def __eq__(self, other): return self.__class__ == other.__class__ and self.drs == other.drs @@ -1121,6 +1126,7 @@ class BoxerNot(AbstractBoxerDrs): __hash__ = AbstractBoxerDrs.__hash__ +@python_2_unicode_compatible class BoxerIndexed(AbstractBoxerDrs): def __init__(self, discourse_id, sent_index, word_indices): AbstractBoxerDrs.__init__(self) @@ -1146,15 +1152,15 @@ class BoxerIndexed(AbstractBoxerDrs): __hash__ = AbstractBoxerDrs.__hash__ def __repr__(self): - s = "%s(%s, %s, [%s]" % ( + s = '%s(%s, %s, [%s]' % ( self._pred(), self.discourse_id, self.sent_index, - ", ".join("%s" % wi for wi in self.word_indices), + ', '.join("%s" % wi for wi in self.word_indices), ) for v in self: - s += ", %s" % v - return s + ")" + s += ', %s' % v + return s + ')' class BoxerPred(BoxerIndexed): @@ -1206,7 +1212,7 @@ class BoxerPred(BoxerIndexed): return iter((self.var, self.name, self.pos, self.sense)) def _pred(self): - return "pred" + return 'pred' class BoxerNamed(BoxerIndexed): @@ -1257,7 +1263,7 @@ class BoxerNamed(BoxerIndexed): return iter((self.var, self.name, self.type, self.sense)) def _pred(self): - return "named" + return 'named' class BoxerRel(BoxerIndexed): @@ -1297,7 +1303,7 @@ class BoxerRel(BoxerIndexed): return iter((self.var1, self.var2, self.rel, self.sense)) def _pred(self): - return "rel" + return 'rel' class BoxerProp(BoxerIndexed): @@ -1339,7 +1345,7 @@ class BoxerProp(BoxerIndexed): return iter((self.var, self.drs)) def _pred(self): - return "prop" + return 'prop' class BoxerEq(BoxerIndexed): @@ -1367,7 +1373,7 @@ class BoxerEq(BoxerIndexed): return iter((self.var1, self.var2)) def _pred(self): - return "eq" + return 'eq' class BoxerCard(BoxerIndexed): @@ -1394,7 +1400,7 @@ class BoxerCard(BoxerIndexed): return iter((self.var, self.value, self.type)) def _pred(self): - return "card" + return 'card' class BoxerOr(BoxerIndexed): @@ -1431,7 +1437,7 @@ class BoxerOr(BoxerIndexed): return iter((self.drs1, self.drs2)) def _pred(self): - return "or" + return 'or' class BoxerWhq(BoxerIndexed): @@ -1481,11 +1487,11 @@ class BoxerWhq(BoxerIndexed): def __iter__(self): return iter( - ("[" + ",".join(self.ans_types) + "]", self.drs1, self.variable, self.drs2) + ('[' + ','.join(self.ans_types) + ']', self.drs1, self.variable, self.drs2) ) def _pred(self): - return "whq" + return 'whq' class PassthroughBoxerDrsInterpreter(object): @@ -1512,13 +1518,13 @@ class NltkDrtBoxerDrsInterpreter(object): elif isinstance(ex, BoxerNot): return DrtNegatedExpression(self.interpret(ex.drs)) elif isinstance(ex, BoxerPred): - pred = self._add_occur_indexing("%s_%s" % (ex.pos, ex.name), ex) + pred = self._add_occur_indexing('%s_%s' % (ex.pos, ex.name), ex) return self._make_atom(pred, ex.var) elif isinstance(ex, BoxerNamed): - pred = self._add_occur_indexing("ne_%s_%s" % (ex.type, ex.name), ex) + pred = self._add_occur_indexing('ne_%s_%s' % (ex.type, ex.name), ex) return self._make_atom(pred, ex.var) elif isinstance(ex, BoxerRel): - pred = self._add_occur_indexing("%s" % (ex.rel), ex) + pred = self._add_occur_indexing('%s' % (ex.rel), ex) return self._make_atom(pred, ex.var1, ex.var2) elif isinstance(ex, BoxerProp): return DrtProposition(Variable(ex.var), self.interpret(ex.drs)) @@ -1528,7 +1534,7 @@ class NltkDrtBoxerDrsInterpreter(object): DrtVariableExpression(Variable(ex.var2)), ) elif isinstance(ex, BoxerCard): - pred = self._add_occur_indexing("card_%s_%s" % (ex.type, ex.value), ex) + pred = self._add_occur_indexing('card_%s_%s' % (ex.type, ex.value), ex) return self._make_atom(pred, ex.var) elif isinstance(ex, BoxerOr): return DrtOrExpression(self.interpret(ex.drs1), self.interpret(ex.drs2)) @@ -1536,7 +1542,7 @@ class NltkDrtBoxerDrsInterpreter(object): drs1 = self.interpret(ex.drs1) drs2 = self.interpret(ex.drs2) return DRS(drs1.refs + drs2.refs, drs1.conds + drs2.conds) - assert False, "%s: %s" % (ex.__class__.__name__, ex) + assert False, '%s: %s' % (ex.__class__.__name__, ex) def _make_atom(self, pred, *args): accum = DrtVariableExpression(Variable(pred)) @@ -1549,9 +1555,9 @@ class NltkDrtBoxerDrsInterpreter(object): def _add_occur_indexing(self, base, ex): if self._occur_index and ex.sent_index is not None: if ex.discourse_id: - base += "_%s" % ex.discourse_id - base += "_s%s" % ex.sent_index - base += "_w%s" % sorted(ex.word_indices)[0] + base += '_%s' % ex.discourse_id + base += '_s%s' % ex.sent_index + base += '_w%s' % sorted(ex.word_indices)[0] return base @@ -1559,7 +1565,7 @@ class UnparseableInputException(Exception): pass -if __name__ == "__main__": +if __name__ == '__main__': opts = OptionParser("usage: %prog TEXT [options]") opts.add_option( "--verbose", @@ -1595,7 +1601,7 @@ if __name__ == "__main__": interpreter = NltkDrtBoxerDrsInterpreter(occur_index=options.occur_index) drs = Boxer(interpreter).interpret_multi( - args[0].split(r"\n"), question=options.question, verbose=options.verbose + args[0].split(r'\n'), question=options.question, verbose=options.verbose ) if drs is None: print(None) diff --git a/nlp_resource_data/nltk/sem/chat80.py b/nlp_resource_data/nltk/sem/chat80.py index 2597177..9500b35 100644 --- a/nlp_resource_data/nltk/sem/chat80.py +++ b/nlp_resource_data/nltk/sem/chat80.py @@ -1,7 +1,7 @@ # Natural Language Toolkit: Chat-80 KB Reader # See http://www.w3.org/TR/swbp-skos-core-guide/ # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Ewan Klein , # URL: # For license information, see LICENSE.TXT @@ -122,127 +122,132 @@ The set of rules is written to the file ``chat_pnames.cfg`` in the current directory. """ +from __future__ import print_function, unicode_literals import re import shelve import os import sys +from six import string_types + import nltk.data +from nltk.compat import python_2_unicode_compatible ########################################################################### # Chat-80 relation metadata bundles needed to build the valuation ########################################################################### borders = { - "rel_name": "borders", - "closures": ["symmetric"], - "schema": ["region", "border"], - "filename": "borders.pl", + 'rel_name': 'borders', + 'closures': ['symmetric'], + 'schema': ['region', 'border'], + 'filename': 'borders.pl', } contains = { - "rel_name": "contains0", - "closures": ["transitive"], - "schema": ["region", "contain"], - "filename": "contain.pl", + 'rel_name': 'contains0', + 'closures': ['transitive'], + 'schema': ['region', 'contain'], + 'filename': 'contain.pl', } city = { - "rel_name": "city", - "closures": [], - "schema": ["city", "country", "population"], - "filename": "cities.pl", + 'rel_name': 'city', + 'closures': [], + 'schema': ['city', 'country', 'population'], + 'filename': 'cities.pl', } country = { - "rel_name": "country", - "closures": [], - "schema": [ - "country", - "region", - "latitude", - "longitude", - "area", - "population", - "capital", - "currency", + 'rel_name': 'country', + 'closures': [], + 'schema': [ + 'country', + 'region', + 'latitude', + 'longitude', + 'area', + 'population', + 'capital', + 'currency', ], - "filename": "countries.pl", + 'filename': 'countries.pl', } circle_of_lat = { - "rel_name": "circle_of_latitude", - "closures": [], - "schema": ["circle_of_latitude", "degrees"], - "filename": "world1.pl", + 'rel_name': 'circle_of_latitude', + 'closures': [], + 'schema': ['circle_of_latitude', 'degrees'], + 'filename': 'world1.pl', } circle_of_long = { - "rel_name": "circle_of_longitude", - "closures": [], - "schema": ["circle_of_longitude", "degrees"], - "filename": "world1.pl", + 'rel_name': 'circle_of_longitude', + 'closures': [], + 'schema': ['circle_of_longitude', 'degrees'], + 'filename': 'world1.pl', } continent = { - "rel_name": "continent", - "closures": [], - "schema": ["continent"], - "filename": "world1.pl", + 'rel_name': 'continent', + 'closures': [], + 'schema': ['continent'], + 'filename': 'world1.pl', } region = { - "rel_name": "in_continent", - "closures": [], - "schema": ["region", "continent"], - "filename": "world1.pl", + 'rel_name': 'in_continent', + 'closures': [], + 'schema': ['region', 'continent'], + 'filename': 'world1.pl', } ocean = { - "rel_name": "ocean", - "closures": [], - "schema": ["ocean"], - "filename": "world1.pl", + 'rel_name': 'ocean', + 'closures': [], + 'schema': ['ocean'], + 'filename': 'world1.pl', } -sea = {"rel_name": "sea", "closures": [], "schema": ["sea"], "filename": "world1.pl"} +sea = {'rel_name': 'sea', 'closures': [], 'schema': ['sea'], 'filename': 'world1.pl'} items = [ - "borders", - "contains", - "city", - "country", - "circle_of_lat", - "circle_of_long", - "continent", - "region", - "ocean", - "sea", + 'borders', + 'contains', + 'city', + 'country', + 'circle_of_lat', + 'circle_of_long', + 'continent', + 'region', + 'ocean', + 'sea', ] items = tuple(sorted(items)) item_metadata = { - "borders": borders, - "contains": contains, - "city": city, - "country": country, - "circle_of_lat": circle_of_lat, - "circle_of_long": circle_of_long, - "continent": continent, - "region": region, - "ocean": ocean, - "sea": sea, + 'borders': borders, + 'contains': contains, + 'city': city, + 'country': country, + 'circle_of_lat': circle_of_lat, + 'circle_of_long': circle_of_long, + 'continent': continent, + 'region': region, + 'ocean': ocean, + 'sea': sea, } rels = item_metadata.values() -not_unary = ["borders.pl", "contain.pl"] +not_unary = ['borders.pl', 'contain.pl'] ########################################################################### +@python_2_unicode_compatible class Concept(object): """ A Concept class, loosely based on SKOS @@ -347,16 +352,17 @@ class Concept(object): from nltk.sem import is_rel assert is_rel(self._extension) - if "symmetric" in self.closures: + if 'symmetric' in self.closures: pairs = [] for (x, y) in self._extension: pairs.append((y, x)) sym = set(pairs) self._extension = self._extension.union(sym) - if "transitive" in self.closures: + if 'transitive' in self.closures: all = self._make_graph(self._extension) closed = self._transclose(all) trans = self._make_pairs(closed) + # print sorted(trans) self._extension = self._extension.union(trans) self.extension = sorted(list(self._extension)) @@ -426,13 +432,13 @@ def cities2table(filename, rel_name, dbname, verbose=False, setup=False): cur = connection.cursor() if setup: cur.execute( - """CREATE TABLE city_table - (City text, Country text, Population int)""" + '''CREATE TABLE city_table + (City text, Country text, Population int)''' ) table_name = "city_table" for t in records: - cur.execute("insert into %s values (?,?,?)" % table_name, t) + cur.execute('insert into %s values (?,?,?)' % table_name, t) if verbose: print("inserting values into %s: " % table_name, t) connection.commit() @@ -473,9 +479,9 @@ def _str2records(filename, rel): contents = nltk.data.load("corpora/chat80/%s" % filename, format="text") for line in contents.splitlines(): if line.startswith(rel): - line = re.sub(rel + r"\(", "", line) - line = re.sub(r"\)\.$", "", line) - record = line.split(",") + line = re.sub(rel + r'\(', '', line) + line = re.sub(r'\)\.$', '', line) + record = line.split(',') recs.append(record) return recs @@ -531,8 +537,8 @@ def binary_concept(label, closures, subj, obj, records): :return: ``Concept`` of arity 2 :rtype: Concept """ - if not label == "border" and not label == "contain": - label = label + "_of" + if not label == 'border' and not label == 'contain': + label = label + '_of' c = Concept(label, arity=2, closures=closures, extension=set()) for record in records: c.augment((record[subj], record[obj])) @@ -553,10 +559,10 @@ def process_bundle(rels): """ concepts = {} for rel in rels: - rel_name = rel["rel_name"] - closures = rel["closures"] - schema = rel["schema"] - filename = rel["filename"] + rel_name = rel['rel_name'] + closures = rel['closures'] + schema = rel['schema'] + filename = rel['filename'] concept_list = clause2concepts(filename, rel_name, schema, closures) for c in concept_list: @@ -612,7 +618,7 @@ def val_dump(rels, db): """ concepts = process_bundle(rels).values() valuation = make_valuation(concepts, read=True) - db_out = shelve.open(db, "n") + db_out = shelve.open(db, 'n') db_out.update(valuation) @@ -674,7 +680,7 @@ def label_indivs(valuation, lexicon=False): pairs = [(e, e) for e in domain] if lexicon: lex = make_lex(domain) - with open("chat_pnames.cfg", "w") as outfile: + with open("chat_pnames.cfg", 'w') as outfile: outfile.writelines(lex) # read the pairs into the valuation valuation.update(pairs) @@ -703,9 +709,9 @@ def make_lex(symbols): template = "PropN[num=sg, sem=<\P.(P %s)>] -> '%s'\n" for s in symbols: - parts = s.split("_") + parts = s.split('_') caps = [p.capitalize() for p in parts] - pname = "_".join(caps) + pname = '_'.join(caps) rule = template % (s, pname) lex.append(rule) return lex @@ -725,7 +731,7 @@ def concepts(items=items): :return: the ``Concept`` objects which are extracted from the relations :rtype: list(Concept) """ - if isinstance(items, str): + if isinstance(items, string_types): items = (items,) rels = [item_metadata[r] for r in items] @@ -848,10 +854,10 @@ def sql_demo(): """ print() print("Using SQL to extract rows from 'city.db' RDB.") - for row in sql_query("corpora/city_database/city.db", "SELECT * FROM city_table"): + for row in sql_query('corpora/city_database/city.db', "SELECT * FROM city_table"): print(row) -if __name__ == "__main__": +if __name__ == '__main__': main() sql_demo() diff --git a/nlp_resource_data/nltk/sem/cooper_storage.py b/nlp_resource_data/nltk/sem/cooper_storage.py index 830c3e4..4aca110 100644 --- a/nlp_resource_data/nltk/sem/cooper_storage.py +++ b/nlp_resource_data/nltk/sem/cooper_storage.py @@ -1,9 +1,10 @@ # Natural Language Toolkit: Cooper storage for Quantifier Ambiguity # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Ewan Klein # URL: # For license information, see LICENSE.TXT +from __future__ import print_function from nltk.sem.logic import LambdaExpression, ApplicationExpression, Variable from nltk.parse import load_parser @@ -25,8 +26,8 @@ class CooperStore(object): self.featstruct = featstruct self.readings = [] try: - self.core = featstruct["CORE"] - self.store = featstruct["STORE"] + self.core = featstruct['CORE'] + self.store = featstruct['STORE'] except KeyError: print("%s is not a Cooper storage structure" % featstruct) @@ -82,7 +83,7 @@ def parse_with_bindops(sentence, grammar=None, trace=0): Use a grammar with Binding Operators to parse a sentence. """ if not grammar: - grammar = "grammars/book_grammars/storage.fcfg" + grammar = 'grammars/book_grammars/storage.fcfg' parser = load_parser(grammar, trace=trace, chart_class=InstantiateVarsChart) # Parse the sentence. tokens = sentence.split() @@ -99,7 +100,7 @@ def demo(): print("=" * 50) trees = cs.parse_with_bindops(sentence, trace=0) for tree in trees: - semrep = cs.CooperStore(tree.label()["SEM"]) + semrep = cs.CooperStore(tree.label()['SEM']) print() print("Binding operators:") print("-" * 15) @@ -120,5 +121,5 @@ def demo(): print("%s: %s" % (i + 1, reading)) -if __name__ == "__main__": +if __name__ == '__main__': demo() diff --git a/nlp_resource_data/nltk/sem/drt.py b/nlp_resource_data/nltk/sem/drt.py index 57e26fb..8bc67f6 100644 --- a/nlp_resource_data/nltk/sem/drt.py +++ b/nlp_resource_data/nltk/sem/drt.py @@ -2,14 +2,18 @@ # # Author: Dan Garrette # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # URL: # For license information, see LICENSE.TXT +from __future__ import print_function, unicode_literals import operator from functools import reduce from itertools import chain +from six import string_types + +from nltk.compat import python_2_unicode_compatible from nltk.sem.logic import ( APP, AbstractVariableExpression, @@ -40,8 +44,8 @@ from nltk.sem.logic import ( # Import Tkinter-based modules if they are available try: - from tkinter import Canvas, Tk - from tkinter.font import Font + from six.moves.tkinter import Canvas, Tk + from six.moves.tkinter_font import Font from nltk.util import in_idle except ImportError: @@ -50,12 +54,12 @@ except ImportError: class DrtTokens(Tokens): - DRS = "DRS" - DRS_CONC = "+" - PRONOUN = "PRO" - OPEN_BRACKET = "[" - CLOSE_BRACKET = "]" - COLON = ":" + DRS = 'DRS' + DRS_CONC = '+' + PRONOUN = 'PRO' + OPEN_BRACKET = '[' + CLOSE_BRACKET = ']' + COLON = ':' PUNCT = [DRS_CONC, OPEN_BRACKET, CLOSE_BRACKET, COLON] @@ -135,7 +139,7 @@ class DrtParser(LogicParser): # Support expressions like: DRS([x y],C) == DRS([x,y],C) if refs and self.token(0) == DrtTokens.COMMA: self.token() # swallow the comma - refs.append(self.get_next_token_variable("quantified")) + refs.append(self.get_next_token_variable('quantified')) self.assertNextToken(DrtTokens.CLOSE_BRACKET) return refs @@ -152,7 +156,7 @@ class DrtParser(LogicParser): def handle_prop(self, tok, context): variable = self.make_VariableExpression(tok) - self.assertNextToken(":") + self.assertNextToken(':') drs = self.process_next_expression(DrtTokens.COLON) return DrtProposition(variable, drs) @@ -175,7 +179,7 @@ class DrtParser(LogicParser): return DRS(first.refs, first.conds, second) if isinstance(first, DrtConcatenation): return DrtConcatenation(first.first, first.second, second) - raise Exception("Antecedent of implication must be a DRS") + raise Exception('Antecedent of implication must be a DRS') return make_imp_expression else: @@ -225,7 +229,7 @@ class DrtExpression(object): return DRS(self.refs, self.conds, other) if isinstance(self, DrtConcatenation): return DrtConcatenation(self.first, self.second, other) - raise Exception("Antecedent of implication must be a DRS") + raise Exception('Antecedent of implication must be a DRS') def equiv(self, other, prover=None): """ @@ -288,7 +292,7 @@ class DrtExpression(object): Draw the DRS :return: the pretty print string """ - return "\n".join(self._pretty()) + return '\n'.join(self._pretty()) def pretty_print(self): print(self.pretty_format()) @@ -297,6 +301,7 @@ class DrtExpression(object): DrsDrawer(self).draw() +@python_2_unicode_compatible class DRS(DrtExpression, Expression): """A Discourse Representation Structure.""" @@ -464,7 +469,7 @@ class DRS(DrtExpression, Expression): return accum def _pretty(self): - refs_line = " ".join(self._order_ref_strings(self.refs)) + refs_line = ' '.join(self._order_ref_strings(self.refs)) cond_lines = [ cond @@ -476,12 +481,12 @@ class DRS(DrtExpression, Expression): length = max([len(refs_line)] + list(map(len, cond_lines))) drs = ( [ - " _" + "_" * length + "_ ", - "| " + refs_line.ljust(length) + " |", - "|-" + "-" * length + "-|", + ' _' + '_' * length + '_ ', + '| ' + refs_line.ljust(length) + ' |', + '|-' + '-' * length + '-|', ] - + ["| " + line.ljust(length) + " |" for line in cond_lines] - + ["|_" + "_" * length + "_|"] + + ['| ' + line.ljust(length) + ' |' for line in cond_lines] + + ['|_' + '_' * length + '_|'] ) if self.consequent: return DrtBinaryExpression._assemble_pretty( @@ -535,17 +540,17 @@ class DRS(DrtExpression, Expression): __hash__ = Expression.__hash__ def __str__(self): - drs = "([%s],[%s])" % ( - ",".join(self._order_ref_strings(self.refs)), - ", ".join("%s" % cond for cond in self.conds), + drs = '([%s],[%s])' % ( + ','.join(self._order_ref_strings(self.refs)), + ', '.join("%s" % cond for cond in self.conds), ) # map(str, self.conds))) if self.consequent: return ( DrtTokens.OPEN + drs - + " " + + ' ' + DrtTokens.IMP - + " " + + ' ' + "%s" % self.consequent + DrtTokens.CLOSE ) @@ -577,7 +582,7 @@ class DrtAbstractVariableExpression(DrtExpression, AbstractVariableExpression): def _pretty(self): s = "%s" % self - blank = " " * len(s) + blank = ' ' * len(s) return [blank, blank, s, blank] def eliminate_equality(self): @@ -606,6 +611,7 @@ class DrtConstantExpression(DrtAbstractVariableExpression, ConstantExpression): pass +@python_2_unicode_compatible class DrtProposition(DrtExpression, Expression): def __init__(self, variable, drs): self.variable = variable @@ -649,11 +655,11 @@ class DrtProposition(DrtExpression, Expression): def _pretty(self): drs_s = self.drs._pretty() - blank = " " * len("%s" % self.variable) + blank = ' ' * len("%s" % self.variable) return ( - [blank + " " + line for line in drs_s[:1]] - + ["%s" % self.variable + ":" + line for line in drs_s[1:2]] - + [blank + " " + line for line in drs_s[2:]] + [blank + ' ' + line for line in drs_s[:1]] + + ["%s" % self.variable + ':' + line for line in drs_s[1:2]] + + [blank + ' ' + line for line in drs_s[2:]] ) def visit(self, function, combinator): @@ -665,7 +671,7 @@ class DrtProposition(DrtExpression, Expression): return combinator(self.variable, function(self.drs)) def __str__(self): - return "prop(%s, %s)" % (self.variable, self.drs) + return 'prop(%s, %s)' % (self.variable, self.drs) class DrtNegatedExpression(DrtExpression, NegatedExpression): @@ -679,10 +685,10 @@ class DrtNegatedExpression(DrtExpression, NegatedExpression): def _pretty(self): term_lines = self.term._pretty() return ( - [" " + line for line in term_lines[:2]] - + ["__ " + line for line in term_lines[2:3]] - + [" | " + line for line in term_lines[3:4]] - + [" " + line for line in term_lines[4:]] + [' ' + line for line in term_lines[:2]] + + ['__ ' + line for line in term_lines[2:3]] + + [' | ' + line for line in term_lines[3:4]] + + [' ' + line for line in term_lines[4:]] ) @@ -706,14 +712,14 @@ class DrtLambdaExpression(DrtExpression, LambdaExpression): while term.__class__ == self.__class__: variables.append(term.variable) term = term.term - var_string = " ".join("%s" % v for v in variables) + DrtTokens.DOT + var_string = ' '.join("%s" % v for v in variables) + DrtTokens.DOT term_lines = term._pretty() - blank = " " * len(var_string) + blank = ' ' * len(var_string) return ( - [" " + blank + line for line in term_lines[:1]] - + [" \ " + blank + line for line in term_lines[1:2]] - + [" /\ " + var_string + line for line in term_lines[2:3]] - + [" " + blank + line for line in term_lines[3:]] + [' ' + blank + line for line in term_lines[:1]] + + [' \ ' + blank + line for line in term_lines[1:2]] + + [' /\ ' + var_string + line for line in term_lines[2:3]] + + [' ' + blank + line for line in term_lines[3:]] ) @@ -736,19 +742,19 @@ class DrtBinaryExpression(DrtExpression, BinaryExpression): max_lines = max(len(first_lines), len(second_lines)) first_lines = _pad_vertically(first_lines, max_lines) second_lines = _pad_vertically(second_lines, max_lines) - blank = " " * len(op) + blank = ' ' * len(op) first_second_lines = list(zip(first_lines, second_lines)) return ( [ - " " + first_line + " " + blank + " " + second_line + " " + ' ' + first_line + ' ' + blank + ' ' + second_line + ' ' for first_line, second_line in first_second_lines[:2] ] + [ - "(" + first_line + " " + op + " " + second_line + ")" + '(' + first_line + ' ' + op + ' ' + second_line + ')' for first_line, second_line in first_second_lines[2:3] ] + [ - " " + first_line + " " + blank + " " + second_line + " " + ' ' + first_line + ' ' + blank + ' ' + second_line + ' ' for first_line, second_line in first_second_lines[3:] ] ) @@ -776,6 +782,7 @@ class DrtEqualityExpression(DrtBinaryExpression, EqualityExpression): return EqualityExpression(self.first.fol(), self.second.fol()) +@python_2_unicode_compatible class DrtConcatenation(DrtBooleanExpression): """DRS of the form '(DRS + DRS)'""" @@ -912,14 +919,14 @@ class DrtConcatenation(DrtBooleanExpression): def __str__(self): first = self._str_subex(self.first) second = self._str_subex(self.second) - drs = Tokens.OPEN + first + " " + self.getOp() + " " + second + Tokens.CLOSE + drs = Tokens.OPEN + first + ' ' + self.getOp() + ' ' + second + Tokens.CLOSE if self.consequent: return ( DrtTokens.OPEN + drs - + " " + + ' ' + DrtTokens.IMP - + " " + + ' ' + "%s" % self.consequent + DrtTokens.CLOSE ) @@ -954,25 +961,26 @@ class DrtApplicationExpression(DrtExpression, ApplicationExpression): func_args_lines = list(zip(function_lines, list(zip(*args_lines)))) return ( [ - func_line + " " + " ".join(args_line) + " " + func_line + ' ' + ' '.join(args_line) + ' ' for func_line, args_line in func_args_lines[:2] ] + [ - func_line + "(" + ",".join(args_line) + ")" + func_line + '(' + ','.join(args_line) + ')' for func_line, args_line in func_args_lines[2:3] ] + [ - func_line + " " + " ".join(args_line) + " " + func_line + ' ' + ' '.join(args_line) + ' ' for func_line, args_line in func_args_lines[3:] ] ) def _pad_vertically(lines, max_lines): - pad_line = [" " * len(lines[0])] + pad_line = [' ' * len(lines[0])] return lines + pad_line * (max_lines - len(lines)) +@python_2_unicode_compatible class PossibleAntecedents(list, DrtExpression, Expression): def free(self): """Set of free variables.""" @@ -991,11 +999,11 @@ class PossibleAntecedents(list, DrtExpression, Expression): def _pretty(self): s = "%s" % self - blank = " " * len(s) + blank = ' ' * len(s) return [blank, blank, s] def __str__(self): - return "[" + ",".join("%s" % it for it in self) + "]" + return '[' + ','.join("%s" % it for it in self) + ']' class AnaphoraResolutionException(Exception): @@ -1101,7 +1109,7 @@ class DrsDrawer(object): master = Tk() master.title("DRT") - font = Font(family="helvetica", size=12) + font = Font(family='helvetica', size=12) if size_canvas: canvas = Canvas(master, width=0, height=0) @@ -1155,8 +1163,8 @@ class DrsDrawer(object): :param y: the left side of the current drawing area :return: the bottom-rightmost point """ - if isinstance(item, str): - self.canvas.create_text(x, y, anchor="nw", font=self.canvas.font, text=item) + if isinstance(item, string_types): + self.canvas.create_text(x, y, anchor='nw', font=self.canvas.font, text=item) elif isinstance(item, tuple): # item is the lower-right of a box (right, bottom) = item @@ -1177,7 +1185,7 @@ class DrsDrawer(object): :param y: the left side of the current drawing area :return: the bottom-rightmost point """ - if isinstance(item, str): + if isinstance(item, string_types): return (x + self.canvas.font.measure(item), y + self._get_text_height()) elif isinstance(item, tuple): return item @@ -1253,9 +1261,9 @@ class DrsDrawer(object): # Handle Discourse Referents if expression.refs: - refs = " ".join("%s" % r for r in expression.refs) + refs = ' '.join("%s" % r for r in expression.refs) else: - refs = " " + refs = ' ' (max_right, bottom) = command(refs, left, bottom) bottom += self.BUFFER * 2 @@ -1308,7 +1316,7 @@ class DrsDrawer(object): if i + 1 < len(args): # since it's not the last arg, add a comma - right = command(DrtTokens.COMMA + " ", right, centred_string_top)[0] + right = command(DrtTokens.COMMA + ' ', right, centred_string_top)[0] # Handle close paren right = command(DrtTokens.CLOSE, right, centred_string_top)[0] @@ -1352,7 +1360,7 @@ class DrsDrawer(object): ) # Handle the operator - right = command(" %s " % expression.getOp(), right, centred_string_top)[0] + right = command(' %s ' % expression.getOp(), right, centred_string_top)[0] # Handle the second operand second_height = expression.second._drawing_height @@ -1385,36 +1393,36 @@ class DrsDrawer(object): def demo(): - print("=" * 20 + "TEST PARSE" + "=" * 20) + print('=' * 20 + 'TEST PARSE' + '=' * 20) dexpr = DrtExpression.fromstring - print(dexpr(r"([x,y],[sees(x,y)])")) - print(dexpr(r"([x],[man(x), walks(x)])")) - print(dexpr(r"\x.\y.([],[sees(x,y)])")) - print(dexpr(r"\x.([],[walks(x)])(john)")) - print(dexpr(r"(([x],[walks(x)]) + ([y],[runs(y)]))")) - print(dexpr(r"(([],[walks(x)]) -> ([],[runs(x)]))")) - print(dexpr(r"([x],[PRO(x), sees(John,x)])")) - print(dexpr(r"([x],[man(x), -([],[walks(x)])])")) - print(dexpr(r"([],[(([x],[man(x)]) -> ([],[walks(x)]))])")) - - print("=" * 20 + "Test fol()" + "=" * 20) - print(dexpr(r"([x,y],[sees(x,y)])").fol()) - - print("=" * 20 + "Test alpha conversion and lambda expression equality" + "=" * 20) - e1 = dexpr(r"\x.([],[P(x)])") + print(dexpr(r'([x,y],[sees(x,y)])')) + print(dexpr(r'([x],[man(x), walks(x)])')) + print(dexpr(r'\x.\y.([],[sees(x,y)])')) + print(dexpr(r'\x.([],[walks(x)])(john)')) + print(dexpr(r'(([x],[walks(x)]) + ([y],[runs(y)]))')) + print(dexpr(r'(([],[walks(x)]) -> ([],[runs(x)]))')) + print(dexpr(r'([x],[PRO(x), sees(John,x)])')) + print(dexpr(r'([x],[man(x), -([],[walks(x)])])')) + print(dexpr(r'([],[(([x],[man(x)]) -> ([],[walks(x)]))])')) + + print('=' * 20 + 'Test fol()' + '=' * 20) + print(dexpr(r'([x,y],[sees(x,y)])').fol()) + + print('=' * 20 + 'Test alpha conversion and lambda expression equality' + '=' * 20) + e1 = dexpr(r'\x.([],[P(x)])') print(e1) - e2 = e1.alpha_convert(Variable("z")) + e2 = e1.alpha_convert(Variable('z')) print(e2) print(e1 == e2) - print("=" * 20 + "Test resolve_anaphora()" + "=" * 20) - print(resolve_anaphora(dexpr(r"([x,y,z],[dog(x), cat(y), walks(z), PRO(z)])"))) + print('=' * 20 + 'Test resolve_anaphora()' + '=' * 20) + print(resolve_anaphora(dexpr(r'([x,y,z],[dog(x), cat(y), walks(z), PRO(z)])'))) print( - resolve_anaphora(dexpr(r"([],[(([x],[dog(x)]) -> ([y],[walks(y), PRO(y)]))])")) + resolve_anaphora(dexpr(r'([],[(([x],[dog(x)]) -> ([y],[walks(y), PRO(y)]))])')) ) - print(resolve_anaphora(dexpr(r"(([x,y],[]) + ([],[PRO(x)]))"))) + print(resolve_anaphora(dexpr(r'(([x,y],[]) + ([],[PRO(x)]))'))) - print("=" * 20 + "Test pretty_print()" + "=" * 20) + print('=' * 20 + 'Test pretty_print()' + '=' * 20) dexpr(r"([],[])").pretty_print() dexpr( r"([],[([x],[big(x), dog(x)]) -> ([],[bark(x)]) -([x],[walk(x)])])" @@ -1426,24 +1434,24 @@ def demo(): def test_draw(): try: - from tkinter import Tk + from six.moves.tkinter import Tk except ImportError: from nose import SkipTest raise SkipTest("tkinter is required, but it's not available.") expressions = [ - r"x", - r"([],[])", - r"([x],[])", - r"([x],[man(x)])", - r"([x,y],[sees(x,y)])", - r"([x],[man(x), walks(x)])", - r"\x.([],[man(x), walks(x)])", - r"\x y.([],[sees(x,y)])", - r"([],[(([],[walks(x)]) + ([],[runs(x)]))])", - r"([x],[man(x), -([],[walks(x)])])", - r"([],[(([x],[man(x)]) -> ([],[walks(x)]))])", + r'x', + r'([],[])', + r'([x],[])', + r'([x],[man(x)])', + r'([x,y],[sees(x,y)])', + r'([x],[man(x), walks(x)])', + r'\x.([],[man(x), walks(x)])', + r'\x y.([],[sees(x,y)])', + r'([],[(([],[walks(x)]) + ([],[runs(x)]))])', + r'([x],[man(x), -([],[walks(x)])])', + r'([],[(([x],[man(x)]) -> ([],[walks(x)]))])', ] for e in expressions: @@ -1451,5 +1459,5 @@ def test_draw(): d.draw() -if __name__ == "__main__": +if __name__ == '__main__': demo() diff --git a/nlp_resource_data/nltk/sem/drt_glue_demo.py b/nlp_resource_data/nltk/sem/drt_glue_demo.py index 61a4f5b..4a45325 100644 --- a/nlp_resource_data/nltk/sem/drt_glue_demo.py +++ b/nlp_resource_data/nltk/sem/drt_glue_demo.py @@ -3,12 +3,12 @@ # # Author: Dan Garrette # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # URL: # For license information, see LICENSE.TXT try: - from tkinter import ( + from six.moves.tkinter import ( Button, Frame, IntVar, @@ -18,7 +18,7 @@ try: Scrollbar, Tk, ) - from tkinter.font import Font + from six.moves.tkinter_font import Font from nltk.draw.util import CanvasFrame, ShowText except ImportError: @@ -36,7 +36,7 @@ class DrtGlueDemo(object): def __init__(self, examples): # Set up the main window. self._top = Tk() - self._top.title("DRT Glue Demo") + self._top.title('DRT Glue Demo') # Set up key bindings. self._init_bindings() @@ -68,7 +68,7 @@ class DrtGlueDemo(object): self._init_canvas(self._top) # Resize callback - self._canvas.bind("", self._configure) + self._canvas.bind('', self._configure) ######################################### ## Initialization Helpers @@ -77,17 +77,17 @@ class DrtGlueDemo(object): def _init_glue(self): tagger = RegexpTagger( [ - ("^(David|Mary|John)$", "NNP"), + ('^(David|Mary|John)$', 'NNP'), ( - "^(walks|sees|eats|chases|believes|gives|sleeps|chases|persuades|tries|seems|leaves)$", - "VB", + '^(walks|sees|eats|chases|believes|gives|sleeps|chases|persuades|tries|seems|leaves)$', + 'VB', ), - ("^(go|order|vanish|find|approach)$", "VB"), - ("^(a)$", "ex_quant"), - ("^(every)$", "univ_quant"), - ("^(sandwich|man|dog|pizza|unicorn|cat|senator)$", "NN"), - ("^(big|gray|former)$", "JJ"), - ("^(him|himself)$", "PRP"), + ('^(go|order|vanish|find|approach)$', 'VB'), + ('^(a)$', 'ex_quant'), + ('^(every)$', 'univ_quant'), + ('^(sandwich|man|dog|pizza|unicorn|cat|senator)$', 'NN'), + ('^(big|gray|former)$', 'JJ'), + ('^(him|himself)$', 'PRP'), ] ) @@ -101,134 +101,134 @@ class DrtGlueDemo(object): # TWhat's our font size (default=same as sysfont) self._size = IntVar(root) - self._size.set(self._sysfont.cget("size")) + self._size.set(self._sysfont.cget('size')) - self._boldfont = Font(family="helvetica", weight="bold", size=self._size.get()) - self._font = Font(family="helvetica", size=self._size.get()) + self._boldfont = Font(family='helvetica', weight='bold', size=self._size.get()) + self._font = Font(family='helvetica', size=self._size.get()) if self._size.get() < 0: big = self._size.get() - 2 else: big = self._size.get() + 2 - self._bigfont = Font(family="helvetica", weight="bold", size=big) + self._bigfont = Font(family='helvetica', weight='bold', size=big) def _init_exampleListbox(self, parent): self._exampleFrame = listframe = Frame(parent) - self._exampleFrame.pack(fill="both", side="left", padx=2) + self._exampleFrame.pack(fill='both', side='left', padx=2) self._exampleList_label = Label( - self._exampleFrame, font=self._boldfont, text="Examples" + self._exampleFrame, font=self._boldfont, text='Examples' ) self._exampleList_label.pack() self._exampleList = Listbox( self._exampleFrame, - selectmode="single", - relief="groove", - background="white", - foreground="#909090", + selectmode='single', + relief='groove', + background='white', + foreground='#909090', font=self._font, - selectforeground="#004040", - selectbackground="#c0f0c0", + selectforeground='#004040', + selectbackground='#c0f0c0', ) - self._exampleList.pack(side="right", fill="both", expand=1) + self._exampleList.pack(side='right', fill='both', expand=1) for example in self._examples: - self._exampleList.insert("end", (" %s" % example)) + self._exampleList.insert('end', (' %s' % example)) self._exampleList.config(height=min(len(self._examples), 25), width=40) # Add a scrollbar if there are more than 25 examples. if len(self._examples) > 25: - listscroll = Scrollbar(self._exampleFrame, orient="vertical") + listscroll = Scrollbar(self._exampleFrame, orient='vertical') self._exampleList.config(yscrollcommand=listscroll.set) listscroll.config(command=self._exampleList.yview) - listscroll.pack(side="left", fill="y") + listscroll.pack(side='left', fill='y') # If they select a example, apply it. - self._exampleList.bind("<>", self._exampleList_select) + self._exampleList.bind('<>', self._exampleList_select) def _init_readingListbox(self, parent): self._readingFrame = listframe = Frame(parent) - self._readingFrame.pack(fill="both", side="left", padx=2) + self._readingFrame.pack(fill='both', side='left', padx=2) self._readingList_label = Label( - self._readingFrame, font=self._boldfont, text="Readings" + self._readingFrame, font=self._boldfont, text='Readings' ) self._readingList_label.pack() self._readingList = Listbox( self._readingFrame, - selectmode="single", - relief="groove", - background="white", - foreground="#909090", + selectmode='single', + relief='groove', + background='white', + foreground='#909090', font=self._font, - selectforeground="#004040", - selectbackground="#c0f0c0", + selectforeground='#004040', + selectbackground='#c0f0c0', ) - self._readingList.pack(side="right", fill="both", expand=1) + self._readingList.pack(side='right', fill='both', expand=1) # Add a scrollbar if there are more than 25 examples. - listscroll = Scrollbar(self._readingFrame, orient="vertical") + listscroll = Scrollbar(self._readingFrame, orient='vertical') self._readingList.config(yscrollcommand=listscroll.set) listscroll.config(command=self._readingList.yview) - listscroll.pack(side="right", fill="y") + listscroll.pack(side='right', fill='y') self._populate_readingListbox() def _populate_readingListbox(self): # Populate the listbox with integers - self._readingList.delete(0, "end") + self._readingList.delete(0, 'end') for i in range(len(self._readings)): - self._readingList.insert("end", (" %s" % (i + 1))) + self._readingList.insert('end', (' %s' % (i + 1))) self._readingList.config(height=min(len(self._readings), 25), width=5) # If they select a example, apply it. - self._readingList.bind("<>", self._readingList_select) + self._readingList.bind('<>', self._readingList_select) def _init_bindings(self): # Key bindings are a good thing. - self._top.bind("", self.destroy) - self._top.bind("", self.destroy) - self._top.bind("", self.destroy) - self._top.bind("n", self.next) - self._top.bind("", self.next) - self._top.bind("p", self.prev) - self._top.bind("", self.prev) + self._top.bind('', self.destroy) + self._top.bind('', self.destroy) + self._top.bind('', self.destroy) + self._top.bind('n', self.next) + self._top.bind('', self.next) + self._top.bind('p', self.prev) + self._top.bind('', self.prev) def _init_buttons(self, parent): # Set up the frames. self._buttonframe = buttonframe = Frame(parent) - buttonframe.pack(fill="none", side="bottom", padx=3, pady=2) + buttonframe.pack(fill='none', side='bottom', padx=3, pady=2) Button( buttonframe, - text="Prev", - background="#90c0d0", - foreground="black", + text='Prev', + background='#90c0d0', + foreground='black', command=self.prev, - ).pack(side="left") + ).pack(side='left') Button( buttonframe, - text="Next", - background="#90c0d0", - foreground="black", + text='Next', + background='#90c0d0', + foreground='black', command=self.next, - ).pack(side="left") + ).pack(side='left') def _configure(self, event): self._autostep = 0 (x1, y1, x2, y2) = self._cframe.scrollregion() y2 = event.height - 6 - self._canvas["scrollregion"] = "%d %d %d %d" % (x1, y1, x2, y2) + self._canvas['scrollregion'] = '%d %d %d %d' % (x1, y1, x2, y2) self._redraw() def _init_canvas(self, parent): self._cframe = CanvasFrame( parent, - background="white", + background='white', # width=525, height=250, closeenough=10, border=2, - relief="sunken", + relief='sunken', ) - self._cframe.pack(expand=1, fill="both", side="top", pady=2) + self._cframe.pack(expand=1, fill='both', side='top', pady=2) canvas = self._canvas = self._cframe.canvas() # Initially, there's no tree or text @@ -241,70 +241,70 @@ class DrtGlueDemo(object): filemenu = Menu(menubar, tearoff=0) filemenu.add_command( - label="Exit", underline=1, command=self.destroy, accelerator="q" + label='Exit', underline=1, command=self.destroy, accelerator='q' ) - menubar.add_cascade(label="File", underline=0, menu=filemenu) + menubar.add_cascade(label='File', underline=0, menu=filemenu) actionmenu = Menu(menubar, tearoff=0) actionmenu.add_command( - label="Next", underline=0, command=self.next, accelerator="n, Space" + label='Next', underline=0, command=self.next, accelerator='n, Space' ) actionmenu.add_command( - label="Previous", underline=0, command=self.prev, accelerator="p, Backspace" + label='Previous', underline=0, command=self.prev, accelerator='p, Backspace' ) - menubar.add_cascade(label="Action", underline=0, menu=actionmenu) + menubar.add_cascade(label='Action', underline=0, menu=actionmenu) optionmenu = Menu(menubar, tearoff=0) optionmenu.add_checkbutton( - label="Remove Duplicates", + label='Remove Duplicates', underline=0, variable=self._glue.remove_duplicates, command=self._toggle_remove_duplicates, - accelerator="r", + accelerator='r', ) - menubar.add_cascade(label="Options", underline=0, menu=optionmenu) + menubar.add_cascade(label='Options', underline=0, menu=optionmenu) viewmenu = Menu(menubar, tearoff=0) viewmenu.add_radiobutton( - label="Tiny", + label='Tiny', variable=self._size, underline=0, value=10, command=self.resize, ) viewmenu.add_radiobutton( - label="Small", + label='Small', variable=self._size, underline=0, value=12, command=self.resize, ) viewmenu.add_radiobutton( - label="Medium", + label='Medium', variable=self._size, underline=0, value=14, command=self.resize, ) viewmenu.add_radiobutton( - label="Large", + label='Large', variable=self._size, underline=0, value=18, command=self.resize, ) viewmenu.add_radiobutton( - label="Huge", + label='Huge', variable=self._size, underline=0, value=24, command=self.resize, ) - menubar.add_cascade(label="View", underline=0, menu=viewmenu) + menubar.add_cascade(label='View', underline=0, menu=viewmenu) helpmenu = Menu(menubar, tearoff=0) - helpmenu.add_command(label="About", underline=0, command=self.about) - menubar.add_cascade(label="Help", underline=0, menu=helpmenu) + helpmenu.add_command(label='About', underline=0, command=self.about) + menubar.add_cascade(label='Help', underline=0, menu=helpmenu) parent.config(menu=menubar) @@ -405,9 +405,9 @@ class DrtGlueDemo(object): "NLTK Discourse Representation Theory (DRT) Glue Semantics Demo\n" + "Written by Daniel H. Garrette" ) - TITLE = "About: NLTK DRT Glue Demo" + TITLE = 'About: NLTK DRT Glue Demo' try: - from tkinter.messagebox import Message + from six.moves.tkinter_messagebox import Message Message(message=ABOUT, title=TITLE).show() except: @@ -441,7 +441,7 @@ class DrtGlueDemo(object): def _toggle_remove_duplicates(self): self._glue.remove_duplicates = not self._glue.remove_duplicates - self._exampleList.selection_clear(0, "end") + self._exampleList.selection_clear(0, 'end') self._readings = [] self._populate_readingListbox() self._readingCache = [None for ex in self._examples] @@ -461,7 +461,7 @@ class DrtGlueDemo(object): self._curExample = index example = self._examples[index] - self._exampleList.selection_clear(0, "end") + self._exampleList.selection_clear(0, 'end') if example: cache = self._readingCache[index] if cache: @@ -478,12 +478,12 @@ class DrtGlueDemo(object): self._readingCache[index] = self._readings except Exception as e: self._readings = [] - self._error = DrtVariableExpression(Variable("Error: " + str(e))) + self._error = DrtVariableExpression(Variable('Error: ' + str(e))) self._readingCache[index] = self._error # add a star to the end of the example self._exampleList.delete(index) - self._exampleList.insert(index, (" %s *" % example)) + self._exampleList.insert(index, (' %s *' % example)) self._exampleList.config( height=min(len(self._examples), 25), width=40 ) @@ -504,7 +504,7 @@ class DrtGlueDemo(object): def _readingList_store_selection(self, index): reading = self._readings[index] - self._readingList.selection_clear(0, "end") + self._readingList.selection_clear(0, 'end') if reading: self._readingList.selection_set(index) @@ -518,7 +518,7 @@ class DrsWidget(object): self._drs = drs self._canvas = canvas canvas.font = Font( - font=canvas.itemcget(canvas.create_text(0, 0, text=""), "font") + font=canvas.itemcget(canvas.create_text(0, 0, text=''), 'font') ) canvas._BUFFER = 3 self.bbox = (0, 0, 0, 0) @@ -533,13 +533,13 @@ class DrsWidget(object): def demo(): examples = [ - "John walks", - "David sees Mary", - "David eats a sandwich", - "every man chases a dog", + 'John walks', + 'David sees Mary', + 'David eats a sandwich', + 'every man chases a dog', # 'every man believes a dog yawns', # 'John gives David a sandwich', - "John chases himself", + 'John chases himself', # 'John persuades David to order a pizza', # 'John tries to go', # 'John tries to find a unicorn', @@ -557,5 +557,5 @@ def demo(): DrtGlueDemo(examples).mainloop() -if __name__ == "__main__": +if __name__ == '__main__': demo() diff --git a/nlp_resource_data/nltk/sem/evaluate.py b/nlp_resource_data/nltk/sem/evaluate.py index 3a1eab0..adc0716 100644 --- a/nlp_resource_data/nltk/sem/evaluate.py +++ b/nlp_resource_data/nltk/sem/evaluate.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Models for first-order languages with lambda # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Ewan Klein , # URL: # For license information, see LICENSE.TXT @@ -13,6 +13,7 @@ This module provides data structures for representing first-order models. """ +from __future__ import print_function, unicode_literals from pprint import pformat import inspect @@ -20,7 +21,10 @@ import textwrap import re import sys +from six import string_types + from nltk.decorators import decorator # this used in code that is commented out +from nltk.compat import python_2_unicode_compatible from nltk.sem.logic import ( AbstractVariableExpression, @@ -50,9 +54,12 @@ class Undefined(Error): def trace(f, *args, **kw): - argspec = inspect.getfullargspec(f) + if sys.version_info[0] >= 3: + argspec = inspect.getfullargspec(f) + else: + argspec = inspect.getargspec(f) d = dict(zip(argspec[0], args)) - if d.pop("trace", None): + if d.pop('trace', None): print() for item in d.items(): print("%s => %s" % item) @@ -92,7 +99,7 @@ def set2rel(s): """ new = set() for elem in s: - if isinstance(elem, str): + if isinstance(elem, string_types): new.add((elem,)) elif isinstance(elem, int): new.add((str(elem))) @@ -112,6 +119,7 @@ def arity(rel): return len(list(rel)[0]) +@python_2_unicode_compatible class Valuation(dict): """ A dictionary which represents a model-theoretic Valuation of non-logical constants. @@ -130,7 +138,7 @@ class Valuation(dict): """ super(Valuation, self).__init__() for (sym, val) in xs: - if isinstance(val, str) or isinstance(val, bool): + if isinstance(val, string_types) or isinstance(val, bool): self[sym] = val elif isinstance(val, set): self[sym] = set2rel(val) @@ -157,7 +165,7 @@ class Valuation(dict): """Set-theoretic domain of the value-space of a Valuation.""" dom = [] for val in self.values(): - if isinstance(val, str): + if isinstance(val, string_types): dom.append(val) elif not isinstance(val, bool): dom.extend( @@ -178,8 +186,8 @@ class Valuation(dict): ########################################## # REs used by the _read_valuation function ########################################## -_VAL_SPLIT_RE = re.compile(r"\s*=+>\s*") -_ELEMENT_SPLIT_RE = re.compile(r"\s*,\s*") +_VAL_SPLIT_RE = re.compile(r'\s*=+>\s*') +_ELEMENT_SPLIT_RE = re.compile(r'\s*,\s*') _TUPLES_RE = re.compile( r"""\s* (\([^)]+\)) # tuple-expression @@ -207,7 +215,7 @@ def _read_valuation_line(s): symbol = pieces[0] value = pieces[1] # check whether the value is meant to be a set - if value.startswith("{"): + if value.startswith('{'): value = value[1:-1] tuple_strings = _TUPLES_RE.findall(value) # are the set elements tuples? @@ -239,15 +247,16 @@ def read_valuation(s, encoding=None): statements = [] for linenum, line in enumerate(s.splitlines()): line = line.strip() - if line.startswith("#") or line == "": + if line.startswith('#') or line == '': continue try: statements.append(_read_valuation_line(line)) except ValueError: - raise ValueError("Unable to parse line %s: %s" % (linenum, line)) + raise ValueError('Unable to parse line %s: %s' % (linenum, line)) return Valuation(statements) +@python_2_unicode_compatible class Assignment(dict): """ A dictionary which represents an assignment of values to variables. @@ -377,6 +386,7 @@ class Assignment(dict): return self +@python_2_unicode_compatible class Model(object): """ A first order model is a domain *D* of discourse and a valuation *V*. @@ -431,7 +441,7 @@ class Model(object): if trace: print() print("'%s' is undefined under M, %s" % (expr, g)) - return "Undefined" + return 'Undefined' def satisfy(self, parsed, g, trace=None): """ @@ -541,11 +551,11 @@ class Model(object): :return: a set of the entities that satisfy ``parsed``. """ - spacer = " " + spacer = ' ' indent = spacer + (spacer * nesting) candidates = [] - if isinstance(varex, str): + if isinstance(varex, string_types): var = Variable(varex) else: var = varex @@ -605,37 +615,37 @@ def propdemo(trace=None): """Example of a propositional model.""" global val1, dom1, m1, g1 - val1 = Valuation([("P", True), ("Q", True), ("R", False)]) + val1 = Valuation([('P', True), ('Q', True), ('R', False)]) dom1 = set([]) m1 = Model(dom1, val1) g1 = Assignment(dom1) print() - print("*" * mult) + print('*' * mult) print("Propositional Formulas Demo") - print("*" * mult) - print("(Propositional constants treated as nullary predicates)") + print('*' * mult) + print('(Propositional constants treated as nullary predicates)') print() print("Model m1:\n", m1) - print("*" * mult) + print('*' * mult) sentences = [ - "(P & Q)", - "(P & R)", - "- P", - "- R", - "- - P", - "- (P & R)", - "(P | R)", - "(R | P)", - "(R | R)", - "(- P | R)", - "(P | - P)", - "(P -> Q)", - "(P -> R)", - "(R -> P)", - "(P <-> P)", - "(R <-> R)", - "(P <-> R)", + '(P & Q)', + '(P & R)', + '- P', + '- R', + '- - P', + '- (P & R)', + '(P | R)', + '(R | P)', + '(R | R)', + '(- P | R)', + '(P | - P)', + '(P -> Q)', + '(P -> R)', + '(R -> P)', + '(P <-> P)', + '(R <-> R)', + '(P <-> R)', ] for sent in sentences: @@ -656,28 +666,28 @@ def folmodel(quiet=False, trace=None): global val2, v2, dom2, m2, g2 v2 = [ - ("adam", "b1"), - ("betty", "g1"), - ("fido", "d1"), - ("girl", set(["g1", "g2"])), - ("boy", set(["b1", "b2"])), - ("dog", set(["d1"])), - ("love", set([("b1", "g1"), ("b2", "g2"), ("g1", "b1"), ("g2", "b1")])), + ('adam', 'b1'), + ('betty', 'g1'), + ('fido', 'd1'), + ('girl', set(['g1', 'g2'])), + ('boy', set(['b1', 'b2'])), + ('dog', set(['d1'])), + ('love', set([('b1', 'g1'), ('b2', 'g2'), ('g1', 'b1'), ('g2', 'b1')])), ] val2 = Valuation(v2) dom2 = val2.domain m2 = Model(dom2, val2) - g2 = Assignment(dom2, [("x", "b1"), ("y", "g2")]) + g2 = Assignment(dom2, [('x', 'b1'), ('y', 'g2')]) if not quiet: print() - print("*" * mult) + print('*' * mult) print("Models Demo") print("*" * mult) print("Model m2:\n", "-" * 14, "\n", m2) print("Variable assignment = ", g2) - exprs = ["adam", "boy", "love", "walks", "x", "y", "z"] + exprs = ['adam', 'boy', 'love', 'walks', 'x', 'y', 'z'] parsed_exprs = [Expression.fromstring(e) for e in exprs] print() @@ -691,10 +701,10 @@ def folmodel(quiet=False, trace=None): print("The interpretation of '%s' in m2 is Undefined" % parsed) applications = [ - ("boy", ("adam")), - ("walks", ("adam",)), - ("love", ("adam", "y")), - ("love", ("y", "adam")), + ('boy', ('adam')), + ('walks', ('adam',)), + ('love', ('adam', 'y')), + ('love', ('y', 'adam')), ] for (fun, args) in applications: @@ -717,29 +727,29 @@ def foldemo(trace=None): folmodel(quiet=True) print() - print("*" * mult) + print('*' * mult) print("FOL Formulas Demo") - print("*" * mult) + print('*' * mult) formulas = [ - "love (adam, betty)", - "(adam = mia)", - "\\x. (boy(x) | girl(x))", - "\\x. boy(x)(adam)", - "\\x y. love(x, y)", - "\\x y. love(x, y)(adam)(betty)", - "\\x y. love(x, y)(adam, betty)", - "\\x y. (boy(x) & love(x, y))", - "\\x. exists y. (boy(x) & love(x, y))", - "exists z1. boy(z1)", - "exists x. (boy(x) & -(x = adam))", - "exists x. (boy(x) & all y. love(y, x))", - "all x. (boy(x) | girl(x))", - "all x. (girl(x) -> exists y. boy(y) & love(x, y))", # Every girl loves exists boy. - "exists x. (boy(x) & all y. (girl(y) -> love(y, x)))", # There is exists boy that every girl loves. - "exists x. (boy(x) & all y. (girl(y) -> love(x, y)))", # exists boy loves every girl. - "all x. (dog(x) -> - girl(x))", - "exists x. exists y. (love(x, y) & love(x, y))", + 'love (adam, betty)', + '(adam = mia)', + '\\x. (boy(x) | girl(x))', + '\\x. boy(x)(adam)', + '\\x y. love(x, y)', + '\\x y. love(x, y)(adam)(betty)', + '\\x y. love(x, y)(adam, betty)', + '\\x y. (boy(x) & love(x, y))', + '\\x. exists y. (boy(x) & love(x, y))', + 'exists z1. boy(z1)', + 'exists x. (boy(x) & -(x = adam))', + 'exists x. (boy(x) & all y. love(y, x))', + 'all x. (boy(x) | girl(x))', + 'all x. (girl(x) -> exists y. boy(y) & love(x, y))', # Every girl loves exists boy. + 'exists x. (boy(x) & all y. (girl(y) -> love(y, x)))', # There is exists boy that every girl loves. + 'exists x. (boy(x) & all y. (girl(y) -> love(x, y)))', # exists boy loves every girl. + 'all x. (dog(x) -> - girl(x))', + 'exists x. exists y. (love(x, y) & love(x, y))', ] for fmla in formulas: @@ -758,32 +768,32 @@ def satdemo(trace=None): """Satisfiers of an open formula in a first order model.""" print() - print("*" * mult) + print('*' * mult) print("Satisfiers Demo") - print("*" * mult) + print('*' * mult) folmodel(quiet=True) formulas = [ - "boy(x)", - "(x = x)", - "(boy(x) | girl(x))", - "(boy(x) & girl(x))", - "love(adam, x)", - "love(x, adam)", - "-(x = adam)", - "exists z22. love(x, z22)", - "exists y. love(y, x)", - "all y. (girl(y) -> love(x, y))", - "all y. (girl(y) -> love(y, x))", - "all y. (girl(y) -> (boy(x) & love(y, x)))", - "(boy(x) & all y. (girl(y) -> love(x, y)))", - "(boy(x) & all y. (girl(y) -> love(y, x)))", - "(boy(x) & exists y. (girl(y) & love(y, x)))", - "(girl(x) -> dog(x))", - "all y. (dog(y) -> (x = y))", - "exists y. love(y, x)", - "exists y. (love(adam, y) & love(y, x))", + 'boy(x)', + '(x = x)', + '(boy(x) | girl(x))', + '(boy(x) & girl(x))', + 'love(adam, x)', + 'love(x, adam)', + '-(x = adam)', + 'exists z22. love(x, z22)', + 'exists y. love(y, x)', + 'all y. (girl(y) -> love(x, y))', + 'all y. (girl(y) -> love(y, x))', + 'all y. (girl(y) -> (boy(x) & love(y, x)))', + '(boy(x) & all y. (girl(y) -> love(x, y)))', + '(boy(x) & all y. (girl(y) -> love(y, x)))', + '(boy(x) & exists y. (girl(y) & love(y, x)))', + '(girl(x) -> dog(x))', + 'all y. (dog(y) -> (x = y))', + 'exists y. love(y, x)', + 'exists y. (love(adam, y) & love(y, x))', ] if trace: @@ -797,7 +807,7 @@ def satdemo(trace=None): for p in parsed: g2.purge() - print("The satisfiers of '%s' are: %s" % (p, m2.satisfiers(p, "x", g2, trace))) + print("The satisfiers of '%s' are: %s" % (p, m2.satisfiers(p, 'x', g2, trace))) def demo(num=0, trace=None): diff --git a/nlp_resource_data/nltk/sem/glue.py b/nlp_resource_data/nltk/sem/glue.py index 684c90c..9fd3cab 100644 --- a/nlp_resource_data/nltk/sem/glue.py +++ b/nlp_resource_data/nltk/sem/glue.py @@ -2,13 +2,16 @@ # # Author: Dan Garrette # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # URL: # For license information, see LICENSE.TXT +from __future__ import print_function, division, unicode_literals import os from itertools import chain +from six import string_types + import nltk from nltk.internals import Counter from nltk.tag import UnigramTagger, BigramTagger, TrigramTagger, RegexpTagger @@ -19,43 +22,45 @@ from nltk.sem.logic import ( LambdaExpression, AbstractVariableExpression, ) +from nltk.compat import python_2_unicode_compatible from nltk.sem import drt from nltk.sem import linearlogic SPEC_SEMTYPES = { - "a": "ex_quant", - "an": "ex_quant", - "every": "univ_quant", - "the": "def_art", - "no": "no_quant", - "default": "ex_quant", + 'a': 'ex_quant', + 'an': 'ex_quant', + 'every': 'univ_quant', + 'the': 'def_art', + 'no': 'no_quant', + 'default': 'ex_quant', } -OPTIONAL_RELATIONSHIPS = ["nmod", "vmod", "punct"] +OPTIONAL_RELATIONSHIPS = ['nmod', 'vmod', 'punct'] +@python_2_unicode_compatible class GlueFormula(object): def __init__(self, meaning, glue, indices=None): if not indices: indices = set() - if isinstance(meaning, str): + if isinstance(meaning, string_types): self.meaning = Expression.fromstring(meaning) elif isinstance(meaning, Expression): self.meaning = meaning else: raise RuntimeError( - "Meaning term neither string or expression: %s, %s" + 'Meaning term neither string or expression: %s, %s' % (meaning, meaning.__class__) ) - if isinstance(glue, str): + if isinstance(glue, string_types): self.glue = linearlogic.LinearLogicParser().parse(glue) elif isinstance(glue, linearlogic.Expression): self.glue = glue else: raise RuntimeError( - "Glue term neither string or expression: %s, %s" + 'Glue term neither string or expression: %s, %s' % (glue, glue.__class__) ) @@ -88,7 +93,7 @@ class GlueFormula(object): ::-1 ]: # if self.glue is (A -o B), dep is in A.dependencies arg_meaning_abstracted = self.make_LambdaExpression( - Variable("v%s" % dep), arg_meaning_abstracted + Variable('v%s' % dep), arg_meaning_abstracted ) return_meaning = self.meaning.applyto(arg_meaning_abstracted) @@ -140,15 +145,16 @@ class GlueFormula(object): def __str__(self): assert isinstance(self.indices, set) - accum = "%s : %s" % (self.meaning, self.glue) + accum = '%s : %s' % (self.meaning, self.glue) if self.indices: - accum += " : {" + ", ".join(str(index) for index in self.indices) + "}" + accum += ' : {' + ', '.join(str(index) for index in self.indices) + '}' return accum def __repr__(self): return "%s" % self +@python_2_unicode_compatible class GlueDict(dict): def __init__(self, filename, encoding=None): self.filename = filename @@ -161,13 +167,13 @@ class GlueDict(dict): try: contents = nltk.data.load( - self.filename, format="text", encoding=self.file_encoding + self.filename, format='text', encoding=self.file_encoding ) # TODO: the above can't handle zip files, but this should anyway be fixed in nltk.data.load() except LookupError as e: try: contents = nltk.data.load( - "file:" + self.filename, format="text", encoding=self.file_encoding + 'file:' + self.filename, format='text', encoding=self.file_encoding ) except LookupError: raise e @@ -178,11 +184,11 @@ class GlueDict(dict): line = line.strip() # remove trailing newline if not len(line): continue # skip empty lines - if line[0] == "#": + if line[0] == '#': continue # skip commented out lines parts = line.split( - " : ", 2 + ' : ', 2 ) # ['verb', '(\\x.( x), ( subj -o f ))', '[subj]'] glue_formulas = [] @@ -194,11 +200,11 @@ class GlueDict(dict): if len(parts) > 1: for (i, c) in enumerate(parts[1]): - if c == "(": + if c == '(': if paren_count == 0: # if it's the first '(' of a tuple tuple_start = i + 1 # then save the index paren_count += 1 - elif c == ")": + elif c == ')': paren_count -= 1 if paren_count == 0: # if it's the last ')' of a tuple meaning_term = parts[1][ @@ -208,33 +214,33 @@ class GlueDict(dict): glue_formulas.append( [meaning_term, glue_term] ) # add the GlueFormula to the list - elif c == ",": + elif c == ',': if ( paren_count == 1 ): # if it's a comma separating the parts of the tuple tuple_comma = i # then save the index - elif c == "#": # skip comments at the ends of lines + elif c == '#': # skip comments at the ends of lines if ( paren_count != 0 ): # if the line hasn't parsed correctly so far raise RuntimeError( - "Formula syntax is incorrect for entry " + line + 'Formula syntax is incorrect for entry ' + line ) break # break to the next line if len(parts) > 2: # if there is a relationship entry at the end - rel_start = parts[2].index("[") + 1 - rel_end = parts[2].index("]") + rel_start = parts[2].index('[') + 1 + rel_end = parts[2].index(']') if rel_start == rel_end: relationships = frozenset() else: relationships = frozenset( - r.strip() for r in parts[2][rel_start:rel_end].split(",") + r.strip() for r in parts[2][rel_start:rel_end].split(',') ) try: - start_inheritance = parts[0].index("(") - end_inheritance = parts[0].index(")") + start_inheritance = parts[0].index('(') + end_inheritance = parts[0].index(')') sem = parts[0][:start_inheritance].strip() supertype = parts[0][start_inheritance + 1 : end_inheritance] except: @@ -273,20 +279,20 @@ class GlueDict(dict): ) # add the glue entry to the dictionary def __str__(self): - accum = "" + accum = '' for pos in self: str_pos = "%s" % pos for relset in self[pos]: i = 1 for gf in self[pos][relset]: if i == 1: - accum += str_pos + ": " + accum += str_pos + ': ' else: - accum += " " * (len(str_pos) + 2) + accum += ' ' * (len(str_pos) + 2) accum += "%s" % gf if relset and i == len(self[pos][relset]): - accum += " : %s" % relset - accum += "\n" + accum += ' : %s' % relset + accum += '\n' i += 1 return accum @@ -294,13 +300,13 @@ class GlueDict(dict): if node is None: # TODO: should it be depgraph.root? Is this code tested? top = depgraph.nodes[0] - depList = list(chain(*top["deps"].values())) + depList = list(chain(*top['deps'].values())) root = depgraph.nodes[depList[0]] return self.to_glueformula_list(depgraph, root, Counter(), verbose) glueformulas = self.lookup(node, depgraph, counter) - for dep_idx in chain(*node["deps"].values()): + for dep_idx in chain(*node['deps'].values()): dep = depgraph.nodes[dep_idx] glueformulas.extend( self.to_glueformula_list(depgraph, dep, counter, verbose) @@ -326,29 +332,29 @@ class GlueDict(dict): if not len(lookup): raise KeyError( "There is no GlueDict entry for sem type of '%s' " - "with tag '%s', and rel '%s'" % (node["word"], node["tag"], node["rel"]) + "with tag '%s', and rel '%s'" % (node['word'], node['tag'], node['rel']) ) return self.get_glueformulas_from_semtype_entry( - lookup, node["word"], node, depgraph, counter + lookup, node['word'], node, depgraph, counter ) def add_missing_dependencies(self, node, depgraph): - rel = node["rel"].lower() - - if rel == "main": - headnode = depgraph.nodes[node["head"]] - subj = self.lookup_unique("subj", headnode, depgraph) - relation = subj["rel"] - node["deps"].setdefault(relation, []) - node["deps"][relation].append(subj["address"]) + rel = node['rel'].lower() + + if rel == 'main': + headnode = depgraph.nodes[node['head']] + subj = self.lookup_unique('subj', headnode, depgraph) + relation = subj['rel'] + node['deps'].setdefault(relation, []) + node['deps'][relation].append(subj['address']) # node['deps'].append(subj['address']) def _lookup_semtype_option(self, semtype, node, depgraph): relationships = frozenset( - depgraph.nodes[dep]["rel"].lower() - for dep in chain(*node["deps"].values()) - if depgraph.nodes[dep]["rel"].lower() not in OPTIONAL_RELATIONSHIPS + depgraph.nodes[dep]['rel'].lower() + for dep in chain(*node['deps'].values()) + if depgraph.nodes[dep]['rel'].lower() not in OPTIONAL_RELATIONSHIPS ) try: @@ -379,18 +385,18 @@ class GlueDict(dict): Based on the node, return a list of plausible semtypes in order of plausibility. """ - rel = node["rel"].lower() - word = node["word"].lower() + rel = node['rel'].lower() + word = node['word'].lower() - if rel == "spec": + if rel == 'spec': if word in SPEC_SEMTYPES: return [SPEC_SEMTYPES[word]] else: - return [SPEC_SEMTYPES["default"]] - elif rel in ["nmod", "vmod"]: - return [node["tag"], rel] + return [SPEC_SEMTYPES['default']] + elif rel in ['nmod', 'vmod']: + return [node['tag'], rel] else: - return [node["tag"]] + return [node['tag']] def get_glueformulas_from_semtype_entry( self, lookup, word, node, depgraph, counter @@ -403,7 +409,7 @@ class GlueDict(dict): if not len(glueformulas): gf.word = word else: - gf.word = "%s%s" % (word, len(glueformulas) + 1) + gf.word = '%s%s' % (word, len(glueformulas) + 1) gf.glue = self.initialize_labels(gf.glue, node, depgraph, counter.get()) @@ -416,8 +422,8 @@ class GlueDict(dict): parameter "" :param word: The actual word to be replace "" """ - word = word.replace(".", "") - return generic.replace("", word) + word = word.replace('.', '') + return generic.replace('', word) def initialize_labels(self, expr, node, depgraph, unique_index): if isinstance(expr, linearlogic.AtomicExpression): @@ -434,13 +440,13 @@ class GlueDict(dict): def find_label_name(self, name, node, depgraph, unique_index): try: - dot = name.index(".") + dot = name.index('.') before_dot = name[:dot] after_dot = name[dot + 1 :] - if before_dot == "super": + if before_dot == 'super': return self.find_label_name( - after_dot, depgraph.nodes[node["head"]], depgraph, unique_index + after_dot, depgraph.nodes[node['head']], depgraph, unique_index ) else: return self.find_label_name( @@ -451,20 +457,20 @@ class GlueDict(dict): ) except ValueError: lbl = self.get_label(node) - if name == "f": + if name == 'f': return lbl - elif name == "v": - return "%sv" % lbl - elif name == "r": - return "%sr" % lbl - elif name == "super": - return self.get_label(depgraph.nodes[node["head"]]) - elif name == "var": - return "%s%s" % (lbl.upper(), unique_index) - elif name == "a": - return self.get_label(self.lookup_unique("conja", node, depgraph)) - elif name == "b": - return self.get_label(self.lookup_unique("conjb", node, depgraph)) + elif name == 'v': + return '%sv' % lbl + elif name == 'r': + return '%sr' % lbl + elif name == 'super': + return self.get_label(depgraph.nodes[node['head']]) + elif name == 'var': + return '%s%s' % (lbl.upper(), unique_index) + elif name == 'a': + return self.get_label(self.lookup_unique('conja', node, depgraph)) + elif name == 'b': + return self.get_label(self.lookup_unique('conjb', node, depgraph)) else: return self.get_label(self.lookup_unique(name, node, depgraph)) @@ -475,35 +481,35 @@ class GlueDict(dict): :param value: where to index into the list of characters :type value: int """ - value = node["address"] + value = node['address'] letter = [ - "f", - "g", - "h", - "i", - "j", - "k", - "l", - "m", - "n", - "o", - "p", - "q", - "r", - "s", - "t", - "u", - "v", - "w", - "x", - "y", - "z", - "a", - "b", - "c", - "d", - "e", + 'f', + 'g', + 'h', + 'i', + 'j', + 'k', + 'l', + 'm', + 'n', + 'o', + 'p', + 'q', + 'r', + 's', + 't', + 'u', + 'v', + 'w', + 'x', + 'y', + 'z', + 'a', + 'b', + 'c', + 'd', + 'e', ][value - 1] num = int(value) // 26 if num > 0: @@ -517,15 +523,15 @@ class GlueDict(dict): """ deps = [ depgraph.nodes[dep] - for dep in chain(*node["deps"].values()) - if depgraph.nodes[dep]["rel"].lower() == rel.lower() + for dep in chain(*node['deps'].values()) + if depgraph.nodes[dep]['rel'].lower() == rel.lower() ] if len(deps) == 0: - raise KeyError("'%s' doesn't contain a feature '%s'" % (node["word"], rel)) + raise KeyError("'%s' doesn't contain a feature '%s'" % (node['word'], rel)) elif len(deps) > 1: raise KeyError( - "'%s' should only have one feature '%s'" % (node["word"], rel) + "'%s' should only have one feature '%s'" % (node['word'], rel) ) else: return deps[0] @@ -550,7 +556,7 @@ class Glue(object): self.semtype_file = semtype_file else: self.semtype_file = os.path.join( - "grammars", "sample_grammars", "glue.semtype" + 'grammars', 'sample_grammars', 'glue.semtype' ) def train_depparser(self, depgraphs=None): @@ -559,7 +565,7 @@ class Glue(object): else: self.depparser.train_from_file( nltk.data.find( - os.path.join("grammars", "sample_grammars", "glue_train.conll") + os.path.join('grammars', 'sample_grammars', 'glue_train.conll') ) ) @@ -649,8 +655,8 @@ class Glue(object): # if there is an exception, the syntax of the formula # may not be understandable by the prover, so don't # throw out the reading. - print("Error when checking logical equality of statements", e) - + print('Error when checking logical equality of statements', e) + if add_reading: reading_list.append(glueformula.meaning) @@ -689,7 +695,7 @@ class Glue(object): return_list.extend(gf.compile(index_counter)) if self.verbose: - print("Compiled Glue Premises:") + print('Compiled Glue Premises:') for cgf in return_list: print(cgf) @@ -700,25 +706,25 @@ class Glue(object): regexp_tagger = RegexpTagger( [ - (r"^-?[0-9]+(.[0-9]+)?$", "CD"), # cardinal numbers - (r"(The|the|A|a|An|an)$", "AT"), # articles - (r".*able$", "JJ"), # adjectives - (r".*ness$", "NN"), # nouns formed from adjectives - (r".*ly$", "RB"), # adverbs - (r".*s$", "NNS"), # plural nouns - (r".*ing$", "VBG"), # gerunds - (r".*ed$", "VBD"), # past tense verbs - (r".*", "NN"), # nouns (default) + (r'^-?[0-9]+(.[0-9]+)?$', 'CD'), # cardinal numbers + (r'(The|the|A|a|An|an)$', 'AT'), # articles + (r'.*able$', 'JJ'), # adjectives + (r'.*ness$', 'NN'), # nouns formed from adjectives + (r'.*ly$', 'RB'), # adverbs + (r'.*s$', 'NNS'), # plural nouns + (r'.*ing$', 'VBG'), # gerunds + (r'.*ed$', 'VBD'), # past tense verbs + (r'.*', 'NN'), # nouns (default) ] ) - brown_train = brown.tagged_sents(categories="news") + brown_train = brown.tagged_sents(categories='news') unigram_tagger = UnigramTagger(brown_train, backoff=regexp_tagger) bigram_tagger = BigramTagger(brown_train, backoff=unigram_tagger) trigram_tagger = TrigramTagger(brown_train, backoff=bigram_tagger) # Override particular words main_tagger = RegexpTagger( - [(r"(A|a|An|an)$", "ex_quant"), (r"(Every|every|All|all)$", "univ_quant")], + [(r'(A|a|An|an)$', 'ex_quant'), (r'(Every|every|All|all)$', 'univ_quant')], backoff=trigram_tagger, ) @@ -730,23 +736,23 @@ class DrtGlueFormula(GlueFormula): if not indices: indices = set() - if isinstance(meaning, str): + if isinstance(meaning, string_types): self.meaning = drt.DrtExpression.fromstring(meaning) elif isinstance(meaning, drt.DrtExpression): self.meaning = meaning else: raise RuntimeError( - "Meaning term neither string or expression: %s, %s" + 'Meaning term neither string or expression: %s, %s' % (meaning, meaning.__class__) ) - if isinstance(glue, str): + if isinstance(glue, string_types): self.glue = linearlogic.LinearLogicParser().parse(glue) elif isinstance(glue, linearlogic.Expression): self.glue = glue else: raise RuntimeError( - "Glue term neither string or expression: %s, %s" + 'Glue term neither string or expression: %s, %s' % (glue, glue.__class__) ) @@ -770,7 +776,7 @@ class DrtGlue(Glue): ): if not semtype_file: semtype_file = os.path.join( - "grammars", "sample_grammars", "drt_glue.semtype" + 'grammars', 'sample_grammars', 'drt_glue.semtype' ) Glue.__init__(self, semtype_file, remove_duplicates, depparser, verbose) @@ -782,12 +788,12 @@ def demo(show_example=-1): from nltk.parse import MaltParser examples = [ - "David sees Mary", - "David eats a sandwich", - "every man chases a dog", - "every man believes a dog sleeps", - "John gives David a sandwich", - "John chases himself", + 'David sees Mary', + 'David eats a sandwich', + 'every man chases a dog', + 'every man believes a dog sleeps', + 'John gives David a sandwich', + 'John chases himself', ] # 'John persuades David to order a pizza', # 'John tries to go', @@ -799,21 +805,21 @@ def demo(show_example=-1): # 'every big gray cat leaves', # 'a former senator leaves', - print("============== DEMO ==============") + print('============== DEMO ==============') tagger = RegexpTagger( [ - ("^(David|Mary|John)$", "NNP"), + ('^(David|Mary|John)$', 'NNP'), ( - "^(sees|eats|chases|believes|gives|sleeps|chases|persuades|tries|seems|leaves)$", - "VB", + '^(sees|eats|chases|believes|gives|sleeps|chases|persuades|tries|seems|leaves)$', + 'VB', ), - ("^(go|order|vanish|find|approach)$", "VB"), - ("^(a)$", "ex_quant"), - ("^(every)$", "univ_quant"), - ("^(sandwich|man|dog|pizza|unicorn|cat|senator)$", "NN"), - ("^(big|gray|former)$", "JJ"), - ("^(him|himself)$", "PRP"), + ('^(go|order|vanish|find|approach)$', 'VB'), + ('^(a)$', 'ex_quant'), + ('^(every)$', 'univ_quant'), + ('^(sandwich|man|dog|pizza|unicorn|cat|senator)$', 'NN'), + ('^(big|gray|former)$', 'JJ'), + ('^(him|himself)$', 'PRP'), ] ) @@ -822,11 +828,11 @@ def demo(show_example=-1): for (i, sentence) in enumerate(examples): if i == show_example or show_example == -1: - print("[[[Example %s]]] %s" % (i, sentence)) + print('[[[Example %s]]] %s' % (i, sentence)) for reading in glue.parse_to_meaning(sentence.split()): print(reading.simplify()) - print("") + print('') -if __name__ == "__main__": +if __name__ == '__main__': demo() diff --git a/nlp_resource_data/nltk/sem/hole.py b/nlp_resource_data/nltk/sem/hole.py index bcd6dbf..32852b8 100644 --- a/nlp_resource_data/nltk/sem/hole.py +++ b/nlp_resource_data/nltk/sem/hole.py @@ -3,7 +3,7 @@ # Author: Peter Wang # Updated by: Dan Garrette # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # URL: # For license information, see LICENSE.TXT @@ -19,9 +19,13 @@ After parsing, the semantic representation is in the form of an underspecified representation that is not easy to read. We use a "plugging" algorithm to convert that representation into first-order logic formulas. """ +from __future__ import print_function, unicode_literals from functools import reduce +from six import itervalues + +from nltk import compat from nltk.parse import load_parser from nltk.sem.skolemize import skolemize @@ -48,17 +52,17 @@ from nltk.sem.logic import ( class Constants(object): - ALL = "ALL" - EXISTS = "EXISTS" - NOT = "NOT" - AND = "AND" - OR = "OR" - IMP = "IMP" - IFF = "IFF" - PRED = "PRED" - LEQ = "LEQ" - HOLE = "HOLE" - LABEL = "LABEL" + ALL = 'ALL' + EXISTS = 'EXISTS' + NOT = 'NOT' + AND = 'AND' + OR = 'OR' + IMP = 'IMP' + IFF = 'IFF' + PRED = 'PRED' + LEQ = 'LEQ' + HOLE = 'HOLE' + LABEL = 'LABEL' MAP = { ALL: lambda v, e: AllExpression(v.variable, e), @@ -139,7 +143,7 @@ class HoleSemantics(object): def _find_top_nodes(self, node_list): top_nodes = node_list.copy() - for f in self.fragments.values(): + for f in itervalues(self.fragments): # the label is the first argument of the predicate args = f[1] for arg in args: @@ -203,7 +207,7 @@ class HoleSemantics(object): head = [(a, ancestors) for a in args if self.is_node(a)] self._plug_nodes(head + queue[1:], potential_labels, plug_acc, record) else: - raise Exception("queue empty") + raise Exception('queue empty') def _plug_hole(self, hole, ancestors0, queue, potential_labels0, plug_acc0, record): """ @@ -298,6 +302,7 @@ class HoleSemantics(object): return node +@compat.python_2_unicode_compatible class Constraint(object): """ This class represents a constraint of the form (L =< N), @@ -321,15 +326,15 @@ class Constraint(object): return hash(repr(self)) def __repr__(self): - return "(%s < %s)" % (self.lhs, self.rhs) + return '(%s < %s)' % (self.lhs, self.rhs) def hole_readings(sentence, grammar_filename=None, verbose=False): if not grammar_filename: - grammar_filename = "grammars/sample_grammars/hole.fcfg" + grammar_filename = 'grammars/sample_grammars/hole.fcfg' if verbose: - print("Reading grammar file", grammar_filename) + print('Reading grammar file', grammar_filename) parser = load_parser(grammar_filename) @@ -337,16 +342,16 @@ def hole_readings(sentence, grammar_filename=None, verbose=False): tokens = sentence.split() trees = list(parser.parse(tokens)) if verbose: - print("Got %d different parses" % len(trees)) + print('Got %d different parses' % len(trees)) all_readings = [] for tree in trees: # Get the semantic feature from the top of the parse tree. - sem = tree.label()["SEM"].simplify() + sem = tree.label()['SEM'].simplify() # Print the raw semantic representation. if verbose: - print("Raw: ", sem) + print('Raw: ', sem) # Skolemize away all quantifiers. All variables become unique. while isinstance(sem, LambdaExpression): @@ -354,7 +359,7 @@ def hole_readings(sentence, grammar_filename=None, verbose=False): skolemized = skolemize(sem) if verbose: - print("Skolemized:", skolemized) + print('Skolemized:', skolemized) # Break the hole semantics representation down into its components # i.e. holes, labels, formula fragments and constraints. @@ -362,14 +367,14 @@ def hole_readings(sentence, grammar_filename=None, verbose=False): # Maybe show the details of the semantic representation. if verbose: - print("Holes: ", hole_sem.holes) - print("Labels: ", hole_sem.labels) - print("Constraints: ", hole_sem.constraints) - print("Top hole: ", hole_sem.top_hole) - print("Top labels: ", hole_sem.top_most_labels) - print("Fragments:") + print('Holes: ', hole_sem.holes) + print('Labels: ', hole_sem.labels) + print('Constraints: ', hole_sem.constraints) + print('Top hole: ', hole_sem.top_hole) + print('Top labels: ', hole_sem.top_most_labels) + print('Fragments:') for l, f in hole_sem.fragments.items(): - print("\t%s: %s" % (l, f)) + print('\t%s: %s' % (l, f)) # Find all the possible ways to plug the formulas together. pluggings = hole_sem.pluggings() @@ -381,7 +386,7 @@ def hole_readings(sentence, grammar_filename=None, verbose=False): if verbose: for i, r in enumerate(readings): print() - print("%d. %s" % (i, r)) + print('%d. %s' % (i, r)) print() all_readings.extend(readings) @@ -389,9 +394,9 @@ def hole_readings(sentence, grammar_filename=None, verbose=False): return all_readings -if __name__ == "__main__": - for r in hole_readings("a dog barks"): +if __name__ == '__main__': + for r in hole_readings('a dog barks'): print(r) print() - for r in hole_readings("every girl chases a dog"): + for r in hole_readings('every girl chases a dog'): print(r) diff --git a/nlp_resource_data/nltk/sem/lfg.py b/nlp_resource_data/nltk/sem/lfg.py index d4decf0..9b6957e 100644 --- a/nlp_resource_data/nltk/sem/lfg.py +++ b/nlp_resource_data/nltk/sem/lfg.py @@ -2,15 +2,18 @@ # # Author: Dan Garrette # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # URL: # For license information, see LICENSE.TXT +from __future__ import print_function, division, unicode_literals from itertools import chain from nltk.internals import Counter +from nltk.compat import python_2_unicode_compatible +@python_2_unicode_compatible class FStructure(dict): def safeappend(self, key, item): """ @@ -40,15 +43,15 @@ class FStructure(dict): depgraph = DependencyGraph() nodes = depgraph.nodes - self._to_depgraph(nodes, 0, "ROOT") + self._to_depgraph(nodes, 0, 'ROOT') # Add all the dependencies for all the nodes for address, node in nodes.items(): - for n2 in (n for n in nodes.values() if n["rel"] != "TOP"): - if n2["head"] == address: - relation = n2["rel"] - node["deps"].setdefault(relation, []) - node["deps"][relation].append(n2["address"]) + for n2 in (n for n in nodes.values() if n['rel'] != 'TOP'): + if n2['head'] == address: + relation = n2['rel'] + node['deps'].setdefault(relation, []) + node['deps'][relation].append(n2['address']) depgraph.root = nodes[1] @@ -59,11 +62,11 @@ class FStructure(dict): nodes[index].update( { - "address": index, - "word": self.pred[0], - "tag": self.pred[1], - "head": head, - "rel": rel, + 'address': index, + 'word': self.pred[0], + 'tag': self.pred[1], + 'head': head, + 'rel': rel, } ) @@ -75,11 +78,11 @@ class FStructure(dict): new_index = len(nodes) nodes[new_index].update( { - "address": new_index, - "word": item[0], - "tag": item[1], - "head": index, - "rel": feature, + 'address': new_index, + 'word': item[0], + 'tag': item[1], + 'head': index, + 'rel': feature, } ) elif isinstance(item, list): @@ -87,7 +90,7 @@ class FStructure(dict): n._to_depgraph(nodes, index, feature) else: raise Exception( - "feature %s is not an FStruct, a list, or a tuple" % feature + 'feature %s is not an FStruct, a list, or a tuple' % feature ) @staticmethod @@ -99,9 +102,9 @@ class FStructure(dict): if not label_counter: label_counter = Counter() - if node["rel"].lower() in ["spec", "punct"]: + if node['rel'].lower() in ['spec', 'punct']: # the value of a 'spec' entry is a word, not an FStructure - return (node["word"], node["tag"]) + return (node['word'], node['tag']) else: fstruct = FStructure() @@ -110,19 +113,19 @@ class FStructure(dict): fstruct.parent = parent - word, tag = node["word"], node["tag"] - if tag[:2] == "VB": - if tag[2:3] == "D": - fstruct.safeappend("tense", ("PAST", "tense")) + word, tag = node['word'], node['tag'] + if tag[:2] == 'VB': + if tag[2:3] == 'D': + fstruct.safeappend('tense', ('PAST', 'tense')) fstruct.pred = (word, tag[:2]) if not fstruct.pred: fstruct.pred = (word, tag) - children = [depgraph.nodes[idx] for idx in chain(*node["deps"].values())] + children = [depgraph.nodes[idx] for idx in chain(*node['deps'].values())] for child in children: fstruct.safeappend( - child["rel"], + child['rel'], FStructure._read_depgraph(child, depgraph, label_counter, fstruct), ) @@ -137,32 +140,32 @@ class FStructure(dict): :type value: int """ letter = [ - "f", - "g", - "h", - "i", - "j", - "k", - "l", - "m", - "n", - "o", - "p", - "q", - "r", - "s", - "t", - "u", - "v", - "w", - "x", - "y", - "z", - "a", - "b", - "c", - "d", - "e", + 'f', + 'g', + 'h', + 'i', + 'j', + 'k', + 'l', + 'm', + 'n', + 'o', + 'p', + 'q', + 'r', + 's', + 't', + 'u', + 'v', + 'w', + 'x', + 'y', + 'z', + 'a', + 'b', + 'c', + 'd', + 'e', ][value - 1] num = int(value) // 26 if num > 0: @@ -171,18 +174,18 @@ class FStructure(dict): return letter def __repr__(self): - return self.__str__().replace("\n", "") + return self.__unicode__().replace('\n', '') def __str__(self): return self.pretty_format() def pretty_format(self, indent=3): try: - accum = "%s:[" % self.label + accum = '%s:[' % self.label except NameError: - accum = "[" + accum = '[' try: - accum += "pred '%s'" % (self.pred[0]) + accum += 'pred \'%s\'' % (self.pred[0]) except NameError: pass @@ -190,24 +193,24 @@ class FStructure(dict): for item in self[feature]: if isinstance(item, FStructure): next_indent = indent + len(feature) + 3 + len(self.label) - accum += "\n%s%s %s" % ( - " " * (indent), + accum += '\n%s%s %s' % ( + ' ' * (indent), feature, item.pretty_format(next_indent), ) elif isinstance(item, tuple): - accum += "\n%s%s '%s'" % (" " * (indent), feature, item[0]) + accum += '\n%s%s \'%s\'' % (' ' * (indent), feature, item[0]) elif isinstance(item, list): - accum += "\n%s%s {%s}" % ( - " " * (indent), + accum += '\n%s%s {%s}' % ( + ' ' * (indent), feature, - ("\n%s" % (" " * (indent + len(feature) + 2))).join(item), + ('\n%s' % (' ' * (indent + len(feature) + 2))).join(item), ) else: # ERROR raise Exception( - "feature %s is not an FStruct, a list, or a tuple" % feature + 'feature %s is not an FStruct, a list, or a tuple' % feature ) - return accum + "]" + return accum + ']' def demo_read_depgraph(): @@ -254,5 +257,5 @@ dog NN 3 OBJ print(FStructure.read_depgraph(dg)) -if __name__ == "__main__": +if __name__ == '__main__': demo_read_depgraph() diff --git a/nlp_resource_data/nltk/sem/linearlogic.py b/nlp_resource_data/nltk/sem/linearlogic.py index abd9d19..2725980 100644 --- a/nlp_resource_data/nltk/sem/linearlogic.py +++ b/nlp_resource_data/nltk/sem/linearlogic.py @@ -2,11 +2,15 @@ # # Author: Dan Garrette # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # URL: # For license information, see LICENSE.TXT +from __future__ import print_function, unicode_literals + +from six import string_types from nltk.internals import Counter +from nltk.compat import python_2_unicode_compatible from nltk.sem.logic import LogicParser, APP _counter = Counter() @@ -14,11 +18,11 @@ _counter = Counter() class Tokens(object): # Punctuation - OPEN = "(" - CLOSE = ")" + OPEN = '(' + CLOSE = ')' # Operations - IMP = "-o" + IMP = '-o' PUNCT = [OPEN, CLOSE] TOKENS = PUNCT + [IMP] @@ -71,6 +75,7 @@ class LinearLogicParser(LogicParser): return ConstantExpression(name) +@python_2_unicode_compatible class Expression(object): _linear_logic_parser = LinearLogicParser() @@ -86,16 +91,17 @@ class Expression(object): return self.applyto(other) def __repr__(self): - return "<%s %s>" % (self.__class__.__name__, self) + return '<%s %s>' % (self.__class__.__name__, self) +@python_2_unicode_compatible class AtomicExpression(Expression): def __init__(self, name, dependencies=None): """ :param name: str for the constant name :param dependencies: list of int for the indices on which this atom is dependent """ - assert isinstance(name, str) + assert isinstance(name, string_types) self.name = name if not dependencies: @@ -198,6 +204,7 @@ class VariableExpression(AtomicExpression): raise UnificationException(self, other, bindings) +@python_2_unicode_compatible class ImpExpression(Expression): def __init__(self, antecedent, consequent): """ @@ -257,7 +264,7 @@ class ImpExpression(Expression): (c, c_new) = self.consequent.compile_neg(index_counter, glueFormulaFactory) fresh_index = index_counter.get() c.dependencies.append(fresh_index) - new_v = glueFormulaFactory("v%s" % fresh_index, a, set([fresh_index])) + new_v = glueFormulaFactory('v%s' % fresh_index, a, set([fresh_index])) return (c, a_new + c_new + [new_v]) def initialize_labels(self, fstruct): @@ -285,10 +292,11 @@ class ImpExpression(Expression): def __hash__(self): return hash( - "%s%s%s" % (hash(self.antecedent), Tokens.IMP, hash(self.consequent)) + '%s%s%s' % (hash(self.antecedent), Tokens.IMP, hash(self.consequent)) ) +@python_2_unicode_compatible class ApplicationExpression(Expression): def __init__(self, function, argument, argument_indices=None): """ @@ -313,7 +321,7 @@ class ApplicationExpression(Expression): bindings += function_simp.antecedent.unify(argument_simp, bindings) except UnificationException as e: raise LinearLogicApplicationException( - "Cannot apply %s to %s. %s" % (function_simp, argument_simp, e) + 'Cannot apply %s to %s. %s' % (function_simp, argument_simp, e) ) # If you are running it on complied premises, more conditions apply @@ -321,12 +329,12 @@ class ApplicationExpression(Expression): # A.dependencies of (A -o (B -o C)) must be a proper subset of argument_indices if not set(function_simp.antecedent.dependencies) < argument_indices: raise LinearLogicApplicationException( - "Dependencies unfulfilled when attempting to apply Linear Logic formula %s to %s" + 'Dependencies unfulfilled when attempting to apply Linear Logic formula %s to %s' % (function_simp, argument_simp) ) if set(function_simp.antecedent.dependencies) == argument_indices: raise LinearLogicApplicationException( - "Dependencies not a proper subset of indices when attempting to apply Linear Logic formula %s to %s" + 'Dependencies not a proper subset of indices when attempting to apply Linear Logic formula %s to %s' % (function_simp, argument_simp) ) @@ -363,10 +371,11 @@ class ApplicationExpression(Expression): def __hash__(self): return hash( - "%s%s%s" % (hash(self.antecedent), Tokens.OPEN, hash(self.consequent)) + '%s%s%s' % (hash(self.antecedent), Tokens.OPEN, hash(self.consequent)) ) +@python_2_unicode_compatible class BindingDict(object): def __init__(self, bindings=None): """ @@ -403,7 +412,7 @@ class BindingDict(object): self.d[variable] = binding else: raise VariableBindingException( - "Variable %s already bound to another value" % (variable) + 'Variable %s already bound to another value' % (variable) ) def __getitem__(self, variable): @@ -437,8 +446,8 @@ class BindingDict(object): return combined except VariableBindingException: raise VariableBindingException( - "Attempting to add two contradicting" - " VariableBindingsLists: %s, %s" % (self, other) + 'Attempting to add two contradicting' + ' VariableBindingsLists: %s, %s' % (self, other) ) def __ne__(self, other): @@ -450,10 +459,10 @@ class BindingDict(object): return self.d == other.d def __str__(self): - return "{" + ", ".join("%s: %s" % (v, self.d[v]) for v in self.d) + "}" + return '{' + ', '.join('%s: %s' % (v, self.d[v]) for v in self.d) + '}' def __repr__(self): - return "BindingDict: %s" % self + return 'BindingDict: %s' % self class VariableBindingException(Exception): @@ -462,7 +471,7 @@ class VariableBindingException(Exception): class UnificationException(Exception): def __init__(self, a, b, bindings): - Exception.__init__(self, "Cannot unify %s with %s given %s" % (a, b, bindings)) + Exception.__init__(self, 'Cannot unify %s with %s given %s' % (a, b, bindings)) class LinearLogicApplicationException(Exception): @@ -472,15 +481,15 @@ class LinearLogicApplicationException(Exception): def demo(): lexpr = Expression.fromstring - print(lexpr(r"f")) - print(lexpr(r"(g -o f)")) - print(lexpr(r"((g -o G) -o G)")) - print(lexpr(r"g -o h -o f")) - print(lexpr(r"(g -o f)(g)").simplify()) - print(lexpr(r"(H -o f)(g)").simplify()) - print(lexpr(r"((g -o G) -o G)((g -o f))").simplify()) - print(lexpr(r"(H -o H)((g -o f))").simplify()) + print(lexpr(r'f')) + print(lexpr(r'(g -o f)')) + print(lexpr(r'((g -o G) -o G)')) + print(lexpr(r'g -o h -o f')) + print(lexpr(r'(g -o f)(g)').simplify()) + print(lexpr(r'(H -o f)(g)').simplify()) + print(lexpr(r'((g -o G) -o G)((g -o f))').simplify()) + print(lexpr(r'(H -o H)((g -o f))').simplify()) -if __name__ == "__main__": +if __name__ == '__main__': demo() diff --git a/nlp_resource_data/nltk/sem/logic.py b/nlp_resource_data/nltk/sem/logic.py index c203e1f..fe5f73b 100644 --- a/nlp_resource_data/nltk/sem/logic.py +++ b/nlp_resource_data/nltk/sem/logic.py @@ -2,7 +2,7 @@ # # Author: Dan Garrette # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # URL: # For license information, see LICENSE.TXT @@ -10,51 +10,55 @@ A version of first order predicate logic, built on top of the typed lambda calculus. """ +from __future__ import print_function, unicode_literals import re import operator from collections import defaultdict from functools import reduce, total_ordering +from six import string_types + from nltk.util import Trie from nltk.internals import Counter +from nltk.compat import python_2_unicode_compatible -APP = "APP" +APP = 'APP' _counter = Counter() class Tokens(object): - LAMBDA = "\\" - LAMBDA_LIST = ["\\"] + LAMBDA = '\\' + LAMBDA_LIST = ['\\'] # Quantifiers - EXISTS = "exists" - EXISTS_LIST = ["some", "exists", "exist"] - ALL = "all" - ALL_LIST = ["all", "forall"] + EXISTS = 'exists' + EXISTS_LIST = ['some', 'exists', 'exist'] + ALL = 'all' + ALL_LIST = ['all', 'forall'] # Punctuation - DOT = "." - OPEN = "(" - CLOSE = ")" - COMMA = "," + DOT = '.' + OPEN = '(' + CLOSE = ')' + COMMA = ',' # Operations - NOT = "-" - NOT_LIST = ["not", "-", "!"] - AND = "&" - AND_LIST = ["and", "&", "^"] - OR = "|" - OR_LIST = ["or", "|"] - IMP = "->" - IMP_LIST = ["implies", "->", "=>"] - IFF = "<->" - IFF_LIST = ["iff", "<->", "<=>"] - EQ = "=" - EQ_LIST = ["=", "=="] - NEQ = "!=" - NEQ_LIST = ["!="] + NOT = '-' + NOT_LIST = ['not', '-', '!'] + AND = '&' + AND_LIST = ['and', '&', '^'] + OR = '|' + OR_LIST = ['or', '|'] + IMP = '->' + IMP_LIST = ['implies', '->', '=>'] + IFF = '<->' + IFF_LIST = ['iff', '<->', '<=>'] + EQ = '=' + EQ_LIST = ['=', '=='] + NEQ = '!=' + NEQ_LIST = ['!='] # Collections of tokens BINOPS = AND_LIST + OR_LIST + IMP_LIST + IFF_LIST @@ -64,7 +68,7 @@ class Tokens(object): TOKENS = BINOPS + EQ_LIST + NEQ_LIST + QUANTS + LAMBDA_LIST + PUNCT + NOT_LIST # Special - SYMBOLS = [x for x in TOKENS if re.match(r"^[-\\.(),!&^|>=<]*$", x)] + SYMBOLS = [x for x in TOKENS if re.match(r'^[-\\.(),!&^|>=<]*$', x)] def boolean_ops(): @@ -94,6 +98,7 @@ def binding_ops(): print("%-15s\t%s" % pair) +@python_2_unicode_compatible class LogicParser(object): """A lambda calculus expression parser.""" @@ -152,7 +157,7 @@ class LogicParser(object): if self.inRange(0): raise UnexpectedTokenException(self._currentIndex + 1, self.token(0)) except LogicalExpressionException as e: - msg = "%s\n%s\n%s^" % (e, data, " " * mapping[e.index - 1]) + msg = '%s\n%s\n%s^' % (e, data, ' ' * mapping[e.index - 1]) raise LogicalExpressionException(None, msg) if self.type_check: @@ -165,7 +170,7 @@ class LogicParser(object): out = [] mapping = {} tokenTrie = Trie(self.get_all_symbols()) - token = "" + token = '' data_idx = 0 token_start_idx = data_idx while data_idx < len(data): @@ -179,7 +184,7 @@ class LogicParser(object): st = tokenTrie c = data[data_idx] - symbol = "" + symbol = '' while c in st: symbol += c st = st[c] @@ -192,16 +197,16 @@ class LogicParser(object): if token: mapping[len(out)] = token_start_idx out.append(token) - token = "" + token = '' mapping[len(out)] = data_idx out.append(symbol) data_idx += len(symbol) else: - if data[data_idx] in " \t\n": # any whitespace + if data[data_idx] in ' \t\n': # any whitespace if token: mapping[len(out)] = token_start_idx out.append(token) - token = "" + token = '' else: if not token: token_start_idx = data_idx @@ -215,7 +220,7 @@ class LogicParser(object): return out, mapping def process_quoted_token(self, data_idx, data): - token = "" + token = '' c = data[data_idx] i = data_idx for start, end, escape, incl_quotes in self.quote_chars: @@ -246,7 +251,7 @@ class LogicParser(object): token += data[i] i += 1 if not token: - raise LogicalExpressionException(None, "Empty quoted token found") + raise LogicalExpressionException(None, 'Empty quoted token found') break return token, i @@ -281,14 +286,14 @@ class LogicParser(object): tok = self.token() except ExpectedMoreTokensException: raise ExpectedMoreTokensException( - self._currentIndex + 1, message="Expression expected." + self._currentIndex + 1, message='Expression expected.' ) accum = self.handle(tok, context) if not accum: raise UnexpectedTokenException( - self._currentIndex, tok, message="Expression expected." + self._currentIndex, tok, message='Expression expected.' ) return self.attempt_adjuncts(accum, context) @@ -360,7 +365,7 @@ class LogicParser(object): try: tok = self.token() except ExpectedMoreTokensException as e: - raise ExpectedMoreTokensException(e.index, "Variable expected.") + raise ExpectedMoreTokensException(e.index, 'Variable expected.') if isinstance(self.make_VariableExpression(tok), ConstantExpression): raise LogicalExpressionException( self._currentIndex, @@ -376,7 +381,7 @@ class LogicParser(object): self._currentIndex + 2, message="Variable and Expression expected following lambda operator.", ) - vars = [self.get_next_token_variable("abstracted")] + vars = [self.get_next_token_variable('abstracted')] while True: if not self.inRange(0) or ( self.token(0) == Tokens.DOT and not self.inRange(1) @@ -387,7 +392,7 @@ class LogicParser(object): if not self.isvariable(self.token(0)): break # Support expressions like: \x y.M == \x.\y.M - vars.append(self.get_next_token_variable("abstracted")) + vars.append(self.get_next_token_variable('abstracted')) if self.inRange(0) and self.token(0) == Tokens.DOT: self.token() # swallow the dot @@ -406,7 +411,7 @@ class LogicParser(object): message="Variable and Expression expected following quantifier '%s'." % tok, ) - vars = [self.get_next_token_variable("quantified")] + vars = [self.get_next_token_variable('quantified')] while True: if not self.inRange(0) or ( self.token(0) == Tokens.DOT and not self.inRange(1) @@ -417,7 +422,7 @@ class LogicParser(object): if not self.isvariable(self.token(0)): break # Support expressions like: some x y.M == some x.some y.M - vars.append(self.get_next_token_variable("quantified")) + vars.append(self.get_next_token_variable('quantified')) if self.inRange(0) and self.token(0) == Tokens.DOT: self.token() # swallow the dot @@ -577,10 +582,10 @@ class LogicParser(object): def __repr__(self): if self.inRange(0): - msg = "Next token: " + self.token(0) + msg = 'Next token: ' + self.token(0) else: - msg = "No more tokens" - return "<" + self.__class__.__name__ + ": " + msg + ">" + msg = 'No more tokens' + return '<' + self.__class__.__name__ + ': ' + msg + '>' def read_logic(s, logic_parser=None, encoding=None): @@ -604,22 +609,23 @@ def read_logic(s, logic_parser=None, encoding=None): statements = [] for linenum, line in enumerate(s.splitlines()): line = line.strip() - if line.startswith("#") or line == "": + if line.startswith('#') or line == '': continue try: statements.append(logic_parser.parse(line)) except LogicalExpressionException: - raise ValueError("Unable to parse line %s: %s" % (linenum, line)) + raise ValueError('Unable to parse line %s: %s' % (linenum, line)) return statements @total_ordering +@python_2_unicode_compatible class Variable(object): def __init__(self, name): """ :param name: the name of the variable """ - assert isinstance(name, str), "%s is not a string" % name + assert isinstance(name, string_types), "%s is not a string" % name self.name = name def __eq__(self, other): @@ -658,15 +664,15 @@ def unique_variable(pattern=None, ignore=None): """ if pattern is not None: if is_indvar(pattern.name): - prefix = "z" + prefix = 'z' elif is_funcvar(pattern.name): - prefix = "F" + prefix = 'F' elif is_eventvar(pattern.name): - prefix = "e0" + prefix = 'e0' else: assert False, "Cannot generate a unique constant" else: - prefix = "z" + prefix = 'z' v = Variable("%s%s" % (prefix, _counter.get())) while ignore is not None and v in ignore: @@ -679,13 +685,14 @@ def skolem_function(univ_scope=None): Return a skolem function over the variables in univ_scope param univ_scope """ - skolem = VariableExpression(Variable("F%s" % _counter.get())) + skolem = VariableExpression(Variable('F%s' % _counter.get())) if univ_scope: for v in list(univ_scope): skolem = skolem(VariableExpression(v)) return skolem +@python_2_unicode_compatible class Type(object): def __repr__(self): return "%s" % self @@ -698,6 +705,7 @@ class Type(object): return read_type(s) +@python_2_unicode_compatible class ComplexType(Type): def __init__(self, first, second): assert isinstance(first, Type), "%s is not a Type" % first @@ -742,13 +750,13 @@ class ComplexType(Type): if self == ANY_TYPE: return "%s" % ANY_TYPE else: - return "<%s,%s>" % (self.first, self.second) + return '<%s,%s>' % (self.first, self.second) def str(self): if self == ANY_TYPE: return ANY_TYPE.str() else: - return "(%s -> %s)" % (self.first.str(), self.second.str()) + return '(%s -> %s)' % (self.first.str(), self.second.str()) class BasicType(Type): @@ -770,30 +778,34 @@ class BasicType(Type): return None +@python_2_unicode_compatible class EntityType(BasicType): def __str__(self): - return "e" + return 'e' def str(self): - return "IND" + return 'IND' +@python_2_unicode_compatible class TruthValueType(BasicType): def __str__(self): - return "t" + return 't' def str(self): - return "BOOL" + return 'BOOL' +@python_2_unicode_compatible class EventType(BasicType): def __str__(self): - return "v" + return 'v' def str(self): - return "EVENT" + return 'EVENT' +@python_2_unicode_compatible class AnyType(BasicType, ComplexType): def __init__(self): pass @@ -821,10 +833,10 @@ class AnyType(BasicType, ComplexType): return other def __str__(self): - return "?" + return '?' def str(self): - return "ANY" + return 'ANY' TRUTH_TYPE = TruthValueType() @@ -834,19 +846,19 @@ ANY_TYPE = AnyType() def read_type(type_string): - assert isinstance(type_string, str) - type_string = type_string.replace(" ", "") # remove spaces + assert isinstance(type_string, string_types) + type_string = type_string.replace(' ', '') # remove spaces - if type_string[0] == "<": - assert type_string[-1] == ">" + if type_string[0] == '<': + assert type_string[-1] == '>' paren_count = 0 for i, char in enumerate(type_string): - if char == "<": + if char == '<': paren_count += 1 - elif char == ">": + elif char == '>': paren_count -= 1 assert paren_count > 0 - elif char == ",": + elif char == ',': if paren_count == 1: break return ComplexType( @@ -859,9 +871,7 @@ def read_type(type_string): elif type_string[0] == "%s" % ANY_TYPE: return ANY_TYPE else: - raise LogicalExpressionException( - None, "Unexpected character: '%s'." % type_string[0] - ) + raise LogicalExpressionException(None, "Unexpected character: '%s'." % type_string[0]) class TypeException(Exception): @@ -938,6 +948,7 @@ class SubstituteBindingsI(object): raise NotImplementedError() +@python_2_unicode_compatible class Expression(SubstituteBindingsI): """This is the base abstract object for all logical expressions""" @@ -1025,8 +1036,8 @@ class Expression(SubstituteBindingsI): val = self.make_VariableExpression(val) elif not isinstance(val, Expression): raise ValueError( - "Can not substitute a non-expression " - "value into an expression: %r" % (val,) + 'Can not substitute a non-expression ' + 'value into an expression: %r' % (val,) ) # Substitute bindings in the target value. val = val.substitute_bindings(bindings) @@ -1110,9 +1121,9 @@ class Expression(SubstituteBindingsI): result = self for i, e in enumerate(sorted(get_indiv_vars(self), key=lambda e: e.variable)): if isinstance(e, EventVariableExpression): - newVar = e.__class__(Variable("e0%s" % (i + 1))) + newVar = e.__class__(Variable('e0%s' % (i + 1))) elif isinstance(e, IndividualVariableExpression): - newVar = e.__class__(Variable("z%s" % (i + 1))) + newVar = e.__class__(Variable('z%s' % (i + 1))) else: newVar = e result = result.replace(e.variable, newVar, True) @@ -1151,7 +1162,7 @@ class Expression(SubstituteBindingsI): return self.visit(function, lambda parts: combinator(*parts)) def __repr__(self): - return "<%s %s>" % (self.__class__.__name__, self) + return '<%s %s>' % (self.__class__.__name__, self) def __str__(self): return self.str() @@ -1164,7 +1175,7 @@ class Expression(SubstituteBindingsI): :return: set of ``Variable`` objects """ return self.free() | set( - p for p in self.predicates() | self.constants() if re.match("^[?@]", p.name) + p for p in self.predicates() | self.constants() if re.match('^[?@]', p.name) ) def free(self): @@ -1205,6 +1216,7 @@ class Expression(SubstituteBindingsI): return VariableExpression(variable) +@python_2_unicode_compatible class ApplicationExpression(Expression): r""" This class is used to represent two related types of logical expressions. @@ -1347,7 +1359,7 @@ class ApplicationExpression(Expression): # uncurry the arguments and find the base function if self.is_atom(): function, args = self.uncurry() - arg_str = ",".join("%s" % arg for arg in args) + arg_str = ','.join("%s" % arg for arg in args) else: # Leave arguments curried function = self.function @@ -1408,6 +1420,7 @@ class ApplicationExpression(Expression): @total_ordering +@python_2_unicode_compatible class AbstractVariableExpression(Expression): """This class represents a variable to be used as a predicate or entity""" @@ -1683,6 +1696,7 @@ class VariableBinderExpression(Expression): __hash__ = Expression.__hash__ +@python_2_unicode_compatible class LambdaExpression(VariableBinderExpression): @property def type(self): @@ -1707,12 +1721,13 @@ class LambdaExpression(VariableBinderExpression): term = term.term return ( Tokens.LAMBDA - + " ".join("%s" % v for v in variables) + + ' '.join("%s" % v for v in variables) + Tokens.DOT + "%s" % term ) +@python_2_unicode_compatible class QuantifiedExpression(VariableBinderExpression): @property def type(self): @@ -1737,8 +1752,8 @@ class QuantifiedExpression(VariableBinderExpression): term = term.term return ( self.getQuantifier() - + " " - + " ".join("%s" % v for v in variables) + + ' ' + + ' '.join("%s" % v for v in variables) + Tokens.DOT + "%s" % term ) @@ -1754,6 +1769,7 @@ class AllExpression(QuantifiedExpression): return Tokens.ALL +@python_2_unicode_compatible class NegatedExpression(Expression): def __init__(self, term): assert isinstance(term, Expression), "%s is not an Expression" % term @@ -1798,6 +1814,7 @@ class NegatedExpression(Expression): return Tokens.NOT + "%s" % self.term +@python_2_unicode_compatible class BinaryExpression(Expression): def __init__(self, first, second): assert isinstance(first, Expression), "%s is not an Expression" % first @@ -1840,7 +1857,7 @@ class BinaryExpression(Expression): def __str__(self): first = self._str_subex(self.first) second = self._str_subex(self.second) - return Tokens.OPEN + first + " " + self.getOp() + " " + second + Tokens.CLOSE + return Tokens.OPEN + first + ' ' + self.getOp() + ' ' + second + Tokens.CLOSE def _str_subex(self, subex): return "%s" % subex @@ -1938,7 +1955,7 @@ class UnexpectedTokenException(LogicalExpressionException): elif unexpected: msg = "Unexpected token: '%s'." % unexpected if message: - msg += " " + message + msg += ' ' + message else: msg = "Expected token '%s'." % expected LogicalExpressionException.__init__(self, index, msg) @@ -1947,9 +1964,9 @@ class UnexpectedTokenException(LogicalExpressionException): class ExpectedMoreTokensException(LogicalExpressionException): def __init__(self, index, message=None): if not message: - message = "More tokens expected." + message = 'More tokens expected.' LogicalExpressionException.__init__( - self, index, "End of input found. " + message + self, index, 'End of input found. ' + message ) @@ -1961,8 +1978,8 @@ def is_indvar(expr): :param expr: str :return: bool True if expr is of the correct form """ - assert isinstance(expr, str), "%s is not a string" % expr - return re.match(r"^[a-df-z]\d*$", expr) is not None + assert isinstance(expr, string_types), "%s is not a string" % expr + return re.match(r'^[a-df-z]\d*$', expr) is not None def is_funcvar(expr): @@ -1973,8 +1990,8 @@ def is_funcvar(expr): :param expr: str :return: bool True if expr is of the correct form """ - assert isinstance(expr, str), "%s is not a string" % expr - return re.match(r"^[A-Z]\d*$", expr) is not None + assert isinstance(expr, string_types), "%s is not a string" % expr + return re.match(r'^[A-Z]\d*$', expr) is not None def is_eventvar(expr): @@ -1985,58 +2002,58 @@ def is_eventvar(expr): :param expr: str :return: bool True if expr is of the correct form """ - assert isinstance(expr, str), "%s is not a string" % expr - return re.match(r"^e\d*$", expr) is not None + assert isinstance(expr, string_types), "%s is not a string" % expr + return re.match(r'^e\d*$', expr) is not None def demo(): lexpr = Expression.fromstring - print("=" * 20 + "Test reader" + "=" * 20) - print(lexpr(r"john")) - print(lexpr(r"man(x)")) - print(lexpr(r"-man(x)")) - print(lexpr(r"(man(x) & tall(x) & walks(x))")) - print(lexpr(r"exists x.(man(x) & tall(x) & walks(x))")) - print(lexpr(r"\x.man(x)")) - print(lexpr(r"\x.man(x)(john)")) - print(lexpr(r"\x y.sees(x,y)")) - print(lexpr(r"\x y.sees(x,y)(a,b)")) - print(lexpr(r"(\x.exists y.walks(x,y))(x)")) - print(lexpr(r"exists x.x = y")) - print(lexpr(r"exists x.(x = y)")) - print(lexpr("P(x) & x=y & P(y)")) - print(lexpr(r"\P Q.exists x.(P(x) & Q(x))")) - print(lexpr(r"man(x) <-> tall(x)")) - - print("=" * 20 + "Test simplify" + "=" * 20) - print(lexpr(r"\x.\y.sees(x,y)(john)(mary)").simplify()) - print(lexpr(r"\x.\y.sees(x,y)(john, mary)").simplify()) - print(lexpr(r"all x.(man(x) & (\x.exists y.walks(x,y))(x))").simplify()) - print(lexpr(r"(\P.\Q.exists x.(P(x) & Q(x)))(\x.dog(x))(\x.bark(x))").simplify()) - - print("=" * 20 + "Test alpha conversion and binder expression equality" + "=" * 20) - e1 = lexpr("exists x.P(x)") + print('=' * 20 + 'Test reader' + '=' * 20) + print(lexpr(r'john')) + print(lexpr(r'man(x)')) + print(lexpr(r'-man(x)')) + print(lexpr(r'(man(x) & tall(x) & walks(x))')) + print(lexpr(r'exists x.(man(x) & tall(x) & walks(x))')) + print(lexpr(r'\x.man(x)')) + print(lexpr(r'\x.man(x)(john)')) + print(lexpr(r'\x y.sees(x,y)')) + print(lexpr(r'\x y.sees(x,y)(a,b)')) + print(lexpr(r'(\x.exists y.walks(x,y))(x)')) + print(lexpr(r'exists x.x = y')) + print(lexpr(r'exists x.(x = y)')) + print(lexpr('P(x) & x=y & P(y)')) + print(lexpr(r'\P Q.exists x.(P(x) & Q(x))')) + print(lexpr(r'man(x) <-> tall(x)')) + + print('=' * 20 + 'Test simplify' + '=' * 20) + print(lexpr(r'\x.\y.sees(x,y)(john)(mary)').simplify()) + print(lexpr(r'\x.\y.sees(x,y)(john, mary)').simplify()) + print(lexpr(r'all x.(man(x) & (\x.exists y.walks(x,y))(x))').simplify()) + print(lexpr(r'(\P.\Q.exists x.(P(x) & Q(x)))(\x.dog(x))(\x.bark(x))').simplify()) + + print('=' * 20 + 'Test alpha conversion and binder expression equality' + '=' * 20) + e1 = lexpr('exists x.P(x)') print(e1) - e2 = e1.alpha_convert(Variable("z")) + e2 = e1.alpha_convert(Variable('z')) print(e2) print(e1 == e2) def demo_errors(): - print("=" * 20 + "Test reader errors" + "=" * 20) - demoException("(P(x) & Q(x)") - demoException("((P(x) &) & Q(x))") - demoException("P(x) -> ") - demoException("P(x") - demoException("P(x,") - demoException("P(x,)") - demoException("exists") - demoException("exists x.") - demoException("\\") - demoException("\\ x y.") - demoException("P(x)Q(x)") - demoException("(P(x)Q(x)") - demoException("exists x -> y") + print('=' * 20 + 'Test reader errors' + '=' * 20) + demoException('(P(x) & Q(x)') + demoException('((P(x) &) & Q(x))') + demoException('P(x) -> ') + demoException('P(x') + demoException('P(x,') + demoException('P(x,)') + demoException('exists') + demoException('exists x.') + demoException('\\') + demoException('\\ x y.') + demoException('P(x)Q(x)') + demoException('(P(x)Q(x)') + demoException('exists x -> y') def demoException(s): @@ -2050,6 +2067,6 @@ def printtype(ex): print("%s : %s" % (ex.str(), ex.type)) -if __name__ == "__main__": +if __name__ == '__main__': demo() # demo_errors() diff --git a/nlp_resource_data/nltk/sem/relextract.py b/nlp_resource_data/nltk/sem/relextract.py index 1d1ec76..5837f84 100644 --- a/nlp_resource_data/nltk/sem/relextract.py +++ b/nlp_resource_data/nltk/sem/relextract.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Relation Extraction # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Ewan Klein # URL: # For license information, see LICENSE.TXT @@ -19,45 +19,47 @@ The two serialization outputs are "rtuple" and "clause". - A clause is an atom of the form ``relsym(subjsym, objsym)``, where the relation, subject and object have been canonicalized to single strings. """ +from __future__ import print_function # todo: get a more general solution to canonicalized symbols for clauses -- maybe use xmlcharrefs? from collections import defaultdict -import html import re +from six.moves import html_entities + # Dictionary that associates corpora with NE classes NE_CLASSES = { - "ieer": [ - "LOCATION", - "ORGANIZATION", - "PERSON", - "DURATION", - "DATE", - "CARDINAL", - "PERCENT", - "MONEY", - "MEASURE", + 'ieer': [ + 'LOCATION', + 'ORGANIZATION', + 'PERSON', + 'DURATION', + 'DATE', + 'CARDINAL', + 'PERCENT', + 'MONEY', + 'MEASURE', ], - "conll2002": ["LOC", "PER", "ORG"], - "ace": [ - "LOCATION", - "ORGANIZATION", - "PERSON", - "DURATION", - "DATE", - "CARDINAL", - "PERCENT", - "MONEY", - "MEASURE", - "FACILITY", - "GPE", + 'conll2002': ['LOC', 'PER', 'ORG'], + 'ace': [ + 'LOCATION', + 'ORGANIZATION', + 'PERSON', + 'DURATION', + 'DATE', + 'CARDINAL', + 'PERCENT', + 'MONEY', + 'MEASURE', + 'FACILITY', + 'GPE', ], } # Allow abbreviated class labels -short2long = dict(LOC="LOCATION", ORG="ORGANIZATION", PER="PERSON") -long2short = dict(LOCATION="LOC", ORGANIZATION="ORG", PERSON="PER") +short2long = dict(LOC='LOCATION', ORG='ORGANIZATION', PER='PERSON') +long2short = dict(LOCATION='LOC', ORGANIZATION='ORG', PERSON='PER') def _expand(type): @@ -84,7 +86,7 @@ def class_abbrev(type): return type -def _join(lst, sep=" ", untag=False): +def _join(lst, sep=' ', untag=False): """ Join a list into a string, turning tags tuples into tag strings or just words. :param untag: if ``True``, omit the tag from tagged input strings. @@ -101,13 +103,19 @@ def _join(lst, sep=" ", untag=False): return sep.join(tuple2str(tup) for tup in lst) -def descape_entity(m, defs=html.entities.entitydefs): +def descape_entity(m, defs=html_entities.entitydefs): """ Translate one entity to its ISO Latin value. Inspired by example from effbot.org """ + # s = 'mcglashan_&_sarrail' + # l = ['mcglashan', '&', 'sarrail'] + # pattern = re.compile("&(\w+?);") + # new = list2sym(l) + # s = pattern.sub(descape_entity, s) + # print s, new try: return defs[m.group(1)] @@ -122,11 +130,11 @@ def list2sym(lst): :return: a Unicode string without whitespace :rtype: unicode """ - sym = _join(lst, "_", untag=True) + sym = _join(lst, '_', untag=True) sym = sym.lower() ENT = re.compile("&(\w+?);") sym = ENT.sub(descape_entity, sym) - sym = sym.replace(".", "") + sym = sym.replace('.', '') return sym @@ -175,23 +183,23 @@ def semi_rel2reldict(pairs, window=5, trace=False): result = [] while len(pairs) > 2: reldict = defaultdict(str) - reldict["lcon"] = _join(pairs[0][0][-window:]) - reldict["subjclass"] = pairs[0][1].label() - reldict["subjtext"] = _join(pairs[0][1].leaves()) - reldict["subjsym"] = list2sym(pairs[0][1].leaves()) - reldict["filler"] = _join(pairs[1][0]) - reldict["untagged_filler"] = _join(pairs[1][0], untag=True) - reldict["objclass"] = pairs[1][1].label() - reldict["objtext"] = _join(pairs[1][1].leaves()) - reldict["objsym"] = list2sym(pairs[1][1].leaves()) - reldict["rcon"] = _join(pairs[2][0][:window]) + reldict['lcon'] = _join(pairs[0][0][-window:]) + reldict['subjclass'] = pairs[0][1].label() + reldict['subjtext'] = _join(pairs[0][1].leaves()) + reldict['subjsym'] = list2sym(pairs[0][1].leaves()) + reldict['filler'] = _join(pairs[1][0]) + reldict['untagged_filler'] = _join(pairs[1][0], untag=True) + reldict['objclass'] = pairs[1][1].label() + reldict['objtext'] = _join(pairs[1][1].leaves()) + reldict['objsym'] = list2sym(pairs[1][1].leaves()) + reldict['rcon'] = _join(pairs[2][0][:window]) if trace: print( "(%s(%s, %s)" % ( - reldict["untagged_filler"], - reldict["subjclass"], - reldict["objclass"], + reldict['untagged_filler'], + reldict['subjclass'], + reldict['objclass'], ) ) result.append(reldict) @@ -199,7 +207,7 @@ def semi_rel2reldict(pairs, window=5, trace=False): return result -def extract_rels(subjclass, objclass, doc, corpus="ace", pattern=None, window=10): +def extract_rels(subjclass, objclass, doc, corpus='ace', pattern=None, window=10): """ Filter the output of ``semi_rel2reldict`` according to specified NE classes and a filler pattern. @@ -241,9 +249,9 @@ def extract_rels(subjclass, objclass, doc, corpus="ace", pattern=None, window=10 "your value for the object type has not been recognized: %s" % objclass ) - if corpus == "ace" or corpus == "conll2002": + if corpus == 'ace' or corpus == 'conll2002': pairs = tree2semi_rel(doc) - elif corpus == "ieer": + elif corpus == 'ieer': pairs = tree2semi_rel(doc.text) + tree2semi_rel(doc.headline) else: raise ValueError("corpus type not recognized") @@ -251,10 +259,10 @@ def extract_rels(subjclass, objclass, doc, corpus="ace", pattern=None, window=10 reldicts = semi_rel2reldict(pairs) relfilter = lambda x: ( - x["subjclass"] == subjclass - and len(x["filler"].split()) <= window - and pattern.match(x["filler"]) - and x["objclass"] == objclass + x['subjclass'] == subjclass + and len(x['filler'].split()) <= window + and pattern.match(x['filler']) + and x['objclass'] == objclass ) return list(filter(relfilter, reldicts)) @@ -267,19 +275,19 @@ def rtuple(reldict, lcon=False, rcon=False): :type reldict: defaultdict """ items = [ - class_abbrev(reldict["subjclass"]), - reldict["subjtext"], - reldict["filler"], - class_abbrev(reldict["objclass"]), - reldict["objtext"], + class_abbrev(reldict['subjclass']), + reldict['subjtext'], + reldict['filler'], + class_abbrev(reldict['objclass']), + reldict['objtext'], ] - format = "[%s: %r] %r [%s: %r]" + format = '[%s: %r] %r [%s: %r]' if lcon: - items = [reldict["lcon"]] + items - format = "...%r)" + format + items = [reldict['lcon']] + items + format = '...%r)' + format if rcon: - items.append(reldict["rcon"]) - format = format + "(%r..." + items.append(reldict['rcon']) + format = format + '(%r...' printargs = tuple(items) return format % printargs @@ -292,7 +300,7 @@ def clause(reldict, relsym): :param relsym: a label for the relation :type relsym: str """ - items = (relsym, reldict["subjsym"], reldict["objsym"]) + items = (relsym, reldict['subjsym'], reldict['objsym']) return "%s(%r, %r)" % items @@ -330,7 +338,7 @@ def in_demo(trace=0, sql=True): warnings.warn("Cannot import sqlite; sql flag will be ignored.") - IN = re.compile(r".*\bin\b(?!\b.+ing)") + IN = re.compile(r'.*\bin\b(?!\b.+ing)') print() print("IEER: in(ORG, LOC) -- just the clauses:") @@ -341,11 +349,11 @@ def in_demo(trace=0, sql=True): if trace: print(doc.docno) print("=" * 15) - for rel in extract_rels("ORG", "LOC", doc, corpus="ieer", pattern=IN): - print(clause(rel, relsym="IN")) + for rel in extract_rels('ORG', 'LOC', doc, corpus='ieer', pattern=IN): + print(clause(rel, relsym='IN')) if sql: try: - rtuple = (rel["subjtext"], rel["objtext"], doc.docno) + rtuple = (rel['subjtext'], rel['objtext'], doc.docno) cur.execute( """insert into Locations values (?, ?, ?)""", @@ -417,7 +425,7 @@ def roles_demo(trace=0): print(doc.docno) print("=" * 15) lcon = rcon = True - for rel in extract_rels("PER", "ORG", doc, corpus="ieer", pattern=ROLES): + for rel in extract_rels('PER', 'ORG', doc, corpus='ieer', pattern=ROLES): print(rtuple(rel, lcon=lcon, rcon=rcon)) @@ -473,12 +481,12 @@ def conllned(trace=1): print("Dutch CoNLL2002: van(PER, ORG) -- raw rtuples with context:") print("=" * 45) - for doc in conll2002.chunked_sents("ned.train"): + for doc in conll2002.chunked_sents('ned.train'): lcon = rcon = False if trace: lcon = rcon = True for rel in extract_rels( - "PER", "ORG", doc, corpus="conll2002", pattern=VAN, window=10 + 'PER', 'ORG', doc, corpus='conll2002', pattern=VAN, window=10 ): print(rtuple(rel, lcon=lcon, rcon=rcon)) @@ -505,11 +513,11 @@ def conllesp(): print("=" * 45) rels = [ rel - for doc in conll2002.chunked_sents("esp.train") - for rel in extract_rels("ORG", "LOC", doc, corpus="conll2002", pattern=DE) + for doc in conll2002.chunked_sents('esp.train') + for rel in extract_rels('ORG', 'LOC', doc, corpus='conll2002', pattern=DE) ] for r in rels[:10]: - print(clause(r, relsym="DE")) + print(clause(r, relsym='DE')) print() @@ -518,17 +526,17 @@ def ne_chunked(): print("1500 Sentences from Penn Treebank, as processed by NLTK NE Chunker") print("=" * 45) ROLE = re.compile( - r".*(chairman|president|trader|scientist|economist|analyst|partner).*" + r'.*(chairman|president|trader|scientist|economist|analyst|partner).*' ) rels = [] for i, sent in enumerate(nltk.corpus.treebank.tagged_sents()[:1500]): sent = nltk.ne_chunk(sent) - rels = extract_rels("PER", "ORG", sent, corpus="ace", pattern=ROLE, window=7) + rels = extract_rels('PER', 'ORG', sent, corpus='ace', pattern=ROLE, window=7) for rel in rels: - print("{0:<5}{1}".format(i, rtuple(rel))) + print('{0:<5}{1}'.format(i, rtuple(rel))) -if __name__ == "__main__": +if __name__ == '__main__': import nltk from nltk.sem import relextract diff --git a/nlp_resource_data/nltk/sem/skolemize.py b/nlp_resource_data/nltk/sem/skolemize.py index 3070480..73eaf4b 100644 --- a/nlp_resource_data/nltk/sem/skolemize.py +++ b/nlp_resource_data/nltk/sem/skolemize.py @@ -2,7 +2,7 @@ # # Author: Ewan Klein # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # URL: # For license information, see LICENSE.TXT @@ -116,7 +116,7 @@ def skolemize(expression, univ_scope=None, used_variables=None): elif isinstance(negated, ApplicationExpression): return expression else: - raise Exception("'%s' cannot be skolemized" % expression) + raise Exception('\'%s\' cannot be skolemized' % expression) elif isinstance(expression, ExistsExpression): term = skolemize( expression.term, univ_scope, used_variables | set([expression.variable]) @@ -129,7 +129,7 @@ def skolemize(expression, univ_scope=None, used_variables=None): elif isinstance(expression, ApplicationExpression): return expression else: - raise Exception("'%s' cannot be skolemized" % expression) + raise Exception('\'%s\' cannot be skolemized' % expression) def to_cnf(first, second): diff --git a/nlp_resource_data/nltk/sem/util.py b/nlp_resource_data/nltk/sem/util.py index a36442b..bac3884 100644 --- a/nlp_resource_data/nltk/sem/util.py +++ b/nlp_resource_data/nltk/sem/util.py @@ -2,7 +2,7 @@ # # Author: Ewan Klein # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # URL: # For license information, see LICENSE.TXT @@ -12,6 +12,7 @@ extraction of the semantic representation of the root node of the the syntax tree, followed by evaluation of the semantic representation in a first-order model. """ +from __future__ import print_function, unicode_literals import codecs from nltk.sem import evaluate @@ -49,7 +50,7 @@ def parse_sents(inputs, grammar, trace=0): return parses -def root_semrep(syntree, semkey="SEM"): +def root_semrep(syntree, semkey='SEM'): """ Find the semantic representation at the root of a tree. @@ -65,12 +66,12 @@ def root_semrep(syntree, semkey="SEM"): try: return node[semkey] except KeyError: - print(node, end=" ") + print(node, end=' ') print("has no specification for the feature %s" % semkey) raise -def interpret_sents(inputs, grammar, semkey="SEM", trace=0): +def interpret_sents(inputs, grammar, semkey='SEM', trace=0): """ Add the semantic representation to each syntactic parse tree of each input sentence. @@ -113,24 +114,24 @@ def demo_model0(): global m0, g0 # Initialize a valuation of non-logical constants.""" v = [ - ("john", "b1"), - ("mary", "g1"), - ("suzie", "g2"), - ("fido", "d1"), - ("tess", "d2"), - ("noosa", "n"), - ("girl", set(["g1", "g2"])), - ("boy", set(["b1", "b2"])), - ("dog", set(["d1", "d2"])), - ("bark", set(["d1", "d2"])), - ("walk", set(["b1", "g2", "d1"])), - ("chase", set([("b1", "g1"), ("b2", "g1"), ("g1", "d1"), ("g2", "d2")])), + ('john', 'b1'), + ('mary', 'g1'), + ('suzie', 'g2'), + ('fido', 'd1'), + ('tess', 'd2'), + ('noosa', 'n'), + ('girl', set(['g1', 'g2'])), + ('boy', set(['b1', 'b2'])), + ('dog', set(['d1', 'd2'])), + ('bark', set(['d1', 'd2'])), + ('walk', set(['b1', 'g2', 'd1'])), + ('chase', set([('b1', 'g1'), ('b2', 'g1'), ('g1', 'd1'), ('g2', 'd2')])), ( - "see", - set([("b1", "g1"), ("b2", "d2"), ("g1", "b1"), ("d2", "b1"), ("g2", "n")]), + 'see', + set([('b1', 'g1'), ('b2', 'd2'), ('g1', 'b1'), ('d2', 'b1'), ('g2', 'n')]), ), - ("in", set([("b1", "n"), ("b2", "n"), ("d2", "n")])), - ("with", set([("b1", "g1"), ("g1", "b1"), ("d1", "b1"), ("b1", "d1")])), + ('in', set([('b1', 'n'), ('b2', 'n'), ('d2', 'n')])), + ('with', set([('b1', 'g1'), ('g1', 'b1'), ('d1', 'b1'), ('b1', 'd1')])), ] # Read in the data from ``v`` val = evaluate.Valuation(v) @@ -142,13 +143,13 @@ def demo_model0(): g0 = evaluate.Assignment(dom) -def read_sents(filename, encoding="utf8"): - with codecs.open(filename, "r", encoding) as fp: +def read_sents(filename, encoding='utf8'): + with codecs.open(filename, 'r', encoding) as fp: sents = [l.rstrip() for l in fp] # get rid of blank lines sents = [l for l in sents if len(l) > 0] - sents = [l for l in sents if not l[0] == "#"] + sents = [l for l in sents if not l[0] == '#'] return sents @@ -170,7 +171,7 @@ def demo_legacy_grammar(): ) print("Reading grammar: %s" % g) print("*" * 20) - for reading in interpret_sents(["hello"], g, semkey="sem"): + for reading in interpret_sents(['hello'], g, semkey='sem'): syn, sem = reading[0] print() print("output: ", sem) @@ -191,9 +192,9 @@ def demo(): beta=True, syntrace=0, semtrace=0, - demo="default", - grammar="", - sentences="", + demo='default', + grammar='', + sentences='', ) opts.add_option( @@ -251,20 +252,20 @@ def demo(): (options, args) = opts.parse_args() - SPACER = "-" * 30 + SPACER = '-' * 30 demo_model0() sents = [ - "Fido sees a boy with Mary", - "John sees Mary", - "every girl chases a dog", - "every boy chases a girl", - "John walks with a girl in Noosa", - "who walks", + 'Fido sees a boy with Mary', + 'John sees Mary', + 'every girl chases a dog', + 'every boy chases a girl', + 'John walks with a girl in Noosa', + 'who walks', ] - gramfile = "grammars/sample_grammars/sem2.fcfg" + gramfile = 'grammars/sample_grammars/sem2.fcfg' if options.sentences: sentsfile = options.sentences @@ -287,20 +288,20 @@ def demo(): for i, sent in enumerate(sents): n = 1 - print("\nSentence: %s" % sent) + print('\nSentence: %s' % sent) print(SPACER) if options.evaluate: for (syntree, semrep, value) in evaluations[i]: if isinstance(value, dict): value = set(value.keys()) - print("%d: %s" % (n, semrep)) + print('%d: %s' % (n, semrep)) print(value) n += 1 else: for (syntree, semrep) in semreps[i]: - print("%d: %s" % (n, semrep)) + print('%d: %s' % (n, semrep)) n += 1 diff --git a/nlp_resource_data/nltk/sentiment/__init__.py b/nlp_resource_data/nltk/sentiment/__init__.py index f31e472..8bb7c07 100644 --- a/nlp_resource_data/nltk/sentiment/__init__.py +++ b/nlp_resource_data/nltk/sentiment/__init__.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Natural Language Toolkit: Sentiment Analysis # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Ewan Klein # URL: # For license information, see LICENSE.TXT diff --git a/nlp_resource_data/nltk/sentiment/__pycache__/__init__.cpython-37.pyc b/nlp_resource_data/nltk/sentiment/__pycache__/__init__.cpython-37.pyc index c945f365031d8518c6057f359cff84a5f7418e5a..1186feeb7cfe6c8a4b58dd3e1c7936be1b0a0013 100644 GIT binary patch delta 30 kcmaFKbeDykET3n<*S&T6l0CjE$+5i9m delta 42 wcmcc1^pc6&iI#71avlbQ;fVUAnW;(2PTaV{+I zA}uOxg(dD#hnH!EPthu$rZuI{hcmoR>k8Z9EN{>TKSR$D?G23;Sn)_>MZbAy(EJ=L zy{&be<9k2UIwY&~MG&QKZxD6UAdcis@)_;uSt0TEeYY2cewIrE?u#yxIDISt*WgR` zLHHUaz(;3<8f<|X%se90WG$9s7N8tkWO-%-ve=TG*I$}@j?JtEg@#-j_uWXe z0jpU%`WP>x0uyGnK@{u>kGoiz+w;9Nxixy={H7C2-kaLI{EX#AjKNH_>?KLub-C{) z1L0>@FNl~g{!2^#J+(-@QLFknX{+uKhyyy<6@ksq>~08Ig5F18Fnx`2gbM&hSg!H=MF0c0`*IEyv;@cfl!TqbI#K5PSUQY{uaoD@?F8%063+V7G} zQnYW*O_XB$`(8MJln>=E?E|AKUX^tDBAvxB-1p;kdmd391U33R5UV)EzK9o5VAI6Q zD0Wahn?WIUgeqV-2VZhlK3;AfENbNR6=-_1GcT~&Scz*f>jhy zTtlHccU>M|Xzbx3qcbWlflUJ!R+NtC#8nn$5SbN!a2rZgO#Y93W^eJB4MKk#xhA+P QP2C}mUL_7`=HYMr0|eGcD*ylh delta 1814 zcmZWp-D@0G6rX!%c6Rn7yPKp*woRK(l1jUz-6RIVk628Lt)aBm25A|P&FtQrbdvd4 z?%YkA5W5d8=u6Que}L_i`XB}ON&I-y2f-I#e3&N@6a;;jg5WtfX^o^$T+ zeBB?H|Grc@H!zUb;F)M%ZTxidL8;pH;g4yK#t5T6aR_{Mp9Z?46OU zBEMv>@KAaI4DY=pe^1+1`;(Gd+z`Ow~z5|47084WyBaY6FF`XIrh-0t`mSHAv z8FrjynFXB5>|}*5Oir>9HVXeGHpZ&=G^fm_*f={1l8S;boqR$QLX!o3cA^9ae6EQK ze4bpNy%`03cEjDi8MR%pIa^PpewCc){h&Xjq@1joFC9Wfr7Z@Ls{%RIdu$52r~{i; z_-!fNmTatXS9S!CYq}_-;wX|aBz+#x?DgJzx!=hQ?$l^X>vO4bD-x_v^+|4VornB( zTb#X-1cf^T2cF=N@#LSv%&3weuqXLN--~4{3fgC#jPjnO4~#U=Hvx3j%AAS}os=V{dI6Gnf1|IMJW-8feMh z#L#DSc>oPex83F;+fQ_At#!f{o|T%Lw_LFv_hm61av4)N8vC`1M7LKSA^OCgyH1Bn*e0NhW4>YI53xJ$6&dh$*6Xz|pp_K@ys8)R31)cdXa zr9P~J%f;>%Z>)JfuVo;*M@LV0&wx=s#0>P17d}FFYFvjcQX&Q^(VelUgCmzh*WcdZ zqONpXm_U3GM-J9`Sn;bsB#iMcm1Ki5dLM>b>bsQbcj-r?@9Wao=w@~SBQsbAH?M2V zd`G*c-GDF5YIeuT+xyRoj0Cq=TTv(>8>_Y#+N&5y=mvbXZl8BUyUFcN%$Y4CJ6786 z!JI#Xd*IM9SKKYH8?M(#YM`f--fI3y0Wt`zwuoc|opS{WF%Pm-kGJF0j3whjq(%fK zPtCU1+5|g;b%OSOJEz%wgwP24=$s;=ao6_}-kS_pHcTw1m`_em&Ws~ufX8K;<015lfc8ms=rU1D#N=xya8?{ka+o|&Qe7^vOakj9 zvPi7t=gD`;Tyk>i^5MQIviO$kcfjTM$^EIWJ}8!w^7P5Z5Srobvo|^~Acp}sdOPsM zDg4Dj5p^WkreXmJ1}ol1^6Z#{OG6Z3<2bB%D*1T2dgh2#)X39UZPrG`H8g%1NLs+b z=4#erL6cvnt6eq5Dz>B*SaP3ie(aE}(A<$wee8KX0Ky zMHxU(G~K4>dvaT->&%Rr8vsB-!+_?W4-2os)LIN8*75mStXGW0pcYgZs!XccqW&K$ Ci>7A) diff --git a/nlp_resource_data/nltk/sentiment/__pycache__/util.cpython-37.pyc b/nlp_resource_data/nltk/sentiment/__pycache__/util.cpython-37.pyc index 5500f3a24788e1ad101ae280aa0d0182c4e50d65..ffa7c39bbd8c9b14f3f7a8889dc1d43db90e0086 100644 GIT binary patch delta 5779 zcmbVQ3vd(Hx!$uYuVhIU#x@260bxvH5#uKhgB>7ocm$}5F%D!KvaEI1X7NfZ&aP}A zQbjPgrOnWUoTjh)sNMGFw&}Y{(zH!q^pQ4A+DV_2?IdZ_*U)C#PCL_?-s#+X|NltX zhLSdOrO|i(^PjW-`Oo=ZXZ7T#$lsnMvD<2D!V-LP!HfEz-+VE)qjR+U|oj%l%q z+y&YK7+3Au^g?YR>BGf>c3rw&s}~bOc0+oRwg~8ItFPV|GCK}e33eM6Q zqAfF*Uwrm(ly0KEW9?H!yTM$}J4t7e4$=c-OM$V14$*_>ByA-frguUcr)TJ4de=Fk zt)d!z6L6a8ee`a61URedJyZuy3q48=XxGrQbcCj03r4G%WD-!KydY+!3447@yr5=b|N8d(sbOJc-bdsI~P6vHEJw;Cg zr_=1B_m0aC%It19iM-vN?$LVE>$UabGz#{H^hRx?pxquDzp!Oo$x)m#s_&0K5VGDYA4Wk^cg&#BxQPY^#utqIGuoJ>-5Of#Eu zvZsJnJku=A$d41El@0|Sl+{~0axUxOe^s}U3w(2Mb8!i{PC{mjp&5{*DKAZtyX68Y$R5edluUsOdU77m7WStC}aWeiS+g3 z&gaQl1WPg_<>|SMHO7pzK581C`xs1eqX5#v011!~2wqB9Yw6L*dz9e?z)l2M3pDIz zgc||0zz95lt(FZ}q~qWf;m^TzS41-i4-=KdW!6@D_ktw>qVP}E?m;w&5w#in_1}fFE)_Hi|~{Ro7vjN!dDN2dI_uz!ydvZ;xLs%wOQ- zyll?X(*_%-&Sa*6Khtu)k z5;%cE0~Zt`9g=9(pmabw3@x~Vdip>(4xb^xr{1fTx6N! zIVs0ZCaPErcnsBMn*e+@Yo*4)p#n6QMi&cK^kDrkszq@%qkGKMQKM^4-mi9ziSo!= zh={Vo1;QjIvpuChH8#s6$`>`Q+kDLt*ScBT@^tVpE2Dd+>p^yqTvSU6hr#x>;93o& zL1u%cdz%gs;`1LYO_Bs}UiKcKzp!jWT!X3C>Jrx7W6q?WwH2R4#;0YWd*p(vbs@p{fhve24}&Nrhqv zM24oTX=oZ)Gh_y`?0Jbj^j2w<&}te!7s@Nsk$iZD+z!dD5Xl7c;X))|H66`I@?=!T z`6mt>LjP9F{+)SZo|4jogdH!dIYS)}^)F%yfWRuss2 zXf{&Zq>@qV0)VXB3MF1kfU zc(QwOAddqPsCBqo9YYs)c4e2Mvesq|=9+qWBf4vPpTphmglSTpD27Um#2|NJQ4Bx= z43RoHAVV$-1jq`xrIuhBE)PO}1PUjpl?ls%!`@N4YwhkpgzW&4zKr_ukF|G^3I6`U zb{_2rsTW{?P4T52J!@wZYQ;6!^~xeT$9g{5(NEU!-)~>TU+8E^ihY&csl4@BNes6s z?AQ;^RB@=w6quK>7D$=&7H-6!_j|7_z#5lBTOUA03;T%4c z>wO-n&mnvP;VFbKB7BKIvt`wWuOcZFxP;W#5cVT{9pM`Y-$baKjo3^RkM}Jmck|x9 zH4`P1BAWEq0dLDz$BHSj?$maayLlWd~hO){oQ-yip|Lrh)P)^xv zkeYLvM&UGCjkuy3l`C@Dhaz)M^j#hqxa*$p;G@qXpx2b|Y$Aqr2t9TQ2mYb|%-P#t zD6++TZ<*$o28PrhfRGg7XPO#H9k)m2_OeBxXgQN`eifU(RvP~wZtc9I_(Rm=0|-At z_&$OP;79g3&Uh}ngE5DdV+mI)2N~8%EtIki*R@7ZfuXkHkPs!LOkr2-I(8)|Qn-}E z`yXmJZ)1(TYgZ%r9pAlcC3&HAY}Wy0UBxSZ3={v^4(b&Cn>{@$Uc=d89^cz?!x1F! z0Z2jt7x~0Tj@aK%xv_WIFsfcjsLaTWnJftmEd=S*f;cqA4Zsh9g$&V+;dC7!5z#{E zQ*e7R*_)6nB>xnt$N2Mm6C2oDkUoHLog??_g$->hrytAdws{h-?y|zy?b||L;>Y&= zHX@w)WqdrwkGx@VQk*IF{n!LvT=ZFgyXFwflFM_fjO!VoV!HzM7VRjhQCTJImC~g* zq?8a=K*-}S^L2wA5xf*Ij)1p<_V7+P?;LjVdeE_46d%IG4FQV)7#!}OPA)^?lKR`h zHx~aDn2CTEx-|=Lf=23CIb7%Bv&oOZ+o6pK4&PFKfue;?L>->pt`-$FI&FH)O1bQp zIPF&mzee~C0+ws`p9u3x^=|C1M(70a6@$|9{q1MSTY#_`L6~p=s6^|$Y%jDCioE>L z(DvJ0ocTDy69}J0coN~Y3j&UQ8R07kKj&wM1|u7g6o$LNUm6;{sR8@fn5!JjWpkCU zep~qX!3%Y-!dyS3!=D6j8`8lRKjh)zEpZb?SO5tE2HLqZL6QkMnM_o(qc~!4I^0s+ z?gw?9Iw@WE!@542^K#78bp~m+@>c$Kq{a|-A?!sMM>vlV0q_I(9~V=Qtj70O&v9&5 z{9VM>;v`|ZDxkD#VK8cVhDa4!L!)3^3HA1r;pPmR1^NFD$BSL~M08-^t;0h*^!>xh zfrG3E#RQaZw%7d(yK=MY48dI71I5MDumBdsf! zd-fCTiwyO9{>YsxH~j(nTEvAvDGZircPG;9kI1Pn?_|>Q-~`&i;nPlr&6 RreF09-x0opFFm~C{{SGBTpj=b delta 4649 zcmbVPe{fty9e?}o+c)1=puv^h#HA!(bmriGHUrLI zz3aZ0q)9IvG(|vYp{xQ5YJ!R=Q$!CGfl)vl&=C|xMxQ@NamMsP zI_4&y{qA?a``z#E_s8!0=mqk^v!vvP;^Ke=fAxdC`sw3mORBS-vLsPSlPICGMKmH> zS(8QU(|n>;G)1%pT7hW&nqRbqTA^rFO@-EH6(s{&faG-rwIJ{mE0iqOiUqI0DoHNW zmI>N#l_r;K%gK6)7HTUD|6G{?I9fh0YZX-0Dg~!~V63El^uQ@etEJQQAniXzv{m#LIslwHdV(IJhk;X1HF^X%t7(QFrMChn zOlRnAR0nDey`3f~1!^tLQiBcwbrrpX4$~2!8t9$Wq@zGJ(vx(IT0pI%Ntyy}+4Xdm z+B6NUCVGrAU`6OD>QEOrR~sAX_?Y~l%r?R?Ojy?>o3-X-REvu3Pg*U>R;^Xg$E|CV zZCab4r>yqmMr~tqleURSM)T-qt;1+F+GeF$0(7Sl1>9mZ7#*{ONPa5cEJg0(_sgxt zu?X>0Y8dH+ou115Mt+_UFZYo0puA#F%ZQycT1MkjBX&B@##*Ahsqi{-A0H{)(oqGm z5Rq9WG^_?eFi4(J4GgcqHIoMW1dxsbAQgnl*vj1f>b*Wv!Ye}`3*rnhjaBobp_Nrn zqi6_4mn%deDhZI4gssV)4n68yUjpoi!s?)54G60NG-c2nW_cY#TQ%QTT2t{l3^^iN zK~aGCNmyo$x#`ksg~*(j_mVpPe)-m#FsfN%1a@ly{>VEjnn`WWsCbz);kpcUd8*^G zA>5PacWmYwl&wRyLd}HBn)sfop75p0yk+*dn;v)dBpe>KCsRRwu4+B$$|*H#WfJ7; zYnv}Q1HHDQW(@(;aZM=@0Mkq16v|1t%(`=@YX=C?_@7tB$XdR!?opuMs@oLa3!;~8 z!VBomh&`#NE!*A1*VbYFo|hjuIpcSIb4tcPPi6`;3iXZ2Y^z(4DWuA*4CzrYqaN2X zg+0=(2ad`!MKghnO8p(ujOzMF3+Gf?I9HTW=Lg6<5~pfha@*d140>?NTf^wq**YhHGLo# zYcg{W#u)flVr%)}h6ALEKfmGH%6!}c_rpM!Wjb!cPNuiN$v@iAmYrU{&yGW337=@4 z3A>Yq;X09GhRL+Vc|JHv&(91HWS){Tj=P@EnQ|aVT?%ex8&OGm-SAP$(Qj z&l{^dhWjK2G$yRHLL{|Z0%47*t&kE~B`ah_hA>wYvI<7nbnaO6MkRP2t`^CYaVdVH zKdQhIL;Qob&HgFiGsfH6+pk{CHK;;1vcm|ATwxA#fEVBow0Dtu{^Ry7F=6k5{Ne`k zlhAx2zXBF_&;Y-5orA>7XpB>LH?u7uMy5qcXW^&_zyZ(*IjzRUXVhbFSz?xgp*E6p34n&KH)3G z{lr+7FYDUgc@k(i!6sb2VA5vPS#X->PumWJ6dd03Xo=9i=!xC@uC8`+5>@oyc~J$! zYOtsvWeopE1$!4=Nf;_PkGTVD;iUvk{RIbJBjVx`2eP|C=1MLc<$vGSn!OiCKZ$T3 z!u<%R5zYWaD%e@1#g-mK3J*hgQG;@7879=uZe($UPa%970X@kcMtB6_QH0MRV8XJ` zB0PrBl|_6ULBw1+`#jQLK$t@i>wN{OClQ`P_$tEJ5S|9`id_~rQ)X&dr*^{eLayPs z7kca&6cfgM9jRv#Zb0}3!Z)R6z#Nhb2Pbwa3cv~x5J`K&G<0SFPC9M8_qzS$#oXL= zJC!Q53|<@|XMAwfNVw*N=}uvoeAK2`lb6=Fn8cpTFRQDxn4MMv)R)$%WELbr){h%k1zfdH3E*@+#N%t|o8f zruQE3H7|t6_d&&f+reLC{?Ayue=V-Ihqvu-$bKBj7(gsik}oN5B1i1+hd^l+2bp0E z##3VhP*e{t+}liSXrbD&cxu>S*Q2lswbq1;GsGjnQ=xLgJpo-&UG{@WB&ewvSVk&g zuzgYqtR)u2wb(rW)BZ@iM~<@|?1m98^@CR?{Arov`pCF$8OQPc_e+`Y=-o=*;+fvR z21Pu-jWdh+vA(LtP9(AKl}z9RMqdoJC7SuUzUt))yaOFSNJ{>Y>?iD<+j^K+)74 zX?T;y6C+HN_%o>DG!E5xLA+bBX5pjuXE-2|Qe4xE8`DDK3Qrm?GZPN`If}iD@C$_Z z5HNw+LkLNP{7rr%lB*CR0G=;SY5uMD6Xby>B-Vx?eANY1q-jY)3TuQRZ}akj>l+49 z^a+G7B76zq%Lq&E1fJPm?LUXipYn$X`hrm;g+r$J-v)-(mtlX28=$+8uz6SI-*}}w zd+79vU*n*v!J!DaR?wnc17e}yCNhOkz=t)5$nZ~K7RagLw2iI zsN)@^>-kr$i$k{nc>c7R7=seE4LKUZB!c)FQ}A<@$=JUNLAU~9MAC|c18Cfhi_F)` zMgKzLMiX&oJZ_0^8%>Sv+|9RW)!D0YVyJuHj{ci>>b?E3?t|c>0Z&X^-t2Wrx=muD+WZ!nT21~9xKNFv zPQn~BUCW5G6#E^H2OT&Q=tZ50B>O#b%JMs$G+p?9i$Yu{?G#%)d-e$LJW?+{*e`dd z+Lg5FxMgg|C$94k00rLUm;CtIBTubq4AiLgfrwfeC=aZW$fdu)op9l3G!`iK1?fwz`nIz@}_f3NMmHNv33ywyX(-^=i+MT5`F& zJTq%C)ND5u)w-^Wrp^KK8!cXmxPT{rj5N$!GX1zFp62 z{5CJ#*LcD2?piUt9w6rUJP`AKH~PD|KB;9&|M=x&to0)X!Dm?PRB{Dzrx5t^XT|~f zZRgU1PYi>x_`z??e>dt=d7Tu5$6Wsz_y#3_PdH+oadIxVj-(gTX>6;G?XhA zmi-TDYhqBd+L_4UnD+h*M_p{b z4&{>?BfUebLCukAVwHJKTx#Iankbn}E1BG^v50{+Yx$pD`DW*k{8{IbFLgcNdqL0< z1noTq9utM2TB-OVHi~{JW68O$1FTbiv8!DvqQRO{!;W0Bn9o(Ryf8E43%?W=a>b9a zNYDnLB}g4E6w!pY2W@c_*gWk*4{=NOv5{LN(pJG<&5X>D&D&xEESQ}d*oGYDwbSBA zWN_m&A+%K;a+rv8{Akb=mm(u1nHVL9T7yIdGtZdI)2{sQZC9Pwa^NTLyK<g3e&?e*Y&O!kImqXVIgu@t@>%ZZ83Y%! zW1eQ2;B@`fo6vG~nBYl_a`P|9{Q4XD5(s}oo)LL zYOCfN3^Z$ncBIW&(6p9Ka1B%S@nx$T&Kw=gIC8FSkCbhP&nh8{6Ig<*HSmiwVi29J z2I*wMLLtm%yNCl7{GqF6*}tRTx&``B`aAk}VMl#Mr*Kj;xXrEmT4+UjD8ISmr$>~z zjKeJttG$$8$L!9Qjg(jS%Xib_8B)CXQzB-uIae-})`LOJDqAK!bkFcYKFpUFDrb+& z$(>7sjcMZCldZ&OX}QhX5ktV8AcjbcHbRyB*X`ZMji7o<2_d$EO0f{eM&O5nNaEzR zm&Ql3SH>o%V!K-5h$$IcJSR`JUp%#W>9{3(r!07otrV*O)lz;x?bCuqG^bg2^fo=s z(hvTjU9c0x=oY!?cl<491&_r-(BSc$oaz{QUa4smbHx}yob*f81z*sSEb$yLo^i(y z#6=oo1#yW8H=j|Q2JRX8%Z{gw@k~PYZa;cXoFOt=PYrIEpr)$0dkUjFi5UkC$1rs# z>7)f=^KZ58e89UijP=OB?deV`qhsS4TmDPW;1Q^gjwP-W*m4ru_A;%hBAwAJ2X_DP z74nme*`Oyrp^i`#DzPUTslR62@xe9#%8fl@>Qzv5(|DVsEVyZ6t7^J7v)6n zM12*l5HTF4s?U<^>vODTE`^ba$XTpeh^2O9uIiDs#%hTu5gL)Xg22&1q0);iax!ip z^LddOCAhvz>r{@%&3QeTX^e*!Vv<0Xv?F^BuD{rUFqDYkB++gwN<_w*RWlYluESeb z9V}z4QBu`blk(c0_l}ZT#O8dV#67({lW~=Z#j}L9fb^G?C#k%LbUM~|##Z7TlyCNR zPbf085c&(^IA(|!2y7O5yuDIJXexxCZ{(ohbEJS?5NbBm%4|3|0UP+v2xsR61(ez=aO zudVPba(B@|NyH3`V)HdhY)MtOOp~}3-dr zB2-A5ol(#Ko)cuRvp@b3J^ynq)1-o`(pN)AH)sas3^4!@TOwDQ^~Fi~{eAt1bHHR$ z4gXO3@`-MifZA<_S`?(kv$A)8-|5Yo6cmB)koKNNLxbjQN~5}KtS1(|1<9q2IhHQ( z?7!LcRS+RDlCrq`VE@jm92SYbQUdZR(xWaKIO8$SgtXsLbWd=W!m&_?q*>`%tYKr# zcWBLegLdm(ADZ4C$IUfMv&)hW-Stviw(JXyCvMsaKezhD)BqvV;lgBBDyEPP?6}X8Z3>~tjLNC#EjQ~ zQPBjmP%8s#!j#k6nvTFQs)$T6`YA*(?*?|}@n4o&s9$!w}9jNu)Jzi55x|~|?_1!&D zYgj6|f5wXK8LHF6c*o?m^CM$JSF$5RljpNzqhnWJm~4dSehv4O*dES~o=26q?e}a~ zE*`qsvc;X}<@<+@?4{z+hOA98Rhe z-i8g36(MfQawd1^2x^Oe+pr~V@yiW;Y_Z-~M^;B@j5qppgb#6qq&h<1h9lHf<{%!` zmXEk3ExHKR5Kbe;P!tf&1X=-NGrV009{Y?SZLaq|Ifpvs01=h}){Bfr4pqTsfrg%B z)3Zu-w!5H+AMata4df)^@lM;pl$3T-jy=Q3P65rg(f@9^w4@=4S;Ron-~ zp!~_9KVvym#d{v)CjkMoVv^M|Bn1kOM;qDgpihB zJlZjE56DMBlxj%NSzK6O>CcFM%Sw0UFOL3angTUy)SL8Kk03Mhy<;af9rNL_*PZ_d z1^z?$o_wSK**c0Q0|l0aBx_OLk9C&F`_3bFxjSG0qZH%t>QmW^up1d&7-kFF}+*r|3P&BQ7-Nsi|2lk;v z0LOnWzcR3=?kMh1>{l1W7YI$4ZSj``o&t!?ncS@aX&W-+!oaEm~Z07Y{#Pk>Ce!Rs=$?h;T8)6N@Cqf9~> z`2+?oJjw))Q8x|Oa$Kj!IpL(8R>0j(0{v}Hn;_B*n}XFGq*ia^R7#XO{sgh9m@m!7 zNffCfESHOcx?*|BQZbxAhO1RhOkrMZR>MM3m}q*4qIB05uc4!gC{ZUTjT3b9D-^K| z^=uz^?us9$=++fhb77%e62mm*0)ajP{RFB64iHe9K1waRfX8}_#aD&e-cNvF+8w;cZ+ DTCadx delta 6241 zcmcIoTWlQHd7d-3ot<5-NQzpL7A37DO4iCWMN5e-Ta09iq-4o5C6lzIj4iL1J415G zBp(;#_h zkrqaQ_WRE)Z(Fx-ox%C%T>f+WFW>pkyz|N*oQ{iaZKj0h;LMHOjprrlz2NEYee#0z z)`>Icn5~wKrMMMm(q+lkZ2i7u>ua)Qq7<_Ylnj&-D8+3PB@?BjEl)|=#Dlav$l9_j zG^+U`zgKjf!do5tPJWq5k|SBnmMqz3mSW46YAcpztCntSR?OCEU>feNFC2O6ovV%c zbC%(#|BtWc{9o_6twwv~s~4@feG%`dJ!X&Fmq0InHD;MkVm)a;v)+bp%GtBfzQO3V zI)W7IYuLB!+xExjI;>9HvR}L}S$l2AzGJ_9pIKe@to@3eM{b`zV;9htwr$96E==9b z+YTSClwB`h_T1W8GlOR&Qz+$KH{(?@i;h#xc=Jw1FlI_c*K;%ZvYjbx_8Ami=7O8b z(FfmG6vnIvKU9C91g|PrpD92I;-N34Q6yIdu_esUCjJH_R9&Yu%hRY8ST1_G+%IVq8AM{W?%g{W+~k=qD8ztAL!v3E>{da1 zX+ip&MRT-LjNsr}y`Oe@oCL zWD>SeGEM9=Pg-X_-Yy8WTiRe9`=z?#Lo``d^COW=B`yblmKbP|;brAeUCMj&Fo|XE zWmU0mOLGeq?u1&tRGrUTX1T(ba-ynb?_r#p@}*+U`7LG_b|z_QR_ke9{U%$1mQ0T> zH6)3|F4q1sGB$&5*}AkL)iqy($*@J8TlDp{UI}foEiX!Z$Oo!CNoBS!Z%E)!kf=@N zH+&WR$`fcUn^FEOajMw9TALT!r(KWEJB7vI#nh#nRJxllIpu=0)jCJ(qlc3Nz5ic$ zAQ%#Fn7nxX`qcFJWNz%r=yVt>x+~SH!$W;EcV%L9GOG*m@xI{KsX?LmZHo(2ZXsXF zb2wBXXFG*|#c(d!hNP*`w~5zahMWXV$|kEFXp!LV-V<3i4Wl}a?%8f~({g^v$>l;b zms_gXD<$NE-=@18mVQyJEW~$gd~}izqDN><Ot3*z?78wgI z(LE1M7>cUENmNx<9~?eqFj?&h4xc*QuPcUP?lI$>xC=;I51H4~NbXRuv?rUi$Qjv}a;mR@!V=f@4M12fhMWQWifu5I;sQ|RiJG?J z%|3cIYX;A^A2K=3$1i{c)%Hgk;of|y1Zm~$;_R%$ow8TVm-rBxo36&kk+B~SCQ+l64NMK}iE;ND`6bjn5_~27c%wkO zrAJ3nkc0tKA7CjEv97H4(Q$gGi!hLom8X2gCVW(uQ@3y+)5+>9JwRMr_B9oQ0Aa62 z^~g<@RVI~HTdm7Jg??J?pSMtQbUp}@=Yk(4hk`#QpFK)IdPx8#C&^4hCXeD9$tS|a zAlKGEKqsix{y$uzWslE*>q!=Tx2?BPJJ2-dRd4V^oM{5~aGEAML*yewK1xJD42f9C zi_?HR1SzHy)-{5L7%ACA1`s0~38TwMajGXUc`(|2j6X!G^2w2~6L#Tlq zE{wYbh~)aADY8o_yHRHq^-KH|4f}B-yHytMsa9}o7rj@b2*}No`Z8dYVsvP$z+9rH z+|H7$i?yR9%V{|&*MfiS8-eBgXWx-V8q!dF17Qr@ZC!;U-u87QM8gPMt_iJZ3pyN= zm6Mw#gf49L@4OgA%mmqY*6VQyDUKdCgom81bW1%Xdjvu>!(t3A(t?I#YFGOZhSWEd zx=CH2w(s~RCRW5(?UF=2#nV4;*8e>C-hsW1IC{l=Ou3MqZV|eI~>Q#)#mhGnQ zwrZe!Y=Cs_Ye0=>9X2tXS3!JQd?k7K{=XV#N-63Kjbw(!SB|ohItYXXOP2ielyS7+uFa5oa_{V|${V)ifwg^mSoS z%>jHCX@#7NA0k3};29zY5p0`4h{9byLqtgbiKte1l#sPhRMTHN!6m##AnIMP{(bPt zOjkSwDWzGLoDAMQekk~MrmGP{n}#%s@Nuyy9i9&RmkLj(e; ztLHt0|KbvO8kax~=%o27uxP^9ZODb-WLR1r616I>2)Yfk-mHa*ioj+ayg(utEB~J`tP(0BNIee+DfqAKhU29?&>mK zox9`|@^qCZOiocr=|fHJgb;D^SZ{;QU15Ajpc3J-GCzqe(5+d<%@RjkuQUgvLoK-- z9FN=%oa9Mg5S-7OE|q}pcb-!5d8NWI61Tfheb+!v{^jUV@@_#uZb_s~Fp2iy~s zfuE$IM)DTCxE())5)ND(x$O!ilR?b@na{M5*&u9M!T$FHgR1(0@Lk@gD?(k37|YZV_N0l4AIaA|Zr>Ex)AH^?Iys_`0XBBb?iy zgNnm-RRkP*IlZG^;-96n9ic~TyS7{Mjc%zP7rk{qz90XU>Si=nvy>1e!w=qj=}Nt`8A_&kRCXqla=t#1m_11HR57-bd_7;pQh>pk&{IJl*q#%p*owNaYKzm zLv}V50fT_xabo!-k^4lxNaV9bK1bwFKtj!{c(`zXfhyi0@_7&|)eHf9%AN?MTa{Z5 z{xCoWm3;L1#!Kjs?fBoX77{ChVoHWF?GW<`i#@KmLxzA7%`OFj80ChKuU4lAxZHkiF^zsI561v@*QNX yc)8>)4z75`5?82|3_psag)4r@3PNKz`h|3!d`bME#H9$%M-dRER709ejr}*Eb6bG` diff --git a/nlp_resource_data/nltk/sentiment/sentiment_analyzer.py b/nlp_resource_data/nltk/sentiment/sentiment_analyzer.py index 9befdd8..4abbc5e 100644 --- a/nlp_resource_data/nltk/sentiment/sentiment_analyzer.py +++ b/nlp_resource_data/nltk/sentiment/sentiment_analyzer.py @@ -2,7 +2,7 @@ # # Natural Language Toolkit: Sentiment Analyzer # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Pierpaolo Pantone <24alsecondo@gmail.com> # URL: # For license information, see LICENSE.TXT @@ -13,7 +13,7 @@ using NLTK features and classifiers, especially for teaching and demonstrative purposes. """ -import sys +from __future__ import print_function from collections import defaultdict from nltk.classify.util import apply_features, accuracy as eval_accuracy @@ -27,6 +27,8 @@ from nltk.metrics import ( from nltk.probability import FreqDist +from nltk.sentiment.util import save_file, timer + class SentimentAnalyzer(object): """ @@ -180,19 +182,10 @@ class SentimentAnalyzer(object): print("Training classifier") self.classifier = trainer(training_set, **kwargs) if save_classifier: - self.save_file(self.classifier, save_classifier) + save_file(self.classifier, save_classifier) return self.classifier - def save_file(self, content, filename): - """ - Store `content` in `filename`. Can be used to store a SentimentAnalyzer. - """ - print("Saving", filename, file=sys.stderr) - with open(filename, 'wb') as storage_file: - # The protocol=2 parameter is for python2 compatibility - pickle.dump(content, storage_file, protocol=2) - def evaluate( self, test_set, @@ -221,7 +214,7 @@ class SentimentAnalyzer(object): metrics_results = {} if accuracy == True: accuracy_score = eval_accuracy(classifier, test_set) - metrics_results["Accuracy"] = accuracy_score + metrics_results['Accuracy'] = accuracy_score gold_results = defaultdict(set) test_results = defaultdict(set) @@ -237,19 +230,19 @@ class SentimentAnalyzer(object): precision_score = eval_precision( gold_results[label], test_results[label] ) - metrics_results["Precision [{0}]".format(label)] = precision_score + metrics_results['Precision [{0}]'.format(label)] = precision_score if recall == True: recall_score = eval_recall(gold_results[label], test_results[label]) - metrics_results["Recall [{0}]".format(label)] = recall_score + metrics_results['Recall [{0}]'.format(label)] = recall_score if f_measure == True: f_measure_score = eval_f_measure( gold_results[label], test_results[label] ) - metrics_results["F-measure [{0}]".format(label)] = f_measure_score + metrics_results['F-measure [{0}]'.format(label)] = f_measure_score # Print evaluation results (in alphabetical order) if verbose == True: for result in sorted(metrics_results): - print("{0}: {1}".format(result, metrics_results[result])) + print('{0}: {1}'.format(result, metrics_results[result])) return metrics_results diff --git a/nlp_resource_data/nltk/sentiment/util.py b/nlp_resource_data/nltk/sentiment/util.py index e2bf22a..334c7b7 100644 --- a/nlp_resource_data/nltk/sentiment/util.py +++ b/nlp_resource_data/nltk/sentiment/util.py @@ -2,7 +2,7 @@ # # Natural Language Toolkit: Sentiment Analyzer # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Pierpaolo Pantone <24alsecondo@gmail.com> # URL: # For license information, see LICENSE.TXT @@ -10,6 +10,7 @@ """ Utility methods for Sentiment Analysis. """ +from __future__ import division import codecs import csv @@ -20,6 +21,7 @@ import re import sys import time from copy import deepcopy +from itertools import tee import nltk from nltk.corpus import CategorizedPlaintextCorpusReader @@ -43,90 +45,90 @@ NEGATION = r""" NEGATION_RE = re.compile(NEGATION, re.VERBOSE) -CLAUSE_PUNCT = r"^[.:;!?]$" +CLAUSE_PUNCT = r'^[.:;!?]$' CLAUSE_PUNCT_RE = re.compile(CLAUSE_PUNCT) # Happy and sad emoticons HAPPY = set( [ - ":-)", - ":)", - ";)", - ":o)", - ":]", - ":3", - ":c)", - ":>", - "=]", - "8)", - "=)", - ":}", - ":^)", - ":-D", - ":D", - "8-D", - "8D", - "x-D", - "xD", - "X-D", - "XD", - "=-D", - "=D", - "=-3", - "=3", - ":-))", + ':-)', + ':)', + ';)', + ':o)', + ':]', + ':3', + ':c)', + ':>', + '=]', + '8)', + '=)', + ':}', + ':^)', + ':-D', + ':D', + '8-D', + '8D', + 'x-D', + 'xD', + 'X-D', + 'XD', + '=-D', + '=D', + '=-3', + '=3', + ':-))', ":'-)", ":')", - ":*", - ":^*", - ">:P", - ":-P", - ":P", - "X-P", - "x-p", - "xp", - "XP", - ":-p", - ":p", - "=p", - ":-b", - ":b", - ">:)", - ">;)", - ">:-)", - "<3", + ':*', + ':^*', + '>:P', + ':-P', + ':P', + 'X-P', + 'x-p', + 'xp', + 'XP', + ':-p', + ':p', + '=p', + ':-b', + ':b', + '>:)', + '>;)', + '>:-)', + '<3', ] ) SAD = set( [ - ":L", - ":-/", - ">:/", - ":S", - ">:[", - ":@", - ":-(", - ":[", - ":-||", - "=L", - ":<", - ":-[", - ":-<", - "=\\", - "=/", - ">:(", - ":(", - ">.<", + ':L', + ':-/', + '>:/', + ':S', + '>:[', + ':@', + ':-(', + ':[', + ':-||', + '=L', + ':<', + ':-[', + ':-<', + '=\\', + '=/', + '>:(', + ':(', + '>.<', ":'-(", ":'(", - ":\\", - ":-c", - ":c", - ":{", - ">:\\", - ";(", + ':\\', + ':-c', + ':c', + ':{', + '>:\\', + ';(', ] ) @@ -146,10 +148,10 @@ def timer(method): # in Python 2.x round() will return a float, so we convert it to int secs = int(round(tot_time % 60)) if hours == 0 and mins == 0 and secs < 10: - print("[TIMER] {0}(): {:.3f} seconds".format(method.__name__, tot_time)) + print('[TIMER] {0}(): {:.3f} seconds'.format(method.__name__, tot_time)) else: print( - "[TIMER] {0}(): {1}h {2}m {3}s".format( + '[TIMER] {0}(): {1}h {2}m {3}s'.format( method.__name__, hours, mins, secs ) ) @@ -158,6 +160,13 @@ def timer(method): return timed +def pairwise(iterable): + """s -> (s0,s1), (s1,s2), (s2, s3), ...""" + a, b = tee(iterable) + next(b, None) + return zip(a, b) + + # //////////////////////////////////////////////////////////// # { Feature extractor functions # //////////////////////////////////////////////////////////// @@ -189,7 +198,7 @@ def extract_unigram_feats(document, unigrams, handle_negation=False): if handle_negation: document = mark_negation(document) for word in unigrams: - features["contains({0})".format(word)] = word in set(document) + features['contains({0})'.format(word)] = word in set(document) return features @@ -212,7 +221,7 @@ def extract_bigram_feats(document, bigrams): """ features = {} for bigr in bigrams: - features["contains({0} - {1})".format(bigr[0], bigr[1])] = bigr in nltk.bigrams( + features['contains({0} - {1})'.format(bigr[0], bigr[1])] = bigr in nltk.bigrams( document ) return features @@ -255,11 +264,11 @@ def mark_negation(document, double_neg_flip=False, shallow=False): neg_scope = not neg_scope continue else: - doc[i] += "_NEG" + doc[i] += '_NEG' elif neg_scope and CLAUSE_PUNCT_RE.search(word): neg_scope = not neg_scope elif neg_scope and not CLAUSE_PUNCT_RE.search(word): - doc[i] += "_NEG" + doc[i] += '_NEG' return document @@ -268,24 +277,34 @@ def output_markdown(filename, **kwargs): """ Write the output of an analysis to a file. """ - with codecs.open(filename, "at") as outfile: - text = "\n*** \n\n" - text += "{0} \n\n".format(time.strftime("%d/%m/%Y, %H:%M")) + with codecs.open(filename, 'at') as outfile: + text = '\n*** \n\n' + text += '{0} \n\n'.format(time.strftime("%d/%m/%Y, %H:%M")) for k in sorted(kwargs): if isinstance(kwargs[k], dict): dictionary = kwargs[k] - text += " - **{0}:**\n".format(k) + text += ' - **{0}:**\n'.format(k) for entry in sorted(dictionary): - text += " - {0}: {1} \n".format(entry, dictionary[entry]) + text += ' - {0}: {1} \n'.format(entry, dictionary[entry]) elif isinstance(kwargs[k], list): - text += " - **{0}:**\n".format(k) + text += ' - **{0}:**\n'.format(k) for entry in kwargs[k]: - text += " - {0}\n".format(entry) + text += ' - {0}\n'.format(entry) else: - text += " - **{0}:** {1} \n".format(k, kwargs[k]) + text += ' - **{0}:** {1} \n'.format(k, kwargs[k]) outfile.write(text) +def save_file(content, filename): + """ + Store `content` in `filename`. Can be used to store a SentimentAnalyzer. + """ + print("Saving", filename) + with codecs.open(filename, 'wb') as storage_file: + # The protocol=2 parameter is for python2 compatibility + pickle.dump(content, storage_file, protocol=2) + + def split_train_test(all_instances, n=None): """ Randomly split `n` instances of the dataset into train and test sets. @@ -311,20 +330,20 @@ def _show_plot(x_values, y_values, x_labels=None, y_labels=None): import matplotlib.pyplot as plt except ImportError: raise ImportError( - "The plot function requires matplotlib to be installed." - "See http://matplotlib.org/" + 'The plot function requires matplotlib to be installed.' + 'See http://matplotlib.org/' ) - plt.locator_params(axis="y", nbins=3) + plt.locator_params(axis='y', nbins=3) axes = plt.axes() axes.yaxis.grid() - plt.plot(x_values, y_values, "ro", color="red") + plt.plot(x_values, y_values, 'ro', color='red') plt.ylim(ymin=-1.2, ymax=1.2) plt.tight_layout(pad=5) if x_labels: - plt.xticks(x_values, x_labels, rotation="vertical") + plt.xticks(x_values, x_labels, rotation='vertical') if y_labels: - plt.yticks([-1, 0, 1], y_labels, rotation="horizontal") + plt.yticks([-1, 0, 1], y_labels, rotation='horizontal') # Pad margins so that markers are not clipped by the axes plt.margins(0.2) plt.show() @@ -339,8 +358,8 @@ def json2csv_preprocess( json_file, outfile, fields, - encoding="utf8", - errors="replace", + encoding='utf8', + errors='replace', gzip_compress=False, skip_retweets=True, skip_tongue_tweets=True, @@ -373,7 +392,7 @@ def json2csv_preprocess( subsets of the original tweets json data. """ with codecs.open(json_file, encoding=encoding) as fp: - (writer, outf) = _outf_writer(outfile, encoding, errors, gzip_compress) + (writer, outf) = outf_writer_compat(outfile, encoding, errors, gzip_compress) # write the list of fields as header writer.writerow(fields) @@ -384,14 +403,14 @@ def json2csv_preprocess( tweet = json.loads(line) row = extract_fields(tweet, fields) try: - text = row[fields.index("text")] + text = row[fields.index('text')] # Remove retweets if skip_retweets == True: - if re.search(r"\bRT\b", text): + if re.search(r'\bRT\b', text): continue # Remove tweets containing ":P" and ":-P" emoticons if skip_tongue_tweets == True: - if re.search(r"\:\-?P\b", text): + if re.search(r'\:\-?P\b', text): continue # Remove tweets containing both happy and sad emoticons if skip_ambiguous_tweets == True: @@ -401,15 +420,15 @@ def json2csv_preprocess( continue # Strip off emoticons from all tweets if strip_off_emoticons == True: - row[fields.index("text")] = re.sub( - r"(?!\n)\s+", " ", EMOTICON_RE.sub("", text) + row[fields.index('text')] = re.sub( + r'(?!\n)\s+', ' ', EMOTICON_RE.sub('', text) ) # Remove duplicate tweets if remove_duplicates == True: - if row[fields.index("text")] in tweets_cache: + if row[fields.index('text')] in tweets_cache: continue else: - tweets_cache.append(row[fields.index("text")]) + tweets_cache.append(row[fields.index('text')]) except ValueError: pass writer.writerow(row) @@ -439,28 +458,51 @@ def parse_tweets_set( """ tweets = [] if not sent_tokenizer: - sent_tokenizer = load("tokenizers/punkt/english.pickle") - - with codecs.open(filename, "rt") as csvfile: - reader = csv.reader(csvfile) - if skip_header == True: - next(reader, None) # skip the header - i = 0 - for tweet_id, text in reader: - # text = text[1] - i += 1 - sys.stdout.write("Loaded {0} tweets\r".format(i)) - # Apply sentence and word tokenizer to text - if word_tokenizer: - tweet = [ - w - for sent in sent_tokenizer.tokenize(text) - for w in word_tokenizer.tokenize(sent) - ] - else: - tweet = text - tweets.append((tweet, label)) - + sent_tokenizer = load('tokenizers/punkt/english.pickle') + + # If we use Python3.x we can proceed using the 'rt' flag + if sys.version_info[0] == 3: + with codecs.open(filename, 'rt') as csvfile: + reader = csv.reader(csvfile) + if skip_header == True: + next(reader, None) # skip the header + i = 0 + for tweet_id, text in reader: + # text = text[1] + i += 1 + sys.stdout.write('Loaded {0} tweets\r'.format(i)) + # Apply sentence and word tokenizer to text + if word_tokenizer: + tweet = [ + w + for sent in sent_tokenizer.tokenize(text) + for w in word_tokenizer.tokenize(sent) + ] + else: + tweet = text + tweets.append((tweet, label)) + # If we use Python2.x we need to handle encoding problems + elif sys.version_info[0] < 3: + with codecs.open(filename) as csvfile: + reader = csv.reader(csvfile) + if skip_header == True: + next(reader, None) # skip the header + i = 0 + for row in reader: + unicode_row = [x.decode('utf8') for x in row] + text = unicode_row[1] + i += 1 + sys.stdout.write('Loaded {0} tweets\r'.format(i)) + # Apply sentence and word tokenizer to text + if word_tokenizer: + tweet = [ + w.encode('utf8') + for sent in sent_tokenizer.tokenize(text) + for w in word_tokenizer.tokenize(sent) + ] + else: + tweet = text + tweets.append((tweet, label)) print("Loaded {0} tweets".format(i)) return tweets @@ -496,17 +538,17 @@ def demo_tweets(trainer, n_instances=None, output=None): if n_instances is not None: n_instances = int(n_instances / 2) - fields = ["id", "text"] + fields = ['id', 'text'] positive_json = twitter_samples.abspath("positive_tweets.json") - positive_csv = "positive_tweets.csv" + positive_csv = 'positive_tweets.csv' json2csv_preprocess(positive_json, positive_csv, fields, limit=n_instances) negative_json = twitter_samples.abspath("negative_tweets.json") - negative_csv = "negative_tweets.csv" + negative_csv = 'negative_tweets.csv' json2csv_preprocess(negative_json, negative_csv, fields, limit=n_instances) - neg_docs = parse_tweets_set(negative_csv, label="neg", word_tokenizer=tokenizer) - pos_docs = parse_tweets_set(positive_csv, label="pos", word_tokenizer=tokenizer) + neg_docs = parse_tweets_set(negative_csv, label='neg', word_tokenizer=tokenizer) + pos_docs = parse_tweets_set(positive_csv, label='pos', word_tokenizer=tokenizer) # We separately split subjective and objective instances to keep a balanced # uniform class distribution in both train and test sets. @@ -542,7 +584,7 @@ def demo_tweets(trainer, n_instances=None, output=None): classifier.show_most_informative_features() except AttributeError: print( - "Your classifier does not provide a show_most_informative_features() method." + 'Your classifier does not provide a show_most_informative_features() method.' ) results = sentim_analyzer.evaluate(test_set) @@ -550,7 +592,7 @@ def demo_tweets(trainer, n_instances=None, output=None): extr = [f.__name__ for f in sentim_analyzer.feat_extractors] output_markdown( output, - Dataset="labeled_tweets", + Dataset='labeled_tweets', Classifier=type(classifier).__name__, Tokenizer=tokenizer.__class__.__name__, Feats=extr, @@ -580,12 +622,12 @@ def demo_movie_reviews(trainer, n_instances=None, output=None): n_instances = int(n_instances / 2) pos_docs = [ - (list(movie_reviews.words(pos_id)), "pos") - for pos_id in movie_reviews.fileids("pos")[:n_instances] + (list(movie_reviews.words(pos_id)), 'pos') + for pos_id in movie_reviews.fileids('pos')[:n_instances] ] neg_docs = [ - (list(movie_reviews.words(neg_id)), "neg") - for neg_id in movie_reviews.fileids("neg")[:n_instances] + (list(movie_reviews.words(neg_id)), 'neg') + for neg_id in movie_reviews.fileids('neg')[:n_instances] ] # We separately split positive and negative instances to keep a balanced # uniform class distribution in both train and test sets. @@ -610,7 +652,7 @@ def demo_movie_reviews(trainer, n_instances=None, output=None): classifier.show_most_informative_features() except AttributeError: print( - "Your classifier does not provide a show_most_informative_features() method." + 'Your classifier does not provide a show_most_informative_features() method.' ) results = sentim_analyzer.evaluate(test_set) @@ -618,9 +660,9 @@ def demo_movie_reviews(trainer, n_instances=None, output=None): extr = [f.__name__ for f in sentim_analyzer.feat_extractors] output_markdown( output, - Dataset="Movie_reviews", + Dataset='Movie_reviews', Classifier=type(classifier).__name__, - Tokenizer="WordPunctTokenizer", + Tokenizer='WordPunctTokenizer', Feats=extr, Results=results, Instances=n_instances, @@ -648,10 +690,10 @@ def demo_subjectivity(trainer, save_analyzer=False, n_instances=None, output=Non n_instances = int(n_instances / 2) subj_docs = [ - (sent, "subj") for sent in subjectivity.sents(categories="subj")[:n_instances] + (sent, 'subj') for sent in subjectivity.sents(categories='subj')[:n_instances] ] obj_docs = [ - (sent, "obj") for sent in subjectivity.sents(categories="obj")[:n_instances] + (sent, 'obj') for sent in subjectivity.sents(categories='obj')[:n_instances] ] # We separately split subjective and objective instances to keep a balanced @@ -680,20 +722,20 @@ def demo_subjectivity(trainer, save_analyzer=False, n_instances=None, output=Non classifier.show_most_informative_features() except AttributeError: print( - "Your classifier does not provide a show_most_informative_features() method." + 'Your classifier does not provide a show_most_informative_features() method.' ) results = sentim_analyzer.evaluate(test_set) if save_analyzer == True: - save_file(sentim_analyzer, "sa_subjectivity.pickle") + save_file(sentim_analyzer, 'sa_subjectivity.pickle') if output: extr = [f.__name__ for f in sentim_analyzer.feat_extractors] output_markdown( output, - Dataset="subjectivity", + Dataset='subjectivity', Classifier=type(classifier).__name__, - Tokenizer="WhitespaceTokenizer", + Tokenizer='WhitespaceTokenizer', Feats=extr, Instances=n_instances, Results=results, @@ -714,10 +756,10 @@ def demo_sent_subjectivity(text): word_tokenizer = regexp.WhitespaceTokenizer() try: - sentim_analyzer = load("sa_subjectivity.pickle") + sentim_analyzer = load('sa_subjectivity.pickle') except LookupError: - print("Cannot find the sentiment analyzer you want to load.") - print("Training a new one using NaiveBayesClassifier.") + print('Cannot find the sentiment analyzer you want to load.') + print('Training a new one using NaiveBayesClassifier.') sentim_analyzer = demo_subjectivity(NaiveBayesClassifier.train, True) # Tokenize and convert to lower case @@ -757,15 +799,15 @@ def demo_liu_hu_lexicon(sentence, plot=False): y.append(0) # neutral if pos_words > neg_words: - print("Positive") + print('Positive') elif pos_words < neg_words: - print("Negative") + print('Negative') elif pos_words == neg_words: - print("Neutral") + print('Neutral') if plot == True: _show_plot( - x, y, x_labels=tokenized_sent, y_labels=["Negative", "Neutral", "Positive"] + x, y, x_labels=tokenized_sent, y_labels=['Negative', 'Neutral', 'Positive'] ) @@ -801,9 +843,9 @@ def demo_vader_tweets(n_instances=None, output=None): if n_instances is not None: n_instances = int(n_instances / 2) - fields = ["id", "text"] + fields = ['id', 'text'] positive_json = twitter_samples.abspath("positive_tweets.json") - positive_csv = "positive_tweets.csv" + positive_csv = 'positive_tweets.csv' json2csv_preprocess( positive_json, positive_csv, @@ -813,7 +855,7 @@ def demo_vader_tweets(n_instances=None, output=None): ) negative_json = twitter_samples.abspath("negative_tweets.json") - negative_csv = "negative_tweets.csv" + negative_csv = 'negative_tweets.csv' json2csv_preprocess( negative_json, negative_csv, @@ -822,8 +864,8 @@ def demo_vader_tweets(n_instances=None, output=None): limit=n_instances, ) - pos_docs = parse_tweets_set(positive_csv, label="pos") - neg_docs = parse_tweets_set(negative_csv, label="neg") + pos_docs = parse_tweets_set(positive_csv, label='pos') + neg_docs = parse_tweets_set(negative_csv, label='neg') # We separately split subjective and objective instances to keep a balanced # uniform class distribution in both train and test sets. @@ -845,43 +887,43 @@ def demo_vader_tweets(n_instances=None, output=None): labels.add(label) gold_results[label].add(i) acc_gold_results.append(label) - score = vader_analyzer.polarity_scores(text)["compound"] + score = vader_analyzer.polarity_scores(text)['compound'] if score > 0: - observed = "pos" + observed = 'pos' else: - observed = "neg" + observed = 'neg' num += 1 acc_test_results.append(observed) test_results[observed].add(i) metrics_results = {} for label in labels: accuracy_score = eval_accuracy(acc_gold_results, acc_test_results) - metrics_results["Accuracy"] = accuracy_score + metrics_results['Accuracy'] = accuracy_score precision_score = eval_precision(gold_results[label], test_results[label]) - metrics_results["Precision [{0}]".format(label)] = precision_score + metrics_results['Precision [{0}]'.format(label)] = precision_score recall_score = eval_recall(gold_results[label], test_results[label]) - metrics_results["Recall [{0}]".format(label)] = recall_score + metrics_results['Recall [{0}]'.format(label)] = recall_score f_measure_score = eval_f_measure(gold_results[label], test_results[label]) - metrics_results["F-measure [{0}]".format(label)] = f_measure_score + metrics_results['F-measure [{0}]'.format(label)] = f_measure_score for result in sorted(metrics_results): - print("{0}: {1}".format(result, metrics_results[result])) + print('{0}: {1}'.format(result, metrics_results[result])) if output: output_markdown( output, - Approach="Vader", - Dataset="labeled_tweets", + Approach='Vader', + Dataset='labeled_tweets', Instances=n_instances, Results=metrics_results, ) -if __name__ == "__main__": +if __name__ == '__main__': from nltk.classify import NaiveBayesClassifier, MaxentClassifier from nltk.classify.scikitlearn import SklearnClassifier from sklearn.svm import LinearSVC - from nltk.twitter.common import _outf_writer, extract_fields + from nltk.twitter.common import outf_writer_compat, extract_fields naive_bayes = NaiveBayesClassifier.train svm = SklearnClassifier(LinearSVC()).train diff --git a/nlp_resource_data/nltk/sentiment/vader.py b/nlp_resource_data/nltk/sentiment/vader.py index 7ba4251..da9fab7 100644 --- a/nlp_resource_data/nltk/sentiment/vader.py +++ b/nlp_resource_data/nltk/sentiment/vader.py @@ -1,12 +1,11 @@ # coding: utf-8 # Natural Language Toolkit: vader # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: C.J. Hutto # Ewan Klein (modifications) # Pierpaolo Pantone <24alsecondo@gmail.com> (modifications) # George Berry (modifications) -# Malavika Suresh (modifications) # URL: # For license information, see LICENSE.TXT # @@ -26,255 +25,268 @@ import math import re import string from itertools import product - import nltk.data -from nltk.util import pairwise - -class VaderConstants: +from .util import pairwise + +##Constants## + +# (empirically derived mean sentiment intensity rating increase for booster words) +B_INCR = 0.293 +B_DECR = -0.293 + +# (empirically derived mean sentiment intensity rating increase for using +# ALLCAPs to emphasize a word) +C_INCR = 0.733 + +N_SCALAR = -0.74 + +# for removing punctuation +REGEX_REMOVE_PUNCTUATION = re.compile('[{0}]'.format(re.escape(string.punctuation))) + +PUNC_LIST = [ + ".", + "!", + "?", + ",", + ";", + ":", + "-", + "'", + "\"", + "!!", + "!!!", + "??", + "???", + "?!?", + "!?!", + "?!?!", + "!?!?", +] +NEGATE = { + "aint", + "arent", + "cannot", + "cant", + "couldnt", + "darent", + "didnt", + "doesnt", + "ain't", + "aren't", + "can't", + "couldn't", + "daren't", + "didn't", + "doesn't", + "dont", + "hadnt", + "hasnt", + "havent", + "isnt", + "mightnt", + "mustnt", + "neither", + "don't", + "hadn't", + "hasn't", + "haven't", + "isn't", + "mightn't", + "mustn't", + "neednt", + "needn't", + "never", + "none", + "nope", + "nor", + "not", + "nothing", + "nowhere", + "oughtnt", + "shant", + "shouldnt", + "uhuh", + "wasnt", + "werent", + "oughtn't", + "shan't", + "shouldn't", + "uh-uh", + "wasn't", + "weren't", + "without", + "wont", + "wouldnt", + "won't", + "wouldn't", + "rarely", + "seldom", + "despite", +} + +# booster/dampener 'intensifiers' or 'degree adverbs' +# http://en.wiktionary.org/wiki/Category:English_degree_adverbs + +BOOSTER_DICT = { + "absolutely": B_INCR, + "amazingly": B_INCR, + "awfully": B_INCR, + "completely": B_INCR, + "considerably": B_INCR, + "decidedly": B_INCR, + "deeply": B_INCR, + "effing": B_INCR, + "enormously": B_INCR, + "entirely": B_INCR, + "especially": B_INCR, + "exceptionally": B_INCR, + "extremely": B_INCR, + "fabulously": B_INCR, + "flipping": B_INCR, + "flippin": B_INCR, + "fricking": B_INCR, + "frickin": B_INCR, + "frigging": B_INCR, + "friggin": B_INCR, + "fully": B_INCR, + "fucking": B_INCR, + "greatly": B_INCR, + "hella": B_INCR, + "highly": B_INCR, + "hugely": B_INCR, + "incredibly": B_INCR, + "intensely": B_INCR, + "majorly": B_INCR, + "more": B_INCR, + "most": B_INCR, + "particularly": B_INCR, + "purely": B_INCR, + "quite": B_INCR, + "really": B_INCR, + "remarkably": B_INCR, + "so": B_INCR, + "substantially": B_INCR, + "thoroughly": B_INCR, + "totally": B_INCR, + "tremendously": B_INCR, + "uber": B_INCR, + "unbelievably": B_INCR, + "unusually": B_INCR, + "utterly": B_INCR, + "very": B_INCR, + "almost": B_DECR, + "barely": B_DECR, + "hardly": B_DECR, + "just enough": B_DECR, + "kind of": B_DECR, + "kinda": B_DECR, + "kindof": B_DECR, + "kind-of": B_DECR, + "less": B_DECR, + "little": B_DECR, + "marginally": B_DECR, + "occasionally": B_DECR, + "partly": B_DECR, + "scarcely": B_DECR, + "slightly": B_DECR, + "somewhat": B_DECR, + "sort of": B_DECR, + "sorta": B_DECR, + "sortof": B_DECR, + "sort-of": B_DECR, +} + +# check for special case idioms using a sentiment-laden keyword known to SAGE +SPECIAL_CASE_IDIOMS = { + "the shit": 3, + "the bomb": 3, + "bad ass": 1.5, + "yeah right": -2, + "cut the mustard": 2, + "kiss of death": -1.5, + "hand to mouth": -2, +} + + +##Static methods## + + +def negated(input_words, include_nt=True): """ - A class to keep the Vader lists and constants. + Determine if input contains negation words """ - ##Constants## - # (empirically derived mean sentiment intensity rating increase for booster words) - B_INCR = 0.293 - B_DECR = -0.293 - - # (empirically derived mean sentiment intensity rating increase for using - # ALLCAPs to emphasize a word) - C_INCR = 0.733 - - N_SCALAR = -0.74 - - NEGATE = { - "aint", - "arent", - "cannot", - "cant", - "couldnt", - "darent", - "didnt", - "doesnt", - "ain't", - "aren't", - "can't", - "couldn't", - "daren't", - "didn't", - "doesn't", - "dont", - "hadnt", - "hasnt", - "havent", - "isnt", - "mightnt", - "mustnt", - "neither", - "don't", - "hadn't", - "hasn't", - "haven't", - "isn't", - "mightn't", - "mustn't", - "neednt", - "needn't", - "never", - "none", - "nope", - "nor", - "not", - "nothing", - "nowhere", - "oughtnt", - "shant", - "shouldnt", - "uhuh", - "wasnt", - "werent", - "oughtn't", - "shan't", - "shouldn't", - "uh-uh", - "wasn't", - "weren't", - "without", - "wont", - "wouldnt", - "won't", - "wouldn't", - "rarely", - "seldom", - "despite", - } - - # booster/dampener 'intensifiers' or 'degree adverbs' - # http://en.wiktionary.org/wiki/Category:English_degree_adverbs - - BOOSTER_DICT = { - "absolutely": B_INCR, - "amazingly": B_INCR, - "awfully": B_INCR, - "completely": B_INCR, - "considerably": B_INCR, - "decidedly": B_INCR, - "deeply": B_INCR, - "effing": B_INCR, - "enormously": B_INCR, - "entirely": B_INCR, - "especially": B_INCR, - "exceptionally": B_INCR, - "extremely": B_INCR, - "fabulously": B_INCR, - "flipping": B_INCR, - "flippin": B_INCR, - "fricking": B_INCR, - "frickin": B_INCR, - "frigging": B_INCR, - "friggin": B_INCR, - "fully": B_INCR, - "fucking": B_INCR, - "greatly": B_INCR, - "hella": B_INCR, - "highly": B_INCR, - "hugely": B_INCR, - "incredibly": B_INCR, - "intensely": B_INCR, - "majorly": B_INCR, - "more": B_INCR, - "most": B_INCR, - "particularly": B_INCR, - "purely": B_INCR, - "quite": B_INCR, - "really": B_INCR, - "remarkably": B_INCR, - "so": B_INCR, - "substantially": B_INCR, - "thoroughly": B_INCR, - "totally": B_INCR, - "tremendously": B_INCR, - "uber": B_INCR, - "unbelievably": B_INCR, - "unusually": B_INCR, - "utterly": B_INCR, - "very": B_INCR, - "almost": B_DECR, - "barely": B_DECR, - "hardly": B_DECR, - "just enough": B_DECR, - "kind of": B_DECR, - "kinda": B_DECR, - "kindof": B_DECR, - "kind-of": B_DECR, - "less": B_DECR, - "little": B_DECR, - "marginally": B_DECR, - "occasionally": B_DECR, - "partly": B_DECR, - "scarcely": B_DECR, - "slightly": B_DECR, - "somewhat": B_DECR, - "sort of": B_DECR, - "sorta": B_DECR, - "sortof": B_DECR, - "sort-of": B_DECR, - } - - # check for special case idioms using a sentiment-laden keyword known to SAGE - SPECIAL_CASE_IDIOMS = { - "the shit": 3, - "the bomb": 3, - "bad ass": 1.5, - "yeah right": -2, - "cut the mustard": 2, - "kiss of death": -1.5, - "hand to mouth": -2, - } - - # for removing punctuation - REGEX_REMOVE_PUNCTUATION = re.compile("[{0}]".format(re.escape(string.punctuation))) - - PUNC_LIST = [ - ".", - "!", - "?", - ",", - ";", - ":", - "-", - "'", - '"', - "!!", - "!!!", - "??", - "???", - "?!?", - "!?!", - "?!?!", - "!?!?", - ] - - def __init__(self): - pass - - def negated(self, input_words, include_nt=True): - """ - Determine if input contains negation words - """ - neg_words = self.NEGATE - if any(word.lower() in neg_words for word in input_words): + neg_words = NEGATE + if any(word.lower() in neg_words for word in input_words): + return True + if include_nt: + if any("n't" in word.lower() for word in input_words): return True - if include_nt: - if any("n't" in word.lower() for word in input_words): - return True - for first, second in pairwise(input_words): - if second.lower() == "least" and first.lower() != "at": - return True - return False - - def normalize(self, score, alpha=15): - """ - Normalize the score to be between -1 and 1 using an alpha that - approximates the max expected value - """ - norm_score = score / math.sqrt((score * score) + alpha) - return norm_score + for first, second in pairwise(input_words): + if second.lower() == "least" and first.lower() != 'at': + return True + return False - def scalar_inc_dec(self, word, valence, is_cap_diff): - """ - Check if the preceding words increase, decrease, or negate/nullify the - valence - """ - scalar = 0.0 - word_lower = word.lower() - if word_lower in self.BOOSTER_DICT: - scalar = self.BOOSTER_DICT[word_lower] - if valence < 0: - scalar *= -1 - # check if booster/dampener word is in ALLCAPS (while others aren't) - if word.isupper() and is_cap_diff: - if valence > 0: - scalar += self.C_INCR - else: - scalar -= self.C_INCR - return scalar +def normalize(score, alpha=15): + """ + Normalize the score to be between -1 and 1 using an alpha that + approximates the max expected value + """ + norm_score = score / math.sqrt((score * score) + alpha) + return norm_score + + +def allcap_differential(words): + """ + Check whether just some words in the input are ALL CAPS + + :param list words: The words to inspect + :returns: `True` if some but not all items in `words` are ALL CAPS + """ + is_different = False + allcap_words = 0 + for word in words: + if word.isupper(): + allcap_words += 1 + cap_differential = len(words) - allcap_words + if 0 < cap_differential < len(words): + is_different = True + return is_different + + +def scalar_inc_dec(word, valence, is_cap_diff): + """ + Check if the preceding words increase, decrease, or negate/nullify the + valence + """ + scalar = 0.0 + word_lower = word.lower() + if word_lower in BOOSTER_DICT: + scalar = BOOSTER_DICT[word_lower] + if valence < 0: + scalar *= -1 + # check if booster/dampener word is in ALLCAPS (while others aren't) + if word.isupper() and is_cap_diff: + if valence > 0: + scalar += C_INCR + else: + scalar -= C_INCR + return scalar -class SentiText: +class SentiText(object): """ Identify sentiment-relevant string-level properties of input text. """ - def __init__(self, text, punc_list, regex_remove_punctuation): + def __init__(self, text): if not isinstance(text, str): - text = str(text.encode("utf-8")) + text = str(text.encode('utf-8')) self.text = text - self.PUNC_LIST = punc_list - self.REGEX_REMOVE_PUNCTUATION = regex_remove_punctuation self.words_and_emoticons = self._words_and_emoticons() - # doesn't separate words from + # doesn't separate words from\ # adjacent punctuation (keeps emoticons & contractions) - self.is_cap_diff = self.allcap_differential(self.words_and_emoticons) + self.is_cap_diff = allcap_differential(self.words_and_emoticons) def _words_plus_punc(self): """ @@ -284,14 +296,14 @@ class SentiText: ',cat': 'cat', } """ - no_punc_text = self.REGEX_REMOVE_PUNCTUATION.sub("", self.text) + no_punc_text = REGEX_REMOVE_PUNCTUATION.sub('', self.text) # removes punctuation (but loses emoticons & contractions) words_only = no_punc_text.split() # remove singletons words_only = set(w for w in words_only if len(w) > 1) # the product gives ('cat', ',') and (',', 'cat') - punc_before = {"".join(p): p[1] for p in product(self.PUNC_LIST, words_only)} - punc_after = {"".join(p): p[0] for p in product(words_only, self.PUNC_LIST)} + punc_before = {''.join(p): p[1] for p in product(PUNC_LIST, words_only)} + punc_after = {''.join(p): p[0] for p in product(words_only, PUNC_LIST)} words_punc_dict = punc_before words_punc_dict.update(punc_after) return words_punc_dict @@ -310,43 +322,25 @@ class SentiText: wes[i] = words_punc_dict[we] return wes - def allcap_differential(self, words): - """ - Check whether just some words in the input are ALL CAPS - :param list words: The words to inspect - :returns: `True` if some but not all items in `words` are ALL CAPS - """ - is_different = False - allcap_words = 0 - for word in words: - if word.isupper(): - allcap_words += 1 - cap_differential = len(words) - allcap_words - if 0 < cap_differential < len(words): - is_different = True - return is_different - - -class SentimentIntensityAnalyzer: +class SentimentIntensityAnalyzer(object): """ Give a sentiment intensity score to sentences. """ def __init__( - self, lexicon_file="sentiment/vader_lexicon.zip/vader_lexicon/vader_lexicon.txt", + self, lexicon_file="sentiment/vader_lexicon.zip/vader_lexicon/vader_lexicon.txt" ): self.lexicon_file = nltk.data.load(lexicon_file) self.lexicon = self.make_lex_dict() - self.constants = VaderConstants() def make_lex_dict(self): """ Convert lexicon file to a dictionary """ lex_dict = {} - for line in self.lexicon_file.split("\n"): - (word, measure) = line.strip().split("\t")[0:2] + for line in self.lexicon_file.split('\n'): + (word, measure) = line.strip().split('\t')[0:2] lex_dict[word] = float(measure) return lex_dict @@ -356,9 +350,9 @@ class SentimentIntensityAnalyzer: Positive values are positive valence, negative value are negative valence. """ + sentitext = SentiText(text) # text, words_and_emoticons, is_cap_diff = self.preprocess(text) - sentitext = SentiText(text, self.constants.PUNC_LIST, - self.constants.REGEX_REMOVE_PUNCTUATION) + sentiments = [] words_and_emoticons = sentitext.words_and_emoticons for item in words_and_emoticons: @@ -368,7 +362,7 @@ class SentimentIntensityAnalyzer: i < len(words_and_emoticons) - 1 and item.lower() == "kind" and words_and_emoticons[i + 1].lower() == "of" - ) or item.lower() in self.constants.BOOSTER_DICT: + ) or item.lower() in BOOSTER_DICT: sentiments.append(valence) continue @@ -389,9 +383,9 @@ class SentimentIntensityAnalyzer: # check if sentiment laden word is in ALL CAPS (while others aren't) if item.isupper() and is_cap_diff: if valence > 0: - valence += self.constants.C_INCR + valence += C_INCR else: - valence -= self.constants.C_INCR + valence -= C_INCR for start_i in range(0, 3): if ( @@ -402,7 +396,7 @@ class SentimentIntensityAnalyzer: # dampen the scalar modifier of preceding words and emoticons # (excluding the ones that immediately preceed the item) based # on their distance from the current item. - s = self.constants.scalar_inc_dec( + s = scalar_inc_dec( words_and_emoticons[i - (start_i + 1)], valence, is_cap_diff ) if start_i == 1 and s != 0: @@ -439,24 +433,30 @@ class SentimentIntensityAnalyzer: words_and_emoticons[i - 2].lower() != "at" and words_and_emoticons[i - 2].lower() != "very" ): - valence = valence * self.constants.N_SCALAR + valence = valence * N_SCALAR elif ( i > 0 and words_and_emoticons[i - 1].lower() not in self.lexicon and words_and_emoticons[i - 1].lower() == "least" ): - valence = valence * self.constants.N_SCALAR + valence = valence * N_SCALAR return valence def _but_check(self, words_and_emoticons, sentiments): - but = {"but", "BUT"} & set(words_and_emoticons) - if but: - bi = words_and_emoticons.index(next(iter(but))) - for sidx, sentiment in enumerate(sentiments): - if sidx < bi: - sentiments[sidx] = sentiment * 0.5 - elif sidx > bi: - sentiments[sidx] = sentiment * 1.5 + # check for modification in sentiment due to contrastive conjunction 'but' + if 'but' in words_and_emoticons or 'BUT' in words_and_emoticons: + try: + bi = words_and_emoticons.index('but') + except ValueError: + bi = words_and_emoticons.index('BUT') + for sentiment in sentiments: + si = sentiments.index(sentiment) + if si < bi: + sentiments.pop(si) + sentiments.insert(si, sentiment * 0.5) + elif si > bi: + sentiments.pop(si) + sentiments.insert(si, sentiment * 1.5) return sentiments def _idioms_check(self, valence, words_and_emoticons, i): @@ -485,42 +485,42 @@ class SentimentIntensityAnalyzer: sequences = [onezero, twoonezero, twoone, threetwoone, threetwo] for seq in sequences: - if seq in self.constants.SPECIAL_CASE_IDIOMS: - valence = self.constants.SPECIAL_CASE_IDIOMS[seq] + if seq in SPECIAL_CASE_IDIOMS: + valence = SPECIAL_CASE_IDIOMS[seq] break if len(words_and_emoticons) - 1 > i: zeroone = "{0} {1}".format( words_and_emoticons[i], words_and_emoticons[i + 1] ) - if zeroone in self.constants.SPECIAL_CASE_IDIOMS: - valence = self.constants.SPECIAL_CASE_IDIOMS[zeroone] + if zeroone in SPECIAL_CASE_IDIOMS: + valence = SPECIAL_CASE_IDIOMS[zeroone] if len(words_and_emoticons) - 1 > i + 1: zeroonetwo = "{0} {1} {2}".format( words_and_emoticons[i], words_and_emoticons[i + 1], words_and_emoticons[i + 2], ) - if zeroonetwo in self.constants.SPECIAL_CASE_IDIOMS: - valence = self.constants.SPECIAL_CASE_IDIOMS[zeroonetwo] + if zeroonetwo in SPECIAL_CASE_IDIOMS: + valence = SPECIAL_CASE_IDIOMS[zeroonetwo] # check for booster/dampener bi-grams such as 'sort of' or 'kind of' - if threetwo in self.constants.BOOSTER_DICT or twoone in self.constants.BOOSTER_DICT: - valence = valence + self.constants.B_DECR + if threetwo in BOOSTER_DICT or twoone in BOOSTER_DICT: + valence = valence + B_DECR return valence def _never_check(self, valence, words_and_emoticons, start_i, i): if start_i == 0: - if self.constants.negated([words_and_emoticons[i - 1]]): - valence = valence * self.constants.N_SCALAR + if negated([words_and_emoticons[i - 1]]): + valence = valence * N_SCALAR if start_i == 1: if words_and_emoticons[i - 2] == "never" and ( words_and_emoticons[i - 1] == "so" or words_and_emoticons[i - 1] == "this" ): valence = valence * 1.5 - elif self.constants.negated([words_and_emoticons[i - (start_i + 1)]]): - valence = valence * self.constants.N_SCALAR + elif negated([words_and_emoticons[i - (start_i + 1)]]): + valence = valence * N_SCALAR if start_i == 2: if ( words_and_emoticons[i - 3] == "never" @@ -534,8 +534,8 @@ class SentimentIntensityAnalyzer: ) ): valence = valence * 1.25 - elif self.constants.negated([words_and_emoticons[i - (start_i + 1)]]): - valence = valence * self.constants.N_SCALAR + elif negated([words_and_emoticons[i - (start_i + 1)]]): + valence = valence * N_SCALAR return valence def _punctuation_emphasis(self, sum_s, text): @@ -596,7 +596,7 @@ class SentimentIntensityAnalyzer: elif sum_s < 0: sum_s -= punct_emph_amplifier - compound = self.constants.normalize(sum_s) + compound = normalize(sum_s) # discriminate between positive, negative and neutral sentiment scores pos_sum, neg_sum, neu_count = self._sift_sentiment_scores(sentiments) diff --git a/nlp_resource_data/nltk/stem/__init__.py b/nlp_resource_data/nltk/stem/__init__.py index 04efb34..d31603f 100644 --- a/nlp_resource_data/nltk/stem/__init__.py +++ b/nlp_resource_data/nltk/stem/__init__.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Stemmers # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Trevor Cohn # Edward Loper # Steven Bird diff --git a/nlp_resource_data/nltk/stem/__pycache__/__init__.cpython-37.pyc b/nlp_resource_data/nltk/stem/__pycache__/__init__.cpython-37.pyc index 8bf71626ccad772031a4dad0f1a0c9dc44e83d36..26b9de09776b8e4c3319e4db03f5e759dc6a6da2 100644 GIT binary patch delta 31 lcmbQp*~-c7#LLUY00gD%6E|{8u&~+amlhZ4Pc~(V0RUbf2B81| delta 43 xcmZqWoXE-T#LLUY00eSL0UNm`ScE+EGxBp&^|KNyGx7@(i?a3gCTp_9007Gz3grL* diff --git a/nlp_resource_data/nltk/stem/__pycache__/api.cpython-37.pyc b/nlp_resource_data/nltk/stem/__pycache__/api.cpython-37.pyc index 79cd0fcf134191834c3e9c733666d7850205ce8d..2357afec1b80a494d55eadda810e9f1677a50a81 100644 GIT binary patch delta 265 zcmbQiwu6n=iIEet6vsjSV+&5TiO6MfVKH5qU5CZ?ps=cbkav7r-bD5%;7#Z9dQkYVhTNqN9 zQyEiOdYPk`C&sC5oZ7}H!wOVg1R{Pp>u2QWrs`)UR%YZEBo<}s>rK{XvS1XMoWkT^ z!wFLcRtq6GKw>OFf)|L3K@1L77ACNWCgUy6+|-i9c4a^_{ eq!`I0up>am-Quvx%}*)KNwov{sTia|fEfVb!z*O~ diff --git a/nlp_resource_data/nltk/stem/__pycache__/arlstem.cpython-37.pyc b/nlp_resource_data/nltk/stem/__pycache__/arlstem.cpython-37.pyc index 7732a26417d90616f14916ee6e31f9abb1ca75fc..a2941759806e19d321dd7151a3d500bad9863d03 100644 GIT binary patch delta 1490 zcmah}OHb5L6z;v94zH;K0`d|NA!H2ljwm9I#s`sP7HW*?W&*Utkq%65r?OCy#>8xvfp>3?uzbYWaTLrh#4B`(++@r{+NoPn9Srh%W#7;*s84>D!n6iB%>J`Q!Xi{W=8KSX4KN|s7BOO zuPX7Pmp1}l6=y-YJCmNAHLaAL&RJsIcKkl|xmMOlAgz7;1}r+>0U=e-T& z4z%3tv^5i`Sz#GG>ll106EI6)O9+P$6>)M_X3C!{d&n-7z@a+sgeXE#G5(f1xp9$m z=F+*FfofYbaW23vIsm)5L<{-u`<3g3#3T7E z0eAWg?(`RCXr;Ip-dM$#L76}t$38Ajf&;w&5}}RO-}tvG7uh^YN{T`3>IL8`2r#

    ?HcPt4pwRfEEB|SYXTq z=0)JbigwR#KGc+enWvE!zElD;)9O8G?nuLm7m4%q3QAL{DL3Z|E0q!$CZQ1+`iz~M m?!u?7Ydo74Di*NoZ^S|IaAwwYZEFM*;^3q%@RbJBLQxS!1dXC3HU=My(MZ}%6Oa~~JFP}dWT8QV ziHVt=8$h%ivd)V8yzT zEAz?pRIhUD9 zq!RYCn&<3%B^;`mjf)Bd?ZIocnTd&vd2c#1l~a?6*&r8QEFK1watS~A7VX>G1Y_OS z`rhX8FoJMK5dwGsHGnFDpu51N^K>8mj^%hdQqCHn*q3&F?MK$!O=fiK4?QA6=GJdm zJ=Tx9L!&a4Mov)+*bAs52)YVca@>sQ-0UmQ&`EJ?)@1#cewjcV$38Ajf&;w&I`aWq ze{Vl(SYq>#6cs11s|U~z0Atogu(7hSn&r|EeON)aom6g_MbyI~lQzIY$%@tMYMSH| z?9$&wk)IU%Y2zi}AV;}~LGt~wZ?!J7`CcqDfUJltoh(aU-R6SKn@=cn z$*hgohHCvc;VBC8Q5G{mhT`%t5BIV<9$@wZug0d4E9x{09dsIqwv{xe1rFJqR#C6>>@i&k>i%y{^5eu zi_%5uqDYpe9TtK5l#~WDSFUTAeAlg(j#r~aRNI1@cc@BObIxMI}B5F9t~0W|Ntj5WZKT iSbADumSlL*q4GSr%AwSpkxeFs;WF_(bFoV9QU3xHFY)#O diff --git a/nlp_resource_data/nltk/stem/__pycache__/cistem.cpython-37.pyc b/nlp_resource_data/nltk/stem/__pycache__/cistem.cpython-37.pyc index 25ab91112c4829626b065a525606f0fdaded42f7..e8f48198b0f50d14c03781c14a3c6fd2746fa470 100644 GIT binary patch delta 899 zcmZvazi-n(6vut`#dZ?AEu{sNMgq!D^COXER`uk znUEMNi-CcGjiqA9U(l65!3zUZW|&yuo}&^J;V2)!d++P_-o3kzy|?}0OV@2_>M5)f zve?zM@31(38p_yIf3c+yX%S*tf_#L))|u|>>TNJ1!9K2im^Bj9H<3ooS6bAF%r{zO zQhaP2Bj1YjUCpz`DwF1+YN1HuFpJhHVX>qU|v68k9B2@k%H&&8{oYOxCMaC@nygrz+Hd`Fah@f_W^Zd z*0eFd&8d)t(R3gW_yZ;BRd&YXL7WDRKUC`qgJQN}OVB|=Q~9p^vixCq8&WR<>gsjj zY0XOzfa_T~LaLJw6RNI6G-($a;yu7)!1MtAo^-u8I8D(}u)7$w{{87KW9T%UroB}Q zJ-ccVvR}x8Q$je5j6YTI-IU409?ZEHpA|XiOPPKWKqHBbJ&Og MILJm0a&!m%0Vs63W&i*H delta 818 zcmZvazi-n(6vyvur*;x2P(p;1pcW_v0w@tes)~eYMJ0YL1u0t=%e7BQh#gz+94Sl@ zf&syV3_}NY7N-0k`~hAOL*{Nw+&5MzsyNFZzq|Lnd+*-!htAjb>~W=1Vrpz0y>yP7 zjQzx9H4J6!t5@Dq@R`p8*27l|c&PO>&Ym(~_ly2m~?sIT+x&Zr3jHMbepx&Er6~7q`$n&!-nb0tH`XWE4mj0@2zCislGxsD}F*k%9 zQypOpDquTeWrax`3sTyN=Lsp##x|rX-o7F?la(^afq z1GWI%mukQ^a09prAVorZsRb+(Gfxxil}X6N3Bp%^D0)06sf`8tk14{WDz>b2QLm=-qMGI5ik7m8#M`FVm!f$jqLfYUDgZTIpv z=&NifSWQDK@qZqzq}xTiXdNtds?0k|)vJtyWYk6-+q-T&Bor$z%YPzd*26IJCxLi` LpGKlc7Ps|3d?lbT diff --git a/nlp_resource_data/nltk/stem/__pycache__/isri.cpython-37.pyc b/nlp_resource_data/nltk/stem/__pycache__/isri.cpython-37.pyc index 30f813a783c820085fd577e984eeba54a8567cf9..d291d41bb342883923de79098385476645254f42 100644 GIT binary patch delta 1528 zcmb7E%}*0i5bySFKY&(>fP8~36 zyLi9@T!>tZ=}F^(f5D5PdoUp}MojeP1%q|wEeI4-br17q-^}~XZ$938GqyG69rb!k zZTK0krP%P2w{9qmKdLQiQzg|NmlF1X2-gZK^|cKC!7z&GDE; zohqBKDXv#HXIPaYnLZ;qJC#zkSbR34!DL*|Zm^GzG6#Z#lY}e#vg83Pt3!P!+-d00 z$%nm{q~7ejx7*%+;O8di5)wpn+l4~$nsPD)38>-k%P0J`xMeyrsk>Sd2+k7qv@7cw zE%BOP@r_F^zUdxIsT&QPypSS#sDaf1!zIv9SqRHSE`mrsL~P9 z979Gmx0qW5s+S^(Ton5OXd%4b$MYO;+?^FYu@ssa~iCn1W{LAymz z18DbTSro{fIwoZRT5nsf(hkNZCv01^uJv)5p%!$S&gfHhfQ3TYG97=;T=v^d11+SfE?Q z-}$S;RyI^Xk&@tTRi0oDQ4_BgJy<{&uZ{Ny&oz^#xS$^wvosc)HZlfiu^7;+1(p0> gurka@CBHvGneM?vN;Trz5b@Eed9vh@+;$K91tYB)*Z=?k delta 1523 zcma)6O=uHA6wYRL+qCK0R$F6#P0~LXC8cSs1*@nCQp8vZcnH#xx?574WW!5B=t)6K ziy%_=?8Qs5cu)c&;!)3DJeUPRL4*`MdQhzKy_xhcmC!xBnVt9E_r1TFuaj>k)k~_{ z=)kXcc5246s&)iV;g9Z!I&?{QCZwo@D@!<&EXpLu6^AbC$^(b4805sx7BXNWxKTqdDU9;E}Cl%>^R`1T91doywW4k$i1Ux7?8GtQRet*9q4a z(d3-C|Fi~mxM&R8NW(I@S!fpr99GVopxZ2!OIATcH(GtR{3CevWy=eOjN#0$c(uR4Xa#W_`N^u2fqe(m$&A9_u z9qDm2Qwpo?W6~6P2ozqE5=L1D-ZdFoPzQq~yd;KD)I#;4-d*r`-m(ckJ`(Q&y@`qZ<{@~PpH18@HT|OzysYO|@$U6c@AId@T}J;wd{ticjq07`sga;p0qEWW-7BDbA(~4A#XHQ~2vk4_ zk)Sk0=?QgLr2HwPR}2`60qa`-$(yv(6d0$|!ziL^Dv@0X&?^^+rRRZq%+3FYW4k7j Trk+a}=P9fV&5$g)oK5T>a?%#G diff --git a/nlp_resource_data/nltk/stem/__pycache__/lancaster.cpython-37.pyc b/nlp_resource_data/nltk/stem/__pycache__/lancaster.cpython-37.pyc index 4682904aefce9f129b05925d4d09cec28a45dd47..fca3ea3b4061617fc52336f5e63ad14819439c29 100644 GIT binary patch delta 988 zcmZuw&1=*^6wf5tB;9OwyWQ3nx2s)ji;LPX4+T+a>%l5Tiq%V~3~@8v29s=<%u?G+ zk?N@jDRc2(@FcQ`dhkE+(4*v{=tZ#L#gkr~m!&RU9GKs{c^|)d@8#ua_S4+%l4WHj zv0C~8el{aX-{5k*GD27oJ6;yxO19)8SN2gL+cE-K@fBMUr|PRg#?GX=jIRZ{ts{x( zHzikd^{0}nlgys7hit=D)})%T^E|saw$-HWSbR|5U#Ov^zZH7*rc1EzF+v?bz9W>z z8Y4kK=q(|aqiwd?4DlR3_N+I9$YI_?pS=F_wa?6n41ZsnZpCz(|H>|*SM7OoMlSUU zbs|%e=-}`9glc@ps%cO&HTg?xybO^v1q+m=P-7gX-mAY@ufXAU?OyuUnx~O#flTN7poXDdJ=*(2t i8EvY9EAag9(ZskKG+pSIDLi6GMnySf$T^f#a_BEk3g4gr delta 902 zcmZ8fO>fgc5cS$l>c(->Bu&ybq(Dh4A|PoZq#pPX>7^7^D54yYRk3p8B{6jpv)-z- z65;^j4l8ly2XKo7T=)mT85wbbYcJdoCw3elBrEM(k7wr1o6&r(AJ^`z8%9hLXK}ZQ z;#FL^tC$LsHY8iMBTpqeLX;_?o_sUJs7+RXH;yLIRnR zV5-&cdX7t|%HJF1s3!i?X?|>!3cx}`&>=V`NPKc@-zEMVelGC=MflG|DGKUojxQyb zFFh_Dz^nk1 ze@fL*_C@-3aa@-f$Jp+N{X4z(*TI9dj!=<5$`psaEbv#E!l{XzB*tyRwk)4*5yl8z z9DK>BsA-5oWkk=yqtcOzWe|{tUa;e1tJNYNvv$SICTJ5__#6M+WnRs`y&kefCfMi- zP$qliARG~PNw3X1lYBbHmgl*LF!W1bTrR@WHP)ycja+Z*fb8112_RN4{#FT3_uBB z4nXYppS>9`LKWdiiR%N2wxp`+ek>NtYcpCy)3vk~*Ro+uIYKW22P}ta^jy~1tH91w hEYG2!EsXlFu!MM&pAfhQygL?m%Zl0NX7q8T(Ds(A zp8f@{PztKZmzWnO)?_(!5-$qHta9i%kxMP#ODMDbSma8+4M*ttWVAqpF`m#MpzHvw zdrB9QGf`+2tvZtv1tmN2CVa|YRBw>8)k&?xsFvH#s3+RsK2vi&YHB`0;!)NPG}eKT zN9aVDg)kdo4#HdjL2N;&Af6zRAmxI=4BW~gwDQ6HHqy<5{ED^}K)5}$xg4|2{GI$R za=dyo|E)T-1VwaMUTis07)PNUCIyWxMXG)-VlJDHf{;0dmAG09P^tbtdsXJRsZKiu zU1XRYPEn!xG?cp{<3wc;rPoBo;25*bnYLnjwoF5Z&LmeDRzU5z%;;-8SYU124A&G? zDS`Ff(Lm~gYKSiDj2&C0DE1O>n$~$g=i0&#clYA?jR>f0&7@Sy3>Jw_G(Uk*#@2Ln zj{0BnUl-n5zXn;=CNVW-)}h=cpYYD^9`a4KxBCrps)&2Y$hIK2CdfHK&a>4>AxP01 z@>n-gFBK+=S3T%kdzJ$ zXqYrBiDx>k+NPBr{;ldSIjdH-!X8_X0yQ>>>mh`lNH9|O%NsJR?9ybx)fq^n)s5Ch02(n8BWGU6$GtpnYwDG#Wy{2=ce(s~Q$cKLN9M(>n09W|c0cR1a zHlvk1UtB=m=4*>P56XH#-$Ss`iiWk#NzKM_>v06BH@R13=W_L<;*D$p(^sgrZ)y|1 z#}2}qsoSs=h}6tN=Y{U(vjhCIq5cJja6bbT0tYmBKn4I;7uR-tys;nl>gwN9@Us75 zr2ZnH)G_|!j=tS@syIzb`7Y#=MmFCm zuR9G}pHyEO9@b8vm1f5Fy^EARDc7C@rDy7%1Gb$)iXPH9h6v*oOxn^c5N~MYTus9* zdbNA^+-#o=J7Kc#-Y4P=Z68)a2-?1o$0PIhp(3(E*;BZ}0d1<3u?c)Tj&C|nJyV0> z0#3)qWa_d#nYgJzU}UxYG_q1wOWHZrTG1sCi~rQHT!d$=+$U*XEA3bSkk1?#;C{kXx5muRVH;jAD7G_DYB8jdm#&O{=^~{ETfe zt++w^Wc9{luMl#W$A`8(hz&;$kGpKXCxS9BFcdGRp@@1H^4o_N^h?X`KK+lrh@7H! z`NRu+&*ASXII;?Pk?~>ju|^@X9@GYNH=S4`xEe!v0^u}*gFq1`5vBm@vld6jOqNU9 z6;L}~3?~#~@Ha~0Is8u(Qnfwgm1;C#r2-#bEDJl!;6KJjZVYhs=q8e__8+}QvPc^Z{QzzNu0Hwn94#kh|4_kJJP{oWND!S%ql;W`||Dkuw>wz)oNzku+4N4?oBH z?2^YaNNba{^z)RpAUhh!@PelMn*uT%MD98m=R4=Z5jo0rb(X5DhT2ZrwRU1ul=@%i E-^?d~>;M1& delta 2881 zcma)8+iw(A7@spUyE|=ZOG~?@wXMAo+-$E~%dMp#23lO@H&9%Xhz= ze)lQ4`3YIwS6y8xz)$A@=<&j0p?fByQ zebS~@EZMr1^UM|RS5PF(BuJp-@4dk8iT8->-;}Bq(X;&9Rg2=*7+g{%!!lLLM(`|? zDzHZ@5YQi51;7s1G_(ao4~Q8MD#avkikj#Qjg^>(aw z!UDX&N~YV8E{F`H&g+af&=eeA09WWV-clYP7cQzF? zt?c26L``7Gi^#}JikqGf;zF-~r}wtJGjDCNd>?0RP?NL0Q(CbsF0)^TGDB$8V>#*_ zYMrohx?SS>fL%z9Pj#S0m>Vb(^r+-{dGXU3JUWEHJt6Cz$XxQ};d`+JmU$rRwkj>y z7CitfRn~zF5KT3vVqJqav#d&%o2C1?U~r$ljM|PO zVA??(I$wA&aK@?|@(+;Ke!|m+*4J8?#AjRmKPMlM8Eo~?D;^TLG*-(xKK~x^!N2wu z1O6cF!4ZAl-#K-a$djq8 zT9*IJ*0NzRvT%`IJNVNoK68nhAYwabAy1{MZbFl=hQn9xd87^^%pzbV($^4lgaw2Q zKzWcK^@#Aj%V4|P3bzHKus>#A--6`?p7Ux)x_szr_|j2L`-r)R(yREy=Ks4v>S=^7 z1U@c#6Y`mv!aW?syeGrr|DP^|W}C=cvV~La0?aQe!3i4gq9SFG9i8nXlKm_VUrp2@!X^VRDFfSgZ{s=vK z_w0}8)suh0lV3W7VIVIrFYkNFm)F+I^2151)j%)`?^z@ap)b4n8!nJznAs_SL=tNh z;}B!Z9qoh;tVENThP2kGc4HPYj1)UV(v|EHNv7zG%y8&QXMh6l8_Pw&}rkl3Fz<0n? z>w^DQE`(82BSf$Kbi>P4YhSmpVZpv+0c^Oje`!g^0eFWx4Ue;04S3ZMVp8OVQ4s4` xH!p{5?VJD&n~BbEw&uMXY7cGHLVSH&-2gv`Q(0(rxCD;O_TXdg_}C{t{spDKXkGvS delta 425 zcmZ8cJx>Bb5S{(tTnI;FY%FZ`C?HfeMu~;S#+cAua{+fF$Q^f?g{T#UmUibqu+h^0 zVf+EtTKNNPtlU{cLY!pY&c1mwdGpkI375xV*Z>zhql@Ib2JlIrRY+I#UD`bd8Z=8G zrkAHI<1uG&3R-CS09qP;FJhtD0HWHLQ^R@0Dm9p!EHij^Nz5vQEBY>N8{qBZS?tCE znk#c#R3e+qUICk~`E$}8=b7nV5AVi#F~n)NQ$ETg)-K_~Xg^3WL;6XcHb{k2VgvG=MDvRp4xb4d5qEm>8(DSiQaSWcDz diff --git a/nlp_resource_data/nltk/stem/__pycache__/rslp.cpython-37.pyc b/nlp_resource_data/nltk/stem/__pycache__/rslp.cpython-37.pyc index 3e18d37caacf358c50e08db5abd0a25dd40c4720..ba7c2f1b44cb2b55e5827cbbb1ad4e5cc23aaf68 100644 GIT binary patch delta 768 zcmZ`%OKTKC5T0kx%ENZL_B-&aq}Ovzrd4sPgZq62hoA9srve>s;|2H%lgU2_--5zEd1Kwl0x;W|UDIBXR}u&~ zGox;sy_ud7;5@+>e(xF9h{8$tnLXM4|2A?S;8G46X2E)QFL=1ONtqYP^vY|$sh6_q zDTr=4vdT(iKwC(qeHuv_-P$8aGt)!; z-6ZO6gg>@Ok+v1s=AVI)c?kp9>h46lQ`5wIS%XNU704Hy+2LdYR zqOyb;oiIZn9grTLS3(}c73y)7&{(yLMSjeYeb^lJYZGO!`!qOw6_Xp0nH!LdxsX4` zTn)pG9bAfZhvz&e9EQ{IG7$bK3H5;Eb7@+ToC94CxM&T=M|LUp3{jwG~*sUot6w%h~ku!3C0e#J(!vFvP delta 701 zcmZ`$%}(1u5cYa)XB`L#(kiKlr~(J%013BVsvZzh4@Fg_9x7RFBbQwg6aPeG1L4G> zsCVQisdxe;ZoI+v2`C3RbL!55z(T5}nVtFOn{UTEC(Vn-%x0}tHPpBG<5U0k#pz7* z&6>)%!AWGq#3uw}Gcsf9Q)0X~IOW!f!7V|5Qs3hCXTx)@P~i9JC2hDWxZCOt(^&NO zf}>$N4q(*lo(-&X(!!!=O0&BmttddJjn_MmV@ReVR*JR49_I$Z3eTu3_` zM_~>QVl+&0uL7)i&05Piw6tJ964KEZWY2G?6yMp+0*)g|21K6$=ouGO0`W?wbZ5fo(VR#(MTko11JHwO23Np^uvUU3A7PiMzogg@9u_^ zzG4sZVObj~tHs~y;CKqd3c7U-qI=re(_So9SW%YYme{OivMJVPi-7n?&-UFU%12!u qjZ?0E;oXwTGSr+lr|Ca9%!ok% diff --git a/nlp_resource_data/nltk/stem/__pycache__/snowball.cpython-37.pyc b/nlp_resource_data/nltk/stem/__pycache__/snowball.cpython-37.pyc index a61a849a13e189fea193d893a20de56ee651879f..bfd6b1edbc86a61316c9a759ba3f20b13fae3b1d 100644 GIT binary patch delta 18324 zcmb7s3w%_?_5aS@M>g3cBqSj(60!*)YR6j95TxQCo|v^?}e9YZdL!_E)v_f4*}!y9*84AAIuNnKNh3 z%$+%N=FGXXyZ>s-j%zKMvobQ$4D>IYa+@pX+1oM;A}=t5K{YrHDpM67Yf+p^i_vLh z$Qykor-?pIK68u3X^}LuFQvumw92%_m)er%Oq1yppRFa`nJ&{-Uq(x&Gqa_iv!BeT z`m$QGo!QJV&!DEMwyO-P&6B;|=VMznVk&9GTCje6E^D`8-cPl^0NcfSOPFe9r*N7gD}JEu?&* zEH9#bky?C}A)*#bLNO5rswKowBJ%?&U#bqG{2-Yxp?sNYr@UR}ODSKjR#3h|c3=?Y z2dkBouaxCwlpms2QNBucz)tz0s)O0YIWFE^f62lDkwEv9YG8uq`<+HAE}O_ z{3uyoN%_%g4drX3z#-}wb?j9JXO(JPYH*Ab&$FGA97;4R?DsYXRFBK&4SBfR7mQ}M za<4z+S{L>=hP(lPgM&rQUVm#iLRP4Hl%{k&o0h|vMZ9L4WUC}4`Fh%@ zaFCA@#pyFyi&&L@+3+%=ayx-T;pOxZHHQMt9zO@S4nul5!s1l=vMI&LR`t@7_+$;q zY@A`9_E&6@&T4MvS_RgI?1 zUo?mF0Otd#gqILFOtNcyFbEakH-SH?ZP_fEyk7h%>*y#{;v)eL0teHppZ!QttINeb zt=#3hOMI9;ZXP&!HDDNEI6yw;1f&vm6OTE93fk!}h)1wynv}F`KB8-7|6^<%iuBqk zNKGV&jj#R?332hBKOtPX!|Kl4i$sk?2OxGg{RK?~R+~xT6UD)zLh*6#n-%EOY?SRI zP}vsa=A9vfs)Sf4+s?Kt^@dfqh~s&sYd}zqohDVuH(ae08n%;hXf|-(2i@Fk;+(im zsu7%9jQ){*aG9FT{GLvxOH_?r4Fm2lu5UPPq;7CBOf(KrGk5#fd!k04$L}!fb2FYw zqL#M6CJ*Vi!fT?;n>6Maq-d6l*SLa>ZolgFyW6~O|MNr*<`Nk4Oe~iTWTVA`q6vKt z;Y?yml9w35NmTGN`qPKdLPI#I>tIo{W%*>(m_pD%6E_W+S^|e9(Oo_r)ENZS;W4}p zxx_5VE@x7g`50F)M4fhX^-o<_*!P-NLG)<_>{7lQ6+V^n)Djwz%_R?vY>66a>$8z` z7jKI_Ty*X4#l{eW$s}-O}Ms>WLJS zb(WYwFjOuUjd_Dj751?gvRdIAJ9|W2MASmv9?YIY zy;wFkj3sjXq2v4bRPp$@L9=DwaQoQRD4`1tA8uGN(EaL)}_?tynbe#}*6$UnNdY8*YpDB3dk1ZBC^t7;bFz1cTq-se`i$l`*%H ztUPgD?fb=j4P#>Ls(UA|Qk2aw%L#mM`sVNJ>aMzcV~txA_K8*Ei5Zd1g#6f(+pX^2 zr}or+?^ExpD`fkqqa(!5+BETH-Kzf_MtfoiubKG+!U~kNPsgQQLo=|TzRsd|PqfY& z_TNtG!C6IYzj$w!(0d~_F!pF=sWtH#eQYdu#>K*@WUp}la zU`ND1>z~joz@hm5$$x!LDLW?qI_Do`HcrhA%~%F)k_$cc+`3K_owGDEU9QE_uIQ`s zZE@$keJRjwQfqPB!aT8Po;7uho9kjl;DVA-iN)5JKr)R<0>`*N69+E1PalXmV$J+Y zOH9&@^X>mPNo)Tu-kpC)YM*W&92Q!4#~O-r#=6NGhy8jVSIFlV?_F4CU;aY_Aih|b zBStTvo zf+w1SbAZ?9;q%eL0>DDRMSuptBEVw6#ek)Nm4JeG&43O-0I&tH7O@zw*YnnZUWpaj6Yg5=3XT41MCFs0@wkGX^^QQpj|*PMYR6t33h?>zHfM7 zh*md(WpxEsyqGfPC|Iy_H^{BrY4WZQhQekd_ z`8>aHd&lc)0EQU=cob2a8VIlTd0dSFe=w<6vP0N+>EkBRwtmPs5Y)U*qnaB(edISAb})XjkCK9B6H3J4^_512 zE8@)hDqC)+MJ;Srb|_pGfSo+d^&=U3s(Ms3BokxOvjNs_B+{LK7q>RUx5jY<>GLJ}l*sXXt|Ns*p8 zCOMhzA4gJ#NhzO6dZo_qA4v4!M28$5(RP(yy4bM8wDIjr4Z2dOBbv=SEVKAF5>Of+ z7)x(n9YBw&=c-iM9d#rPh)$_Y=I>hKt7N%HNFBdY2zvJFACK`mabwHq2o7ISwn9I8 zMcI700QDmnl-X%&@_1D`h`mS|snky6dQS`O6_10k7w`-KUJj9eA(%5 zzWeUGC~G|ZEqzJ}+^zF|Kp&U#4+;63fIk4<1H2EwZb)vH;&w+Bk0+`G0#PL#c4Q=M zLo|f~7Cv{QM{bc((}sZ8A3K|h+x*3>UOeWnut=xbD&F=FnHjen(TpG+GUz0#`_H@% z4N7Z*7{Pz|^`OBJ%oDYNIyO#h4eVdumo51QjNbzO)mtUW7$r<(luF0Y2`rdK32Q%- zVq)}P;nA*}TSFGdL#Ql=b{|rY0GPKti5 zrwcwnFdZ-h zP)FcQ@%sHFKFDXvTp%2hxmn0rDH@>{&X(CG52wf+@z%?%#~sVg5p^AvY_3?>QO@Rd zUEk4|Id%^9Cyb!TiETgO24at8?u>oj9SaV8Ml2jk%Rd)`t{)VcPptex0K8KD9K(>s z9zzeOAWgwuT|?zKEh938dJWtR_&fNFq-LS0vexHRyd4~iMXm#dxfG;r!<0(bESV&)txrwEx`)H*uao~p1tbc-a8Q^QcG;#Wd5_3Hz zqvE4m254t|%tjmdh#N0pR?&9jUR!(_k`1c9Y51_S7hcrj^Moj}o3wJ`+xXV5D{dO3 z)b_RZ7NOHi0m}i40T%<70G0tR0bDBna7$%H1Cn<1R<6nf$fmxdf-gnotsB{Qa&Ld~ z)}yA}ufPp$p=IOc|3L14#VtP`V0n;|j$HA=j~8e2Zjhe^JO|h>Y`2HmA#ug+1?)0$ z*X^3+1jrwCO}vApMOIQL8g%u7*B?$N@Q2(UZy?;>bAQh>+(isg)+}|Iuc3_7*i0@( zfKuEIh-u13q@kj6)+ju@0qJId570s&*HO>?eupAcp`QCi_s&s~L)0csz8`F@1QgM` z3~3Kw9bi3y3>P&~?O5U&zs@C}MzYE^Bl+t{qO8PEQz~AQf0W2k^*>W?nZp?2Ytcp% zpb-E&z*S0lzoz|ydZrH;sIe`^ONCNuh@OE5DMu6Hp?-%^zpb~lrIkuIfwVbJYU#P( zsk8=Yk#C5y5XE#2Q)0ztYQWudzsKvK*F%8~P9;o#8v@eOII@%UUaEDzYp&O?9rUpr zq3p@mO3losZ!@(bvb#uYF|#yaC3AhTd1+Wzm0c;1@F|rByG9}l*3M&0Z zT>QWwwxVm}1OG6xY~kHoHoA()w9SNro?z76;_-)qd?-jJ@5bdG!eEPsSL+1YV!1;+ zx3?^JI0!~hc(a^0Z>VQ4A1nU4_s@f2n4?B7ck9~uc!+m^qIfy|QQ+GfA`|h?eXkD~ zL1dk0uP62y5S3XB-H#Mb)8A<#0Aaf7kegY!k`T4B?7co;0+xP~?DBL<^v zGllex?zKL8u#L<48z_BKob0ZS>_8Gv!fykNBp`_x9R5?lgMgU%z82|g2pl#!!2Dhi z?gLP3h%_#t0Yu=3Q4TF|^yg0@-39m= zU_amh;Cb=W=L#YRk$efz19%1SD?mcOc8B;8P>uqm@qrrYPV+#ht#3Vy4;7)GBGe2dWa6&X>@>V zE{$7b6NSZFXmPn*`dcAhFLv*rp@)R6sIg3Zy5FCHy_i&{9F*~5{ek)sbx0oUNDbso)Pf>+e{}pm z-qUc|U>%swZqR9?7mjVNb@Jhg$rIM!T@5fXG4Q2Pi~F){$EkH51-u9N1K@pt9pc|X zD$#~a$tSy-3YLhwUn*f&i;WKycD?!1%f?}^pvob@uK?0kNH5w%%a}YKStOY*5(i%? zwS7QbQDfM@o)+^buaqeDW-;P#1;YNTgV}e6jH)qYil61L5r6wtc@YSzrBhLj@w0rI zJj*W>iw@_RaduxQW*)9$8-(|8MMg)bSxu)S9OGYiGI8^(1!-y~9ToNL8;5hm>xU<0 zPa%<6@^qI*tO{=x`A6IfIxaPA-bcr68@GrUS1+r$w{cpQI@Kk5n^9 z)&k?s{+ocq8*!(F;%&4c-st-w~DJIj%N0Q83DdR

    ;p$D32#3--e{sQ#IKJIlAoU+9mG0CzgO*Sv8W+zB93aYPi~{cSia<{4Wrd zndC7@?>N4uX*tjH(5pfJxyzZ>y2ZfZUWi?<4-ns-AQxh^nE(3a`fI}RXrK;|Sjy*H z#V+yp*9X%vwfHwTZ|G|oeToP83~&bUSHS0hF93ElQh7GWWpsi{>#$CJrzIwg(%$-bmVh+tbLg3I*pBu!E+9{6joT@&L2awGP3Z5 z}<7+G*qt+?$%yAz(B`V%o(f=fsQH>Tl zGq#YksPYtX%RAHa;(LXz2eE1&z4Na@*iR^i;pNaQy@_4k*dFF32j^eWYS!L0^xdz` z)h;w9-4p5COP%FXl+vwkRHkfdUPxXLHhh zfF#FOzNL3sRquN4Zfy!my>+IBQ zNfC$sQo_EJTK?@{E;rf-5kpk*QB(;rBxVUtqmr#lpT|JteBLM^>UyAfA zl#cMnAVF@o)kIt-&w2beAWG%O(j*632?V8o%72RzdCoQQA&^o97z%I@kh4{Wv>h-S zPy>j$wquZne@krTNLNIFg8`L*FZKRTLV7X*nJZ84A!TRpjsan^ha_btIOV9y@f{1< zR(%hU&s0YObpx;roN{oNBXtR2m4WOrp02UUsjP4Mo_&IUE&lOMi3R%spC^jGt?Z3s z7JXYJT;GnIR*n+6hCe{}WV?y2VJ=F~x1?ux9r-quUDnsuuoAtzOm-Ep8eoU4zLrP< zm9}G%(x1VaO&fw@sxpl?t>6XP6&6;geXxn%;K_>>X7QGs-l4t*dqg`^0;;L3$B^kpG<64Y`(NnW@cx#+QnwJaA-M-6L$A}^D{|1WM(TG`Y0kqkO$y-& zfvchhPQJ|xw zVnO~WdZW-#TG2EEz>`RrM%^;XbEj11$7Kaim@0+I&j?f0=trn^0$P+;2Rsds?g0!q z|D=0fCd;uz-iBOhT^sr|uSQ*vv}+29B#VvS$r;hjyj(u8)KnymXe(NNOA?aRAcdMG zjRQ_{3=-I~$!S*{wuP7bqNsCf?;WBtvqswroq(SJ>=50Tg>9kIW?Hz( z7PiLJPOpku5*D`mANg#BG3{S8#^hzXxwG3@%p9zs0tB-=r{jqEK0O-h0I&q*Rz>HPwbI8Te@u2`H;R7>z;dT}p%XIpfN($HUclXe``~ZU+LkkWXph$u zPq|~EQi=nD@nP-#Dpp=AowmKGW8(Wje}q6r%Saia3gJ|0QBRXN-MPx*kAn3vt#&9Y zwLO9K$mLUK{9O#TPt<^g+Nc0GN6j=9iG}A%N*J9NNP#2i+=_ zMc3(Q{bf=a@4Ffrgo z(aYO=cK19^LAMS()(N8O7O@UYB!^`55nI%5t)>x1bWsKzXdisGnhojoSWZ^64G|g8 z%O?pOmeY~O%fX%SH>EYK1Fa8}=*)_}n8ynOml`0iE@9n7g)ABdxM+qntCB|V zzwr*dfDLBpY>4*jVeE};2Ux2C!vMn#T67j0I02QD1WM6T>Rr^|L;aF{9mUk&G}=^W zTEp+^W+PasiT$R#tcIOQvEWS}@6ZNJW@TA&AI7qaD(x-YplEX^voUO;);^hfvq-yl zGOMj9!dM{iq`yyonLAHp3I7>AcyDE{ON8g^oCv>hdPYj)!`1syGBO% zc~F}%g}EtILf$6ku0yHv=$-)3keK1c{1#B+Tv-fmbJ+bdmHZj(A}Z;H!q_=p_M|97jCh2rtVDXv{C4h@yozA zp_*>Bu#J~dfbt}9r4m>4EUu@iEj^UkfVN6aJCtCSHoBIYyFuGg%cg0!)Uuhwo~Fhq zJ{jzkV$#kLqwsr4j7`QQr^Lm4i(U^h8oBK7WNp)QR$p~*OwjwC6a5u6KE@E)7kC2 zS*$Yaeb7Dtd8HWm@-aX3zQz1)l@X z0KU-vJev*HCfAcr-B-_sDm+!&R?o_+(y(l7fOJ3xAQR9JkOjyl&~BW>21a;)+c8P_$klN}ndDoEVk6=w=@yX>BBofC8=7p$@B}~k#X_x;>GKZ=<=+w|u#S2he zhw*~Xsb9jORn^bn=%-&%oCb2Ows8*K2piq~*&MpeQ(6uh%%wBEv5C7pn;YpG4qe2l z1N{RnYaY9(VlL9N0p~jj_Sd$}V@29M^Vl^5vfbBNPSezp@%4W*oOO(wy#|r4D zg180H=evp?LfQ>ksYCIs;Y0CU&;z{1k^Zs%AVf)iL4B@)A3hyDt^=HH)ScP*2HAT0 z242v`!xY$Z`^R+OcM*HW#7=0JEMXPwcUlL1qwkXZpwO{D}6{5;mLFXb7?cWhQt`M)3q;_v8~^AfAd0cy0xQ!#-X>O;>O8zK)P$Zjj6u8z(-K>la&}O)qt6~dPm8Sv^?|IBk zF_Nvl1aFKG=|1RYE17#68o@!4q6ll20b!OuW#y!Lly@46=1#+uw5ro#7Mf@ zdLvh%fYKZRNWbl4q)q|;2>7Jiud;4S1P`Y_!7jWLSc)vE z7lqomj7;j8y`0Y=s(v2o*$YmDaAJ*fK8*^Umav=+Jv{W^MTgR+NM(|WkgQR?laPakVWZyQ{A|dO>M0c!8{Bj%JB0B>TuqXxIWH5*t3o8(r2W(nv3!5{u9HJ7(rM?bH z)>iJJYhUZuC0Pl1jImSQ=eqyi!k$-}K8BnV0I3zShf+1_K>GwB6$dYB^zr;*d)OcQ zw7fX&a?|$*{UML))rvN<$$1V)Nj`8Y~!NAW2pnH54Qk6(0Kz4pC6`au? z-pERnu|{EEQP53q0Tvl;YcSYuz*=o`m~6ld%^7Bc(;JC|{({ zQrcO?&~kJ`Dwx!*^LdajXiM7J@;=|4UWh4N2v`8P2+#nK6P%byVedfhJ2PEe=|2t$-^VNc#=8$DW$}W~q-%z2y z!hE{cvc*W>r73DwBK*q5hE5|Gnp5~4B&rx9HeN@;6%SV!N$px?=`cADx*m6Q(5Ju*)jcx%X+La%t}h8%tegF4*XNhogAqll1&4~hH2~N{;7kpAH;-%yw2^Ctp3@y8dM<88`K81qPl3)<`LO7N z9A;-`JU^1YBjMxNB;+?Zof*9)k(QC}RxeLO2?ios+}a*$3iw@PT)jVXqW^pe+&Vr2 zY!d;K0J8uW03HBD0p9^uzzE{b8^?_uj*ASu00z?qz$*oQ7vL>r5h3g~$KvG}j3jFw zpWUc+T1!9;`#jU3D#0rWOceFDvUDY#Vs+`1PB&&P$z)b-(UokCtsyHy3qE4V%G&t< E0Vi(Lg8%>k delta 18141 zcmb7M34Bw>(bv8w$?}N-;{$vf+sGJu% zlw{Mq9yvI2LCMj$#T0TB3^@q7dgM?}$erdONt?DQNxu2-lVw9q+RwkA{@R_Lo!z%P zJ3Bl3nM&DNGEM^uZ=VUu}lb976LBZisgo3tp+w$-HBTrpczM=a%|wHV6B$b1~- zW3@QS$GPHMmd5xkW=A~H;Qzc5>ukg52E~FZ3yLuxCT*qBISo_ zNt91=B~X4a<%em>luwrBLnxo3rBXgs=7&-~O&d=6;WD2@`4L(=<ub=MJvMqBO5 zh#kt9B1+??rsUB4eY2pX!Q0|0X>_(Xcw3#ksbsA1$5paMu|IBAaXNAE3qPtm3QHGP zBwp5-PbDEsvv;G5hiop7=G*A@H-uE5%g+^Aw9(5oUr2RoS}1Da;<=oAIZve$oCPg5>(NoyEqY_*7fX=ZZ;9ch6EL<2?XImHy)%< zq#Q-G^=ok;Y4oh~_N1>yUnd~;4*K}a1h!bS!Y7E$E7L^zus`NuV&^ zn>5AGI@uPsMX55azDATLXI`qQAZg}Kv!sCxLiW z&5ZI*s%Pwf*3r}^3%{k48FDmp*MrHos_Q<&<5zP=XjeiO?)0p4h16!3$8Is^rIboS z)(xZ_&AyPrOGC`vZ@lGPCD!uNT3@}>qq#lK4Q{9B0M+p&5txRVSrSWUqr|@9lLs8S z8N}32Uf;rPk$>P-`+lHBH)7WXcHa*1j(DX`oIgpiYnH z<;eBTknDB>b(@c`_4%pePOiPvbt-3%c{N0Tq<~$@mm|ZcP@Y;sBeJ>vgCkp_Mm7xC zNL(%75NRS_d{{7lKwX{%$Nv7lEs}9#;`K+?1VXEBGN)uYI3c+?Obs&wYBvPuKFfl zZRuz+W8&e#mDHh{c>%1HSMGgfFP|a?P0E@hyD7(gDLTB2z^)q8jAsp5eSx(=J}p-ENh9u4=#8EecVbW#gE~MCsxYm|sJvcws?CNuM?jB#umCKXK!oAB*&bcN%(KDGo2p zw}vHsvM}fWC#mjBQGUU(1ZbXN2<+BSF1j~#f|IOpz+?1rIUbQT`NHg+iz#{5(4E^~ zapi?+>>Kg(3;#L1&*0owBey4M0_;&n6A2UQ`ZZA|{&Z1=6@yFT^TooUV)UZ;i9b9l zNoS1;Nvoy~TSfDtdt&8M!iH!!i;~3|<0sT^Ad46ta6HU^okh`HoyvxbtE+Xg#Tkny zSD&o`(z0{d_#Ye$Du-3>H+d7KZO-Z1xj03w!X9ET?32Z7&^Hax2xta)0X{$g&<5B9 z*bLYr&MaLKD;)rAJE0s=TazMgUlvV!#+t6p%XTSNIf|vieo0Q!*=j0%_abrmC3gh} za1+T#aD}4ky)CWoW*47}#^wVS02Tr+09**T2(SpS7_b7+0@w)P06$;@pb5|pxExSV z5aezo8vq@ED*&B7Jz>Q+brHeChTpcRz)ybym;w@ ztg37C%0agAPk@Sjxms@24zqim&mXW5p}FUP&u=(+v3xYxYTiIyv#Yk=>+$vLi0lJ8_knn%u5nmC_zM7q0C|9UUQDmgUiN3? zUj)1W_#@y?fb%}tPAa{D{C3|EO4w59NXN{Id6?N^&OwUrl#cR~^_5CeHv~NbI7|>y zd@lZkNMD=7o)a_IrYYO4B3sW8hrUYa+PZc&8+@zZq9ysQkrT*P@$$NYVl4$EZCIzO zC7pc&+0OWJEw#x^->(~|5K-EYuQL9PSlLh*8`o*o(wda*3ST9*HRQ4D#DfiKg~^?4 zM~s%XlWkXhlqT8MEi9Je1F5DoQ>Uphswrx_%9F+C4LK}X40cyiy=Cr_5!aoU_d0RA zJB7FgyC;uaV%n~(k6U5tG*cs1YQ)UV)cKrwnwiawY2tKKOc1sD`fcTBNs)t?LC<#% z-7}G-kCxKkm#yZ9TTLf=G10+qAo^5jl+vh~*FU$yWX!!brpdD1I-9qVfXv81SbOtY zGCi7+D>NQT8%qOVQ1bipcO7cZXGzCM_5O|()w9R=x}TSezqO1D;;a>7D~*#@h%J;$ z&^U2HaUJFcms_KAS44pw>N-~oZ4i5@l-~__7y!SU*j`2&t31RSs@lk3z-|eZcq5! zDZcTHh?zxoLK(GH0lye`Xt?1On?;4UU|z&xgyMa4e4yiIpuXPa^BIRlX+a7izd`Z|@$AMd_PY3V<8Q4La5V8~ac|r8 zaYv{uq`2#8Vvg-DZ)d?tD4PtJ0+yqTC*j+q2HcXULp4;Fr0X$k`~$ zU=&u!Y=et)SvpH*UCwZ}Qf6DZx8CK_e2!>Ji9;Pj*=+HAM=qPw^<_tW!h}ldVSwzY z>C{!f;Ai?WKMxas6%GM>Ohp{J%Ks{^yFM#8mxS|q0C|d9g_3Up*j(swzeDhDlDwyl&}Di?x7X*Yxh zGYk+1vci**rZ|v^j{`yFawg(INC6ZZ)C8ml0a5|u0an0ibV=cfNJjz25)gqR#wI=( zP+|~bWcz89aj3NI@!=?&Xp{{>Iu#vH+i95K1XU z8;jN-lACzk9=9|AjMAKY(M%o11bnweOPhUEgus+~s$RIhDk z!xeIMKHPOTn<{SHJ!%0Kian|?MCy<`(uBy4boLq;D4HqNwW;g7-K$ycKnq|ap5h8X zCtwpG2xtSe13Ca#0?w=YHB|Z!aq#XerG^Q%Jyra2Wr29}J6l)IJrAitm^55gtaH0U zmVnRg_0X4Jez`ofv+_L2;0pL$zL2HG?JluzdaM)6F~rvM4|@72Sc14!Dzm2pBZ} zV*vIr>iPygAGy_{}qlO!YMyVdjzGA0v-b#1{?ukwt97r9|h$YK-w$l68Yp8l5c)5 z{~VpD1i*<7L=ia2#P{6ilPbs;P&FEuC+N!KRr$yzds|D6Ok)Z5YDfnf@&Li z=}DBc4IwXGF^@6ln+xggCuj_p5XVNr&k0wd9pz)l<2} zS`0{bFC=R$=m=;4vTB3@H$DQ`J6{hX1?Gut1Uqz|aZ<9=OSftFJcsQPZy(Pmv_3OQRVlJD^O-F9z2=!L<`-8zlVe$+jp$Ux)DszO4RIBT&z^}T zKl7Vsa@YZpexi(hDK0rt!V0>sIg!AE^2{Vvu+M1or)j?X8S5!D18@o8IiEAH)kJle zJ#IQeVu}ry3m%7Dp8b!v{9Oor4n>s!s0ADuNqG-tZxfJJJc;xvK%XMLgRCroJ7gFO zxI+9rzy}bO(9a3M679Q)4JFskchQZb=UhjeT#$GZpGC~%$Ad*U-$_$=u{ikL6~+Z* zDFmIfywA6?FB9vYA4zWg4bT6iX`r?D0iNL_z+V9$0zL+O0ywY#@dlNa)9KikNPV#l z>pdZd&&h8X>+ZG}YOO(svaa2sG`EKo4}JWU?(o@D`n+z3T3_$$bG_whnnD=s+W8(l z=3c)0+o5_q?fuT$&=`m6^L5B*heP%I+j$q!68L^u(m}1#CAYZl<=2Nf z6z=4A64{t$xrrYY?XMK4A0}c*@p_~~ZtMr--oh>A8^qyP)>z*{t&^hg)n;~5-16$m zA#YI`)o39nrNyCmHLi;9U!6HDvT+!?5U#c~^lcV4myqIeb7+^*#7?*Iy~jD?5$t2 zpMNJ^w7yenm1g{FWc4>KEC!zM^Y8rIxQ3QGC3lOzK;rq->HgjSc-JapQgZ2q5C7;6 zoz*RJOX^ec{*JVK1HfU^-0QfrwZSK2!;YwMb|636LDyOI%0vTMU?->dB^J821;s`0t-jLEl9;?SC&wCEVPnivF`vksKM4!HmE?w_A zJ(m4S{N?m0_K5iI^d+pMYxx(+Y##g(huP`#xgBOYn0pEF#>MZcIhk~mr|r?a(odo! zcM;!8C-~e3QXPJm_|2E4G~w@jS->6@F<;FdHU?r0;J3E3gtD*i{R!B0h~`}q5j^uzq4Nc#82-P%*&KG#M|(9@7_YDY>JtQe*;FO~( z#}}@!-PY$e8j3BqgNwi^2X_%t)qs^IyYY1O4P+)8eqguYUyFt_8P-|UKR!onJColV zKizg_xOm~r*mA@1LvaY*J7;&8je_&7>58st-$t|51MLpW(90UYa=;~k^X>_WREn^N zk3J^bVCjz)nkSi8@W|L!PiNDC=-OL}?=f6AoO8-1yxpP^D(R?XfG z4zmwPZ*#4qmdQGuK*eWKPN$>|{JHKg6jrKCUWG@m1snlGpIhO_aIYqsabre4xM6$2 zHABDD!e)o3O5b5&Iq|T;p&Yt3<{Mq>#K|+m^$#tqU{v4qNgI5=shX%ywz8GlKvT5_ zeX9dF0dQ8i1~~70t)S9R<7vJUs86<`at~L`S#(tv&sWG96D^;VbWe+7PtkoYL}lx? z7&ejR>N8^4lwusUjI-S?PrchISC`BOdhT^z={1V>%A!JjM+_?*UQ9xa)7Z`pu4W^3 zKc)B>{bUSFo`&NYq6` zT^*20?UrfM6qgSq9RF_8?}%d?#~8L2T>Ku{BA7@W2<3U3>@e9^J`-(J=(FS5SgT>d z^bPTBTslsvvhQx*v(rb+)L&V}#}Da8;#oPnTmL?u(S zl;$PNOrZ^&LZN#;elL2X&`_Gt^uH*_;a~Kb%doWsO=|fbS-};cN&)gd0<_~UK~<^v z!=w?A#z#60r=XJWcOOV#L#@FVAwyc+>H$rzY6z0H?i=DID~{gD88J-1TvxE(R3r^+ z16qDg68ftFj>x1D-YaPWuv1XX(rVH#7{*2f3sFPbTvZ;;F_x}o+R~sC;c#dRc-#ck zOvLiaZGg;gp9c8A<_E~9mdA9%EipEilhm@Y&+X-d!S({9n^b#Tztt-J^I60f8m?cQ z%#v*3$oL%nqGXmK#(HC8i>cH&4Vyu%KG*DigL1ioE0CHEINw?xrN5NSJ|58Q-iQwV z6tEp|6X3kd*hQu5XbJZ>yQSuKy5n{ki}=8I6S^Bx*-AC$?=(5&dph~@?q^0YJ4?+1 z!Q%A0H@M}LA$4>Mx9f2ktUN9orDo80ie8h!^2X<(CnE_AZVCb&-Quvg>D$TkQC84f z2AY(#Cxd0O=k=#DSV4shzm$N*LL=k!_=zKh8ZlNA<;cr%2Rw*C20U&f3N{8}WfW|z zeww-*tVDeTLY$~R8^y&WgD>c%2SC37^o4+Vy>vI|aESPP(4{e%gw%uxM?G;&>CJhM z-RU;Mc#*xGw)&EA>vEt*V}w*Ug%9ATgp`1v??nSrQRPZJ-u+c3OH^WKp`k~i4|DZ| z95$P6)i2IrzpRq+EV^%lJOJrNuA^-w|&(T-Cx~?k>q? z?Uvwx=ViGXKB0DL`C$6_Es*{Tz|R4<1F!_;9!95_I_YtdmmCa7fk6?*!uxeeo!kJ3~5hJ5h(ih@kH#knuE9 zMyNs{np!l{B*bvI68T+Vy<2}^6w9o=7wLNd`vH3azXt3hkQ(Z)m(v7A=J%uI0lB3M2e68c#PAyfxPhqEyyWub zK%RtLq+s+8Qi#EKoD!xOn-YV!c<7VLMzT0opwBF3FAnGq9Ydd<06YnJ3h*@Gyms_v zDm^@?zdNLuhnUBiXPMXVo24b~-Zq9=%TTM{o^v~=zM+9G&Un|2%2xfB;c%QEaW7j(P`1krN8>Q?w!Vl7eWqCpy@T- zTPP7-ar-+|_vL{O#l1$}9dM8S&@|?x_zHQcnA#4d&}BFdSfd@}P^{@ty#O~=ksXwF z=AW_@qDY?e9EFhL-@}8+L*2(noguI*9aa+PYjAZ~1>YG zH?NZ|GwEMUXO*!Xow1r(1~G@|Gs;;O`?J2foaL};Jy_0Wunzf^(}9(@|t^Ic+zCYI1yEc>Y;BM`b24P~0QE50QC z_8HXHjr!v=*rw6@sSyfn`mT}^V$PAE@LeP!Rt7U?h9#&z6MBmnrtD~${_adxRk$lG z=u{^~Ps4piytRLyRelTlJblnD#7qyO#mV=ptoko1SV?5QkUFREfW ziSL2-KHvkuhx%nzY^46>9G05IKLh1+z-ho21p1X#>_YvasVrT8x{8g`&rmAJqp^r$ z0I`5L0|0!NxyZO5Rwz$IZ7fk@zMMAs&n2_Ju!9Z_4TXFV zilvo<%W1p@M5|_y$v%iv_1>tj{_b4*l~QrHa~{1;lo>hlIpXRYc&)1qui>~fLrzHF zGM_D~n2pNPl{}vpnJ|kZ4;$VE7b8zl#guU|<+X`~MsU$^ly5w1rJ((N+7Yos4FzV6`f>8~zh6AN+7m!uar z8zsBs)@_h}WRP}~ZceOW3)oV9Qw_Ud7;L7&u+3oDM;26ny@pk#%RSQg*&>$#S2+q2 za`p1%ES1%EFJI28%=x2elno)*^MXkF>JHU^xzC_q!Wt?cvgh|8xBig&>!a65qcNzH|{()LhqJyN&G>R>4gLR2fF_V_a9oR zU`7Lv@w|wj$N9+K^ov&+m#SDYFK+HX(sDPFbzj`V^CSFQY0uN&aIomf{RZ85OHInS zfqppogJYh48IAdVebQyjKcE*ejGE#pfMh@_APq1AFdQKLi6o?k0@49}(`w8uJr6%l zJ-SPeU&V54dA4!36+}Q#th;&@JFO2(Als}pe)pY)IE z^1kPO3XxdpRu%CBoZ{sP*dI*8B+!gfY$i;iZ^bnI>dp_#xqw$;0Px;%Wp{_z?% zYHcCPMgu$mFQ6D8Uq2`$DTZI#3fg)AOqx8d(=knRYTSpsA21#;5pZshMsIVnG8V5t z>}0ihm`M2~E=~_ZagPnX1d6Cg!p822b?h?cY=bNu{wOj~=ky@TBGKoPX}YLEU`7`D zcsrOo09P19iUP^wh#|8RC7S?O0G^y~pA!ZP8;IUi4j4r`!Y-XB0&1`e|S zN5m&)n$#IN-~AOQqLq9!NjBc8kgr{oAUg&yLBF_}Wd!>NgAAoBMyU}6;^UAW3%G$g z#i59_rN|gTpYDaDK&4=lQ6Sa00+>KgcvcX8I%G&Kl=_GrY9<{mBO`B2=pNFxAUI|G zO9qJO)mw}?M*pk;{49NR3(H`0^o1=f9Bwbu{VinZ&HAG)tf?}xfg4Aib2dC|*bZAO zchS3JYuENO3AtzuR%3s++ru7F8sy|lg+L#S-kd^8?v}WIG4^I$z8GWo!&Z&h97E(! z!|^tDt&`rR_V``eTKy;MS=sV@NGSjm0!9HQV|1qg$^g><(*flGY2l_Kl>_EJkWXT# zf=~4koMkBV6FIz!hIrG}oTaO=mtv$>0v!6j0Ly06^=AWYWZY#$!dj*SUz`4IfFgl1 zJn|+Z&d1QkYSg(LAXnUB(4hTN>*+FxUho2M59m0L_w!}2Ws@UvH;SGF$d|I8Lh5M% zPLlGD*lr0~)&`oJ@$(jPqMCd5Qy2$!jCLb~5}640-%fwTvC+YfWqEhZ$PdR$Hf9MIv{yO=iDsN3@o$<v-RBV}Wqt!{2bfqV=*QWF3Z0W$$r zfSrJs0RINmz_wfkzzZ+(EdaXAq!jMXD85c`GXTyVhb81Q)ER{6Z$Cqs16jEQAzOKi rR|_<|W~Ncj*Fiupt0{35%Su$@)WoHUI_c1w*u{x>fh}lCOkDqetj%|_ diff --git a/nlp_resource_data/nltk/stem/__pycache__/util.cpython-37.pyc b/nlp_resource_data/nltk/stem/__pycache__/util.cpython-37.pyc index 424301803db4812f23b3df3771751913ec1245e4..0cfb576f4901f7e034a64fd6937d531117cdffde 100644 GIT binary patch delta 30 kcmbQoI*FCriIR@yjYk0B1M|asU7T delta 42 wcmbQlI**mxiI-wg3PC diff --git a/nlp_resource_data/nltk/stem/__pycache__/wordnet.cpython-37.pyc b/nlp_resource_data/nltk/stem/__pycache__/wordnet.cpython-37.pyc index 6c440a13e168243903bc8c2e0f72c0d28aefc95e..74899bae02c2e24dabc77a82701886f2c4f7904b 100644 GIT binary patch delta 647 zcmZ8fO>fjN5Vd2+$?j&=t%NE*E^w$sBt%88MWy9}gv26&04W!fu8ml5;>dAS(Mu2Q zA3*sBTsiR%`XhSEl?&WBb7ID;MNmh69>0v=%(MI>yPTdro=(REMwfh+E+*tVZq@_# zSgAmS*X-D@_X=pFe*!hc!92PSx5&c&qv3-`iU5%60z2 zR!!-bM@D};`5MgwnL z0kcKT^cs&+mT@Qg*YV*g2P$X7q&EKJ{e^W6*g>vHz`L=f%A-M4R&_=P{%T7bUYAbU z<65qg9$+05AT*2Iby#yc&nLrJ%6FaboCXUKu*J<#v5&=;4i>h6TvcB8f;WzDg$%+T ieb5UZj0DYpy7+5zNFT0R*hiaf29QM=g^y)4z4HfntBqj* delta 545 zcmZ8dJ#Q015Z%4q`!JkiBtk4X2oa)0y5`(L6D1~%s~Dt+=9=T|UCAH!VU5qSLP3i7 zA6&&BpwEw>rJ}l)CaF><6*FgJ1yGF5s zopo@}-eOQp@t7q%;GE7zY$PHOoOPKJN-mj_T3(63RF1Ngz2t$V%yZ_|Zn2knyp+8U zpL&?>MFuL>`GDT9e||S!ns@*T58d`KOLTi2T@15n1e3O3EJcUEDSn7#S0Ey=!z6?0 z@S+sSC@n>-Q%{0boXKWuLjyJ<%UZ0HB+5r$^g;1eemiLF8fb;1zz6vEI$ zq1VN}`R>Fam_cESLgU}AN{U*6MpVZ=qXy@>G#uedki|Fi{R=YeiSlV0=M#T`w&ybt z#hux_q{{y$8o7=@Ws4MlsOGWuiIwOXcMaIV?D3vw2dzhRKIlkk@Yf?<8Qw-HkV0u? i{jnb8mC1@>t$9d@%^ynVXr*{u!}l2t$mOnhw)Y1^cyN9I diff --git a/nlp_resource_data/nltk/stem/api.py b/nlp_resource_data/nltk/stem/api.py index dfa5c27..aa3b326 100644 --- a/nlp_resource_data/nltk/stem/api.py +++ b/nlp_resource_data/nltk/stem/api.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Stemmer Interface # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Trevor Cohn # Edward Loper # Steven Bird @@ -8,9 +8,11 @@ # For license information, see LICENSE.TXT from abc import ABCMeta, abstractmethod +from six import add_metaclass -class StemmerI(metaclass=ABCMeta): +@add_metaclass(ABCMeta) +class StemmerI(object): """ A processing interface for removing morphological affixes from words. This process is known as stemming. diff --git a/nlp_resource_data/nltk/stem/arlstem.py b/nlp_resource_data/nltk/stem/arlstem.py index 86cec73..d2777c0 100644 --- a/nlp_resource_data/nltk/stem/arlstem.py +++ b/nlp_resource_data/nltk/stem/arlstem.py @@ -2,7 +2,7 @@ # # Natural Language Toolkit: ARLSTem Stemmer # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # # Author: Kheireddine Abainia (x-programer) # Algorithms: Kheireddine Abainia @@ -25,78 +25,79 @@ index, over-stemming index and stemming weight), and the results showed that ARLSTem is promising and producing high performances. This stemmer is not based on any dictionary and can be used on-line effectively. """ +from __future__ import unicode_literals import re from nltk.stem.api import StemmerI class ARLSTem(StemmerI): - """ + ''' ARLSTem stemmer : a light Arabic Stemming algorithm without any dictionary. Department of Telecommunication & Information Processing. USTHB University, Algiers, Algeria. ARLSTem.stem(token) returns the Arabic stem for the input token. The ARLSTem Stemmer requires that all tokens are encoded using Unicode encoding. - """ + ''' def __init__(self): # different Alif with hamza - self.re_hamzated_alif = re.compile(r"[\u0622\u0623\u0625]") - self.re_alifMaqsura = re.compile(r"[\u0649]") - self.re_diacritics = re.compile(r"[\u064B-\u065F]") + self.re_hamzated_alif = re.compile(r'[\u0622\u0623\u0625]') + self.re_alifMaqsura = re.compile(r'[\u0649]') + self.re_diacritics = re.compile(r'[\u064B-\u065F]') # Alif Laam, Laam Laam, Fa Laam, Fa Ba - self.pr2 = ["\u0627\u0644", "\u0644\u0644", "\u0641\u0644", "\u0641\u0628"] + self.pr2 = ['\u0627\u0644', '\u0644\u0644', '\u0641\u0644', '\u0641\u0628'] # Ba Alif Laam, Kaaf Alif Laam, Waaw Alif Laam - self.pr3 = ["\u0628\u0627\u0644", "\u0643\u0627\u0644", "\u0648\u0627\u0644"] + self.pr3 = ['\u0628\u0627\u0644', '\u0643\u0627\u0644', '\u0648\u0627\u0644'] # Fa Laam Laam, Waaw Laam Laam - self.pr32 = ["\u0641\u0644\u0644", "\u0648\u0644\u0644"] + self.pr32 = ['\u0641\u0644\u0644', '\u0648\u0644\u0644'] # Fa Ba Alif Laam, Waaw Ba Alif Laam, Fa Kaaf Alif Laam self.pr4 = [ - "\u0641\u0628\u0627\u0644", - "\u0648\u0628\u0627\u0644", - "\u0641\u0643\u0627\u0644", + '\u0641\u0628\u0627\u0644', + '\u0648\u0628\u0627\u0644', + '\u0641\u0643\u0627\u0644', ] # Kaf Yaa, Kaf Miim - self.su2 = ["\u0643\u064A", "\u0643\u0645"] + self.su2 = ['\u0643\u064A', '\u0643\u0645'] # Ha Alif, Ha Miim - self.su22 = ["\u0647\u0627", "\u0647\u0645"] + self.su22 = ['\u0647\u0627', '\u0647\u0645'] # Kaf Miim Alif, Kaf Noon Shadda - self.su3 = ["\u0643\u0645\u0627", "\u0643\u0646\u0651"] + self.su3 = ['\u0643\u0645\u0627', '\u0643\u0646\u0651'] # Ha Miim Alif, Ha Noon Shadda - self.su32 = ["\u0647\u0645\u0627", "\u0647\u0646\u0651"] + self.su32 = ['\u0647\u0645\u0627', '\u0647\u0646\u0651'] # Alif Noon, Ya Noon, Waaw Noon - self.pl_si2 = ["\u0627\u0646", "\u064A\u0646", "\u0648\u0646"] + self.pl_si2 = ['\u0627\u0646', '\u064A\u0646', '\u0648\u0646'] # Taa Alif Noon, Taa Ya Noon - self.pl_si3 = ["\u062A\u0627\u0646", "\u062A\u064A\u0646"] + self.pl_si3 = ['\u062A\u0627\u0646', '\u062A\u064A\u0646'] # Alif Noon, Waaw Noon - self.verb_su2 = ["\u0627\u0646", "\u0648\u0646"] + self.verb_su2 = ['\u0627\u0646', '\u0648\u0646'] # Siin Taa, Siin Yaa - self.verb_pr2 = ["\u0633\u062A", "\u0633\u064A"] + self.verb_pr2 = ['\u0633\u062A', '\u0633\u064A'] # Siin Alif, Siin Noon - self.verb_pr22 = ["\u0633\u0627", "\u0633\u0646"] + self.verb_pr22 = ['\u0633\u0627', '\u0633\u0646'] # Lam Noon, Lam Taa, Lam Yaa, Lam Hamza self.verb_pr33 = [ - "\u0644\u0646", - "\u0644\u062A", - "\u0644\u064A", - "\u0644\u0623", + '\u0644\u0646', + '\u0644\u062A', + '\u0644\u064A', + '\u0644\u0623', ] # Taa Miim Alif, Taa Noon Shadda - self.verb_suf3 = ["\u062A\u0645\u0627", "\u062A\u0646\u0651"] + self.verb_suf3 = ['\u062A\u0645\u0627', '\u062A\u0646\u0651'] # Noon Alif, Taa Miim, Taa Alif, Waaw Alif self.verb_suf2 = [ - "\u0646\u0627", - "\u062A\u0645", - "\u062A\u0627", - "\u0648\u0627", + '\u0646\u0627', + '\u062A\u0645', + '\u062A\u0627', + '\u0648\u0627', ] # Taa, Alif, Noon - self.verb_suf1 = ["\u062A", "\u0627", "\u0646"] + self.verb_suf1 = ['\u062A', '\u0627', '\u0646'] def stem(self, token): """ @@ -140,14 +141,14 @@ class ARLSTem(StemmerI): beginning. """ # strip Arabic diacritics - token = self.re_diacritics.sub("", token) + token = self.re_diacritics.sub('', token) # replace Hamzated Alif with Alif bare - token = self.re_hamzated_alif.sub("\u0627", token) + token = self.re_hamzated_alif.sub('\u0627', token) # replace alifMaqsura with Yaa - token = self.re_alifMaqsura.sub("\u064A", token) + token = self.re_alifMaqsura.sub('\u064A', token) # strip the Waaw from the word beginning if the remaining is 3 letters # at least - if token.startswith("\u0648") and len(token) > 3: + if token.startswith('\u0648') and len(token) > 3: token = token[1:] return token @@ -176,7 +177,7 @@ class ARLSTem(StemmerI): """ remove suffixes from the word's end. """ - if token.endswith("\u0643") and len(token) > 3: + if token.endswith('\u0643') and len(token) > 3: return token[:-1] if len(token) > 4: for s2 in self.su2: @@ -186,7 +187,7 @@ class ARLSTem(StemmerI): for s3 in self.su3: if token.endswith(s3): return token[:-3] - if token.endswith("\u0647") and len(token) > 3: + if token.endswith('\u0647') and len(token) > 3: token = token[:-1] return token if len(token) > 4: @@ -197,7 +198,7 @@ class ARLSTem(StemmerI): for s3 in self.su32: if token.endswith(s3): return token[:-3] - if token.endswith("\u0646\u0627") and len(token) > 4: + if token.endswith('\u0646\u0627') and len(token) > 4: return token[:-2] return token @@ -205,7 +206,7 @@ class ARLSTem(StemmerI): """ transform the word from the feminine form to the masculine form. """ - if token.endswith("\u0629") and len(token) > 3: + if token.endswith('\u0629') and len(token) > 3: return token[:-1] def plur2sing(self, token): @@ -220,11 +221,11 @@ class ARLSTem(StemmerI): for ps3 in self.pl_si3: if token.endswith(ps3): return token[:-3] - if len(token) > 3 and token.endswith("\u0627\u062A"): + if len(token) > 3 and token.endswith('\u0627\u062A'): return token[:-2] - if len(token) > 3 and token.startswith("\u0627") and token[2] == "\u0627": + if len(token) > 3 and token.startswith('\u0627') and token[2] == '\u0627': return token[:2] + token[3:] - if len(token) > 4 and token.startswith("\u0627") and token[-2] == "\u0627": + if len(token) > 4 and token.startswith('\u0627') and token[-2] == '\u0627': return token[1:-2] + token[-1] def verb(self, token): @@ -252,32 +253,32 @@ class ARLSTem(StemmerI): """ stem the present prefixes and suffixes """ - if len(token) > 5 and token.startswith("\u062A"): # Taa + if len(token) > 5 and token.startswith('\u062A'): # Taa for s2 in self.pl_si2: if token.endswith(s2): return token[1:-2] - if len(token) > 5 and token.startswith("\u064A"): # Yaa + if len(token) > 5 and token.startswith('\u064A'): # Yaa for s2 in self.verb_su2: if token.endswith(s2): return token[1:-2] - if len(token) > 4 and token.startswith("\u0627"): # Alif + if len(token) > 4 and token.startswith('\u0627'): # Alif # Waaw Alif - if len(token) > 5 and token.endswith("\u0648\u0627"): + if len(token) > 5 and token.endswith('\u0648\u0627'): return token[1:-2] # Yaa - if token.endswith("\u064A"): + if token.endswith('\u064A'): return token[1:-1] # Alif - if token.endswith("\u0627"): + if token.endswith('\u0627'): return token[1:-1] # Noon - if token.endswith("\u0646"): + if token.endswith('\u0646'): return token[1:-1] # ^Yaa, Noon$ - if len(token) > 4 and token.startswith("\u064A") and token.endswith("\u0646"): + if len(token) > 4 and token.startswith('\u064A') and token.endswith('\u0646'): return token[1:-1] # ^Taa, Noon$ - if len(token) > 4 and token.startswith("\u062A") and token.endswith("\u0646"): + if len(token) > 4 and token.startswith('\u062A') and token.endswith('\u0646'): return token[1:-1] def verb_t2(self, token): @@ -299,14 +300,14 @@ class ARLSTem(StemmerI): if ( len(token) > 5 and token.startswith(self.verb_pr2[0]) - and token.endswith("\u0646") + and token.endswith('\u0646') ): return token[2:-1] # ^Siin Yaa, Noon$ if ( len(token) > 5 and token.startswith(self.verb_pr2[1]) - and token.endswith("\u0646") + and token.endswith('\u0646') ): return token[2:-1] @@ -335,7 +336,7 @@ class ARLSTem(StemmerI): for pr1 in self.verb_suf1: if token.startswith(pr1): return token[1:] - if token.startswith("\u064A"): + if token.startswith('\u064A'): return token[1:] def verb_t5(self, token): diff --git a/nlp_resource_data/nltk/stem/cistem.py b/nlp_resource_data/nltk/stem/cistem.py index ef1cc50..efbd5fb 100644 --- a/nlp_resource_data/nltk/stem/cistem.py +++ b/nlp_resource_data/nltk/stem/cistem.py @@ -1,16 +1,18 @@ # -*- coding: utf-8 -*- # Natural Language Toolkit: CISTEM Stemmer for German -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Leonie Weissweiler # Algorithm: Leonie Weissweiler # Alexander Fraser # URL: # For license information, see LICENSE.TXT +from __future__ import unicode_literals import re from nltk.stem.api import StemmerI +from nltk.compat import python_2_unicode_compatible - +@python_2_unicode_compatible class Cistem(StemmerI): """ CISTEM Stemmer for German @@ -31,7 +33,7 @@ class Cistem(StemmerI): is thrice as fast as the Snowball stemmer for German while being about as fast as most other stemmers. - case_insensitive is a a boolean specifying if case-insensitive stemming + case_insensitive is a a boolean specifiying if case-insensitive stemming should be used. Case insensitivity improves performance only if words in the text may be incorrectly upper case. For all-lowercase and correctly cased text, best performance is achieved by setting case_insensitive for false. @@ -39,7 +41,6 @@ class Cistem(StemmerI): :param case_insensitive: if True, the stemming is case insensitive. False by default. :type case_insensitive: bool """ - strip_ge = re.compile(r"^ge(.{4,})") repl_xx = re.compile(r"(.)\1") strip_emr = re.compile(r"e[mr]$") @@ -136,6 +137,7 @@ class Cistem(StemmerI): return word + def segment(self, word): """ This method works very similarly to stem (:func:'cistem.stem'). The difference is that in diff --git a/nlp_resource_data/nltk/stem/isri.py b/nlp_resource_data/nltk/stem/isri.py index 695e5fa..5e9de8a 100644 --- a/nlp_resource_data/nltk/stem/isri.py +++ b/nlp_resource_data/nltk/stem/isri.py @@ -2,7 +2,7 @@ # # Natural Language Toolkit: The ISRI Arabic Stemmer # -# Copyright (C) 2001-2020 NLTK Proejct +# Copyright (C) 2001-2019 NLTK Proejct # Algorithm: Kazem Taghva, Rania Elkhoury, and Jeffrey Coombs (2005) # Author: Hosam Algasaier # URL: @@ -29,13 +29,14 @@ Additional adjustments were made to improve the algorithm: increases the word ambiguities and changes the original root. """ +from __future__ import unicode_literals import re from nltk.stem.api import StemmerI class ISRIStemmer(StemmerI): - """ + ''' ISRI Arabic stemmer based on algorithm: Arabic Stemming without a root dictionary. Information Science Research Institute. University of Nevada, Las Vegas, USA. @@ -47,138 +48,138 @@ class ISRIStemmer(StemmerI): The ISRI Stemmer requires that all tokens have Unicode string types. If you use Python IDLE on Arabic Windows you have to decode text first using Arabic '1256' coding. - """ + ''' def __init__(self): # length three prefixes self.p3 = [ - "\u0643\u0627\u0644", - "\u0628\u0627\u0644", - "\u0648\u0644\u0644", - "\u0648\u0627\u0644", + '\u0643\u0627\u0644', + '\u0628\u0627\u0644', + '\u0648\u0644\u0644', + '\u0648\u0627\u0644', ] # length two prefixes - self.p2 = ["\u0627\u0644", "\u0644\u0644"] + self.p2 = ['\u0627\u0644', '\u0644\u0644'] # length one prefixes self.p1 = [ - "\u0644", - "\u0628", - "\u0641", - "\u0633", - "\u0648", - "\u064a", - "\u062a", - "\u0646", - "\u0627", + '\u0644', + '\u0628', + '\u0641', + '\u0633', + '\u0648', + '\u064a', + '\u062a', + '\u0646', + '\u0627', ] # length three suffixes self.s3 = [ - "\u062a\u0645\u0644", - "\u0647\u0645\u0644", - "\u062a\u0627\u0646", - "\u062a\u064a\u0646", - "\u0643\u0645\u0644", + '\u062a\u0645\u0644', + '\u0647\u0645\u0644', + '\u062a\u0627\u0646', + '\u062a\u064a\u0646', + '\u0643\u0645\u0644', ] # length two suffixes self.s2 = [ - "\u0648\u0646", - "\u0627\u062a", - "\u0627\u0646", - "\u064a\u0646", - "\u062a\u0646", - "\u0643\u0645", - "\u0647\u0646", - "\u0646\u0627", - "\u064a\u0627", - "\u0647\u0627", - "\u062a\u0645", - "\u0643\u0646", - "\u0646\u064a", - "\u0648\u0627", - "\u0645\u0627", - "\u0647\u0645", + '\u0648\u0646', + '\u0627\u062a', + '\u0627\u0646', + '\u064a\u0646', + '\u062a\u0646', + '\u0643\u0645', + '\u0647\u0646', + '\u0646\u0627', + '\u064a\u0627', + '\u0647\u0627', + '\u062a\u0645', + '\u0643\u0646', + '\u0646\u064a', + '\u0648\u0627', + '\u0645\u0627', + '\u0647\u0645', ] # length one suffixes - self.s1 = ["\u0629", "\u0647", "\u064a", "\u0643", "\u062a", "\u0627", "\u0646"] + self.s1 = ['\u0629', '\u0647', '\u064a', '\u0643', '\u062a', '\u0627', '\u0646'] # groups of length four patterns self.pr4 = { - 0: ["\u0645"], - 1: ["\u0627"], - 2: ["\u0627", "\u0648", "\u064A"], - 3: ["\u0629"], + 0: ['\u0645'], + 1: ['\u0627'], + 2: ['\u0627', '\u0648', '\u064A'], + 3: ['\u0629'], } # Groups of length five patterns and length three roots self.pr53 = { - 0: ["\u0627", "\u062a"], - 1: ["\u0627", "\u064a", "\u0648"], - 2: ["\u0627", "\u062a", "\u0645"], - 3: ["\u0645", "\u064a", "\u062a"], - 4: ["\u0645", "\u062a"], - 5: ["\u0627", "\u0648"], - 6: ["\u0627", "\u0645"], + 0: ['\u0627', '\u062a'], + 1: ['\u0627', '\u064a', '\u0648'], + 2: ['\u0627', '\u062a', '\u0645'], + 3: ['\u0645', '\u064a', '\u062a'], + 4: ['\u0645', '\u062a'], + 5: ['\u0627', '\u0648'], + 6: ['\u0627', '\u0645'], } - self.re_short_vowels = re.compile(r"[\u064B-\u0652]") - self.re_hamza = re.compile(r"[\u0621\u0624\u0626]") - self.re_initial_hamza = re.compile(r"^[\u0622\u0623\u0625]") + self.re_short_vowels = re.compile(r'[\u064B-\u0652]') + self.re_hamza = re.compile(r'[\u0621\u0624\u0626]') + self.re_initial_hamza = re.compile(r'^[\u0622\u0623\u0625]') self.stop_words = [ - "\u064a\u0643\u0648\u0646", - "\u0648\u0644\u064a\u0633", - "\u0648\u0643\u0627\u0646", - "\u0643\u0630\u0644\u0643", - "\u0627\u0644\u062a\u064a", - "\u0648\u0628\u064a\u0646", - "\u0639\u0644\u064a\u0647\u0627", - "\u0645\u0633\u0627\u0621", - "\u0627\u0644\u0630\u064a", - "\u0648\u0643\u0627\u0646\u062a", - "\u0648\u0644\u0643\u0646", - "\u0648\u0627\u0644\u062a\u064a", - "\u062a\u0643\u0648\u0646", - "\u0627\u0644\u064a\u0648\u0645", - "\u0627\u0644\u0644\u0630\u064a\u0646", - "\u0639\u0644\u064a\u0647", - "\u0643\u0627\u0646\u062a", - "\u0644\u0630\u0644\u0643", - "\u0623\u0645\u0627\u0645", - "\u0647\u0646\u0627\u0643", - "\u0645\u0646\u0647\u0627", - "\u0645\u0627\u0632\u0627\u0644", - "\u0644\u0627\u0632\u0627\u0644", - "\u0644\u0627\u064a\u0632\u0627\u0644", - "\u0645\u0627\u064a\u0632\u0627\u0644", - "\u0627\u0635\u0628\u062d", - "\u0623\u0635\u0628\u062d", - "\u0623\u0645\u0633\u0649", - "\u0627\u0645\u0633\u0649", - "\u0623\u0636\u062d\u0649", - "\u0627\u0636\u062d\u0649", - "\u0645\u0627\u0628\u0631\u062d", - "\u0645\u0627\u0641\u062a\u0626", - "\u0645\u0627\u0627\u0646\u0641\u0643", - "\u0644\u0627\u0633\u064a\u0645\u0627", - "\u0648\u0644\u0627\u064a\u0632\u0627\u0644", - "\u0627\u0644\u062d\u0627\u0644\u064a", - "\u0627\u0644\u064a\u0647\u0627", - "\u0627\u0644\u0630\u064a\u0646", - "\u0641\u0627\u0646\u0647", - "\u0648\u0627\u0644\u0630\u064a", - "\u0648\u0647\u0630\u0627", - "\u0644\u0647\u0630\u0627", - "\u0641\u0643\u0627\u0646", - "\u0633\u062a\u0643\u0648\u0646", - "\u0627\u0644\u064a\u0647", - "\u064a\u0645\u0643\u0646", - "\u0628\u0647\u0630\u0627", - "\u0627\u0644\u0630\u0649", + '\u064a\u0643\u0648\u0646', + '\u0648\u0644\u064a\u0633', + '\u0648\u0643\u0627\u0646', + '\u0643\u0630\u0644\u0643', + '\u0627\u0644\u062a\u064a', + '\u0648\u0628\u064a\u0646', + '\u0639\u0644\u064a\u0647\u0627', + '\u0645\u0633\u0627\u0621', + '\u0627\u0644\u0630\u064a', + '\u0648\u0643\u0627\u0646\u062a', + '\u0648\u0644\u0643\u0646', + '\u0648\u0627\u0644\u062a\u064a', + '\u062a\u0643\u0648\u0646', + '\u0627\u0644\u064a\u0648\u0645', + '\u0627\u0644\u0644\u0630\u064a\u0646', + '\u0639\u0644\u064a\u0647', + '\u0643\u0627\u0646\u062a', + '\u0644\u0630\u0644\u0643', + '\u0623\u0645\u0627\u0645', + '\u0647\u0646\u0627\u0643', + '\u0645\u0646\u0647\u0627', + '\u0645\u0627\u0632\u0627\u0644', + '\u0644\u0627\u0632\u0627\u0644', + '\u0644\u0627\u064a\u0632\u0627\u0644', + '\u0645\u0627\u064a\u0632\u0627\u0644', + '\u0627\u0635\u0628\u062d', + '\u0623\u0635\u0628\u062d', + '\u0623\u0645\u0633\u0649', + '\u0627\u0645\u0633\u0649', + '\u0623\u0636\u062d\u0649', + '\u0627\u0636\u062d\u0649', + '\u0645\u0627\u0628\u0631\u062d', + '\u0645\u0627\u0641\u062a\u0626', + '\u0645\u0627\u0627\u0646\u0641\u0643', + '\u0644\u0627\u0633\u064a\u0645\u0627', + '\u0648\u0644\u0627\u064a\u0632\u0627\u0644', + '\u0627\u0644\u062d\u0627\u0644\u064a', + '\u0627\u0644\u064a\u0647\u0627', + '\u0627\u0644\u0630\u064a\u0646', + '\u0641\u0627\u0646\u0647', + '\u0648\u0627\u0644\u0630\u064a', + '\u0648\u0647\u0630\u0627', + '\u0644\u0647\u0630\u0627', + '\u0641\u0643\u0627\u0646', + '\u0633\u062a\u0643\u0648\u0646', + '\u0627\u0644\u064a\u0647', + '\u064a\u0645\u0643\u0646', + '\u0628\u0647\u0630\u0627', + '\u0627\u0644\u0630\u0649', ] def stem(self, token): @@ -226,12 +227,12 @@ class ISRIStemmer(StemmerI): num=3 both 1&2 """ if num == 1: - word = self.re_short_vowels.sub("", word) + word = self.re_short_vowels.sub('', word) elif num == 2: - word = self.re_initial_hamza.sub("\u0627", word) + word = self.re_initial_hamza.sub('\u0627', word) elif num == 3: - word = self.re_short_vowels.sub("", word) - word = self.re_initial_hamza.sub("\u0627", word) + word = self.re_short_vowels.sub('', word) + word = self.re_initial_hamza.sub('\u0627', word) return word def pre32(self, word): @@ -260,7 +261,7 @@ class ISRIStemmer(StemmerI): def waw(self, word): """remove connective ‘و’ if it precedes a word beginning with ‘و’ """ - if len(word) >= 4 and word[:2] == "\u0648\u0648": + if len(word) >= 4 and word[:2] == '\u0648\u0648': word = word[1:] return word @@ -282,35 +283,35 @@ class ISRIStemmer(StemmerI): def pro_w53(self, word): """process length five patterns and extract length three roots""" - if word[2] in self.pr53[0] and word[0] == "\u0627": # افتعل - افاعل + if word[2] in self.pr53[0] and word[0] == '\u0627': # افتعل - افاعل word = word[1] + word[3:] - elif word[3] in self.pr53[1] and word[0] == "\u0645": # مفعول - مفعال - مفعيل + elif word[3] in self.pr53[1] and word[0] == '\u0645': # مفعول - مفعال - مفعيل word = word[1:3] + word[4] - elif word[0] in self.pr53[2] and word[4] == "\u0629": # مفعلة - تفعلة - افعلة + elif word[0] in self.pr53[2] and word[4] == '\u0629': # مفعلة - تفعلة - افعلة word = word[1:4] - elif word[0] in self.pr53[3] and word[2] == "\u062a": # مفتعل - يفتعل - تفتعل + elif word[0] in self.pr53[3] and word[2] == '\u062a': # مفتعل - يفتعل - تفتعل word = word[1] + word[3:] - elif word[0] in self.pr53[4] and word[2] == "\u0627": # مفاعل - تفاعل + elif word[0] in self.pr53[4] and word[2] == '\u0627': # مفاعل - تفاعل word = word[1] + word[3:] - elif word[2] in self.pr53[5] and word[4] == "\u0629": # فعولة - فعالة + elif word[2] in self.pr53[5] and word[4] == '\u0629': # فعولة - فعالة word = word[:2] + word[3] - elif word[0] in self.pr53[6] and word[1] == "\u0646": # انفعل - منفعل + elif word[0] in self.pr53[6] and word[1] == '\u0646': # انفعل - منفعل word = word[2:] - elif word[3] == "\u0627" and word[0] == "\u0627": # افعال + elif word[3] == '\u0627' and word[0] == '\u0627': # افعال word = word[1:3] + word[4] - elif word[4] == "\u0646" and word[3] == "\u0627": # فعلان + elif word[4] == '\u0646' and word[3] == '\u0627': # فعلان word = word[:3] - elif word[3] == "\u064a" and word[0] == "\u062a": # تفعيل + elif word[3] == '\u064a' and word[0] == '\u062a': # تفعيل word = word[1:3] + word[4] - elif word[3] == "\u0648" and word[1] == "\u0627": # فاعول + elif word[3] == '\u0648' and word[1] == '\u0627': # فاعول word = word[0] + word[2] + word[4] - elif word[2] == "\u0627" and word[1] == "\u0648": # فواعل + elif word[2] == '\u0627' and word[1] == '\u0648': # فواعل word = word[0] + word[3:] - elif word[3] == "\u0626" and word[2] == "\u0627": # فعائل + elif word[3] == '\u0626' and word[2] == '\u0627': # فعائل word = word[:2] + word[4] - elif word[4] == "\u0629" and word[1] == "\u0627": # فاعلة + elif word[4] == '\u0629' and word[1] == '\u0627': # فاعلة word = word[0] + word[2:4] - elif word[4] == "\u064a" and word[2] == "\u0627": # فعالي + elif word[4] == '\u064a' and word[2] == '\u0627': # فعالي word = word[:2] + word[3] else: word = self.suf1(word) # do - normalize short sufix @@ -322,9 +323,9 @@ class ISRIStemmer(StemmerI): """process length five patterns and extract length four roots""" if word[0] in self.pr53[2]: # تفعلل - افعلل - مفعلل word = word[1:] - elif word[4] == "\u0629": # فعللة + elif word[4] == '\u0629': # فعللة word = word[:4] - elif word[2] == "\u0627": # فعالل + elif word[2] == '\u0627': # فعالل word = word[:2] + word[3:] return word @@ -338,24 +339,24 @@ class ISRIStemmer(StemmerI): def pro_w6(self, word): """process length six patterns and extract length three roots""" - if word.startswith("\u0627\u0633\u062a") or word.startswith( - "\u0645\u0633\u062a" + if word.startswith('\u0627\u0633\u062a') or word.startswith( + '\u0645\u0633\u062a' ): # مستفعل - استفعل word = word[3:] elif ( - word[0] == "\u0645" and word[3] == "\u0627" and word[5] == "\u0629" + word[0] == '\u0645' and word[3] == '\u0627' and word[5] == '\u0629' ): # مفعالة word = word[1:3] + word[4] elif ( - word[0] == "\u0627" and word[2] == "\u062a" and word[4] == "\u0627" + word[0] == '\u0627' and word[2] == '\u062a' and word[4] == '\u0627' ): # افتعال word = word[1] + word[3] + word[5] elif ( - word[0] == "\u0627" and word[3] == "\u0648" and word[2] == word[4] + word[0] == '\u0627' and word[3] == '\u0648' and word[2] == word[4] ): # افعوعل word = word[1] + word[4:] elif ( - word[0] == "\u062a" and word[2] == "\u0627" and word[4] == "\u064a" + word[0] == '\u062a' and word[2] == '\u0627' and word[4] == '\u064a' ): # تفاعيل new pattern word = word[1] + word[3] + word[5] else: @@ -366,9 +367,9 @@ class ISRIStemmer(StemmerI): def pro_w64(self, word): """process length six patterns and extract length four roots""" - if word[0] == "\u0627" and word[4] == "\u0627": # افعلال + if word[0] == '\u0627' and word[4] == '\u0627': # افعلال word = word[1:4] + word[5] - elif word.startswith("\u0645\u062a"): # متفعلل + elif word.startswith('\u0645\u062a'): # متفعلل word = word[2:] return word diff --git a/nlp_resource_data/nltk/stem/lancaster.py b/nlp_resource_data/nltk/stem/lancaster.py index ef5eaa4..919a1a6 100644 --- a/nlp_resource_data/nltk/stem/lancaster.py +++ b/nlp_resource_data/nltk/stem/lancaster.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Stemmers # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Steven Tomcavage # URL: # For license information, see LICENSE.TXT @@ -9,11 +9,14 @@ A word stemmer based on the Lancaster (Paice/Husk) stemming algorithm. Paice, Chris D. "Another Stemmer." ACM SIGIR Forum 24.3 (1990): 56-61. """ +from __future__ import unicode_literals import re from nltk.stem.api import StemmerI +from nltk.compat import python_2_unicode_compatible +@python_2_unicode_compatible class LancasterStemmer(StemmerI): """ Lancaster Stemmer @@ -267,7 +270,7 @@ class LancasterStemmer(StemmerI): word, remove_total, append_string ) rule_was_applied = True - if cont_flag == ".": + if cont_flag == '.': proceed = False break elif self.__isAcceptable(word, remove_total): @@ -275,7 +278,7 @@ class LancasterStemmer(StemmerI): word, remove_total, append_string ) rule_was_applied = True - if cont_flag == ".": + if cont_flag == '.': proceed = False break # If no rules apply, the word doesn't need any more stemming @@ -346,4 +349,4 @@ class LancasterStemmer(StemmerI): return word def __repr__(self): - return "" + return '' diff --git a/nlp_resource_data/nltk/stem/porter.py b/nlp_resource_data/nltk/stem/porter.py index cb04f52..e79b8b6 100644 --- a/nlp_resource_data/nltk/stem/porter.py +++ b/nlp_resource_data/nltk/stem/porter.py @@ -18,13 +18,17 @@ which includes another Python implementation and other implementations in many languages. """ -__docformat__ = "plaintext" +from __future__ import print_function, unicode_literals + +__docformat__ = 'plaintext' import re from nltk.stem.api import StemmerI +from nltk.compat import python_2_unicode_compatible +@python_2_unicode_compatible class PorterStemmer(StemmerI): """ A word stemmer based on the Porter stemming algorithm. @@ -71,14 +75,14 @@ class PorterStemmer(StemmerI): For the best stemming, you should use the default NLTK_EXTENSIONS version. However, if you need to get the same results as either the original algorithm or one of Martin Porter's hosted versions for - compatibility with an existing implementation or dataset, you can use + compability with an existing implementation or dataset, you can use one of the other modes instead. """ # Modes the Stemmer can be instantiated in - NLTK_EXTENSIONS = "NLTK_EXTENSIONS" - MARTIN_EXTENSIONS = "MARTIN_EXTENSIONS" - ORIGINAL_ALGORITHM = "ORIGINAL_ALGORITHM" + NLTK_EXTENSIONS = 'NLTK_EXTENSIONS' + MARTIN_EXTENSIONS = 'MARTIN_EXTENSIONS' + ORIGINAL_ALGORITHM = 'ORIGINAL_ALGORITHM' def __init__(self, mode=NLTK_EXTENSIONS): if mode not in ( @@ -118,7 +122,7 @@ class PorterStemmer(StemmerI): for val in irregular_forms[key]: self.pool[val] = key - self.vowels = frozenset(["a", "e", "i", "o", "u"]) + self.vowels = frozenset(['a', 'e', 'i', 'o', 'u']) def _is_consonant(self, word, i): """Returns True if word[i] is a consonant, False otherwise @@ -134,7 +138,7 @@ class PorterStemmer(StemmerI): """ if word[i] in self.vowels: return False - if word[i] == "y": + if word[i] == 'y': if i == 0: return True else: @@ -175,7 +179,7 @@ class PorterStemmer(StemmerI): m=1 TROUBLE, OATS, TREES, IVY. m=2 TROUBLES, PRIVATE, OATEN, ORRERY. """ - cv_sequence = "" + cv_sequence = '' # Construct a string of 'c's and 'v's representing whether each # character in `stem` is a consonant or a vowel. @@ -183,14 +187,14 @@ class PorterStemmer(StemmerI): # 'architecture' becomes 'vcccvcvccvcv' for i in range(len(stem)): if self._is_consonant(stem, i): - cv_sequence += "c" + cv_sequence += 'c' else: - cv_sequence += "v" + cv_sequence += 'v' # Count the number of 'vc' occurences, which is equivalent to # the number of 'VC' occurrences in Porter's reduced form in the # docstring above, which is in turn equivalent to `m` - return cv_sequence.count("vc") + return cv_sequence.count('vc') def _has_positive_measure(self, stem): return self._measure(stem) > 0 @@ -226,7 +230,7 @@ class PorterStemmer(StemmerI): and self._is_consonant(word, len(word) - 3) and not self._is_consonant(word, len(word) - 2) and self._is_consonant(word, len(word) - 1) - and word[-1] not in ("w", "x", "y") + and word[-1] not in ('w', 'x', 'y') ) or ( self.mode == self.NLTK_EXTENSIONS and len(word) == 2 @@ -237,7 +241,7 @@ class PorterStemmer(StemmerI): def _replace_suffix(self, word, suffix, replacement): """Replaces `suffix` of `word` with `replacement""" assert word.endswith(suffix), "Given word doesn't end with given suffix" - if suffix == "": + if suffix == '': return word + replacement else: return word[: -len(suffix)] + replacement @@ -253,7 +257,7 @@ class PorterStemmer(StemmerI): """ for rule in rules: suffix, replacement, condition = rule - if suffix == "*d" and self._ends_double_consonant(word): + if suffix == '*d' and self._ends_double_consonant(word): stem = word[:-2] if condition is None or condition(stem): return stem + replacement @@ -261,7 +265,7 @@ class PorterStemmer(StemmerI): # Don't try any further rules return word if word.endswith(suffix): - stem = self._replace_suffix(word, suffix, "") + stem = self._replace_suffix(word, suffix, '') if condition is None or condition(stem): return stem + replacement else: @@ -284,16 +288,16 @@ class PorterStemmer(StemmerI): # this NLTK-only rule extends the original algorithm, so # that 'flies'->'fli' but 'dies'->'die' etc if self.mode == self.NLTK_EXTENSIONS: - if word.endswith("ies") and len(word) == 4: - return self._replace_suffix(word, "ies", "ie") + if word.endswith('ies') and len(word) == 4: + return self._replace_suffix(word, 'ies', 'ie') return self._apply_rule_list( word, [ - ("sses", "ss", None), # SSES -> SS - ("ies", "i", None), # IES -> I - ("ss", "ss", None), # SS -> SS - ("s", "", None), # S -> + ('sses', 'ss', None), # SSES -> SS + ('ies', 'i', None), # IES -> I + ('ss', 'ss', None), # SS -> SS + ('s', '', None), # S -> ], ) @@ -333,25 +337,25 @@ class PorterStemmer(StemmerI): # this NLTK-only block extends the original algorithm, so that # 'spied'->'spi' but 'died'->'die' etc if self.mode == self.NLTK_EXTENSIONS: - if word.endswith("ied"): + if word.endswith('ied'): if len(word) == 4: - return self._replace_suffix(word, "ied", "ie") + return self._replace_suffix(word, 'ied', 'ie') else: - return self._replace_suffix(word, "ied", "i") + return self._replace_suffix(word, 'ied', 'i') # (m>0) EED -> EE - if word.endswith("eed"): - stem = self._replace_suffix(word, "eed", "") + if word.endswith('eed'): + stem = self._replace_suffix(word, 'eed', '') if self._measure(stem) > 0: - return stem + "ee" + return stem + 'ee' else: return word rule_2_or_3_succeeded = False - for suffix in ["ed", "ing"]: + for suffix in ['ed', 'ing']: if word.endswith(suffix): - intermediate_stem = self._replace_suffix(word, suffix, "") + intermediate_stem = self._replace_suffix(word, suffix, '') if self._contains_vowel(intermediate_stem): rule_2_or_3_succeeded = True break @@ -362,20 +366,20 @@ class PorterStemmer(StemmerI): return self._apply_rule_list( intermediate_stem, [ - ("at", "ate", None), # AT -> ATE - ("bl", "ble", None), # BL -> BLE - ("iz", "ize", None), # IZ -> IZE + ('at', 'ate', None), # AT -> ATE + ('bl', 'ble', None), # BL -> BLE + ('iz', 'ize', None), # IZ -> IZE # (*d and not (*L or *S or *Z)) # -> single letter ( - "*d", + '*d', intermediate_stem[-1], - lambda stem: intermediate_stem[-1] not in ("l", "s", "z"), + lambda stem: intermediate_stem[-1] not in ('l', 's', 'z'), ), # (m=1 and *o) -> E ( - "", - "e", + '', + 'e', lambda stem: (self._measure(stem) == 1 and self._ends_cvc(stem)), ), ], @@ -420,8 +424,8 @@ class PorterStemmer(StemmerI): word, [ ( - "y", - "i", + 'y', + 'i', nltk_condition if self.mode == self.NLTK_EXTENSIONS else original_condition, @@ -463,39 +467,39 @@ class PorterStemmer(StemmerI): # Instead of applying the ALLI -> AL rule after '(a)bli' per # the published algorithm, instead we apply it first, and, # if it succeeds, run the result through step2 again. - if word.endswith("alli") and self._has_positive_measure( - self._replace_suffix(word, "alli", "") + if word.endswith('alli') and self._has_positive_measure( + self._replace_suffix(word, 'alli', '') ): - return self._step2(self._replace_suffix(word, "alli", "al")) + return self._step2(self._replace_suffix(word, 'alli', 'al')) - bli_rule = ("bli", "ble", self._has_positive_measure) - abli_rule = ("abli", "able", self._has_positive_measure) + bli_rule = ('bli', 'ble', self._has_positive_measure) + abli_rule = ('abli', 'able', self._has_positive_measure) rules = [ - ("ational", "ate", self._has_positive_measure), - ("tional", "tion", self._has_positive_measure), - ("enci", "ence", self._has_positive_measure), - ("anci", "ance", self._has_positive_measure), - ("izer", "ize", self._has_positive_measure), + ('ational', 'ate', self._has_positive_measure), + ('tional', 'tion', self._has_positive_measure), + ('enci', 'ence', self._has_positive_measure), + ('anci', 'ance', self._has_positive_measure), + ('izer', 'ize', self._has_positive_measure), abli_rule if self.mode == self.ORIGINAL_ALGORITHM else bli_rule, - ("alli", "al", self._has_positive_measure), - ("entli", "ent", self._has_positive_measure), - ("eli", "e", self._has_positive_measure), - ("ousli", "ous", self._has_positive_measure), - ("ization", "ize", self._has_positive_measure), - ("ation", "ate", self._has_positive_measure), - ("ator", "ate", self._has_positive_measure), - ("alism", "al", self._has_positive_measure), - ("iveness", "ive", self._has_positive_measure), - ("fulness", "ful", self._has_positive_measure), - ("ousness", "ous", self._has_positive_measure), - ("aliti", "al", self._has_positive_measure), - ("iviti", "ive", self._has_positive_measure), - ("biliti", "ble", self._has_positive_measure), + ('alli', 'al', self._has_positive_measure), + ('entli', 'ent', self._has_positive_measure), + ('eli', 'e', self._has_positive_measure), + ('ousli', 'ous', self._has_positive_measure), + ('ization', 'ize', self._has_positive_measure), + ('ation', 'ate', self._has_positive_measure), + ('ator', 'ate', self._has_positive_measure), + ('alism', 'al', self._has_positive_measure), + ('iveness', 'ive', self._has_positive_measure), + ('fulness', 'ful', self._has_positive_measure), + ('ousness', 'ous', self._has_positive_measure), + ('aliti', 'al', self._has_positive_measure), + ('iviti', 'ive', self._has_positive_measure), + ('biliti', 'ble', self._has_positive_measure), ] if self.mode == self.NLTK_EXTENSIONS: - rules.append(("fulli", "ful", self._has_positive_measure)) + rules.append(('fulli', 'ful', self._has_positive_measure)) # The 'l' of the 'logi' -> 'log' rule is put with the stem, # so that short stems like 'geo' 'theo' etc work like @@ -527,13 +531,13 @@ class PorterStemmer(StemmerI): return self._apply_rule_list( word, [ - ("icate", "ic", self._has_positive_measure), - ("ative", "", self._has_positive_measure), - ("alize", "al", self._has_positive_measure), - ("iciti", "ic", self._has_positive_measure), - ("ical", "ic", self._has_positive_measure), - ("ful", "", self._has_positive_measure), - ("ness", "", self._has_positive_measure), + ('icate', 'ic', self._has_positive_measure), + ('ative', '', self._has_positive_measure), + ('alize', 'al', self._has_positive_measure), + ('iciti', 'ic', self._has_positive_measure), + ('ical', 'ic', self._has_positive_measure), + ('ful', '', self._has_positive_measure), + ('ness', '', self._has_positive_measure), ], ) @@ -570,30 +574,30 @@ class PorterStemmer(StemmerI): return self._apply_rule_list( word, [ - ("al", "", measure_gt_1), - ("ance", "", measure_gt_1), - ("ence", "", measure_gt_1), - ("er", "", measure_gt_1), - ("ic", "", measure_gt_1), - ("able", "", measure_gt_1), - ("ible", "", measure_gt_1), - ("ant", "", measure_gt_1), - ("ement", "", measure_gt_1), - ("ment", "", measure_gt_1), - ("ent", "", measure_gt_1), + ('al', '', measure_gt_1), + ('ance', '', measure_gt_1), + ('ence', '', measure_gt_1), + ('er', '', measure_gt_1), + ('ic', '', measure_gt_1), + ('able', '', measure_gt_1), + ('ible', '', measure_gt_1), + ('ant', '', measure_gt_1), + ('ement', '', measure_gt_1), + ('ment', '', measure_gt_1), + ('ent', '', measure_gt_1), # (m>1 and (*S or *T)) ION -> ( - "ion", - "", - lambda stem: self._measure(stem) > 1 and stem[-1] in ("s", "t"), + 'ion', + '', + lambda stem: self._measure(stem) > 1 and stem[-1] in ('s', 't'), ), - ("ou", "", measure_gt_1), - ("ism", "", measure_gt_1), - ("ate", "", measure_gt_1), - ("iti", "", measure_gt_1), - ("ous", "", measure_gt_1), - ("ive", "", measure_gt_1), - ("ize", "", measure_gt_1), + ('ou', '', measure_gt_1), + ('ism', '', measure_gt_1), + ('ate', '', measure_gt_1), + ('iti', '', measure_gt_1), + ('ous', '', measure_gt_1), + ('ive', '', measure_gt_1), + ('ize', '', measure_gt_1), ], ) @@ -625,8 +629,8 @@ class PorterStemmer(StemmerI): # no explicit mention of the inconsistency; you have to infer it # from the examples. # For this reason, we can't use _apply_rule_list here. - if word.endswith("e"): - stem = self._replace_suffix(word, "e", "") + if word.endswith('e'): + stem = self._replace_suffix(word, 'e', '') if self._measure(stem) > 1: return stem if self._measure(stem) == 1 and not self._ends_cvc(stem): @@ -645,7 +649,7 @@ class PorterStemmer(StemmerI): roll -> roll """ return self._apply_rule_list( - word, [("ll", "l", lambda stem: self._measure(word[:-1]) > 1)] + word, [('ll', 'l', lambda stem: self._measure(word[:-1]) > 1)] ) def stem(self, word): @@ -672,7 +676,7 @@ class PorterStemmer(StemmerI): return stem def __repr__(self): - return "" + return '' def demo(): @@ -694,16 +698,16 @@ def demo(): stemmed.append(stemmer.stem(word)) # Convert the results to a string, and word-wrap them. - results = " ".join(stemmed) - results = re.sub(r"(.{,70})\s", r"\1\n", results + " ").rstrip() + results = ' '.join(stemmed) + results = re.sub(r"(.{,70})\s", r'\1\n', results + ' ').rstrip() # Convert the original to a string, and word wrap it. - original = " ".join(orig) - original = re.sub(r"(.{,70})\s", r"\1\n", original + " ").rstrip() + original = ' '.join(orig) + original = re.sub(r"(.{,70})\s", r'\1\n', original + ' ').rstrip() # Print the results. - print("-Original-".center(70).replace(" ", "*").replace("-", " ")) + print('-Original-'.center(70).replace(' ', '*').replace('-', ' ')) print(original) - print("-Results-".center(70).replace(" ", "*").replace("-", " ")) + print('-Results-'.center(70).replace(' ', '*').replace('-', ' ')) print(results) - print("*" * 70) + print('*' * 70) diff --git a/nlp_resource_data/nltk/stem/regexp.py b/nlp_resource_data/nltk/stem/regexp.py index e00f232..8f6ead5 100644 --- a/nlp_resource_data/nltk/stem/regexp.py +++ b/nlp_resource_data/nltk/stem/regexp.py @@ -1,16 +1,19 @@ # Natural Language Toolkit: Stemmers # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Trevor Cohn # Edward Loper # Steven Bird # URL: # For license information, see LICENSE.TXT +from __future__ import unicode_literals import re from nltk.stem.api import StemmerI +from nltk.compat import python_2_unicode_compatible +@python_2_unicode_compatible class RegexpStemmer(StemmerI): """ A stemmer that uses regular expressions to identify morphological @@ -41,7 +44,7 @@ class RegexpStemmer(StemmerI): def __init__(self, regexp, min=0): - if not hasattr(regexp, "pattern"): + if not hasattr(regexp, 'pattern'): regexp = re.compile(regexp) self._regexp = regexp self._min = min @@ -50,7 +53,7 @@ class RegexpStemmer(StemmerI): if len(word) < self._min: return word else: - return self._regexp.sub("", word) + return self._regexp.sub('', word) def __repr__(self): - return "".format(self._regexp.pattern) + return ''.format(self._regexp.pattern) diff --git a/nlp_resource_data/nltk/stem/rslp.py b/nlp_resource_data/nltk/stem/rslp.py index 10f5de5..06184ee 100644 --- a/nlp_resource_data/nltk/stem/rslp.py +++ b/nlp_resource_data/nltk/stem/rslp.py @@ -2,7 +2,7 @@ # Natural Language Toolkit: RSLP Stemmer # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Tiago Tresoldi # URL: # For license information, see LICENSE.TXT @@ -30,7 +30,7 @@ # comentário, inclusive sobre o desenvolvimento de um stemmer diferente # e/ou melhor para o português. Também sugiro utilizar-se a lista de discussão # do NLTK para o português para qualquer debate. - +from __future__ import print_function, unicode_literals from nltk.data import load from nltk.stem.api import StemmerI @@ -65,7 +65,7 @@ class RSLPStemmer(StemmerI): self._model.append(self.read_rule("step6.pt")) def read_rule(self, filename): - rules = load("nltk:stemmers/rslp/" + filename, format="raw").decode("utf8") + rules = load('nltk:stemmers/rslp/' + filename, format='raw').decode("utf8") lines = rules.split("\n") lines = [line for line in lines if line != ""] # remove blank lines diff --git a/nlp_resource_data/nltk/stem/snowball.py b/nlp_resource_data/nltk/stem/snowball.py index aede6a4..f8e9214 100644 --- a/nlp_resource_data/nltk/stem/snowball.py +++ b/nlp_resource_data/nltk/stem/snowball.py @@ -2,7 +2,7 @@ # # Natural Language Toolkit: Snowball Stemmer # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Peter Michael Stahl # Peter Ljunglof (revisions) # Lakhdar Benzahia (co-writer) @@ -23,9 +23,12 @@ developed by Martin Porter. There is also a demo function: `snowball.demo()`. """ +from __future__ import unicode_literals, print_function import re +from six.moves import input +from nltk import compat from nltk.corpus import stopwords from nltk.stem import porter from nltk.stem.util import suffix_replace, prefix_replace @@ -115,6 +118,7 @@ class SnowballStemmer(StemmerI): return self.stemmer.stem(self, token) +@compat.python_2_unicode_compatible class _LanguageSpecificStemmer(StemmerI): """ @@ -205,7 +209,7 @@ class _ScandinavianStemmer(_LanguageSpecificStemmer): r1 = "" for i in range(1, len(word)): if word[i] not in vowels and word[i - 1] in vowels: - if 3 > len(word[: i + 1]) > 0: + if len(word[: i + 1]) < 3 and len(word[: i + 1]) > 0: r1 = word[3:] elif len(word[: i + 1]) >= 3: r1 = word[i + 1 :] @@ -319,196 +323,196 @@ class ArabicStemmer(_StandardStemmer): # Normalize_pre stes __vocalization = re.compile( - r"[\u064b-\u064c-\u064d-\u064e-\u064f-\u0650-\u0651-\u0652]" + r'[\u064b-\u064c-\u064d-\u064e-\u064f-\u0650-\u0651-\u0652]' ) # ً، ٌ، ٍ، َ، ُ، ِ، ّ، ْ - __kasheeda = re.compile(r"[\u0640]") # ـ tatweel/kasheeda + __kasheeda = re.compile(r'[\u0640]') # ـ tatweel/kasheeda - __arabic_punctuation_marks = re.compile(r"[\u060C-\u061B-\u061F]") # ؛ ، ؟ + __arabic_punctuation_marks = re.compile(r'[\u060C-\u061B-\u061F]') # ؛ ، ؟ # Normalize_post - __last_hamzat = ("\u0623", "\u0625", "\u0622", "\u0624", "\u0626") # أ، إ، آ، ؤ، ئ + __last_hamzat = ('\u0623', '\u0625', '\u0622', '\u0624', '\u0626') # أ، إ، آ، ؤ، ئ # normalize other hamza's - __initial_hamzat = re.compile(r"^[\u0622\u0623\u0625]") # أ، إ، آ + __initial_hamzat = re.compile(r'^[\u0622\u0623\u0625]') # أ، إ، آ - __waw_hamza = re.compile(r"[\u0624]") # ؤ + __waw_hamza = re.compile(r'[\u0624]') # ؤ - __yeh_hamza = re.compile(r"[\u0626]") # ئ + __yeh_hamza = re.compile(r'[\u0626]') # ئ - __alefat = re.compile(r"[\u0623\u0622\u0625]") # أ، إ، آ + __alefat = re.compile(r'[\u0623\u0622\u0625]') # أ، إ، آ # Checks __checks1 = ( - "\u0643\u0627\u0644", - "\u0628\u0627\u0644", # بال، كال - "\u0627\u0644", - "\u0644\u0644", # لل، ال + '\u0643\u0627\u0644', + '\u0628\u0627\u0644', # بال، كال + '\u0627\u0644', + '\u0644\u0644', # لل، ال ) - __checks2 = ("\u0629", "\u0627\u062a") # ة # female plural ات + __checks2 = ('\u0629', '\u0627\u062a') # ة # female plural ات # Suffixes __suffix_noun_step1a = ( - "\u064a", - "\u0643", - "\u0647", # ي، ك، ه - "\u0646\u0627", - "\u0643\u0645", - "\u0647\u0627", - "\u0647\u0646", - "\u0647\u0645", # نا، كم، ها، هن، هم - "\u0643\u0645\u0627", - "\u0647\u0645\u0627", # كما، هما + '\u064a', + '\u0643', + '\u0647', # ي، ك، ه + '\u0646\u0627', + '\u0643\u0645', + '\u0647\u0627', + '\u0647\u0646', + '\u0647\u0645', # نا، كم، ها، هن، هم + '\u0643\u0645\u0627', + '\u0647\u0645\u0627', # كما، هما ) - __suffix_noun_step1b = "\u0646" # ن + __suffix_noun_step1b = '\u0646' # ن - __suffix_noun_step2a = ("\u0627", "\u064a", "\u0648") # ا، ي، و + __suffix_noun_step2a = ('\u0627', '\u064a', '\u0648') # ا، ي، و - __suffix_noun_step2b = "\u0627\u062a" # ات + __suffix_noun_step2b = '\u0627\u062a' # ات - __suffix_noun_step2c1 = "\u062a" # ت + __suffix_noun_step2c1 = '\u062a' # ت - __suffix_noun_step2c2 = "\u0629" # ة + __suffix_noun_step2c2 = '\u0629' # ة - __suffix_noun_step3 = "\u064a" # ي + __suffix_noun_step3 = '\u064a' # ي __suffix_verb_step1 = ( - "\u0647", - "\u0643", # ه، ك - "\u0646\u064a", - "\u0646\u0627", - "\u0647\u0627", - "\u0647\u0645", # ني، نا، ها، هم - "\u0647\u0646", - "\u0643\u0645", - "\u0643\u0646", # هن، كم، كن - "\u0647\u0645\u0627", - "\u0643\u0645\u0627", - "\u0643\u0645\u0648", # هما، كما، كمو + '\u0647', + '\u0643', # ه، ك + '\u0646\u064a', + '\u0646\u0627', + '\u0647\u0627', + '\u0647\u0645', # ني، نا، ها، هم + '\u0647\u0646', + '\u0643\u0645', + '\u0643\u0646', # هن، كم، كن + '\u0647\u0645\u0627', + '\u0643\u0645\u0627', + '\u0643\u0645\u0648', # هما، كما، كمو ) __suffix_verb_step2a = ( - "\u062a", - "\u0627", - "\u0646", - "\u064a", # ت، ا، ن، ي - "\u0646\u0627", - "\u062a\u0627", - "\u062a\u0646", # نا، تا، تن Past - "\u0627\u0646", - "\u0648\u0646", - "\u064a\u0646", # ان، هن، ين Present - "\u062a\u0645\u0627", # تما + '\u062a', + '\u0627', + '\u0646', + '\u064a', # ت، ا، ن، ي + '\u0646\u0627', + '\u062a\u0627', + '\u062a\u0646', # نا، تا، تن Past + '\u0627\u0646', + '\u0648\u0646', + '\u064a\u0646', # ان، هن، ين Present + '\u062a\u0645\u0627', # تما ) - __suffix_verb_step2b = ("\u0648\u0627", "\u062a\u0645") # وا، تم + __suffix_verb_step2b = ('\u0648\u0627', '\u062a\u0645') # وا، تم - __suffix_verb_step2c = ("\u0648", "\u062a\u0645\u0648") # و # تمو + __suffix_verb_step2c = ('\u0648', '\u062a\u0645\u0648') # و # تمو - __suffix_all_alef_maqsura = "\u0649" # ى + __suffix_all_alef_maqsura = '\u0649' # ى # Prefixes __prefix_step1 = ( - "\u0623", # أ - "\u0623\u0623", - "\u0623\u0622", - "\u0623\u0624", - "\u0623\u0627", - "\u0623\u0625", # أأ، أآ، أؤ، أا، أإ + '\u0623', # أ + '\u0623\u0623', + '\u0623\u0622', + '\u0623\u0624', + '\u0623\u0627', + '\u0623\u0625', # أأ، أآ، أؤ، أا، أإ ) - __prefix_step2a = ("\u0641\u0627\u0644", "\u0648\u0627\u0644") # فال، وال + __prefix_step2a = ('\u0641\u0627\u0644', '\u0648\u0627\u0644') # فال، وال - __prefix_step2b = ("\u0641", "\u0648") # ف، و + __prefix_step2b = ('\u0641', '\u0648') # ف، و __prefix_step3a_noun = ( - "\u0627\u0644", - "\u0644\u0644", # لل، ال - "\u0643\u0627\u0644", - "\u0628\u0627\u0644", # بال، كال + '\u0627\u0644', + '\u0644\u0644', # لل، ال + '\u0643\u0627\u0644', + '\u0628\u0627\u0644', # بال، كال ) __prefix_step3b_noun = ( - "\u0628", - "\u0643", - "\u0644", # ب، ك، ل - "\u0628\u0628", - "\u0643\u0643", # بب، كك + '\u0628', + '\u0643', + '\u0644', # ب، ك، ل + '\u0628\u0628', + '\u0643\u0643', # بب، كك ) __prefix_step3_verb = ( - "\u0633\u064a", - "\u0633\u062a", - "\u0633\u0646", - "\u0633\u0623", + '\u0633\u064a', + '\u0633\u062a', + '\u0633\u0646', + '\u0633\u0623', ) # سي، ست، سن، سأ __prefix_step4_verb = ( - "\u064a\u0633\u062a", - "\u0646\u0633\u062a", - "\u062a\u0633\u062a", + '\u064a\u0633\u062a', + '\u0646\u0633\u062a', + '\u062a\u0633\u062a', ) # يست، نست، تست # Suffixes added due to Conjugation Verbs - __conjugation_suffix_verb_1 = ("\u0647", "\u0643") # ه، ك + __conjugation_suffix_verb_1 = ('\u0647', '\u0643') # ه، ك __conjugation_suffix_verb_2 = ( - "\u0646\u064a", - "\u0646\u0627", - "\u0647\u0627", # ني، نا، ها - "\u0647\u0645", - "\u0647\u0646", - "\u0643\u0645", # هم، هن، كم - "\u0643\u0646", # كن + '\u0646\u064a', + '\u0646\u0627', + '\u0647\u0627', # ني، نا، ها + '\u0647\u0645', + '\u0647\u0646', + '\u0643\u0645', # هم، هن، كم + '\u0643\u0646', # كن ) __conjugation_suffix_verb_3 = ( - "\u0647\u0645\u0627", - "\u0643\u0645\u0627", - "\u0643\u0645\u0648", + '\u0647\u0645\u0627', + '\u0643\u0645\u0627', + '\u0643\u0645\u0648', ) # هما، كما، كمو - __conjugation_suffix_verb_4 = ("\u0627", "\u0646", "\u064a") # ا، ن، ي + __conjugation_suffix_verb_4 = ('\u0627', '\u0646', '\u064a') # ا، ن، ي __conjugation_suffix_verb_past = ( - "\u0646\u0627", - "\u062a\u0627", - "\u062a\u0646", + '\u0646\u0627', + '\u062a\u0627', + '\u062a\u0646', ) # نا، تا، تن __conjugation_suffix_verb_present = ( - "\u0627\u0646", - "\u0648\u0646", - "\u064a\u0646", + '\u0627\u0646', + '\u0648\u0646', + '\u064a\u0646', ) # ان، ون، ين # Suffixes added due to derivation Names - __conjugation_suffix_noun_1 = ("\u064a", "\u0643", "\u0647") # ي، ك، ه + __conjugation_suffix_noun_1 = ('\u064a', '\u0643', '\u0647') # ي، ك، ه __conjugation_suffix_noun_2 = ( - "\u0646\u0627", - "\u0643\u0645", # نا، كم - "\u0647\u0627", - "\u0647\u0646", - "\u0647\u0645", # ها، هن، هم + '\u0646\u0627', + '\u0643\u0645', # نا، كم + '\u0647\u0627', + '\u0647\u0646', + '\u0647\u0645', # ها، هن، هم ) __conjugation_suffix_noun_3 = ( - "\u0643\u0645\u0627", - "\u0647\u0645\u0627", + '\u0643\u0645\u0627', + '\u0647\u0645\u0627', ) # كما، هما # Prefixes added due to derivation Names - __prefixes1 = ("\u0648\u0627", "\u0641\u0627") # فا، وا + __prefixes1 = ('\u0648\u0627', '\u0641\u0627') # فا، وا - __articles_3len = ("\u0643\u0627\u0644", "\u0628\u0627\u0644") # بال كال + __articles_3len = ('\u0643\u0627\u0644', '\u0628\u0627\u0644') # بال كال - __articles_2len = ("\u0627\u0644", "\u0644\u0644") # ال لل + __articles_2len = ('\u0627\u0644', '\u0644\u0644') # ال لل # Prepositions letters - __prepositions1 = ("\u0643", "\u0644") # ك، ل - __prepositions2 = ("\u0628\u0628", "\u0643\u0643") # بب، كك + __prepositions1 = ('\u0643', '\u0644') # ك، ل + __prepositions2 = ('\u0628\u0628', '\u0643\u0643') # بب، كك is_verb = True is_noun = True @@ -532,24 +536,24 @@ class ArabicStemmer(_StandardStemmer): :return: normalized token type string """ # strip diacritics - token = self.__vocalization.sub("", token) + token = self.__vocalization.sub('', token) # strip kasheeda - token = self.__kasheeda.sub("", token) + token = self.__kasheeda.sub('', token) # strip punctuation marks - token = self.__arabic_punctuation_marks.sub("", token) + token = self.__arabic_punctuation_marks.sub('', token) return token def __normalize_post(self, token): # normalize last hamza for hamza in self.__last_hamzat: if token.endswith(hamza): - token = suffix_replace(token, hamza, "\u0621") + token = suffix_replace(token, hamza, '\u0621') break # normalize other hamzat - token = self.__initial_hamzat.sub("\u0627", token) - token = self.__waw_hamza.sub("\u0648", token) - token = self.__yeh_hamza.sub("\u064a", token) - token = self.__alefat.sub("\u0627", token) + token = self.__initial_hamzat.sub('\u0627', token) + token = self.__waw_hamza.sub('\u0648', token) + token = self.__yeh_hamza.sub('\u064a', token) + token = self.__alefat.sub('\u0627', token) return token def __checks_1(self, token): @@ -570,12 +574,12 @@ class ArabicStemmer(_StandardStemmer): def __checks_2(self, token): for suffix in self.__checks2: if token.endswith(suffix): - if suffix == "\u0629" and len(token) > 2: + if suffix == '\u0629' and len(token) > 2: self.is_noun = True self.is_verb = False break - if suffix == "\u0627\u062a" and len(token) > 3: + if suffix == '\u0627\u062a' and len(token) > 3: self.is_noun = True self.is_verb = False break @@ -602,7 +606,7 @@ class ArabicStemmer(_StandardStemmer): def __Suffix_Verb_Step2a(self, token): for suffix in self.__suffix_verb_step2a: if token.endswith(suffix) and len(token) > 3: - if suffix == "\u062a" and len(token) >= 4: + if suffix == '\u062a' and len(token) >= 4: token = token[:-1] self.suffix_verb_step2a_success = True break @@ -622,7 +626,7 @@ class ArabicStemmer(_StandardStemmer): self.suffix_verb_step2a_success = True break - if suffix == "\u062a\u0645\u0627" and len(token) >= 6: + if suffix == '\u062a\u0645\u0627' and len(token) >= 6: token = token[:-3] self.suffix_verb_step2a_success = True break @@ -631,11 +635,11 @@ class ArabicStemmer(_StandardStemmer): def __Suffix_Verb_Step2c(self, token): for suffix in self.__suffix_verb_step2c: if token.endswith(suffix): - if suffix == "\u062a\u0645\u0648" and len(token) >= 6: + if suffix == '\u062a\u0645\u0648' and len(token) >= 6: token = token[:-3] break - if suffix == "\u0648" and len(token) >= 4: + if suffix == '\u0648' and len(token) >= 4: token = token[:-1] break return token @@ -716,30 +720,30 @@ class ArabicStemmer(_StandardStemmer): def __Suffix_All_alef_maqsura(self, token): for suffix in self.__suffix_all_alef_maqsura: if token.endswith(suffix): - token = suffix_replace(token, suffix, "\u064a") + token = suffix_replace(token, suffix, '\u064a') return token def __Prefix_Step1(self, token): for prefix in self.__prefix_step1: if token.startswith(prefix) and len(token) > 3: - if prefix == "\u0623\u0623": - token = prefix_replace(token, prefix, "\u0623") + if prefix == '\u0623\u0623': + token = prefix_replace(token, prefix, '\u0623') break - elif prefix == "\u0623\u0622": - token = prefix_replace(token, prefix, "\u0622") + elif prefix == '\u0623\u0622': + token = prefix_replace(token, prefix, '\u0622') break - elif prefix == "\u0623\u0624": - token = prefix_replace(token, prefix, "\u0624") + elif prefix == '\u0623\u0624': + token = prefix_replace(token, prefix, '\u0624') break - elif prefix == "\u0623\u0627": - token = prefix_replace(token, prefix, "\u0627") + elif prefix == '\u0623\u0627': + token = prefix_replace(token, prefix, '\u0627') break - elif prefix == "\u0623\u0625": - token = prefix_replace(token, prefix, "\u0625") + elif prefix == '\u0623\u0625': + token = prefix_replace(token, prefix, '\u0625') break return token @@ -775,7 +779,7 @@ class ArabicStemmer(_StandardStemmer): for prefix in self.__prefix_step3b_noun: if token.startswith(prefix): if len(token) > 3: - if prefix == "\u0628": + if prefix == '\u0628': token = token[len(prefix) :] self.prefix_step3b_noun_success = True break @@ -801,7 +805,7 @@ class ArabicStemmer(_StandardStemmer): def __Prefix_Step4_Verb(self, token): for prefix in self.__prefix_step4_verb: if token.startswith(prefix) and len(token) > 4: - token = prefix_replace(token, prefix, "\u0627\u0633\u062a") + token = prefix_replace(token, prefix, '\u0627\u0633\u062a') self.is_verb = True self.is_noun = False break @@ -1143,7 +1147,7 @@ class DutchStemmer(_StandardStemmer): # contains at least 3 letters. for i in range(1, len(word)): if word[i] not in self.__vowels and word[i - 1] in self.__vowels: - if 3 > len(word[: i + 1]) > 0: + if len(word[: i + 1]) < 3 and len(word[: i + 1]) > 0: r1 = word[3:] elif len(word[: i + 1]) == 0: return word @@ -1299,61 +1303,61 @@ class EnglishStemmer(_StandardStemmer): __step1a_suffixes = ("sses", "ied", "ies", "us", "ss", "s") __step1b_suffixes = ("eedly", "ingly", "edly", "eed", "ing", "ed") __step2_suffixes = ( - "ization", - "ational", - "fulness", - "ousness", - "iveness", - "tional", - "biliti", - "lessli", - "entli", - "ation", - "alism", - "aliti", - "ousli", - "iviti", - "fulli", - "enci", - "anci", - "abli", - "izer", - "ator", - "alli", - "bli", - "ogi", - "li", + 'ization', + 'ational', + 'fulness', + 'ousness', + 'iveness', + 'tional', + 'biliti', + 'lessli', + 'entli', + 'ation', + 'alism', + 'aliti', + 'ousli', + 'iviti', + 'fulli', + 'enci', + 'anci', + 'abli', + 'izer', + 'ator', + 'alli', + 'bli', + 'ogi', + 'li', ) __step3_suffixes = ( - "ational", - "tional", - "alize", - "icate", - "iciti", - "ative", - "ical", - "ness", - "ful", + 'ational', + 'tional', + 'alize', + 'icate', + 'iciti', + 'ative', + 'ical', + 'ness', + 'ful', ) __step4_suffixes = ( - "ement", - "ance", - "ence", - "able", - "ible", - "ment", - "ant", - "ent", - "ism", - "ate", - "iti", - "ous", - "ive", - "ize", - "ion", - "al", - "er", - "ic", + 'ement', + 'ance', + 'ence', + 'able', + 'ible', + 'ment', + 'ant', + 'ent', + 'ism', + 'ate', + 'iti', + 'ous', + 'ive', + 'ize', + 'ion', + 'al', + 'er', + 'ic', ) __step5_suffixes = ("e", "l") __special_words = { @@ -1835,65 +1839,65 @@ class FinnishStemmer(_StandardStemmer): "zz", ) __step1_suffixes = ( - "kaan", - "k\xE4\xE4n", - "sti", - "kin", - "han", - "h\xE4n", - "ko", - "k\xF6", - "pa", - "p\xE4", + 'kaan', + 'k\xE4\xE4n', + 'sti', + 'kin', + 'han', + 'h\xE4n', + 'ko', + 'k\xF6', + 'pa', + 'p\xE4', ) - __step2_suffixes = ("nsa", "ns\xE4", "mme", "nne", "si", "ni", "an", "\xE4n", "en") + __step2_suffixes = ('nsa', 'ns\xE4', 'mme', 'nne', 'si', 'ni', 'an', '\xE4n', 'en') __step3_suffixes = ( - "siin", - "tten", - "seen", - "han", - "hen", - "hin", - "hon", - "h\xE4n", - "h\xF6n", - "den", - "tta", - "tt\xE4", - "ssa", - "ss\xE4", - "sta", - "st\xE4", - "lla", - "ll\xE4", - "lta", - "lt\xE4", - "lle", - "ksi", - "ine", - "ta", - "t\xE4", - "na", - "n\xE4", - "a", - "\xE4", - "n", + 'siin', + 'tten', + 'seen', + 'han', + 'hen', + 'hin', + 'hon', + 'h\xE4n', + 'h\xF6n', + 'den', + 'tta', + 'tt\xE4', + 'ssa', + 'ss\xE4', + 'sta', + 'st\xE4', + 'lla', + 'll\xE4', + 'lta', + 'lt\xE4', + 'lle', + 'ksi', + 'ine', + 'ta', + 't\xE4', + 'na', + 'n\xE4', + 'a', + '\xE4', + 'n', ) __step4_suffixes = ( - "impi", - "impa", - "imp\xE4", - "immi", - "imma", - "imm\xE4", - "mpi", - "mpa", - "mp\xE4", - "mmi", - "mma", - "mm\xE4", - "eja", - "ej\xE4", + 'impi', + 'impa', + 'imp\xE4', + 'immi', + 'imma', + 'imm\xE4', + 'mpi', + 'mpa', + 'mp\xE4', + 'mmi', + 'mma', + 'mm\xE4', + 'eja', + 'ej\xE4', ) def stem(self, word): @@ -2145,128 +2149,128 @@ class FrenchStemmer(_StandardStemmer): __vowels = "aeiouy\xE2\xE0\xEB\xE9\xEA\xE8\xEF\xEE\xF4\xFB\xF9" __step1_suffixes = ( - "issements", - "issement", - "atrices", - "atrice", - "ateurs", - "ations", - "logies", - "usions", - "utions", - "ements", - "amment", - "emment", - "ances", - "iqUes", - "ismes", - "ables", - "istes", - "ateur", - "ation", - "logie", - "usion", - "ution", - "ences", - "ement", - "euses", - "ments", - "ance", - "iqUe", - "isme", - "able", - "iste", - "ence", - "it\xE9s", - "ives", - "eaux", - "euse", - "ment", - "eux", - "it\xE9", - "ive", - "ifs", - "aux", - "if", + 'issements', + 'issement', + 'atrices', + 'atrice', + 'ateurs', + 'ations', + 'logies', + 'usions', + 'utions', + 'ements', + 'amment', + 'emment', + 'ances', + 'iqUes', + 'ismes', + 'ables', + 'istes', + 'ateur', + 'ation', + 'logie', + 'usion', + 'ution', + 'ences', + 'ement', + 'euses', + 'ments', + 'ance', + 'iqUe', + 'isme', + 'able', + 'iste', + 'ence', + 'it\xE9s', + 'ives', + 'eaux', + 'euse', + 'ment', + 'eux', + 'it\xE9', + 'ive', + 'ifs', + 'aux', + 'if', ) __step2a_suffixes = ( - "issaIent", - "issantes", - "iraIent", - "issante", - "issants", - "issions", - "irions", - "issais", - "issait", - "issant", - "issent", - "issiez", - "issons", - "irais", - "irait", - "irent", - "iriez", - "irons", - "iront", - "isses", - "issez", - "\xEEmes", - "\xEEtes", - "irai", - "iras", - "irez", - "isse", - "ies", - "ira", - "\xEEt", - "ie", - "ir", - "is", - "it", - "i", + 'issaIent', + 'issantes', + 'iraIent', + 'issante', + 'issants', + 'issions', + 'irions', + 'issais', + 'issait', + 'issant', + 'issent', + 'issiez', + 'issons', + 'irais', + 'irait', + 'irent', + 'iriez', + 'irons', + 'iront', + 'isses', + 'issez', + '\xEEmes', + '\xEEtes', + 'irai', + 'iras', + 'irez', + 'isse', + 'ies', + 'ira', + '\xEEt', + 'ie', + 'ir', + 'is', + 'it', + 'i', ) __step2b_suffixes = ( - "eraIent", - "assions", - "erions", - "assent", - "assiez", - "\xE8rent", - "erais", - "erait", - "eriez", - "erons", - "eront", - "aIent", - "antes", - "asses", - "ions", - "erai", - "eras", - "erez", - "\xE2mes", - "\xE2tes", - "ante", - "ants", - "asse", - "\xE9es", - "era", - "iez", - "ais", - "ait", - "ant", - "\xE9e", - "\xE9s", - "er", - "ez", - "\xE2t", - "ai", - "as", - "\xE9", - "a", + 'eraIent', + 'assions', + 'erions', + 'assent', + 'assiez', + '\xE8rent', + 'erais', + 'erait', + 'eriez', + 'erons', + 'eront', + 'aIent', + 'antes', + 'asses', + 'ions', + 'erai', + 'eras', + 'erez', + '\xE2mes', + '\xE2tes', + 'ante', + 'ants', + 'asse', + '\xE9es', + 'era', + 'iez', + 'ais', + 'ait', + 'ant', + '\xE9e', + '\xE9s', + 'er', + 'ez', + '\xE2t', + 'ai', + 'as', + '\xE9', + 'a', ) - __step4_suffixes = ("i\xE8re", "I\xE8re", "ion", "ier", "Ier", "e", "\xEB") + __step4_suffixes = ('i\xE8re', 'I\xE8re', 'ion', 'ier', 'Ier', 'e', '\xEB') def stem(self, word): """ @@ -2485,48 +2489,48 @@ class FrenchStemmer(_StandardStemmer): step2b_success = True elif suffix in ( - "eraIent", - "erions", - "\xE8rent", - "erais", - "erait", - "eriez", - "erons", - "eront", - "erai", - "eras", - "erez", - "\xE9es", - "era", - "iez", - "\xE9e", - "\xE9s", - "er", - "ez", - "\xE9", + 'eraIent', + 'erions', + '\xE8rent', + 'erais', + 'erait', + 'eriez', + 'erons', + 'eront', + 'erai', + 'eras', + 'erez', + '\xE9es', + 'era', + 'iez', + '\xE9e', + '\xE9s', + 'er', + 'ez', + '\xE9', ): word = word[: -len(suffix)] step2b_success = True elif suffix in ( - "assions", - "assent", - "assiez", - "aIent", - "antes", - "asses", - "\xE2mes", - "\xE2tes", - "ante", - "ants", - "asse", - "ais", - "ait", - "ant", - "\xE2t", - "ai", - "as", - "a", + 'assions', + 'assent', + 'assiez', + 'aIent', + 'antes', + 'asses', + '\xE2mes', + '\xE2tes', + 'ante', + 'ants', + 'asse', + 'ais', + 'ait', + 'ant', + '\xE2t', + 'ai', + 'as', + 'a', ): word = word[: -len(suffix)] rv = rv[: -len(suffix)] @@ -2681,7 +2685,7 @@ class GermanStemmer(_StandardStemmer): # contains at least 3 letters. for i in range(1, len(word)): if word[i] not in self.__vowels and word[i - 1] in self.__vowels: - if 3 > len(word[: i + 1]) > 0: + if len(word[: i + 1]) < 3 and len(word[: i + 1]) > 0: r1 = word[3:] elif len(word[: i + 1]) == 0: return word @@ -2838,151 +2842,151 @@ class HungarianStemmer(_LanguageSpecificStemmer): __step1_suffixes = ("al", "el") __step2_suffixes = ( - "k\xE9ppen", - "onk\xE9nt", - "enk\xE9nt", - "ank\xE9nt", - "k\xE9pp", - "k\xE9nt", - "ban", - "ben", - "nak", - "nek", - "val", - "vel", - "t\xF3l", - "t\xF5l", - "r\xF3l", - "r\xF5l", - "b\xF3l", - "b\xF5l", - "hoz", - "hez", - "h\xF6z", - "n\xE1l", - "n\xE9l", - "\xE9rt", - "kor", - "ba", - "be", - "ra", - "re", - "ig", - "at", - "et", - "ot", - "\xF6t", - "ul", - "\xFCl", - "v\xE1", - "v\xE9", - "en", - "on", - "an", - "\xF6n", - "n", - "t", + 'k\xE9ppen', + 'onk\xE9nt', + 'enk\xE9nt', + 'ank\xE9nt', + 'k\xE9pp', + 'k\xE9nt', + 'ban', + 'ben', + 'nak', + 'nek', + 'val', + 'vel', + 't\xF3l', + 't\xF5l', + 'r\xF3l', + 'r\xF5l', + 'b\xF3l', + 'b\xF5l', + 'hoz', + 'hez', + 'h\xF6z', + 'n\xE1l', + 'n\xE9l', + '\xE9rt', + 'kor', + 'ba', + 'be', + 'ra', + 're', + 'ig', + 'at', + 'et', + 'ot', + '\xF6t', + 'ul', + '\xFCl', + 'v\xE1', + 'v\xE9', + 'en', + 'on', + 'an', + '\xF6n', + 'n', + 't', ) __step3_suffixes = ("\xE1nk\xE9nt", "\xE1n", "\xE9n") __step4_suffixes = ( - "astul", - "est\xFCl", - "\xE1stul", - "\xE9st\xFCl", - "stul", - "st\xFCl", + 'astul', + 'est\xFCl', + '\xE1stul', + '\xE9st\xFCl', + 'stul', + 'st\xFCl', ) __step5_suffixes = ("\xE1", "\xE9") __step6_suffixes = ( - "ok\xE9", - "\xF6k\xE9", - "ak\xE9", - "ek\xE9", - "\xE1k\xE9", - "\xE1\xE9i", - "\xE9k\xE9", - "\xE9\xE9i", - "k\xE9", - "\xE9i", - "\xE9\xE9", - "\xE9", + 'ok\xE9', + '\xF6k\xE9', + 'ak\xE9', + 'ek\xE9', + '\xE1k\xE9', + '\xE1\xE9i', + '\xE9k\xE9', + '\xE9\xE9i', + 'k\xE9', + '\xE9i', + '\xE9\xE9', + '\xE9', ) __step7_suffixes = ( - "\xE1juk", - "\xE9j\xFCk", - "\xFCnk", - "unk", - "juk", - "j\xFCk", - "\xE1nk", - "\xE9nk", - "nk", - "uk", - "\xFCk", - "em", - "om", - "am", - "od", - "ed", - "ad", - "\xF6d", - "ja", - "je", - "\xE1m", - "\xE1d", - "\xE9m", - "\xE9d", - "m", - "d", - "a", - "e", - "o", - "\xE1", - "\xE9", + '\xE1juk', + '\xE9j\xFCk', + '\xFCnk', + 'unk', + 'juk', + 'j\xFCk', + '\xE1nk', + '\xE9nk', + 'nk', + 'uk', + '\xFCk', + 'em', + 'om', + 'am', + 'od', + 'ed', + 'ad', + '\xF6d', + 'ja', + 'je', + '\xE1m', + '\xE1d', + '\xE9m', + '\xE9d', + 'm', + 'd', + 'a', + 'e', + 'o', + '\xE1', + '\xE9', ) __step8_suffixes = ( - "jaitok", - "jeitek", - "jaink", - "jeink", - "aitok", - "eitek", - "\xE1itok", - "\xE9itek", - "jaim", - "jeim", - "jaid", - "jeid", - "eink", - "aink", - "itek", - "jeik", - "jaik", - "\xE1ink", - "\xE9ink", - "aim", - "eim", - "aid", - "eid", - "jai", - "jei", - "ink", - "aik", - "eik", - "\xE1im", - "\xE1id", - "\xE1ik", - "\xE9im", - "\xE9id", - "\xE9ik", - "im", - "id", - "ai", - "ei", - "ik", - "\xE1i", - "\xE9i", - "i", + 'jaitok', + 'jeitek', + 'jaink', + 'jeink', + 'aitok', + 'eitek', + '\xE1itok', + '\xE9itek', + 'jaim', + 'jeim', + 'jaid', + 'jeid', + 'eink', + 'aink', + 'itek', + 'jeik', + 'jaik', + '\xE1ink', + '\xE9ink', + 'aim', + 'eim', + 'aid', + 'eid', + 'jai', + 'jei', + 'ink', + 'aik', + 'eik', + '\xE1im', + '\xE1id', + '\xE1ik', + '\xE9im', + '\xE9id', + '\xE9ik', + 'im', + 'id', + 'ai', + 'ei', + 'ik', + '\xE1i', + '\xE9i', + 'i', ) __step9_suffixes = ("\xE1k", "\xE9k", "\xF6k", "ok", "ek", "ak", "k") @@ -3207,185 +3211,185 @@ class ItalianStemmer(_StandardStemmer): __vowels = "aeiou\xE0\xE8\xEC\xF2\xF9" __step0_suffixes = ( - "gliela", - "gliele", - "glieli", - "glielo", - "gliene", - "sene", - "mela", - "mele", - "meli", - "melo", - "mene", - "tela", - "tele", - "teli", - "telo", - "tene", - "cela", - "cele", - "celi", - "celo", - "cene", - "vela", - "vele", - "veli", - "velo", - "vene", - "gli", - "ci", - "la", - "le", - "li", - "lo", - "mi", - "ne", - "si", - "ti", - "vi", + 'gliela', + 'gliele', + 'glieli', + 'glielo', + 'gliene', + 'sene', + 'mela', + 'mele', + 'meli', + 'melo', + 'mene', + 'tela', + 'tele', + 'teli', + 'telo', + 'tene', + 'cela', + 'cele', + 'celi', + 'celo', + 'cene', + 'vela', + 'vele', + 'veli', + 'velo', + 'vene', + 'gli', + 'ci', + 'la', + 'le', + 'li', + 'lo', + 'mi', + 'ne', + 'si', + 'ti', + 'vi', ) __step1_suffixes = ( - "atrice", - "atrici", - "azione", - "azioni", - "uzione", - "uzioni", - "usione", - "usioni", - "amento", - "amenti", - "imento", - "imenti", - "amente", - "abile", - "abili", - "ibile", - "ibili", - "mente", - "atore", - "atori", - "logia", - "logie", - "anza", - "anze", - "iche", - "ichi", - "ismo", - "ismi", - "ista", - "iste", - "isti", - "ist\xE0", - "ist\xE8", - "ist\xEC", - "ante", - "anti", - "enza", - "enze", - "ico", - "ici", - "ica", - "ice", - "oso", - "osi", - "osa", - "ose", - "it\xE0", - "ivo", - "ivi", - "iva", - "ive", + 'atrice', + 'atrici', + 'azione', + 'azioni', + 'uzione', + 'uzioni', + 'usione', + 'usioni', + 'amento', + 'amenti', + 'imento', + 'imenti', + 'amente', + 'abile', + 'abili', + 'ibile', + 'ibili', + 'mente', + 'atore', + 'atori', + 'logia', + 'logie', + 'anza', + 'anze', + 'iche', + 'ichi', + 'ismo', + 'ismi', + 'ista', + 'iste', + 'isti', + 'ist\xE0', + 'ist\xE8', + 'ist\xEC', + 'ante', + 'anti', + 'enza', + 'enze', + 'ico', + 'ici', + 'ica', + 'ice', + 'oso', + 'osi', + 'osa', + 'ose', + 'it\xE0', + 'ivo', + 'ivi', + 'iva', + 'ive', ) __step2_suffixes = ( - "erebbero", - "irebbero", - "assero", - "assimo", - "eranno", - "erebbe", - "eremmo", - "ereste", - "eresti", - "essero", - "iranno", - "irebbe", - "iremmo", - "ireste", - "iresti", - "iscano", - "iscono", - "issero", - "arono", - "avamo", - "avano", - "avate", - "eremo", - "erete", - "erono", - "evamo", - "evano", - "evate", - "iremo", - "irete", - "irono", - "ivamo", - "ivano", - "ivate", - "ammo", - "ando", - "asse", - "assi", - "emmo", - "enda", - "ende", - "endi", - "endo", - "erai", - "erei", - "Yamo", - "iamo", - "immo", - "irai", - "irei", - "isca", - "isce", - "isci", - "isco", - "ano", - "are", - "ata", - "ate", - "ati", - "ato", - "ava", - "avi", - "avo", - "er\xE0", - "ere", - "er\xF2", - "ete", - "eva", - "evi", - "evo", - "ir\xE0", - "ire", - "ir\xF2", - "ita", - "ite", - "iti", - "ito", - "iva", - "ivi", - "ivo", - "ono", - "uta", - "ute", - "uti", - "uto", - "ar", - "ir", + 'erebbero', + 'irebbero', + 'assero', + 'assimo', + 'eranno', + 'erebbe', + 'eremmo', + 'ereste', + 'eresti', + 'essero', + 'iranno', + 'irebbe', + 'iremmo', + 'ireste', + 'iresti', + 'iscano', + 'iscono', + 'issero', + 'arono', + 'avamo', + 'avano', + 'avate', + 'eremo', + 'erete', + 'erono', + 'evamo', + 'evano', + 'evate', + 'iremo', + 'irete', + 'irono', + 'ivamo', + 'ivano', + 'ivate', + 'ammo', + 'ando', + 'asse', + 'assi', + 'emmo', + 'enda', + 'ende', + 'endi', + 'endo', + 'erai', + 'erei', + 'Yamo', + 'iamo', + 'immo', + 'irai', + 'irei', + 'isca', + 'isce', + 'isci', + 'isco', + 'ano', + 'are', + 'ata', + 'ate', + 'ati', + 'ato', + 'ava', + 'avi', + 'avo', + 'er\xE0', + 'ere', + 'er\xF2', + 'ete', + 'eva', + 'evi', + 'evo', + 'ir\xE0', + 'ire', + 'ir\xF2', + 'ita', + 'ite', + 'iti', + 'ito', + 'iva', + 'ivi', + 'ivo', + 'ono', + 'uta', + 'ute', + 'uti', + 'uto', + 'ar', + 'ir', ) def stem(self, word): @@ -3705,175 +3709,175 @@ class PortugueseStemmer(_StandardStemmer): __vowels = "aeiou\xE1\xE9\xED\xF3\xFA\xE2\xEA\xF4" __step1_suffixes = ( - "amentos", - "imentos", - "uço~es", - "amento", - "imento", - "adoras", - "adores", - "a\xE7o~es", - "logias", - "\xEAncias", - "amente", - "idades", - "an\xE7as", - "ismos", - "istas", - "adora", - "a\xE7a~o", - "antes", - "\xE2ncia", - "logia", - "uça~o", - "\xEAncia", - "mente", - "idade", - "an\xE7a", - "ezas", - "icos", - "icas", - "ismo", - "\xE1vel", - "\xEDvel", - "ista", - "osos", - "osas", - "ador", - "ante", - "ivas", - "ivos", - "iras", - "eza", - "ico", - "ica", - "oso", - "osa", - "iva", - "ivo", - "ira", + 'amentos', + 'imentos', + 'uço~es', + 'amento', + 'imento', + 'adoras', + 'adores', + 'a\xE7o~es', + 'logias', + '\xEAncias', + 'amente', + 'idades', + 'an\xE7as', + 'ismos', + 'istas', + 'adora', + 'a\xE7a~o', + 'antes', + '\xE2ncia', + 'logia', + 'uça~o', + '\xEAncia', + 'mente', + 'idade', + 'an\xE7a', + 'ezas', + 'icos', + 'icas', + 'ismo', + '\xE1vel', + '\xEDvel', + 'ista', + 'osos', + 'osas', + 'ador', + 'ante', + 'ivas', + 'ivos', + 'iras', + 'eza', + 'ico', + 'ica', + 'oso', + 'osa', + 'iva', + 'ivo', + 'ira', ) __step2_suffixes = ( - "ar\xEDamos", - "er\xEDamos", - "ir\xEDamos", - "\xE1ssemos", - "\xEAssemos", - "\xEDssemos", - "ar\xEDeis", - "er\xEDeis", - "ir\xEDeis", - "\xE1sseis", - "\xE9sseis", - "\xEDsseis", - "\xE1ramos", - "\xE9ramos", - "\xEDramos", - "\xE1vamos", - "aremos", - "eremos", - "iremos", - "ariam", - "eriam", - "iriam", - "assem", - "essem", - "issem", - "ara~o", - "era~o", - "ira~o", - "arias", - "erias", - "irias", - "ardes", - "erdes", - "irdes", - "asses", - "esses", - "isses", - "astes", - "estes", - "istes", - "\xE1reis", - "areis", - "\xE9reis", - "ereis", - "\xEDreis", - "ireis", - "\xE1veis", - "\xEDamos", - "armos", - "ermos", - "irmos", - "aria", - "eria", - "iria", - "asse", - "esse", - "isse", - "aste", - "este", - "iste", - "arei", - "erei", - "irei", - "aram", - "eram", - "iram", - "avam", - "arem", - "erem", - "irem", - "ando", - "endo", - "indo", - "adas", - "idas", - "ar\xE1s", - "aras", - "er\xE1s", - "eras", - "ir\xE1s", - "avas", - "ares", - "eres", - "ires", - "\xEDeis", - "ados", - "idos", - "\xE1mos", - "amos", - "emos", - "imos", - "iras", - "ada", - "ida", - "ar\xE1", - "ara", - "er\xE1", - "era", - "ir\xE1", - "ava", - "iam", - "ado", - "ido", - "ias", - "ais", - "eis", - "ira", - "ia", - "ei", - "am", - "em", - "ar", - "er", - "ir", - "as", - "es", - "is", - "eu", - "iu", - "ou", + 'ar\xEDamos', + 'er\xEDamos', + 'ir\xEDamos', + '\xE1ssemos', + '\xEAssemos', + '\xEDssemos', + 'ar\xEDeis', + 'er\xEDeis', + 'ir\xEDeis', + '\xE1sseis', + '\xE9sseis', + '\xEDsseis', + '\xE1ramos', + '\xE9ramos', + '\xEDramos', + '\xE1vamos', + 'aremos', + 'eremos', + 'iremos', + 'ariam', + 'eriam', + 'iriam', + 'assem', + 'essem', + 'issem', + 'ara~o', + 'era~o', + 'ira~o', + 'arias', + 'erias', + 'irias', + 'ardes', + 'erdes', + 'irdes', + 'asses', + 'esses', + 'isses', + 'astes', + 'estes', + 'istes', + '\xE1reis', + 'areis', + '\xE9reis', + 'ereis', + '\xEDreis', + 'ireis', + '\xE1veis', + '\xEDamos', + 'armos', + 'ermos', + 'irmos', + 'aria', + 'eria', + 'iria', + 'asse', + 'esse', + 'isse', + 'aste', + 'este', + 'iste', + 'arei', + 'erei', + 'irei', + 'aram', + 'eram', + 'iram', + 'avam', + 'arem', + 'erem', + 'irem', + 'ando', + 'endo', + 'indo', + 'adas', + 'idas', + 'ar\xE1s', + 'aras', + 'er\xE1s', + 'eras', + 'ir\xE1s', + 'avas', + 'ares', + 'eres', + 'ires', + '\xEDeis', + 'ados', + 'idos', + '\xE1mos', + 'amos', + 'emos', + 'imos', + 'iras', + 'ada', + 'ida', + 'ar\xE1', + 'ara', + 'er\xE1', + 'era', + 'ir\xE1', + 'ava', + 'iam', + 'ado', + 'ido', + 'ias', + 'ais', + 'eis', + 'ira', + 'ia', + 'ei', + 'am', + 'em', + 'ar', + 'er', + 'ir', + 'as', + 'es', + 'is', + 'eu', + 'iu', + 'ou', ) __step4_suffixes = ("os", "a", "i", "o", "\xE1", "\xED", "\xF3") @@ -4053,230 +4057,230 @@ class RomanianStemmer(_StandardStemmer): __vowels = "aeiou\u0103\xE2\xEE" __step0_suffixes = ( - "iilor", - "ului", - "elor", - "iile", - "ilor", - "atei", - "a\u0163ie", - "a\u0163ia", - "aua", - "ele", - "iua", - "iei", - "ile", - "ul", - "ea", - "ii", + 'iilor', + 'ului', + 'elor', + 'iile', + 'ilor', + 'atei', + 'a\u0163ie', + 'a\u0163ia', + 'aua', + 'ele', + 'iua', + 'iei', + 'ile', + 'ul', + 'ea', + 'ii', ) __step1_suffixes = ( - "abilitate", - "abilitati", - "abilit\u0103\u0163i", - "ibilitate", - "abilit\u0103i", - "ivitate", - "ivitati", - "ivit\u0103\u0163i", - "icitate", - "icitati", - "icit\u0103\u0163i", - "icatori", - "ivit\u0103i", - "icit\u0103i", - "icator", - "a\u0163iune", - "atoare", - "\u0103toare", - "i\u0163iune", - "itoare", - "iciva", - "icive", - "icivi", - "iciv\u0103", - "icala", - "icale", - "icali", - "ical\u0103", - "ativa", - "ative", - "ativi", - "ativ\u0103", - "atori", - "\u0103tori", - "itiva", - "itive", - "itivi", - "itiv\u0103", - "itori", - "iciv", - "ical", - "ativ", - "ator", - "\u0103tor", - "itiv", - "itor", + 'abilitate', + 'abilitati', + 'abilit\u0103\u0163i', + 'ibilitate', + 'abilit\u0103i', + 'ivitate', + 'ivitati', + 'ivit\u0103\u0163i', + 'icitate', + 'icitati', + 'icit\u0103\u0163i', + 'icatori', + 'ivit\u0103i', + 'icit\u0103i', + 'icator', + 'a\u0163iune', + 'atoare', + '\u0103toare', + 'i\u0163iune', + 'itoare', + 'iciva', + 'icive', + 'icivi', + 'iciv\u0103', + 'icala', + 'icale', + 'icali', + 'ical\u0103', + 'ativa', + 'ative', + 'ativi', + 'ativ\u0103', + 'atori', + '\u0103tori', + 'itiva', + 'itive', + 'itivi', + 'itiv\u0103', + 'itori', + 'iciv', + 'ical', + 'ativ', + 'ator', + '\u0103tor', + 'itiv', + 'itor', ) __step2_suffixes = ( - "abila", - "abile", - "abili", - "abil\u0103", - "ibila", - "ibile", - "ibili", - "ibil\u0103", - "atori", - "itate", - "itati", - "it\u0103\u0163i", - "abil", - "ibil", - "oasa", - "oas\u0103", - "oase", - "anta", - "ante", - "anti", - "ant\u0103", - "ator", - "it\u0103i", - "iune", - "iuni", - "isme", - "ista", - "iste", - "isti", - "ist\u0103", - "i\u015Fti", - "ata", - "at\u0103", - "ati", - "ate", - "uta", - "ut\u0103", - "uti", - "ute", - "ita", - "it\u0103", - "iti", - "ite", - "ica", - "ice", - "ici", - "ic\u0103", - "osi", - "o\u015Fi", - "ant", - "iva", - "ive", - "ivi", - "iv\u0103", - "ism", - "ist", - "at", - "ut", - "it", - "ic", - "os", - "iv", + 'abila', + 'abile', + 'abili', + 'abil\u0103', + 'ibila', + 'ibile', + 'ibili', + 'ibil\u0103', + 'atori', + 'itate', + 'itati', + 'it\u0103\u0163i', + 'abil', + 'ibil', + 'oasa', + 'oas\u0103', + 'oase', + 'anta', + 'ante', + 'anti', + 'ant\u0103', + 'ator', + 'it\u0103i', + 'iune', + 'iuni', + 'isme', + 'ista', + 'iste', + 'isti', + 'ist\u0103', + 'i\u015Fti', + 'ata', + 'at\u0103', + 'ati', + 'ate', + 'uta', + 'ut\u0103', + 'uti', + 'ute', + 'ita', + 'it\u0103', + 'iti', + 'ite', + 'ica', + 'ice', + 'ici', + 'ic\u0103', + 'osi', + 'o\u015Fi', + 'ant', + 'iva', + 'ive', + 'ivi', + 'iv\u0103', + 'ism', + 'ist', + 'at', + 'ut', + 'it', + 'ic', + 'os', + 'iv', ) __step3_suffixes = ( - "seser\u0103\u0163i", - "aser\u0103\u0163i", - "iser\u0103\u0163i", - "\xE2ser\u0103\u0163i", - "user\u0103\u0163i", - "seser\u0103m", - "aser\u0103m", - "iser\u0103m", - "\xE2ser\u0103m", - "user\u0103m", - "ser\u0103\u0163i", - "sese\u015Fi", - "seser\u0103", - "easc\u0103", - "ar\u0103\u0163i", - "ur\u0103\u0163i", - "ir\u0103\u0163i", - "\xE2r\u0103\u0163i", - "ase\u015Fi", - "aser\u0103", - "ise\u015Fi", - "iser\u0103", - "\xe2se\u015Fi", - "\xE2ser\u0103", - "use\u015Fi", - "user\u0103", - "ser\u0103m", - "sesem", - "indu", - "\xE2ndu", - "eaz\u0103", - "e\u015Fti", - "e\u015Fte", - "\u0103\u015Fti", - "\u0103\u015Fte", - "ea\u0163i", - "ia\u0163i", - "ar\u0103m", - "ur\u0103m", - "ir\u0103m", - "\xE2r\u0103m", - "asem", - "isem", - "\xE2sem", - "usem", - "se\u015Fi", - "ser\u0103", - "sese", - "are", - "ere", - "ire", - "\xE2re", - "ind", - "\xE2nd", - "eze", - "ezi", - "esc", - "\u0103sc", - "eam", - "eai", - "eau", - "iam", - "iai", - "iau", - "a\u015Fi", - "ar\u0103", - "u\u015Fi", - "ur\u0103", - "i\u015Fi", - "ir\u0103", - "\xE2\u015Fi", - "\xe2r\u0103", - "ase", - "ise", - "\xE2se", - "use", - "a\u0163i", - "e\u0163i", - "i\u0163i", - "\xe2\u0163i", - "sei", - "ez", - "am", - "ai", - "au", - "ea", - "ia", - "ui", - "\xE2i", - "\u0103m", - "em", - "im", - "\xE2m", - "se", + 'seser\u0103\u0163i', + 'aser\u0103\u0163i', + 'iser\u0103\u0163i', + '\xE2ser\u0103\u0163i', + 'user\u0103\u0163i', + 'seser\u0103m', + 'aser\u0103m', + 'iser\u0103m', + '\xE2ser\u0103m', + 'user\u0103m', + 'ser\u0103\u0163i', + 'sese\u015Fi', + 'seser\u0103', + 'easc\u0103', + 'ar\u0103\u0163i', + 'ur\u0103\u0163i', + 'ir\u0103\u0163i', + '\xE2r\u0103\u0163i', + 'ase\u015Fi', + 'aser\u0103', + 'ise\u015Fi', + 'iser\u0103', + '\xe2se\u015Fi', + '\xE2ser\u0103', + 'use\u015Fi', + 'user\u0103', + 'ser\u0103m', + 'sesem', + 'indu', + '\xE2ndu', + 'eaz\u0103', + 'e\u015Fti', + 'e\u015Fte', + '\u0103\u015Fti', + '\u0103\u015Fte', + 'ea\u0163i', + 'ia\u0163i', + 'ar\u0103m', + 'ur\u0103m', + 'ir\u0103m', + '\xE2r\u0103m', + 'asem', + 'isem', + '\xE2sem', + 'usem', + 'se\u015Fi', + 'ser\u0103', + 'sese', + 'are', + 'ere', + 'ire', + '\xE2re', + 'ind', + '\xE2nd', + 'eze', + 'ezi', + 'esc', + '\u0103sc', + 'eam', + 'eai', + 'eau', + 'iam', + 'iai', + 'iau', + 'a\u015Fi', + 'ar\u0103', + 'u\u015Fi', + 'ur\u0103', + 'i\u015Fi', + 'ir\u0103', + '\xE2\u015Fi', + '\xe2r\u0103', + 'ase', + 'ise', + '\xE2se', + 'use', + 'a\u0163i', + 'e\u0163i', + 'i\u0163i', + '\xe2\u0163i', + 'sei', + 'ez', + 'am', + 'ai', + 'au', + 'ea', + 'ia', + 'ui', + '\xE2i', + '\u0103m', + 'em', + 'im', + '\xE2m', + 'se', ) def stem(self, word): @@ -4470,26 +4474,26 @@ class RomanianStemmer(_StandardStemmer): if word.endswith(suffix): if suffix in rv: if suffix in ( - "seser\u0103\u0163i", - "seser\u0103m", - "ser\u0103\u0163i", - "sese\u015Fi", - "seser\u0103", - "ser\u0103m", - "sesem", - "se\u015Fi", - "ser\u0103", - "sese", - "a\u0163i", - "e\u0163i", - "i\u0163i", - "\xE2\u0163i", - "sei", - "\u0103m", - "em", - "im", - "\xE2m", - "se", + 'seser\u0103\u0163i', + 'seser\u0103m', + 'ser\u0103\u0163i', + 'sese\u015Fi', + 'seser\u0103', + 'ser\u0103m', + 'sesem', + 'se\u015Fi', + 'ser\u0103', + 'sese', + 'a\u0163i', + 'e\u0163i', + 'i\u0163i', + '\xE2\u0163i', + 'sei', + '\u0103m', + 'em', + 'im', + '\xE2m', + 'se', ): word = word[: -len(suffix)] rv = rv[: -len(suffix)] @@ -4550,326 +4554,326 @@ class RussianStemmer(_LanguageSpecificStemmer): "v", ) __adjectival_suffixes = ( - "ui^ushchi^ui^u", - "ui^ushchi^ai^a", - "ui^ushchimi", - "ui^ushchymi", - "ui^ushchego", - "ui^ushchogo", - "ui^ushchemu", - "ui^ushchomu", - "ui^ushchikh", - "ui^ushchykh", - "ui^ushchui^u", - "ui^ushchaia", - "ui^ushchoi^u", - "ui^ushchei^u", - "i^ushchi^ui^u", - "i^ushchi^ai^a", - "ui^ushchee", - "ui^ushchie", - "ui^ushchye", - "ui^ushchoe", - "ui^ushchei`", - "ui^ushchii`", - "ui^ushchyi`", - "ui^ushchoi`", - "ui^ushchem", - "ui^ushchim", - "ui^ushchym", - "ui^ushchom", - "i^ushchimi", - "i^ushchymi", - "i^ushchego", - "i^ushchogo", - "i^ushchemu", - "i^ushchomu", - "i^ushchikh", - "i^ushchykh", - "i^ushchui^u", - "i^ushchai^a", - "i^ushchoi^u", - "i^ushchei^u", - "i^ushchee", - "i^ushchie", - "i^ushchye", - "i^ushchoe", - "i^ushchei`", - "i^ushchii`", - "i^ushchyi`", - "i^ushchoi`", - "i^ushchem", - "i^ushchim", - "i^ushchym", - "i^ushchom", - "shchi^ui^u", - "shchi^ai^a", - "ivshi^ui^u", - "ivshi^ai^a", - "yvshi^ui^u", - "yvshi^ai^a", - "shchimi", - "shchymi", - "shchego", - "shchogo", - "shchemu", - "shchomu", - "shchikh", - "shchykh", - "shchui^u", - "shchai^a", - "shchoi^u", - "shchei^u", - "ivshimi", - "ivshymi", - "ivshego", - "ivshogo", - "ivshemu", - "ivshomu", - "ivshikh", - "ivshykh", - "ivshui^u", - "ivshai^a", - "ivshoi^u", - "ivshei^u", - "yvshimi", - "yvshymi", - "yvshego", - "yvshogo", - "yvshemu", - "yvshomu", - "yvshikh", - "yvshykh", - "yvshui^u", - "yvshai^a", - "yvshoi^u", - "yvshei^u", - "vshi^ui^u", - "vshi^ai^a", - "shchee", - "shchie", - "shchye", - "shchoe", - "shchei`", - "shchii`", - "shchyi`", - "shchoi`", - "shchem", - "shchim", - "shchym", - "shchom", - "ivshee", - "ivshie", - "ivshye", - "ivshoe", - "ivshei`", - "ivshii`", - "ivshyi`", - "ivshoi`", - "ivshem", - "ivshim", - "ivshym", - "ivshom", - "yvshee", - "yvshie", - "yvshye", - "yvshoe", - "yvshei`", - "yvshii`", - "yvshyi`", - "yvshoi`", - "yvshem", - "yvshim", - "yvshym", - "yvshom", - "vshimi", - "vshymi", - "vshego", - "vshogo", - "vshemu", - "vshomu", - "vshikh", - "vshykh", - "vshui^u", - "vshai^a", - "vshoi^u", - "vshei^u", - "emi^ui^u", - "emi^ai^a", - "nni^ui^u", - "nni^ai^a", - "vshee", - "vshie", - "vshye", - "vshoe", - "vshei`", - "vshii`", - "vshyi`", - "vshoi`", - "vshem", - "vshim", - "vshym", - "vshom", - "emimi", - "emymi", - "emego", - "emogo", - "ememu", - "emomu", - "emikh", - "emykh", - "emui^u", - "emai^a", - "emoi^u", - "emei^u", - "nnimi", - "nnymi", - "nnego", - "nnogo", - "nnemu", - "nnomu", - "nnikh", - "nnykh", - "nnui^u", - "nnai^a", - "nnoi^u", - "nnei^u", - "emee", - "emie", - "emye", - "emoe", - "emei`", - "emii`", - "emyi`", - "emoi`", - "emem", - "emim", - "emym", - "emom", - "nnee", - "nnie", - "nnye", - "nnoe", - "nnei`", - "nnii`", - "nnyi`", - "nnoi`", - "nnem", - "nnim", - "nnym", - "nnom", - "i^ui^u", - "i^ai^a", - "imi", - "ymi", - "ego", - "ogo", - "emu", - "omu", - "ikh", - "ykh", - "ui^u", - "ai^a", - "oi^u", - "ei^u", - "ee", - "ie", - "ye", - "oe", - "ei`", - "ii`", - "yi`", - "oi`", - "em", - "im", - "ym", - "om", + 'ui^ushchi^ui^u', + 'ui^ushchi^ai^a', + 'ui^ushchimi', + 'ui^ushchymi', + 'ui^ushchego', + 'ui^ushchogo', + 'ui^ushchemu', + 'ui^ushchomu', + 'ui^ushchikh', + 'ui^ushchykh', + 'ui^ushchui^u', + 'ui^ushchaia', + 'ui^ushchoi^u', + 'ui^ushchei^u', + 'i^ushchi^ui^u', + 'i^ushchi^ai^a', + 'ui^ushchee', + 'ui^ushchie', + 'ui^ushchye', + 'ui^ushchoe', + 'ui^ushchei`', + 'ui^ushchii`', + 'ui^ushchyi`', + 'ui^ushchoi`', + 'ui^ushchem', + 'ui^ushchim', + 'ui^ushchym', + 'ui^ushchom', + 'i^ushchimi', + 'i^ushchymi', + 'i^ushchego', + 'i^ushchogo', + 'i^ushchemu', + 'i^ushchomu', + 'i^ushchikh', + 'i^ushchykh', + 'i^ushchui^u', + 'i^ushchai^a', + 'i^ushchoi^u', + 'i^ushchei^u', + 'i^ushchee', + 'i^ushchie', + 'i^ushchye', + 'i^ushchoe', + 'i^ushchei`', + 'i^ushchii`', + 'i^ushchyi`', + 'i^ushchoi`', + 'i^ushchem', + 'i^ushchim', + 'i^ushchym', + 'i^ushchom', + 'shchi^ui^u', + 'shchi^ai^a', + 'ivshi^ui^u', + 'ivshi^ai^a', + 'yvshi^ui^u', + 'yvshi^ai^a', + 'shchimi', + 'shchymi', + 'shchego', + 'shchogo', + 'shchemu', + 'shchomu', + 'shchikh', + 'shchykh', + 'shchui^u', + 'shchai^a', + 'shchoi^u', + 'shchei^u', + 'ivshimi', + 'ivshymi', + 'ivshego', + 'ivshogo', + 'ivshemu', + 'ivshomu', + 'ivshikh', + 'ivshykh', + 'ivshui^u', + 'ivshai^a', + 'ivshoi^u', + 'ivshei^u', + 'yvshimi', + 'yvshymi', + 'yvshego', + 'yvshogo', + 'yvshemu', + 'yvshomu', + 'yvshikh', + 'yvshykh', + 'yvshui^u', + 'yvshai^a', + 'yvshoi^u', + 'yvshei^u', + 'vshi^ui^u', + 'vshi^ai^a', + 'shchee', + 'shchie', + 'shchye', + 'shchoe', + 'shchei`', + 'shchii`', + 'shchyi`', + 'shchoi`', + 'shchem', + 'shchim', + 'shchym', + 'shchom', + 'ivshee', + 'ivshie', + 'ivshye', + 'ivshoe', + 'ivshei`', + 'ivshii`', + 'ivshyi`', + 'ivshoi`', + 'ivshem', + 'ivshim', + 'ivshym', + 'ivshom', + 'yvshee', + 'yvshie', + 'yvshye', + 'yvshoe', + 'yvshei`', + 'yvshii`', + 'yvshyi`', + 'yvshoi`', + 'yvshem', + 'yvshim', + 'yvshym', + 'yvshom', + 'vshimi', + 'vshymi', + 'vshego', + 'vshogo', + 'vshemu', + 'vshomu', + 'vshikh', + 'vshykh', + 'vshui^u', + 'vshai^a', + 'vshoi^u', + 'vshei^u', + 'emi^ui^u', + 'emi^ai^a', + 'nni^ui^u', + 'nni^ai^a', + 'vshee', + 'vshie', + 'vshye', + 'vshoe', + 'vshei`', + 'vshii`', + 'vshyi`', + 'vshoi`', + 'vshem', + 'vshim', + 'vshym', + 'vshom', + 'emimi', + 'emymi', + 'emego', + 'emogo', + 'ememu', + 'emomu', + 'emikh', + 'emykh', + 'emui^u', + 'emai^a', + 'emoi^u', + 'emei^u', + 'nnimi', + 'nnymi', + 'nnego', + 'nnogo', + 'nnemu', + 'nnomu', + 'nnikh', + 'nnykh', + 'nnui^u', + 'nnai^a', + 'nnoi^u', + 'nnei^u', + 'emee', + 'emie', + 'emye', + 'emoe', + 'emei`', + 'emii`', + 'emyi`', + 'emoi`', + 'emem', + 'emim', + 'emym', + 'emom', + 'nnee', + 'nnie', + 'nnye', + 'nnoe', + 'nnei`', + 'nnii`', + 'nnyi`', + 'nnoi`', + 'nnem', + 'nnim', + 'nnym', + 'nnom', + 'i^ui^u', + 'i^ai^a', + 'imi', + 'ymi', + 'ego', + 'ogo', + 'emu', + 'omu', + 'ikh', + 'ykh', + 'ui^u', + 'ai^a', + 'oi^u', + 'ei^u', + 'ee', + 'ie', + 'ye', + 'oe', + 'ei`', + 'ii`', + 'yi`', + 'oi`', + 'em', + 'im', + 'ym', + 'om', ) __reflexive_suffixes = ("si^a", "s'") __verb_suffixes = ( "esh'", - "ei`te", - "ui`te", - "ui^ut", + 'ei`te', + 'ui`te', + 'ui^ut', "ish'", - "ete", - "i`te", - "i^ut", - "nno", - "ila", - "yla", - "ena", - "ite", - "ili", - "yli", - "ilo", - "ylo", - "eno", - "i^at", - "uet", - "eny", + 'ete', + 'i`te', + 'i^ut', + 'nno', + 'ila', + 'yla', + 'ena', + 'ite', + 'ili', + 'yli', + 'ilo', + 'ylo', + 'eno', + 'i^at', + 'uet', + 'eny', "it'", "yt'", - "ui^u", - "la", - "na", - "li", - "em", - "lo", - "no", - "et", - "ny", + 'ui^u', + 'la', + 'na', + 'li', + 'em', + 'lo', + 'no', + 'et', + 'ny', "t'", - "ei`", - "ui`", - "il", - "yl", - "im", - "ym", - "en", - "it", - "yt", - "i^u", - "i`", - "l", - "n", + 'ei`', + 'ui`', + 'il', + 'yl', + 'im', + 'ym', + 'en', + 'it', + 'yt', + 'i^u', + 'i`', + 'l', + 'n', ) __noun_suffixes = ( - "ii^ami", - "ii^akh", - "i^ami", - "ii^am", - "i^akh", - "ami", - "iei`", - "i^am", - "iem", - "akh", - "ii^u", + 'ii^ami', + 'ii^akh', + 'i^ami', + 'ii^am', + 'i^akh', + 'ami', + 'iei`', + 'i^am', + 'iem', + 'akh', + 'ii^u', "'i^u", - "ii^a", + 'ii^a', "'i^a", - "ev", - "ov", - "ie", + 'ev', + 'ov', + 'ie', "'e", - "ei", - "ii", - "ei`", - "oi`", - "ii`", - "em", - "am", - "om", - "i^u", - "i^a", - "a", - "e", - "i", - "i`", - "o", - "u", - "y", + 'ei', + 'ii', + 'ei`', + 'oi`', + 'ii`', + 'em', + 'am', + 'om', + 'i^u', + 'i^a', + 'a', + 'e', + 'i', + 'i`', + 'o', + 'u', + 'y', "'", ) __superlative_suffixes = ("ei`she", "ei`sh") @@ -4894,10 +4898,8 @@ class RussianStemmer(_LanguageSpecificStemmer): chr_exceeded = True break - if not chr_exceeded: - return word - - word = self.__cyrillic_to_roman(word) + if chr_exceeded: + word = self.__cyrillic_to_roman(word) step1_success = False adjectival_removed = False @@ -4938,136 +4940,136 @@ class RussianStemmer(_LanguageSpecificStemmer): for suffix in self.__adjectival_suffixes: if rv.endswith(suffix): if suffix in ( - "i^ushchi^ui^u", - "i^ushchi^ai^a", - "i^ushchui^u", - "i^ushchai^a", - "i^ushchoi^u", - "i^ushchei^u", - "i^ushchimi", - "i^ushchymi", - "i^ushchego", - "i^ushchogo", - "i^ushchemu", - "i^ushchomu", - "i^ushchikh", - "i^ushchykh", - "shchi^ui^u", - "shchi^ai^a", - "i^ushchee", - "i^ushchie", - "i^ushchye", - "i^ushchoe", - "i^ushchei`", - "i^ushchii`", - "i^ushchyi`", - "i^ushchoi`", - "i^ushchem", - "i^ushchim", - "i^ushchym", - "i^ushchom", - "vshi^ui^u", - "vshi^ai^a", - "shchui^u", - "shchai^a", - "shchoi^u", - "shchei^u", - "emi^ui^u", - "emi^ai^a", - "nni^ui^u", - "nni^ai^a", - "shchimi", - "shchymi", - "shchego", - "shchogo", - "shchemu", - "shchomu", - "shchikh", - "shchykh", - "vshui^u", - "vshai^a", - "vshoi^u", - "vshei^u", - "shchee", - "shchie", - "shchye", - "shchoe", - "shchei`", - "shchii`", - "shchyi`", - "shchoi`", - "shchem", - "shchim", - "shchym", - "shchom", - "vshimi", - "vshymi", - "vshego", - "vshogo", - "vshemu", - "vshomu", - "vshikh", - "vshykh", - "emui^u", - "emai^a", - "emoi^u", - "emei^u", - "nnui^u", - "nnai^a", - "nnoi^u", - "nnei^u", - "vshee", - "vshie", - "vshye", - "vshoe", - "vshei`", - "vshii`", - "vshyi`", - "vshoi`", - "vshem", - "vshim", - "vshym", - "vshom", - "emimi", - "emymi", - "emego", - "emogo", - "ememu", - "emomu", - "emikh", - "emykh", - "nnimi", - "nnymi", - "nnego", - "nnogo", - "nnemu", - "nnomu", - "nnikh", - "nnykh", - "emee", - "emie", - "emye", - "emoe", - "emei`", - "emii`", - "emyi`", - "emoi`", - "emem", - "emim", - "emym", - "emom", - "nnee", - "nnie", - "nnye", - "nnoe", - "nnei`", - "nnii`", - "nnyi`", - "nnoi`", - "nnem", - "nnim", - "nnym", - "nnom", + 'i^ushchi^ui^u', + 'i^ushchi^ai^a', + 'i^ushchui^u', + 'i^ushchai^a', + 'i^ushchoi^u', + 'i^ushchei^u', + 'i^ushchimi', + 'i^ushchymi', + 'i^ushchego', + 'i^ushchogo', + 'i^ushchemu', + 'i^ushchomu', + 'i^ushchikh', + 'i^ushchykh', + 'shchi^ui^u', + 'shchi^ai^a', + 'i^ushchee', + 'i^ushchie', + 'i^ushchye', + 'i^ushchoe', + 'i^ushchei`', + 'i^ushchii`', + 'i^ushchyi`', + 'i^ushchoi`', + 'i^ushchem', + 'i^ushchim', + 'i^ushchym', + 'i^ushchom', + 'vshi^ui^u', + 'vshi^ai^a', + 'shchui^u', + 'shchai^a', + 'shchoi^u', + 'shchei^u', + 'emi^ui^u', + 'emi^ai^a', + 'nni^ui^u', + 'nni^ai^a', + 'shchimi', + 'shchymi', + 'shchego', + 'shchogo', + 'shchemu', + 'shchomu', + 'shchikh', + 'shchykh', + 'vshui^u', + 'vshai^a', + 'vshoi^u', + 'vshei^u', + 'shchee', + 'shchie', + 'shchye', + 'shchoe', + 'shchei`', + 'shchii`', + 'shchyi`', + 'shchoi`', + 'shchem', + 'shchim', + 'shchym', + 'shchom', + 'vshimi', + 'vshymi', + 'vshego', + 'vshogo', + 'vshemu', + 'vshomu', + 'vshikh', + 'vshykh', + 'emui^u', + 'emai^a', + 'emoi^u', + 'emei^u', + 'nnui^u', + 'nnai^a', + 'nnoi^u', + 'nnei^u', + 'vshee', + 'vshie', + 'vshye', + 'vshoe', + 'vshei`', + 'vshii`', + 'vshyi`', + 'vshoi`', + 'vshem', + 'vshim', + 'vshym', + 'vshom', + 'emimi', + 'emymi', + 'emego', + 'emogo', + 'ememu', + 'emomu', + 'emikh', + 'emykh', + 'nnimi', + 'nnymi', + 'nnego', + 'nnogo', + 'nnemu', + 'nnomu', + 'nnikh', + 'nnykh', + 'emee', + 'emie', + 'emye', + 'emoe', + 'emei`', + 'emii`', + 'emyi`', + 'emoi`', + 'emem', + 'emim', + 'emym', + 'emom', + 'nnee', + 'nnie', + 'nnye', + 'nnoe', + 'nnei`', + 'nnii`', + 'nnyi`', + 'nnoi`', + 'nnem', + 'nnim', + 'nnym', + 'nnom', ): if ( rv[-len(suffix) - 3 : -len(suffix)] == "i^a" @@ -5160,7 +5162,8 @@ class RussianStemmer(_LanguageSpecificStemmer): if word.endswith("'"): word = word[:-1] - word = self.__roman_to_cyrillic(word) + if chr_exceeded: + word = self.__roman_to_cyrillic(word) return word @@ -5394,164 +5397,164 @@ class SpanishStemmer(_StandardStemmer): "lo", ) __step1_suffixes = ( - "amientos", - "imientos", - "amiento", - "imiento", - "aciones", - "uciones", - "adoras", - "adores", - "ancias", - "log\xEDas", - "encias", - "amente", - "idades", - "anzas", - "ismos", - "ables", - "ibles", - "istas", - "adora", - "aci\xF3n", - "antes", - "ancia", - "log\xEDa", - "uci\xf3n", - "encia", - "mente", - "anza", - "icos", - "icas", - "ismo", - "able", - "ible", - "ista", - "osos", - "osas", - "ador", - "ante", - "idad", - "ivas", - "ivos", - "ico", - "ica", - "oso", - "osa", - "iva", - "ivo", + 'amientos', + 'imientos', + 'amiento', + 'imiento', + 'aciones', + 'uciones', + 'adoras', + 'adores', + 'ancias', + 'log\xEDas', + 'encias', + 'amente', + 'idades', + 'anzas', + 'ismos', + 'ables', + 'ibles', + 'istas', + 'adora', + 'aci\xF3n', + 'antes', + 'ancia', + 'log\xEDa', + 'uci\xf3n', + 'encia', + 'mente', + 'anza', + 'icos', + 'icas', + 'ismo', + 'able', + 'ible', + 'ista', + 'osos', + 'osas', + 'ador', + 'ante', + 'idad', + 'ivas', + 'ivos', + 'ico', + 'ica', + 'oso', + 'osa', + 'iva', + 'ivo', ) __step2a_suffixes = ( - "yeron", - "yendo", - "yamos", - "yais", - "yan", - "yen", - "yas", - "yes", - "ya", - "ye", - "yo", - "y\xF3", + 'yeron', + 'yendo', + 'yamos', + 'yais', + 'yan', + 'yen', + 'yas', + 'yes', + 'ya', + 'ye', + 'yo', + 'y\xF3', ) __step2b_suffixes = ( - "ar\xEDamos", - "er\xEDamos", - "ir\xEDamos", - "i\xE9ramos", - "i\xE9semos", - "ar\xEDais", - "aremos", - "er\xEDais", - "eremos", - "ir\xEDais", - "iremos", - "ierais", - "ieseis", - "asteis", - "isteis", - "\xE1bamos", - "\xE1ramos", - "\xE1semos", - "ar\xEDan", - "ar\xEDas", - "ar\xE9is", - "er\xEDan", - "er\xEDas", - "er\xE9is", - "ir\xEDan", - "ir\xEDas", - "ir\xE9is", - "ieran", - "iesen", - "ieron", - "iendo", - "ieras", - "ieses", - "abais", - "arais", - "aseis", - "\xE9amos", - "ar\xE1n", - "ar\xE1s", - "ar\xEDa", - "er\xE1n", - "er\xE1s", - "er\xEDa", - "ir\xE1n", - "ir\xE1s", - "ir\xEDa", - "iera", - "iese", - "aste", - "iste", - "aban", - "aran", - "asen", - "aron", - "ando", - "abas", - "adas", - "idas", - "aras", - "ases", - "\xEDais", - "ados", - "idos", - "amos", - "imos", - "emos", - "ar\xE1", - "ar\xE9", - "er\xE1", - "er\xE9", - "ir\xE1", - "ir\xE9", - "aba", - "ada", - "ida", - "ara", - "ase", - "\xEDan", - "ado", - "ido", - "\xEDas", - "\xE1is", - "\xE9is", - "\xEDa", - "ad", - "ed", - "id", - "an", - "i\xF3", - "ar", - "er", - "ir", - "as", - "\xEDs", - "en", - "es", + 'ar\xEDamos', + 'er\xEDamos', + 'ir\xEDamos', + 'i\xE9ramos', + 'i\xE9semos', + 'ar\xEDais', + 'aremos', + 'er\xEDais', + 'eremos', + 'ir\xEDais', + 'iremos', + 'ierais', + 'ieseis', + 'asteis', + 'isteis', + '\xE1bamos', + '\xE1ramos', + '\xE1semos', + 'ar\xEDan', + 'ar\xEDas', + 'ar\xE9is', + 'er\xEDan', + 'er\xEDas', + 'er\xE9is', + 'ir\xEDan', + 'ir\xEDas', + 'ir\xE9is', + 'ieran', + 'iesen', + 'ieron', + 'iendo', + 'ieras', + 'ieses', + 'abais', + 'arais', + 'aseis', + '\xE9amos', + 'ar\xE1n', + 'ar\xE1s', + 'ar\xEDa', + 'er\xE1n', + 'er\xE1s', + 'er\xEDa', + 'ir\xE1n', + 'ir\xE1s', + 'ir\xEDa', + 'iera', + 'iese', + 'aste', + 'iste', + 'aban', + 'aran', + 'asen', + 'aron', + 'ando', + 'abas', + 'adas', + 'idas', + 'aras', + 'ases', + '\xEDais', + 'ados', + 'idos', + 'amos', + 'imos', + 'emos', + 'ar\xE1', + 'ar\xE9', + 'er\xE1', + 'er\xE9', + 'ir\xE1', + 'ir\xE9', + 'aba', + 'ada', + 'ida', + 'ara', + 'ase', + '\xEDan', + 'ado', + 'ido', + '\xEDas', + '\xE1is', + '\xE9is', + '\xEDa', + 'ad', + 'ed', + 'id', + 'an', + 'i\xF3', + 'ar', + 'er', + 'ir', + 'as', + '\xEDs', + 'en', + 'es', ) __step3_suffixes = ("os", "a", "e", "o", "\xE1", "\xE9", "\xED", "\xF3") @@ -5875,6 +5878,7 @@ def demo(): """ + import re from nltk.corpus import udhr udhr_corpus = { @@ -5927,16 +5931,16 @@ def demo(): excerpt = udhr.words(udhr_corpus[language])[:300] stemmed = " ".join(stemmer.stem(word) for word in excerpt) - stemmed = re.sub(r"(.{,70})\s", r"\1\n", stemmed + " ").rstrip() + stemmed = re.sub(r"(.{,70})\s", r'\1\n', stemmed + ' ').rstrip() excerpt = " ".join(excerpt) - excerpt = re.sub(r"(.{,70})\s", r"\1\n", excerpt + " ").rstrip() + excerpt = re.sub(r"(.{,70})\s", r'\1\n', excerpt + ' ').rstrip() print("\n") - print("-" * 70) - print("ORIGINAL".center(70)) + print('-' * 70) + print('ORIGINAL'.center(70)) print(excerpt) print("\n\n") - print("STEMMED RESULTS".center(70)) + print('STEMMED RESULTS'.center(70)) print(stemmed) - print("-" * 70) + print('-' * 70) print("\n") diff --git a/nlp_resource_data/nltk/stem/util.py b/nlp_resource_data/nltk/stem/util.py index eec97bd..0daad9d 100644 --- a/nlp_resource_data/nltk/stem/util.py +++ b/nlp_resource_data/nltk/stem/util.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Stemmer Utilities # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Helder # URL: # For license information, see LICENSE.TXT diff --git a/nlp_resource_data/nltk/stem/wordnet.py b/nlp_resource_data/nltk/stem/wordnet.py index 33fe049..da521a3 100644 --- a/nlp_resource_data/nltk/stem/wordnet.py +++ b/nlp_resource_data/nltk/stem/wordnet.py @@ -1,15 +1,18 @@ # Natural Language Toolkit: WordNet stemmer interface # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Steven Bird # Edward Loper # URL: # For license information, see LICENSE.TXT +from __future__ import unicode_literals from nltk.corpus.reader.wordnet import NOUN from nltk.corpus import wordnet +from nltk.compat import python_2_unicode_compatible +@python_2_unicode_compatible class WordNetLemmatizer(object): """ WordNet Lemmatizer @@ -39,7 +42,7 @@ class WordNetLemmatizer(object): return min(lemmas, key=len) if lemmas else word def __repr__(self): - return "" + return '' # unload wordnet diff --git a/nlp_resource_data/nltk/tag/__init__.py b/nlp_resource_data/nltk/tag/__init__.py index 7e6d272..9381205 100644 --- a/nlp_resource_data/nltk/tag/__init__.py +++ b/nlp_resource_data/nltk/tag/__init__.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Natural Language Toolkit: Taggers # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Edward Loper # Steven Bird (minor additions) # URL: @@ -63,6 +63,7 @@ We evaluate a tagger on data that was not seen during training: For more information, please consult chapter 5 of the NLTK Book. """ +from __future__ import print_function from nltk.tag.api import TaggerI from nltk.tag.util import str2tuple, tuple2str, untag @@ -93,14 +94,14 @@ from nltk.tag.perceptron import PerceptronTagger from nltk.data import load, find RUS_PICKLE = ( - "taggers/averaged_perceptron_tagger_ru/averaged_perceptron_tagger_ru.pickle" + 'taggers/averaged_perceptron_tagger_ru/averaged_perceptron_tagger_ru.pickle' ) def _get_tagger(lang=None): - if lang == "rus": + if lang == 'rus': tagger = PerceptronTagger(False) - ap_russian_model_loc = "file:" + str(find(RUS_PICKLE)) + ap_russian_model_loc = 'file:' + str(find(RUS_PICKLE)) tagger.load(ap_russian_model_loc) else: tagger = PerceptronTagger() @@ -109,7 +110,7 @@ def _get_tagger(lang=None): def _pos_tag(tokens, tagset=None, tagger=None, lang=None): # Currently only supoorts English and Russian. - if lang not in ["eng", "rus"]: + if lang not in ['eng', 'rus']: raise NotImplementedError( "Currently, NLTK pos_tag only supports English and Russian " "(i.e. lang='eng' or lang='rus')" @@ -117,22 +118,22 @@ def _pos_tag(tokens, tagset=None, tagger=None, lang=None): else: tagged_tokens = tagger.tag(tokens) if tagset: # Maps to the specified tagset. - if lang == "eng": + if lang == 'eng': tagged_tokens = [ - (token, map_tag("en-ptb", tagset, tag)) + (token, map_tag('en-ptb', tagset, tag)) for (token, tag) in tagged_tokens ] - elif lang == "rus": + elif lang == 'rus': # Note that the new Russion pos tags from the model contains suffixes, # see https://github.com/nltk/nltk/issues/2151#issuecomment-430709018 tagged_tokens = [ - (token, map_tag("ru-rnc-new", tagset, tag.partition("=")[0])) + (token, map_tag('ru-rnc-new', tagset, tag.partition('=')[0])) for (token, tag) in tagged_tokens ] return tagged_tokens -def pos_tag(tokens, tagset=None, lang="eng"): +def pos_tag(tokens, tagset=None, lang='eng'): """ Use NLTK's currently recommended part of speech tagger to tag the given list of tokens. @@ -161,13 +162,13 @@ def pos_tag(tokens, tagset=None, lang="eng"): return _pos_tag(tokens, tagset, tagger, lang) -def pos_tag_sents(sentences, tagset=None, lang="eng"): +def pos_tag_sents(sentences, tagset=None, lang='eng'): """ Use NLTK's currently recommended part of speech tagger to tag the given list of sentences, each consisting of a list of tokens. - :param sentences: List of sentences to be tagged - :type sentences: list(list(str)) + :param tokens: List of sentences to be tagged + :type tokens: list(list(str)) :param tagset: the tagset to be used, e.g. universal, wsj, brown :type tagset: str :param lang: the ISO 639 code of the language, e.g. 'eng' for English, 'rus' for Russian @@ -176,4 +177,4 @@ def pos_tag_sents(sentences, tagset=None, lang="eng"): :rtype: list(list(tuple(str, str))) """ tagger = _get_tagger(lang) - return [_pos_tag(sent, tagset, tagger, lang) for sent in sentences] + return [_pos_tag(sent, tagset, tagger) for sent in sentences] diff --git a/nlp_resource_data/nltk/tag/__pycache__/__init__.cpython-37.pyc b/nlp_resource_data/nltk/tag/__pycache__/__init__.cpython-37.pyc index 45da18c38e31fa72f05b027be93bc3081e0b9be2..bfd2eeafeef2767f0562dab6ce69599cdae9cd8c 100644 GIT binary patch delta 1287 zcmZuw&u<%55Z<@`wb^=Yr)iw5W5=!&$6XTggVv1%RRv9{)B+X|5YmHcF@8>Jb>G^| zu1^F-kcy;7YwvJ^I|@f64*Uz8R^ra75=SZ|n0W~%LaeppneTn?+nIUues2BJvhG-x zso^uqjNMQW_U&#+)!A`8-LhH$QeF9ZtW<<^eS?ZFUl&f z${MdJ?2>m?)_Gkn@g)^!y=B?p4Y|TsWRo`)W_j17!^eMB?y9ihZOBc&DYy6*#?$;I zQ5G}e$`i(~i;9>PMf7fnMKLdI^je}SN@4-Mo8t1W=G0E#V}BU=+z|SH=F-j;l+~FW_ku<<4m`9`O2Gg05G+B}vy2(tPd_#wuxCU!zx(TA; zlf?J*Y2lmXy;bxS!AU}ke3dR`04f)i_$2!{bN{BJjg^x)1&VTuSU;iKS-&%qPL1`XWr=-#XF3rQDO#PRnNY5 zpgk;n&QOz`uoLYlG1fm#?w&OhdqHJ6x@vfw40?Wufy()RI{rhnJimEJ#X5<2ipV{` z+dFT%s#Z1lqJZkMbzZMaTY_&XHC1&EtA3f(Sldao+igd0s}4{8x8scKib2(}I6SgU&j-Oqn|BR(^OAf@y>)n10837GJl4YZ>OO3w)(v2dv delta 1271 zcmZuw-*4Mg6!vv~xmn$&?b_5`lQ!+r)N4t~SjLd1307d^hoXrJ30(h{L#tj$pqx7#w0C8NS7en}6R7B} z$W^{7YrK}k*W5L^&e!ElepA+YJ;5g34cXugxyd(^Sa*kiSMJFcZ(&&CJK~0z6?4xS ze_bq#>tY_g+oB@MViCPJL{%7K3B5aFW=}C!Mo-ya+UThGo4UM;Cx|ma2(U(8@sm&Y ztS2Are)REsP>bHyx67LdpFe6H`qF8AVSjz-_iZ?8HKVWe-Lk-3Fo{NC8vaUbQq42C z75$>9xm6K8JhJ z&cQ`pgNirMa2;DXaD;W4C4wt5YADvMz8_dv}3*#%^o!#7w$bD*3kpU-~FLgj>=sb>tyvqSaE z?B0b+c*vx6HBmCo5-rL)?*4!D97T=Aol`otnc)*xja*8atW4zl4@)AXetCS0)&;*% zn~4ZhD8fCWWF0fp>2yrB16vedecN%tvV`BYEI#SEp?TEA+i2ST9;jHxb;K^9%2Y9?I+PL?2{#BI?~dp9Y@jQG8_Fr#Wy+UQW@-2PJ?{W2R6&FB zFU~IP!wNxjX?fp)uG0^}_h6MW6G=wcp$#>Rlm9i;$xG)I?ovpiLt{Lo<4M)-OJ5wj R&VzTbB%q7UMlYA@{{ZO5Doy|Z diff --git a/nlp_resource_data/nltk/tag/__pycache__/api.cpython-37.pyc b/nlp_resource_data/nltk/tag/__pycache__/api.cpython-37.pyc index c683081a1746749935641334c066c9330ff98004..f3a62a1160150f7e78f28a906e6684e8cd752b9e 100644 GIT binary patch delta 691 zcmZuuO>5LZ7~aWuHXobzpl&~edMQD=ZC5Rd%Yq{IpooWF49GGjLyb+c%S<9Y2p+aK zFD?Pkia%lhffxV7+yu}5fS!Ec0WC^1%#+FU&NI)GcYdy&tp^W-pxc3`u@{r0MbIbj z7TbrnGm_bbUj|7)I?QI;oE&yCCv(p_najetJ|{^Jm~Q3)X*nC;Yrk2;9i`ONHtC{OS>z1wEbYK;0JVJH_L7jeK z;f{`3#ZG5@{J4GU4zHn78feQ6+-?KJCimJ;?y&!_Ryyo0EgD+0`|S_+%RYur%$n1@ zl4d#0Sn;2k3?}JomYz&zl+#i?X_ws>kFX0WFTaATKEj31-=N%u!c~|86FQ-x`M%>g zk>Zyxy_h^$L+Lug2Er!7WhY&S!gH8H18^*k-_PO|zS(~FZ@ibf$WOTmToP7AeKN?Q z%ebEUi^M<#1UP>5QOHO~J-KXv8YO QszD;+YcL25JtEHC-`|UjdjJ3c delta 660 zcmZutL5tHs6z)utc9KqN5!u~tQE)*dEVgbz-Gx$d5f(3^cruqVCPSNTl9riN6p!w8 z4<&!VUC*BSFT8m)XK(%i*^}=L=%Q@Ge3^XjeeZjGC*2?0{-N)CHF$2lc`*)Feuo^d z?jG$N)#66XPHHieGb^dAniu|;QQ$BWXZ*mxMaoC^EGub~D z@2B&HP_urooH-B4o%Ojx$pdT&h-%@7R2_iPWB51m4ZB(I!o9?SGUv&tVwQyI<1 z5Q)h2v+}F=`X#1d_Ek6Vwux}*^LJ6jps)c)qYd+I$~j}KZTNsxZYB#6r9$iVq#2MW zTR5kKu#M0~xN7W8C_IOwDFCZ0^B=TSxBSL;K7_42EoQw0f>yamwQ`^<pW-~MID&vX`A);j}N4S;76l~Ha+~NdY>-zq0{!)f- diff --git a/nlp_resource_data/nltk/tag/__pycache__/brill.cpython-37.pyc b/nlp_resource_data/nltk/tag/__pycache__/brill.cpython-37.pyc index 1ccd1f895ef6523816e611ba68527f26f2d316b1..10bc51338445c53f73df7842211d0a53d9b8876e 100644 GIT binary patch delta 2634 zcma)8OKclO820$Nemf64O`Mn8wDBfw-Nbp85T%dw0d3lnlopq=IG(AyVg2arxaguo|u0&v6B!GmiC+R{PTbF z|KH5~yC0YSC4qr@Xfqc}EelUsg)h-JGODpR#+C>JWloS~MA zri9hYs?K{Ke6t@#{CpAxcrcc#=rqNK?b9g1uG$v{!mB1>w;%*#phYWxbT

    )Rm`(DWxVUYbwTMp)0*+dq2INULrlQ@)I(ssKb%3CSgXDkEAQYmKyz~ zuDm4q11HFFO$l`s1;Pj-eXfl&NltJ>kn}|FIQwbTOuTp`ZxBz83zBs5DSC$B*(riY z)5eh$O2Q%JaET#FAk8C{PP|cgDb}b2VPa>NL{G9$3E^|}6YOW%;2M3RBHzXzd6|YQ zG`vHD61~#9QhqLa`7S*r2-T2rSJ9Se{{i|`>IDcLVpoE2@g#5b0csI>K3hGg+CdZg zVF|SZst42v1yv&yks^ypg^I@SdJ>gn{{qd|)5TQM`9(mKATz2mI8n&@Y-Mnwkl)Aq zDHL>*q@qc0RW(nAe7c4=@zSh@&riL7S;Ta6iczCr#epugn{24>U3`f806W-@F-ZY>Rwv*XZ{p*~M|7mb*!2z~l@d@9 zb^UgHBwNwtY*na!NCjwB)7q0RAp5#i^`3b0U~^Jf5ZE&p5|QUCbj1b+Z5n)$FYzia z@fjG7B$L|QE4cU{1bm7*WE5V;IYr&o9Kk%GMtLTOK=HBzVXTI~zi`uVB>1wf|o?``A z(RuwjKR+!^X}u(_DQ%g9xAlq$;@VhHe`O20T$o&$gy@*;ku93ggtb`0-%v^j4JkcqDglv(7O_6;l z|BmkPgzU3~!A?k;h$>0=I7vi{!F6nP0}^2UyzKDQZ1%RpI7&rJ1&>nG+wi+x-!aq( z1f=3gPs!A3_QE(A#HrzT`kQ`gY@Eg*g$)eB#Na~a9Tu1dLxffWggp=o`n7R;VjQD* zCkRvcVxHZgs``gfn?-W!@I(TD&=P%CzSoZ9AiNrMIt~3uoSnvG*WM1>8_MrSY66Z9 z%WCK|Vq&Ct;PIknPX+V(n^<5@PMcKtt_Nm)ZVb2BpgID8#UoUOO$9r` zZNnD&lUX~z0;YhtR1%j->T;QTMOsvHDPHDWAq{E5v<+!V8{TF%bt{|tG3(0i3?%MYRfh>55qWMUyY)xjIe}I0dT0wI?t%j+Q(EBzyWm?`mAy@ z45C2kpVqa?G4Qy?<6hHW|I606sgUf3Ub9c{TCn@0r0Z}vR1#>2@6&f=N5ml3gifJ{ zSBa#1ED<%bW8gU!3E!irEwGr0k(mE#3|cl6(yL|xQr`6k!>=j8(Xmt$b*50=stVv1 z`zmO)2i{PX?%2UG?!|t`3x~6x!e~_3e+tZod5<&gU107cxExy@ z$ILl|6A1GNnMRP5p->iZ_eb7`q-T6Df(yqH*#Ib)lCZf#>#AdB#oR*Ji61sP-G<+9 cEBt3wf#^#`n=a~G#kn$md6u0?O^lBH2Cy3AKL7v# diff --git a/nlp_resource_data/nltk/lm/__pycache__/util.cpython-37.pyc b/nlp_resource_data/nltk/lm/__pycache__/util.cpython-37.pyc index a4ab7157f13e8525f6ab1ab7728a59a29f531a47..bdbe66086b5855ffdcf235947dbbf8cf729ca39e 100644 GIT binary patch delta 57 zcmaFCe21CWiIxLsrL!~3nHjOB zW@ErVpqYOFo=|)-l*AVw{0mGpCMFYONHoUVix0l=qThFh7F*nAzH|9*=lgw^-Z}f# z`O(YevPIw*7uLnS8A5)-!{(2}<~)4ik3d}F5SO~lqXXtJihIK|90OLn zo8$&{Dzb8`D)YCgYq=$TQ(NV-t_?N8Py_6Ip4pMRocbeu)Tx7x?UuF9n6$3$ewDZt zw|bMfRk_E$VSw>**Lah(YwLGgXWKNb#eTmVxKeoiNUDw(F0@%%xDdoX98-P~FBd%t zlDY13r|-ii-wg)CPPG2Vol+T~dU)nY9I7M9r}VYzB;scRA&gN~O$_$!d=*Y$lO29O zrPte3*>F~$k~$Flt`XFazJ5$ru2$Xg3F0Rkh?&MA4)Z8g|8rHhY0J*19>r&9|EXH0(t#vUzS7kFN9 zedC+T*Njdix9TsA3~-0Q0j!}yfYN+CbUUE18xPi{ju*>UlnRt8;zdDp<0$C$+Gc8o z((9>dxOJ@9UwOpPdRfd{Sf*TvzTc08NI*Iq142p`<%}oqHtx+n4M)JU1P$)+C! zeaF+ZmtPVw_2NwHNiL*Xc+wZ*=j4m2#So zF!ba=LP@(=25_zBk(ZO#;*R&^OJ;>Hh4>3Zvi_$Q{hx zbDVRN7b=XK3I*Fu^#UxZB}6ZdVg;xuOo%##x1}zO2HB0kbEq&k*)S+#QAI)Eg$fJN zF?}yuQF+uT_F})Au?pG(WF}A%%!&~V3+%s?E4kDU!Ggl-ORdFDD69N5ZA}<>SMpc+KJbWyFs)Jt+W|Q zq^3{@;vU<}yYyI}l0t9(2NDRB9vWyuFQW9)YmYtU)R`xCoh+5mym|BH^EdPK@ap}y z58kcUZ3UlB?-O?CtfKr)o5`aia}~ej8zh0^DuEhkq3Wu*>Y+B$U0o$vDJ+f3Zdvl> zure}SL-Li-99gcVD%=W;z&up$sjkiKAFI54qy|=CA8wGSCPj4=RfC#TYJ8{!^`LR6 z1dV%|+YGc%6{jg|wYz%5QFG(HIEzva$eP!2^?D&kzU=RM!$=I&`wLaf_xZK$-FU>e zzx4KZ1(}3MwH{G);xNNVKQxb=)~xu5De;)jN&~1PPE6s3Cc7T^aC* ze>H}kj)?WmOBS6>btd|xnyYL9mqbIN7|=p0p0v-1zx2;OpBkKa&KoRAVHgcq7{`0r zcy(&4!)fGw{HD*e8+{aU1-qcQTA;dmuqeJOeY_kj1non`ZAi41a{FfNd%Z05VBh=Y zxLUqwuU(J2R0w$CW;fmC-65ga_k83MDM-8#?|R4YH=k(gf|zT)vq^>I6`uf4fHKG{ z**NgfZjLtgO)t#&2LLgEA?~(LZ!e+PwX1tQ1~;5D(@a)X6vCkSdN29r4Bdxq0bw>Q|l}9gF#%{%XR1M(!TX3)dG|SPzXnpW_f9t^3iNURmKK9r4z;;iJ#j?Tip{AI7dEaBGnl4<0!@1 zOc;ACzFz!ijWjSygS5%yrQ&IhfTr{}@%_>puhFQ=v?P)*YEF1LcmyeSg|T17ze~p( zG=(>0CX+Zci!-UoD9^;xvu~~ZANB@gA&(gQU92znUY8hkC&^4OD#tiTFyl9`Fvf2) z_J?@5yeht3xqZz*yWBz`LN7rSXaFLdHO4@Cl(VE`ut`ZX5Mm~HPZ;}C>~t>FsWxaM zahk9p;?CN^E9BlBLpwZQh@6CyoJ9W&9*|l>jCrf1 z+3lKTnr7S7%`&bf(*QbhPQhhd4st!&Px7iChavaVVH_nOFHZ9X$(GmiiyLH7qtZAI z6QJhiRzHjUVqeBR(CFqS@WC)aMfnrK04c~{f{Z(p^t${_<>r-99AqKiA+!_fcS$qV NTJh>Nqh+@${{jB6LDv8P diff --git a/nlp_resource_data/nltk/lm/api.py b/nlp_resource_data/nltk/lm/api.py index 60e9f87..e2ea244 100644 --- a/nlp_resource_data/nltk/lm/api.py +++ b/nlp_resource_data/nltk/lm/api.py @@ -1,38 +1,55 @@ +# -*- coding: utf-8 -*- # Natural Language Toolkit: Language Models # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Authors: Ilia Kurenkov # URL: # For license information, see LICENSE.TXT """Language Model Interface.""" +from __future__ import division, unicode_literals import random from abc import ABCMeta, abstractmethod from bisect import bisect +from six import add_metaclass from nltk.lm.counter import NgramCounter from nltk.lm.util import log_base2 from nltk.lm.vocabulary import Vocabulary -from itertools import accumulate - - -class Smoothing(metaclass=ABCMeta): +try: + from itertools import accumulate +except ImportError: + import operator + + def accumulate(iterable, func=operator.add): + """Return running totals""" + # accumulate([1,2,3,4,5]) --> 1 3 6 10 15 + # accumulate([1,2,3,4,5], operator.mul) --> 1 2 6 24 120 + it = iter(iterable) + try: + total = next(it) + except StopIteration: + return + yield total + for element in it: + total = func(total, element) + yield total + + +@add_metaclass(ABCMeta) +class Smoothing(object): """Ngram Smoothing Interface Implements Chen & Goodman 1995's idea that all smoothing algorithms have - certain features in common. This should ideally allow smoothing algorithms to + certain features in common. This should ideally allow smoothing algoritms to work both with Backoff and Interpolation. + + counter represents the number of counts for ngrams """ def __init__(self, vocabulary, counter): - """ - :param vocabulary: The Ngram vocabulary object. - :type vocabulary: nltk.lm.vocab.Vocabulary - :param counter: The counts of the vocabulary items. - :type counter: nltk.lm.counter.NgramCounter - """ self.vocab = vocabulary self.counts = counter @@ -56,7 +73,7 @@ def _random_generator(seed_or_generator): return random.Random(seed_or_generator) -def _weighted_choice(population, weights, random_generator=None): +def _weighted_choice(population, weights, random_seed=None): """Like random.choice, but with weights. Heavily inspired by python 3.6 `random.choices`. @@ -67,11 +84,12 @@ def _weighted_choice(population, weights, random_generator=None): raise ValueError("The number of weights does not match the population") cum_weights = list(accumulate(weights)) total = cum_weights[-1] - threshold = random_generator.random() + threshold = _random_generator(random_seed).random() return population[bisect(cum_weights, total * threshold)] -class LanguageModel(metaclass=ABCMeta): +@add_metaclass(ABCMeta) +class LanguageModel(object): """ABC for Language Models. Cannot be directly instantiated itself. @@ -106,7 +124,7 @@ class LanguageModel(metaclass=ABCMeta): if not self.vocab: if vocabulary_text is None: raise ValueError( - "Cannot fit without a vocabulary or text to create it from." + "Cannot fit without a vocabulary or text to " "create it from." ) self.vocab.update(vocabulary_text) self.counts.update(self.vocab.lookup(sent) for sent in text) @@ -181,8 +199,8 @@ class LanguageModel(metaclass=ABCMeta): :param int num_words: How many words to generate. By default 1. :param text_seed: Generation can be conditioned on preceding context. - :param random_seed: A random seed or an instance of `random.Random`. If provided, - makes the random sampling part of generation reproducible. + :param random_seed: If provided, makes the random sampling part of + generation reproducible. :return: One (str) word or a list of words generated from model. Examples: @@ -198,8 +216,7 @@ class LanguageModel(metaclass=ABCMeta): """ text_seed = [] if text_seed is None else list(text_seed) - random_generator = _random_generator(random_seed) - # This is the base recursion case. + # base recursion case if num_words == 1: context = ( text_seed[-self.order + 1 :] @@ -210,23 +227,21 @@ class LanguageModel(metaclass=ABCMeta): while context and not samples: context = context[1:] if len(context) > 1 else [] samples = self.context_counts(self.vocab.lookup(context)) - # Sorting samples achieves two things: + # sorting achieves two things: # - reproducible randomness when sampling - # - turns Mapping into Sequence which `_weighted_choice` expects + # - turning Mapping into Sequence which _weighted_choice expects samples = sorted(samples) return _weighted_choice( - samples, - tuple(self.score(w, context) for w in samples), - random_generator, + samples, tuple(self.score(w, context) for w in samples), random_seed ) - # We build up text one word at a time using the preceding context. + # build up text one word at a time generated = [] for _ in range(num_words): generated.append( self.generate( num_words=1, text_seed=text_seed + generated, - random_seed=random_generator, + random_seed=random_seed, ) ) return generated diff --git a/nlp_resource_data/nltk/lm/counter.py b/nlp_resource_data/nltk/lm/counter.py index 09be9b4..1ceaa42 100644 --- a/nlp_resource_data/nltk/lm/counter.py +++ b/nlp_resource_data/nltk/lm/counter.py @@ -1,6 +1,7 @@ +# -*- coding: utf-8 -*- # Natural Language Toolkit # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Ilia Kurenkov # URL: # For license information, see LICENSE.TXT @@ -9,13 +10,17 @@ Language Model Counter ---------------------- """ -from collections import defaultdict -from collections.abc import Sequence +from __future__ import unicode_literals +from collections import Sequence, defaultdict + +from six import string_types +from nltk import compat from nltk.probability import ConditionalFreqDist, FreqDist -class NgramCounter: +@compat.python_2_unicode_compatible +class NgramCounter(object): """Class for counting ngrams. Will count any ngram sequence you give it ;) @@ -146,7 +151,7 @@ class NgramCounter: """User-friendly access to ngram counts.""" if isinstance(item, int): return self._counts[item] - elif isinstance(item, str): + elif isinstance(item, string_types): return self._counts.__getitem__(1)[item] elif isinstance(item, Sequence): return self._counts.__getitem__(len(item) + 1)[tuple(item)] diff --git a/nlp_resource_data/nltk/lm/models.py b/nlp_resource_data/nltk/lm/models.py index f459af3..639a1ce 100644 --- a/nlp_resource_data/nltk/lm/models.py +++ b/nlp_resource_data/nltk/lm/models.py @@ -1,15 +1,19 @@ +# -*- coding: utf-8 -*- # Natural Language Toolkit: Language Models # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Ilia Kurenkov # URL: # For license information, see LICENSE.TXT """Language Models""" +from __future__ import division, unicode_literals +from nltk import compat from nltk.lm.api import LanguageModel, Smoothing from nltk.lm.smoothing import KneserNey, WittenBell +@compat.python_2_unicode_compatible class MLE(LanguageModel): """Class for providing MLE ngram model scores. @@ -26,6 +30,7 @@ class MLE(LanguageModel): return self.context_counts(context).freq(word) +@compat.python_2_unicode_compatible class Lidstone(LanguageModel): """Provides Lidstone-smoothed scores. @@ -34,7 +39,7 @@ class Lidstone(LanguageModel): """ def __init__(self, gamma, *args, **kwargs): - super().__init__(*args, **kwargs) + super(Lidstone, self).__init__(*args, **kwargs) self.gamma = gamma def unmasked_score(self, word, context=None): @@ -49,6 +54,7 @@ class Lidstone(LanguageModel): return (word_count + self.gamma) / (norm_count + len(self.vocab) * self.gamma) +@compat.python_2_unicode_compatible class Laplace(Lidstone): """Implements Laplace (add one) smoothing. @@ -56,31 +62,24 @@ class Laplace(Lidstone): """ def __init__(self, *args, **kwargs): - super().__init__(1, *args, **kwargs) + super(Laplace, self).__init__(1, *args, **kwargs) class InterpolatedLanguageModel(LanguageModel): """Logic common to all interpolated language models. The idea to abstract this comes from Chen & Goodman 1995. - Do not instantiate this class directly! """ def __init__(self, smoothing_cls, order, **kwargs): assert issubclass(smoothing_cls, Smoothing) params = kwargs.pop("params", {}) - super().__init__(order, **kwargs) + super(InterpolatedLanguageModel, self).__init__(order, **kwargs) self.estimator = smoothing_cls(self.vocab, self.counts, **params) def unmasked_score(self, word, context=None): if not context: - # The base recursion case: no context, we only have a unigram. return self.estimator.unigram_score(word) - if not self.counts[context]: - # It can also happen that we have no data for this context. - # In that case we defer to the lower-order ngram. - # This is the same as setting alpha to 0 and gamma to 1. - return self.unmasked_score(word, context[1:]) alpha, gamma = self.estimator.alpha_gamma(word, context) return alpha + gamma * self.unmasked_score(word, context[1:]) @@ -89,11 +88,13 @@ class WittenBellInterpolated(InterpolatedLanguageModel): """Interpolated version of Witten-Bell smoothing.""" def __init__(self, order, **kwargs): - super().__init__(WittenBell, order, **kwargs) + super(WittenBellInterpolated, self).__init__(WittenBell, order, **kwargs) class KneserNeyInterpolated(InterpolatedLanguageModel): """Interpolated version of Kneser-Ney smoothing.""" def __init__(self, order, discount=0.1, **kwargs): - super().__init__(KneserNey, order, params={"discount": discount}, **kwargs) + super(KneserNeyInterpolated, self).__init__( + KneserNey, order, params={"discount": discount}, **kwargs + ) diff --git a/nlp_resource_data/nltk/lm/preprocessing.py b/nlp_resource_data/nltk/lm/preprocessing.py index 8279cd4..c9d695a 100644 --- a/nlp_resource_data/nltk/lm/preprocessing.py +++ b/nlp_resource_data/nltk/lm/preprocessing.py @@ -1,6 +1,7 @@ +# -*- coding: utf-8 -*- # Natural Language Toolkit: Language Model Unit Tests # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Ilia Kurenkov # URL: # For license information, see LICENSE.TXT diff --git a/nlp_resource_data/nltk/lm/smoothing.py b/nlp_resource_data/nltk/lm/smoothing.py index 02b8df2..693e2da 100644 --- a/nlp_resource_data/nltk/lm/smoothing.py +++ b/nlp_resource_data/nltk/lm/smoothing.py @@ -1,6 +1,7 @@ +# -*- coding: utf-8 -*- # Natural Language Toolkit: Language Model Unit Tests # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Ilia Kurenkov # URL: # For license information, see LICENSE.TXT @@ -20,37 +21,58 @@ def _count_non_zero_vals(dictionary): class WittenBell(Smoothing): """Witten-Bell smoothing.""" - def __init__(self, vocabulary, counter, **kwargs): - super().__init__(vocabulary, counter, **kwargs) + def __init__(self, vocabulary, counter, discount=0.1, **kwargs): + super(WittenBell, self).__init__(vocabulary, counter, *kwargs) + self.counts = counter def alpha_gamma(self, word, context): - alpha = self.counts[context].freq(word) - gamma = self._gamma(context) - return (1.0 - gamma) * alpha, gamma - - def _gamma(self, context): - n_plus = _count_non_zero_vals(self.counts[context]) - return n_plus / (n_plus + self.counts[len(context) + 1].N()) + gamma = self.gamma(context) + return (1.0 - gamma) * self.alpha(word, context), gamma def unigram_score(self, word): return self.counts.unigrams.freq(word) + def alpha(self, word, context): + return self.counts[context].freq(word) + + def gamma(self, context): + n_plus = _count_non_zero_vals(self.counts[context]) + return n_plus / (n_plus + self.counts[len(context) + 1].N()) + class KneserNey(Smoothing): """Kneser-Ney Smoothing.""" def __init__(self, vocabulary, counter, discount=0.1, **kwargs): - super().__init__(vocabulary, counter, **kwargs) + super(KneserNey, self).__init__(vocabulary, counter, *kwargs) self.discount = discount + self.vocabulary = vocabulary def unigram_score(self, word): - return 1.0 / len(self.vocab) + return 1.0 / len(self.vocabulary) def alpha_gamma(self, word, context): prefix_counts = self.counts[context] - prefix_total_ngrams = prefix_counts.N() - alpha = max(prefix_counts[word] - self.discount, 0.0) / prefix_total_ngrams - gamma = ( - self.discount * _count_non_zero_vals(prefix_counts) / prefix_total_ngrams - ) - return alpha, gamma + return self.alpha(word, prefix_counts), self.gamma(prefix_counts) + + def alpha(self, word, prefix_counts): + return max(prefix_counts[word] - self.discount, 0.0) / prefix_counts.N() + + def gamma(self, prefix_counts): + return self.discount * _count_non_zero_vals(prefix_counts) / prefix_counts.N() + + +class GoodTuring(Smoothing): + """Good-Turing Smoothing""" + def __init__(self, vocabulary, counter, **kwargs): + super(GoodTuring, self).__init__(vocabulary, counter, *kwargs) + self.counts = counter + self.vocabulary = vocabulary + + def unigram_score(self, word): + word_count = self.counts[word] + count_plus_1 = 0. + for everyContext in self.counts.keys(): + if len(everyContext.split()) == word_count+1: + count_plus_1 += 1 + return count_plus_1 / len(self.vocabulary) diff --git a/nlp_resource_data/nltk/lm/util.py b/nlp_resource_data/nltk/lm/util.py index 0da2eb2..62457e3 100644 --- a/nlp_resource_data/nltk/lm/util.py +++ b/nlp_resource_data/nltk/lm/util.py @@ -1,6 +1,7 @@ +# -*- coding: utf-8 -*- # Natural Language Toolkit # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Ilia Kurenkov # URL: # For license information, see LICENSE.TXT diff --git a/nlp_resource_data/nltk/lm/vocabulary.py b/nlp_resource_data/nltk/lm/vocabulary.py index 39ba6b2..3c7439d 100644 --- a/nlp_resource_data/nltk/lm/vocabulary.py +++ b/nlp_resource_data/nltk/lm/vocabulary.py @@ -1,16 +1,26 @@ +# -*- coding: utf-8 -*- # Natural Language Toolkit # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Ilia Kurenkov # URL: # For license information, see LICENSE.TXT """Language Model Vocabulary""" +from __future__ import unicode_literals + import sys -from collections import Counter -from collections.abc import Iterable +from collections import Counter, Iterable from itertools import chain -from functools import singledispatch + +from nltk import compat + +try: + # Python >= 3.4 + from functools import singledispatch +except ImportError: + # Python < 3.4 + from singledispatch import singledispatch @singledispatch @@ -30,13 +40,22 @@ def _(words, vocab): return tuple(_dispatched_lookup(w, vocab) for w in words) -@_dispatched_lookup.register(str) +try: + # Python 2 unicode + str type + basestring +except NameError: + # Python 3 unicode + str type + basestring = str + + +@_dispatched_lookup.register(basestring) def _string_lookup(word, vocab): """Looks up one word in the vocabulary.""" return word if word in vocab else vocab.unk_label -class Vocabulary: +@compat.python_2_unicode_compatible +class Vocabulary(object): """Stores language model vocabulary. Satisfies two common language modeling requirements for a vocabulary: @@ -116,7 +135,7 @@ class Vocabulary: ('', 'a', '', 'd', '', 'c') It's possible to update the counts after the vocabulary has been created. - In general, the interface is the same as that of `collections.Counter`. + The interface follows that of `collections.Counter`. >>> vocab['b'] 1 @@ -217,6 +236,12 @@ class Vocabulary: and self.counts == other.counts ) + if sys.version_info[0] == 2: + # see https://stackoverflow.com/a/35781654/4501212 + def __ne__(self, other): + equal = self.__eq__(other) + return equal if equal is NotImplemented else not equal + def __str__(self): return "<{0} with cutoff={1} unk_label='{2}' and {3} items>".format( self.__class__.__name__, self.cutoff, self.unk_label, len(self) diff --git a/nlp_resource_data/nltk/metrics/__init__.py b/nlp_resource_data/nltk/metrics/__init__.py index 59be7df..227fda4 100644 --- a/nlp_resource_data/nltk/metrics/__init__.py +++ b/nlp_resource_data/nltk/metrics/__init__.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Metrics # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Steven Bird # Edward Loper # URL: @@ -24,7 +24,6 @@ from nltk.metrics.scores import ( from nltk.metrics.confusionmatrix import ConfusionMatrix from nltk.metrics.distance import ( edit_distance, - edit_distance_align, binary_distance, jaccard_distance, masi_distance, @@ -40,7 +39,6 @@ from nltk.metrics.association import ( NgramAssocMeasures, BigramAssocMeasures, TrigramAssocMeasures, - QuadgramAssocMeasures, ContingencyMeasures, ) from nltk.metrics.spearman import ( diff --git a/nlp_resource_data/nltk/metrics/__pycache__/__init__.cpython-37.pyc b/nlp_resource_data/nltk/metrics/__pycache__/__init__.cpython-37.pyc index b0a9b1a9819d5fc8bd7b323279a6fcdb71b0d6dc..c0fcc7c6b8c69a67dddfd72478d12f653b68c0f9 100644 GIT binary patch delta 295 zcmW-bJ5Iw;5Jf$)^E0*-692y>AwZS%G$2-hM8z~#6dTFnjgcH@ADGuugG9puBBf#p z6s&<2P~qn(j?Ubf;y$ZiL;XhARjHX>zfPWN-GxJaexHJrLfT~$GSFzf$4s)&A{%XT z&}p+ibIC)m^#SuKz<@#wDZ+?ijN5$3PH2i#n&FJ*IBzqRozemq_3y`M@S|sGUL|S1 zt=7dR<9m1mg+Fwz`M#6D84u-GevnN#=HGIBG;WHP69MBz3Bcr`5{)de+X9Y&%WoC` z(i7?n1PyRpuslH>zfmJ(srug`|>3t1=Mi|*AN3s4{li&!Z~ zK^4?UuQ^35K?ya`PzOEgrJXWXpc3hIr;0VGVIAsdfPoEYU=x~=mvJ_+1ubkt8#~a6 zdXlq+UFe27dy&}5JswJ@$Crb~3G)}f&z;OtW|r*_E!*>{YqDh__8d~j9=X9A`5>v_ zD7l|f&+n)e+$V3zcJPyYt?-TTD#*KzKj|U!xovvA8MRHu^TErCF)xHRR|HfMEXIO# z#2G{8`n)JiEpB?=)U>H@PhDOTA7}!)Kv|#?0@T=uH)E6|b$L|;Y9aP`UBu%1g*Um3 r8=>);;7t(~_#5=cQ)K-c`Y@i(D4+Cu!D#g&x0}roh2%&zt)%_|>jr!7 diff --git a/nlp_resource_data/nltk/metrics/__pycache__/agreement.cpython-37.pyc b/nlp_resource_data/nltk/metrics/__pycache__/agreement.cpython-37.pyc index 152e55e3b492bfbecf1491fa99859e9976949eb5..b00991be00d39c0731add475c08129b86f9565de 100644 GIT binary patch delta 4653 zcmb7IYit}>6`ni$^m^?&ey+WC;@F9ujo**h@gshuNt&j`ZDQJlA<=Z=d%fB0%m2E@uNcgpj1N2k0OmKA*8Az1yw=(C@M&B&K+lW zy-A=r+HdZhd(OG%p4YvTKR!#=pCi?WtExf@{N(~K>c5(Ou{yH$fvPA}(G*IkY7$G; zRLi4zEU)GzNb{IJ%?D4f>9+z}K+=3>(5lcXAO|14Ig;vt=a|#W!NZyktS~F{;RY~=_ z%HNlUv}i5z8P%fjtTtP%HmwbmMAvt2DB3Qz>tZ{7fY#6*w3gP<`fEOzLs|oEq)jwJ zn@eadG)h}(TU=SAJL#@#{w>yax|?<&nRb>?_t3qxi+0m}*T5%IWsL5pJ+znhm8k5e z19XrM(cu!>2py#d=olR@p&g_Xbdq}Ll%%b>3a9BIIzwma9G$0!U31oVZzyL~twZZ% z9T)dpC)qdHuJyee3YgH9NAz9nrVr91Y#+?W@}ypgDD>zxLX^0o?PonGMUUmxufC@B zUj9DoJ))fZj@HNe*85>*0DL~k1}Rx9-_y;pf)Rc!iM4Pc(bxTvY|-U(G8L( z9z|8O1KOB24#EfFH-Ul4rr0FD=LscAF9hloJAm&=e_?K7YmV^u*UgVrchs|}#2z*lDikI3u$3`4Z>tLx{V|VIo#Ba;tuN^SWyP8BaBDPk#lDbE>BIUR z$)&WFG3>;7lWl$}yzXnNgTmw00Iw*#5gs4U4dk+XK%A&ON|wd-+7FtMDyMi8jGW-K zX;|l}G4r$#b>sc*Nb^`mhNGq!;SKO`*`=VA3Fvy(=DL1m^B;8?LYt9gCqfIrLd?%= z;pun`O8E{X*C3!i$8TgZEJb5Jsn_utoEulTRDiOsmhBeudYE{2^X-PG$)N({f;1_n zI%@wQ-wh1lU3F<5euo{BC~oiLis?8@vCA1g^G)%3G%D7cnn<(wdDDsUyL9n=Xn|Cw z^8GmOK`3mS_rmM9#YE)f|B#K{NA@N0W~83%6z@c|HR%m6SY|uEWixHqGH8y+OvfD` zW#@D8SkU#o)K)YH1vND}1ixX&!WS~4ooPPxbvglmJ8}hc0FlkXEs<_eY z52(Zs|3czt&Cd^F_-+w^JDx4^260HT%nd&f>6Vefe&BaJ*_`Dji|QIY3RKr9!sS?+ zqAny~7B^eIJ6(+P$i0_SUB4I9$UK$Ke-%PFI)$t90|@Agn8#gJ#;PC_D^=pUVj+=y zMYOiAPZrbleg-&X$+diY>|clFTSaYK?r4H&Igb>2(VQsu2#84sdEVDx0i`S7H)`RyRIBZr*I#DA7+RReGGKa~Vc;YS_k`BGCRWX%X#RhlkN)_s9#iBMcl*sL1X< zJw5EMElUd{uVF{Cx&7oW9~G5J_JWV@ zUPnelolpNpywveg8)gCU$e!h0N@uP(e$K4)3d0H%KkkeU6*U)>7VE*;p=XQ)Url70 zZmd8XkF%TNok$Ijnoq3M^QvN6w}WMV)!sWcwrAvYeff1 z*H#}%>*D$DN^(nlqx-kLp8~;BCKx}0TFYYI0knUK7xpb2lAbGCxK-g&rgYV!61l1m zR()jkBhe81-tZji-+}Njz@6k*=ULJ0k_eJTm55uhMZc{+t>%e1y#H1&HpIJgzN}_s zp4JeNX=xFcdZJ4*l>$773rn8`a5RS?y^cBNTI6?kkKT1EP2)T>^<~a3Wo4y1WSKvX ztYsyxW)dOsx1K-CZY@vEPiA4WsOXzmmVI*$)k<3`aD+x`p`~<+Ch$g*jRnfGLE54D zQ}PbN7g46r;YvIhoM(Cx&UT|w{IKuW3r`|nS?aq-eo3mx9TwtQqO4kKDpb`@@r(Xf zyJb$C2EF&WllY`iw|HUT9k=qN4Kn&$MRzZcR$4jJPPnG`i*F5nIsz#GcTu>=PM%et z@jm0bJm^w{XXs3ijIU1~K8}9{`*oJf5KG@;o!vRL7sVHcy4%X)7e126!imh7#Zvaz z=37HwCS*h`j;unT{oTk|&nM5hlcINY^s!U0U>tt%cY8J#=5sJ`vwjjsg$k0r&5gQD zP1zOX392EV4gk^(;>PIlO007JD8iyp4|Fz`ZNX=lnG43u=G1|!#JvXH+vw7IFNRju z%d+_E*fDZSjEqmO6_U_38$IZFGU-)!D>Z0FaTl_WRF)^G>;ke&$?QCfGUos^^$3Gs zx>)hsY?3WQd^jE?6Uydz-NGZdAX;Le2PvQ%}e|12lCfY>(M1xE@5RolST7-Vv zro-)wt1J<}0Ee=ha+kzY6BCUmkd^SVhqy5@vKEFZ9zocHP>+B|Dj!1_N5IPi4{YAlf)&AVC?Z~*jP>HI6V!FfNThTfc7|cSs`k;Vp}xMt!Gc5{7Vl3!=0{amJH!K? z2J!e*7wHnKQ*(6}QH$S@UrIcQ%%*s2>J2~AVfgIy1=1@1GM(_Zq4a0P?dhn<9r|aD zoR|D*m-BNoEgBZ?t$)K`>_L73#!g6w&oDd3nXYsA-wB0#5WF}QfIE0Rk&3$?Uv@fe zX5IT#Fr9(p-G=)a-l*W>o4Cva;`cL+eX^=Bm$mAYX(#*PE06IMe9+0jXqA%aeyZ`x zugyj~F|JNUoY{}UT8wiB?=sUJfBO7Iwq)}vF*Dmvs)as#j9d~g&+gmHk*5v?ySA{AW^Eg;oL2jA~SBm9Qu=s^ZmJH3+EOtl3huT2{Lnpgvkk{d6BK zqvdzN2#Kqpm9&ah)0#Y7Ee+5*S|3qp18uzH-l5!2o9F?FJ(!1WrY*FU9-?h`oJ7e; zk%wtJ?Vv&0nJ2P~cGDi(OZ)P0{WL@e=pY@+!wu6TbcCw(XdZ5qj?r;CK_}@HJ!YG; zS+}LUs%rIG1FK(cyh~EAv)ax5TNwpxZQ4LcRu9l=I>QbELGuQwQUVG+euoexqG&Cw z71`)>8|t^-(+*vGmmQi_F5lPMSli}d*lCA=cd!mh>J%1)=PaFjL#~gIC08E2th}sH^{jGExuk%UA&~cVR?b@3Fsp|5>J1g(+|UNu zP_-k8irrg?gY6Mux(|CtfeGJ7H`ERA3Zu0-wgpk6>Ukx2V!Ot1sYu-SHHeRVq3x>D z@48z3Q02S`zz-E(4v#1EeJf_1^{wjHSInf&V|~5iHx+Y47yinB)FFi=;}x*7yc32V zU!?lfx+t$2>1{xqBd#YoN(~ZTmL->rlE)Jcr_wwePH$hWN)lRyBy|X?0W1XFycC|6 zL#LGc5M6=*-qumro7%$1Lj`MbS72@ZCv-6+Nd*~Uw-v)dNrjX)xuV!0?=T#E#~b~2>!oOQ^ksj(3J2P_9?X*)CyUO*=^=g=ekHZY{ne#KertS3HF zuLe|4BdH)x5)fncxBD@dj|qU!!kiZvVv$%uMBWyUnj3__A-EwmlH?Ojt?z6nhYl)?F)J#~ew3*fo zizFKzD%T}ec{r&@`Fb?P!unOl^$2??zIN!*$aCn=1cICcJ595f7`J{k5}q;t zynV0jCG!6#;qM~QKIyjGO}x@RC}$3rF~!q^cOV!?AP08Nq@sesWBKCxwJ?pQ(t2Wv z@o8j{Va9^AyqHw17@qY3x-XXy8F2rs8TOr8&&K7iqORk&-6w%RpBC09P><8Sx zgg>}2DFc`@bf@(tPrmY;5UJb+%1^ETQ+y}*&VcmShe~Eq`A%1;Gc0F)ULwi)CyI!7 zo_F6=H`Ogg{JQgTH%^YHo1vhYWD$Fb_)BxW_;*)BM%I~!J5e!)&T?VYQhDLW60~h{ z+x|}@S&5-9G9$dq*_D(W0v1{3r;xOu`_*Jrg%O@Y*A$nNRKa{NBaP%n`Ai~ViaXuE zlVe_i-D*w?VVjn1D1xzx&D`ie-5oFJ#gPe&#)*&?*B zn;1dMku=xyx+&*{oG~SZ1u7@Rv@{sOB?MmvpsBMA{$_3(?6OIY6OlX`AeY4LqaPdz z14}-^=M1m}bSHlS=x_9iPe%8Pw??aF$pMQTchVw^+b$VWp?{r4BP(!{+a(tje;XaG znL|>-3mzggHkk3i7OzBb0Kq;4coy>zf&m10})Yzl;E*!N00NjpR)ybjiNKd0~ZYDOBz)ajO1GZ;{tmTrDccgWb4m zdBfqj9!-S9;2Ea%hT21K6!-LG&mKf_k60N$<3>?8TEsC&wfOz`VbU!AGd^9pj8fdX zJh3@eIbRX;6F+ey9+u9@6;dnWlTi}b{?lXw$w<7Ui%ALqk1;@xiva|Qa8-{)6A=qE_H8q^tsSGkK)>!T3Jg|b3L(F+Tn{cZqhY_O`QPR f)I<38SDdOBzC(O!8T@@}v7;=bWR!!#n0fF&^Us`9 diff --git a/nlp_resource_data/nltk/metrics/__pycache__/aline.cpython-37.pyc b/nlp_resource_data/nltk/metrics/__pycache__/aline.cpython-37.pyc index a3fe39276e224161852d8ae052af731cf3ef03b0..9076b3d9ace12d1e33c90deaa6940085e731bf21 100644 GIT binary patch delta 3686 zcma)9OK=ob6z!g0hRIAmL&%4G2%mt1#FU5v5+fhc5Y!-_04>mIawlP!o=@J)gaCmK zBnS!$u>qwj7FAlhpv1xos0+)oTv!EF>c)+;#+9WD7iwkYz5UWX)05$ou0HSex#!+@ z?|rY6&X3BUN?>Ck;1lpCQ!*j9PXy|=U-k+D2$BFII8-s_kQ}0L)Q?Y2)hW4l3-MZ) z5Ozm>Y?Ua`1<39d;*LE+*aNOHQ7QqqcxK4YW&s*bFbjE6@pB zpbNIbs{>vs0IwnUy2W-IyiY2FZSV$chaIpJc0mv9hCQ$sBxK*joA@5shwOeh05aOP z;VoMzL+|7QsT{nZz(GW<58QZpu?&DPUj37PsR9Pz5Jd8>!;{__E^4vFsYP_0v+KGn z9p}l{;4~m2N5(86U!H`Vvj>b9(Q(eMOBB&@j_Wj9h89T{5%MlXXVT-kK|^Qf%!g%smdOUENpG6{pz%q4hroV0T)q-HJBWpTNH z1CT~t?xICFXOB0SuhYlJ|!I5E@7laqd!ZR3d)+OU4@ zA&%Teu7!@=uXxZ$$F%&kf=ovrlFXxR#S zZkl5<_pUMg>JwP4MT|0^{zqkVLmAQaCjIj_YylB<1daN#U~{0!wB7rNHRwBo%Ul)6 z=<5UX^=rY(V5MokdyCB_ujZ+5gX1o-Q{PZ?NObBqYSzxB#MnH9thj@1p@&NSpPDtL z5u8r9w`EeSP2XJGA+FWOYuAam-HP8gjE%fR$p*=f5hw=F4*^A!9D`5ey|OTB zFWSDQs2-dM92)!@{YoJDo@?Dl;~?pe9P-2MTBW zXW;RPF3~48h-^^5-u!#16^xbX7Z)}xDsWDt^DpF#H|7!)Sz7;T;b%=bw*m)RObvO};XWdB_iKp2aWJ1oY z3zW3NPLp|#qDBVdu`n9#!B7;ACH|(w>2(#{RoKvKu?&%2nEIeQODC9({OK3UHC zWVt^>n0S(Jp}s*oCFDiBteScBmuRNjS!q5I7W~m7$%@X3f>TW zdv+peoIBb+Mq9#9;02nUMaX((Ij+Pgr(A8uj9!(jw>_4Cj2iBsU&5(dcn)0-|5V-b Ho1Xsx`@(8? delta 3675 zcmbVPU2GIp6rQ`?Zl_C^w%fmLY0CmFut+K34@FPb4NAUomy)=X{qCLb zeCOPA&b_lgf6G70p1mHATVSuozL$a(S3I?yGom1XAPEyCK)?|plA~9M`J94M5O&k` zM8FFilH-UFbG#-fPAIrQq(X2?E+~|Wz$J-LBo%{rWqfSwV4LKI=U^W^4;|16UGM_z z9~7l!aDZ_yn(RTHcS|MEt@%ln+6@wP!^`jr^uSR#46nm6I1asx>t&;X$00boQu06% zoB)|&A2``n#Io)Od?~Xf(sCGpK^U@Jp;>W(8#Y$lzI?ma3V|jEcC+)&`9wF!KI^HebJ+xpU)(#q60#2G8KsDFt9Dn`| zvda|mg?MKcJ4a0&U)&Oqh3CyiZaj}}0!~?;Idu|F>n=lZX0`_?0(kIze)C0#XtA*wJpbS zL5aJ4_J8`R%unS9Xz_ylTo|1>H`^_h!iR7kE=&`t45s0YX+bK7i|{5Jy)XlBv9SW) zhD&U$gm>UwHdeuV@ID)>;WD_{SOZt!12+0VgR5+;g=;>kPFVrh!;Z@$omOfu=wJAU zguspT9r1oa`sb2y2Wi&!mEI%kwYsv6>RR^mfi@wmMp%ci2B8^Y6@nijfY64p9$^E* zV+c}3!TEpha)4-K{leKsMet%#cuwjex?un}Q9!VZLO2rUdwl_unfoQk)|kufD6 zktyAZrl%2}KzI^iCxV{oE;b94ESZ(MP-9MIdbNHG^bA5f!fpnEZd!$N{MAKk5lR@0 zIvC8)=)>G~y3C;FZqjnpEJvs|*-yFtDykRsr?`R_uK7@L$!|V4n7`FM3=?X37UFR%ijCR6Pr+x1&_m&yvxo@Z! zy;i1w^G-WShbGqy5r;Nkv!jA#CeTU-8FGkr;!T-W?b}{6#HJ5ATZiINrFB@I7>Xxk z8gAXB9r3l3ZQ2drPBNUn=et&*_MlG(s!2ih@&VU^oz5o(> z6$3aMMCeBV2AP8RXo}+M96~t5IKxgsNWRJ@LQxrUI`)z#;vg=f{Z-#V`n1gr$!2}O zay)VITj+@!C%EV_?WcxYjk*%IDR&`E^GofA2A}px;}<)0esr|m{^M|(xQ~y=_Z1v2xQ*4IK89NyX5T1KZ!q}}ptPOAyI+BJp`^gHs z@`1Z85=y4}zuJ6L3+d7BH+8q_J3un7fjEuIsepq{v552_f;|8|&$#>ASaVBI zMe8twjB_|1ie(BDG!#qGX~qN!SkvW{OwXaYm^FSd7FC$h9*l{UX#5{c93Oh2u#)w; znk;8XFQpx8!v$)6rYIPMcz-aMDU6Lq6BG0>6VhIUqX>B3Xbd5aKoQO$VAtp*!fAvF zgcQO_1k5|aw~EKmyy1uW%f1SZ>^X*)2pv)q{i ztv}U3FrYDz`(TWU4?Jjm@MVoLF@!fY0TYeEhcQNhSJhRpW2SOCq9-lsNJ_6VPNM6_v8|(AUDm`P))9WnNl24I=h?zF4Pe;sZtNxUD zis%i{Q)AXbPp#E(O4J*nJ>SegJHy)xpj~IyL%W{07ec$iY=m|rdeECpaVVlKn7WqO zMZM!TlgZVjMnc83kK4ZO<=Ho6c{OyTvZiUcR*=m(S>I=WkgX~TcWYC}gikxzUSUA3 zgZ6LmL}-$IAna+waZR9cc!gw=VzUG)p99c{Mv5&Hmn@Bf3EL3=0rPC}=;8`5X;{_@ zk1qg32u&A%7l)*dYG{DBxSOS8c0TJ=dJU6C&dUdOe$2|}CI{GY<%bP3Yt*|2D!7CR zW7gqL+*HC%C4o#ev%X9%Yj086NAaJvQsAMmsxu^Zg^yc_9>?~BoaYvLpJSI3*Slt9 zY64eWD@_a~goE1`sO4gWCq4?b|U#AHhCOO*V#h^lje`>BKqBwHBi+bo1xVSH=#p=&Nso2ZWyrg%zwYAt49*oOr-&!? zubg5>vbkKAnwMB}{efwoPDwuiayh7Hg5(H8e!zWptj3yJw_{<~II@Z+QSX|c-m&g*f zx#>-kVBa^bn_u?JU1I28vbN@rJ8=M#b1S^)4unB~P|6#gcNZDH2t)gtC0oAg!NZ`X z%OR9Y0LaKo$kRBAb)?b(Dsi1<7Tw;AYvEoY4Y;`F^y4&;L5T~N$OXxx7PY_9e%#i`b($xeRUSnW0C!8_`bXPUk)7mjD03G2T7M{R%HqBmLX z(z7bY!6#@@w!1a6U=9kQWZF6FFZSKi6-FOU--NIkVG9DkBmHP`f+%vSc07d^23Sg& zA++`&l+Th|{A>nb;6r9CTSx{V0k#*vSoWhp))wD?_zZyzSf!nvD+3yeSGDKKR(=KI zlz&S1;s*N=_9N&B2N0e^IEZivq0C`6HCD1WR(22l*AUAN%0Uc+XOTdnD_|(qnCHn_ zI$UgOe_JHGil27K0!b9FtZpN7MYE^)=bD(<%A;9aipD^yc?1vP7(#)yb#;*!n9=P-EgosFl4$J0X(^Belg+K&k7W1Z`U8}Hp4KA(o2w*$lY ztax$VO0oho0`3YP4-~Ce;T6iBV;ZCR5^E$Jiq!J2vESBj>EcO+rvTRxC)3vuUPr)< zW?Yndd90_Q_jBfU_fF4scIR**Pfwh8mrk5Dckm(%e1c)hU?`#_C@RH1t(j*y{N^!6 zCxQuT83q-gKa|{TFivA=irJAI6~TfMIt}gk=tMpjcpm&5iSlvZK1tCp#R5+;G3W+_ zHU#`8>8hWz3zMs`9{UvUcc^Z23my&nDfMXS>wuq5{>1?4MvhpyfMQXlcxxzT9mS*C z&sonEVyN`E@QcQ=dXq5EZwG+X6cmysN&c1vg%{;CsS>JWNrZnXO_Y=A1L^Mc$-e=h C_fI$g delta 3103 zcmbtWTWl0n7@jjT-OgUSy|+?Ip@g!fwopQ$v=+IRODPQnp~Gde-I=!2&h9K{2BFxb zF(?U&A?CzeH2OgF(WqMne+(HI5D$vA8dS+Dn+nkkk?`LPnPFHN z8<8^daqy|f&4^xZM)i@3e#wlPWzR`wnWdWXmlVAcddkfT=&7(OFDiN!v@6XjXjh4L zHMFbE8fe#ub`7*^%{pk;S+y{~&Xk9xL_KRDH`rff4eONm)@l&t-NB9tH*IxHCZ{Ld zY?4lOw6o{s{%|FX{Q-|eL+l6n$XcA6kZCErIGLu{%tKJj0jNPE%!ZX!%_^7)!io2g zd9N!|O+{W}u&fatPX>?(jpdrvVYQ<+mJtub2^t+*6$M*b`o6hzfW4A-!cimUyF<>WT zNhn;!?dUS-;bgKP&)r}zP_xcwY2Rt~XL!5?*T8fo1+gs0^|&NKC9*?8oB<(6ds$Wg1T>owJrV468jn5J}NUl~SR|B*yxqmm900t4JHJ(d0Q} zj9O<*_Ivc+Nu)WBOC{;nwnTuofezW&484-V6H*eBQrjNyaZ2L>Ry1ns5# z2R;9+X=QxFw{!hvm87!?Ru6(;?eK<(P*1M7qaIgK<)XAgEibSu@tz$bMHA2z3UcLw zot;xlY>SfVD!5pbVu93zZFpzAT(Eo#>A`@wGH&@ID*8Ayfj+w6fvIzlri<2>rftKq zr>q$^Q*omo1xf^XiQ%~8oq2iY^?1PLzbc|+)Aps@tCca*xn0Y%g0h+T{zi%VBN*Om!QYv zS=aVFH)D9dO&zG@H<(}Z_Ex-vKv+Q;bN)X$mQE&9sU$VuVFR@zvm(7Bo)2VGY0IB* zO{##GyuUiIcsS)GJ@5XQNQ^d|(1EZKVGF`mgkFRv5c&`b`U*=FZ50gQX?R%2T6w!F zt1^^LDL#?%NJ%6V3EqDK6eo*Y&GM6i1>4?MlRHrtR7n%F8eStByVuZNRoF*tfl_?V z_Amcy@+T3tBkVvBWwsM7K^V_L)EeD|7HW{svcqT%BNQ|fmN>pWFz_CG zcJ(r{hxN4$v6S5~09BY>%v4*az!+h95P=f`dX3N5IS9@Oapzm1Ai{D(R72U)!4sIh5la0hwsI zpEs-}OGPu3TfVVWStFup%13h={LnN)1|f^!BAj8nySj+MX1aFQw_rzs&U`#q*v+ov z|0|wT;CU;Vi>E@w(@Rgm0oCWOZGNASZR~LO(fSAXhL5yiXKddvKFNLBy^ge^wop!Z zJW!CA;Kc*3V;W523u6QWVbtG#j0SA1{L0Qekjyb8i}c9645B1m=6VH-tjsBm%!~0NttXv4E<;zglrVJ`(SVpZ^Ee*h&`w diff --git a/nlp_resource_data/nltk/metrics/__pycache__/confusionmatrix.cpython-37.pyc b/nlp_resource_data/nltk/metrics/__pycache__/confusionmatrix.cpython-37.pyc index 5f05b53902adaf5a2d9a6b6943ff1aead439809c..5e5972e1fa0f7660e08339c5d1c7195bda3fd38f 100644 GIT binary patch delta 1495 zcmZ`(&2Jk;6yMn|kJnCco!B%jB?+y8joPFi5DAE^rm9jdDN^|mT7gEkcamkY*)V>?hi8`q#&!7R9|43$NMa))0ZGijB=~H`R$>RXwyoGn+`uIU zcW)V?9Xbz;(BY*+>yQLAbi>jEBPeq(q`RiL!^?*Tu*&a$Z1~<&NN)J1n(m7zE!fr| z?G;g$s@fopdRfR>92H!2;@l^SZU}zwW|S9TpY87#ce9jTWzPb8S<>$mQ8(tjho>4p ze*J}8DM9YPH&zC@SdojR4YDflmfj#$`CaLqOStA+%3QvzNTjU$omil21QjHf*S2}e z_xhsUmJa>?qGp(43Zq^)4%H+b@2$n44P~^~5c2@OD_XKvox6Zq6xkUmIDTDKVQiZh z(4B;_59DTbxO4%ji%2D+g|-f!Fkmae`+_kh|Ehj^?!Q7xjO7JHbdUN|kDV*8U=|c3 zP!^=6qrVAt^bTurC+UWr_E7SfHE8e;GjFv{R{w+=UHC7*ofS(nI;XN=V}DeBMaK0$ z)~=(!l8N*u=#m*3HtyZl^3Q=&yohif0dt6#0F;dt5esNKVrKvv z&f=Qx0+tfIThLl@25i3p9Xa1zuDp&Rx}$aZN%Or1c4#z`m(W^9Sdl+9n@wGWaW}A3 z;wk_?`mv@!+OpnutyC<2RFZqLi`Qenoi zx}9zW6843S{)%>w&MM(xGB@k)@Ln;}hUvvEgo!9^jpsbc!a>a2_!;ClXf;wLRjW$q K(6~W8O8x;$cqrKb delta 1320 zcmZ`(&1(}u6yMp8WV30sYHL5VHhu)zs;S?ghFG;!5d1(9B`9N(9d*-9cAec6El3X* zK|RUfP4VJCAU&!GB6#xRagKTr&wA05?@eN>0XOV#Co{kI-fwYbZmROPdi zMR7YlL-vce>HQ=lex#4=LY`|#y|`NvUmA1Ofa6pLY)q0V-C!+tJL2V%xMDxwsUmca zJLG-KR28K7PANb^VKXWp727k@6TJbTI|X&xEMBR?S3#zu;z?${qo|ud3$sAa)Wg>G zcI=0(zanz5c21O>aiWVW&QsXWPiL%C%;alOSPCJHuo?l?Nt;r?d@h$MEWCNcD4Vtv0fJr^@;cjr$AxWdh8kwc^221>eH7PwWUi2+YY{qn(Fdb3p zdLy7bXA!6LsyLmyI))W{DQQaM1Q^$rKIGn8d&ZH10y#>Bv{vV_6582UDX2DUUbTEz zyk1vAGu9m{^g7>DN}0YWE)DRBew9=jP}%KxFG<(z<<2Vleer!@s<0O6|33p9d-FhC z9;}gm@oMna8Kr*|CiwtD9szUk5ddkTB^+K9O&2Jk;6yMoh+w1jrV&}tgV&|htHX%fDny9JVL~SZ)DD+YlfiK&QXA^DMU0bv3 zKqH}0wFj=zNE8VPI3RIF7=gGIaRMO@pkDS8iCg~x5S(~#ojO(|u;kzTc4prDy?Jlm zoHReapT3bwsRI1ku@im%%1L_U_zU==LS3MQiZ*e?x=4^qz@@qra@m&aGL6vaW1$|Q zGc-=s$D|&mmuP|}fyHQwreRH?8JdMLPIGhsMwJfIJd6oiphGYw=`bCEF-1pd5ymtf zql+aV89GbLv;wZObdpZNOwJsj)9;Avb2IlyfFR_%f;M;Z+p`Ic1nCyDTwmX7yA9uJ zx-g?q>(KIm^DoJ_i3FVJpRMJ#$CkM--X`PxsW?hfJP`k$PQw}_0MA7LFqIt3)ly8s z`8a~g|CC-{tBZ}5Kx|Y4VgzJ2AdP@fhK{?C1Quj;lUlw`Ezj3I-)znEx8?C@5tbS< zKTVW7pU4~XF)C^z8${bYaygJ}(`6}~kP*;Ai?bZ^0fgRO7RApPLSKORc_EZkFA~FT znAc#3R{{`<3cnjKl2YfdY>8~6ktPOF+t@Yjm9w#W(N|{Iat-z%JQsR9KyR2WA;rr4 z&4KstjR6lLZ5I+AB%u^0^UQ83L^cjZ` z{6oX;S$_s%9%frmMUobygw64jkwyN|@B@XjYT#m?AGB#BD$@K?zcVYSZJV?*+*88q#6;r_kw_iXD9)Hl3V zl4HErZ430mA^?HxHJKksM^+0uwc92#E&+_lz@q z?D}sae^|+jzX<$EWu08_{8~B6!Ij&oet7#jZrlRUl=DsuNUI)!fONaA92l&rdzQnl zqMq!n=oVmAxBA_~QZUTZ+AvZ1<<)6&tMk_CeKN3vCW`GE?R{h4gi`PDpD*u2{MEIG z*)SQHmgf=x+vY#6y*4w86kC{uf!zWYNS^7lGR{pQ%-I{`Y0|cBZWh4Qn*or;BuR>jq$JdW Yx};>4xKdJbN=#9egtDm~DiwwN3lH~tYXATM delta 3488 zcmZ`*O>7&-72a7c$t7h{68%Tnw6c+H*_0^KqJQcriX6m-gT#*7Ms8imF__iPkX&oI zOV181$skJrS{MZg^cU?R?YRf1MbY8o@KRE^Q&^aRX|vkX1CuJZ4(@oj}DR9#W> zGmk%g7W{(z@Nfd0{`^|0?loBHj=5F$nkHW_6@v%rHF7lgRGlCr!Qa*A$M*prCqO<^ zf#gXq(y-lpipOw1jzkY)TJd~OwdPkdZQIaZBe1NFDkxZ z&FR3_j*;xnN7`-eG8)ROd>lnG*w=ayhq-(NC(t&};%tJCVt)+Dke6#Xj0VRN*$HV= znoA9>nXbiN55AE&**yY`7H$VWbUzCJ2>g9SQ78|UZNM)pdsLn1k5r{YI%=D=)v8(} zoycZ0?9<44ggeC2SKPEKx zNc}+V#NdynB2Tr@#K(jvcV})YA}V6-$Try~fMR@^4{?E*;eI3;?hJQ?F*yr=e+fui zxqSgtcjY#yvL5Br;GAq9Ds3_Wu1+X!Oh6KElhATi{W@gLT~+2yMC$KFY`3}$Nf50% zo+?D*1F)P#fpoLXPJ(_ zR2&Lnx~;RVe4I~!AWsA7>0Xm@Q+T|Wv;}L_m=KI}+;C5GAhUWpXZINQ83nODS&9sa zuIbn{x4~Qi2g~Nn61M01aCQd$iskvj?~#g3+rfrpIvx05PS-)Le+Y;|CP`9Ft4We1 z87)b4^@y4wd>U3PbPm6S5`0%X)Qt#;IN+}V{t-$JD{T$ww%Unqz9|6vZE{7qeXbMR zEW${Wqp40@MB34IO!CJi1ucLfTr-d2fYZWF{7%fMuc(6W7>g)(qNyTZU50ney`(Jx8`S3Pd9O|#K- zSiu;$GsR8YokDF>e%*H2l(FHp9BSC^JZ+Ag1-8y}yV4TSpjMMW4V;tS~^! z<7~mG3(@o!N~I09R>XK!Jx+^^wn~-*G9}BOFP|$p%qle)EjDR2DdpdB4GR!9ysFV? zIl_j38utJ`I>>?U+%Wt@RQ&=XD&>dg)2N%_4tC=c9z~2~6V4K&3@{9XHD5GV*6k)@ zZ`DmW-f&2zy7FM5JXq-VSI)x906B(&2p43{P~MjcF#C3Wf}mO^=Qd=p2XV9!Q+fIa z$pt|tx|eyiE^02H`}T)*l0HYP{#uXsL&w*{lYx_+(8 z#a5FU*CA7H-Dp7&Pus32fI-nmTHeT4U#MWw@Y;H@g2^l4i)AdVU~oyibZ7AsIG0e~ zl@y4)i}t@z+A0#a4lw$YK$M(LbdrgrNt&qei@^_Dm5gaR;iq=KnaGjLcuD8gUd%Dq zm^0sR%U>wjYqo3hEm^bignoiXuo^2VJ`?;t*&!3bt&yXzorW2nMRU448b}t)k@!pWFFlddqd?)kU={-t&8a3f=8bT+vrr~YDzZ5*$|3=~sw4VbKJlj9B z*xwAkEy9!>wOYRL8pFWFK+?x&nnEvaTjL}!CN7X`J2xlpkCT1DZ)P55uA$sJK=S&)o->=xT|s-A zKVPwdmMivN)xp!*9Qkta)7j(X{hdG0en>Ja$gMlB*{Yc}22id9D|0oH34Sv7tuYw^ zjKyd8OyKVY*Ng9-n8z-@Rk?KFedzTfJ`;Qf=jM= delta 44 ycmZ1wu_l7siISrZZX5<$n7G>+}ZFXU~qYeQ5@C>#9 diff --git a/nlp_resource_data/nltk/metrics/__pycache__/scores.cpython-37.pyc b/nlp_resource_data/nltk/metrics/__pycache__/scores.cpython-37.pyc index 3a17b1697515323a619e6077b8e5b9f1b81dc1fe..594541d579498e79b24c2fc9995c37bab6c0893b 100644 GIT binary patch delta 1436 zcma)5J98UV6yCcJX;+ryl`UHjOB5LrBTo_&%EU3?P%x93G#Li6Gt9Eqy|Q+6uU2z* z*TimI*cC-)!txs^XjM=EnKV$QfQDIq0S!NZigQ*WV&W8NN8dfKd*1htJHPDJ4r;ZE zhQGA@)c^cxt#kG(GF;7LGr*QKFM&n##|TAqb6Q9nTHk;Vy4nFmIEu@pY86c$077+2)(Pg=a@}d3&P63(+|*LOj$4 z-9M?!fu7fA5Ka?+lun097)^7VhsR-z)PUvYXgfHJA0SnZ$LVM!1(GEYJRJ(WYC$kP z61j02&gP6Y4Y!Ief#C1EyJ-x&*)8)P+sRV%{R6NU;T#Yn0b)87xtWMKDa;q(qD5{8 z!(j@+@T7xWTtd)RU1l?AWzWic^Y^Te^^ew22yKLc4jV*o61)lyg(GJMVptS%1GRB6 z)moF;dX&otZiUeJP|K%9#9|lt1dxO{B{Iv43@yQnX>LCoQWq!F8 z$k{l^*(_&pDcO;)L=dMS9+BHRg4V9E3UirR{Nc*{oBFx_b`2G|aU96p|r;Kk8HT5|bCLisLZa46e>! zG%hUmR`#&9zf%OEn7AN@yp&jO@5w+N@?ihl?1$Fx1DX|DvT(p)8|mc&z&ptPNTLD4 zgG{xrZ7;0hItt!w|5IU~Ii2$_Xk&6anh6M!2y#QE63V2e`AY`L9UlQ!C4Cx{gyrNZJ*tx z`?l|^Ae{O>wxds%xAKaQPdLE^@_o?hwn2ub3X<`n++E^Jlv|{Vf^(;M^HC&YpssmN z96r9OqGJ&+I!4p>8u2iko!q42V?+xNaIz2jy)pG~(KM>*wQ}Gp$!-(;oAESIl-#nk ff?7QKJr(g(ihT+hQ@dS!rZ%gxs_tgb`&<72t-&`R delta 1308 zcma)5&2Jk;6yMnod%a$NB(`hEO&X~!jW$5du~H*LLlLct5E37Sy{smlNxXRWLvPoV zaOokDdf7jWi8l>_tErKy@8*wy}K-n_5hd-L{}ojxQ_qFH_k#Mv#@hWyN?;lzLW_t`2<~;E`?^{UVML~HmS?80qKaQ3+F@I1EK!YY z-zM5!EgqN}ugx6zfN1zK-M2r{V*O)`R1r>8_v?}CyGm1K_Oa$SSdBSqZL&IZkH?zd zVtv+Rtz+W1S(~jW)e2i>9ks5qF6*ha!@KO+M5?mET z+H^9R1Dzb6Q&Lt@ptah>CeSTTsymC+x~1Q2szc~0@N`(keFxzw5hw%IQs9#?3zK+T zX|uBVjV`fumzX*X7AMskI_VW-XLJWsK_B6NMXsp8RERfT5O!_xgY&cT;Q(W;De$aQ zg3_WQol;3sN#G3I{UG8rn*;v{!y*rD;1GwjiCKQIx%j<#q+hNng)}~+LYgc*(>`b@ z^@HMY<+Vv!N=u2*Mp#kc8K-RgcAn%^1R|Vq5l)kYJx{4Al7nZ|_f&ukd#t(YK8yi@ ziwk%4*dphPhyC$(nFMosL>l4@F=_7z8tpMU{<`?5|A&WxlqCs00+-ePbOPWdCH{y< zR}{Qkd^fnVwX_CL9lY57r_#Q7d+lh?R9k5$DF>P-AdP4)qy;oS;N|T~D}^x5K}Qc9 zFKG(wqk(c2!BXJ0BvCiDCo8ji6b>@DR9a>^&0(w99&Qk?xHa6}ctWhKpqx5~rYR(! zt9q=r6=4ot+#4Cs;pv>fCMGqXTu<@q`bG~!pLRvvHjk3u zij(!N8`sg_4uXh=aS*764AHqK&4}jHWnr(NNd;LXQCXU-?a$*$o+Kgzv-oPGe;><~ z_1jUJK>ij$0)8zP`RwXE4@KFSWjB!Qb3EZ9o?S!uk4j7<9t6)!P|1GzL+?Zhn+rb1 T$<5H!Rll>V6SsKz!ufvzHYgSF diff --git a/nlp_resource_data/nltk/metrics/__pycache__/segmentation.cpython-37.pyc b/nlp_resource_data/nltk/metrics/__pycache__/segmentation.cpython-37.pyc index c8ae7b7da4726055da575644fdf0ffbb2e02539d..479d2eff104c384bd7b7c3ff43c0c1a0c2025dea 100644 GIT binary patch delta 354 zcmX9%Jxc>Y6uh_hwYQhkn8f6gSlFnXMS?^-u`r?_B7)r^O5lYcCJK8b2(|%HY{LEn zJMH8etN0tVv=g)tLG%ZRzKa9N_H*%k$y@b2p%J?F;9zCo6`?(to8JJ`+eXI> zP0%oh7TSFXE#&C*wa`X^E-}YR9K&%+9L!@HCrAm*V3sl$*{&88?ymB>6Tq`68^^6? ze68ObreyS+YVa+qUhPJ*svh_z)Rb`E0M(cC0)~1L7jP8sxsNv7s7m&-c1x8g=~GGonM8(po7`O9o?*f-$gIN*4><=4nwE FvwuXUPMQD! delta 353 zcmZoSeP_b!#LLUY00eSL0r83xc_kSWCaP;$3#YKAu(dEov81r4aI`Q)v8HgQZ~I)0FpPk)3h* zWK-cT#ubwf2un&b1D&VI^zuIt6e)n%EJfl#s)%LsC*dN-%FU@FADPubiqe4?2tL?r zvfg6O%qsypAip#(rHBP6Si}J$*e2IX*f8$dyi0}0Ww*jCa^Jc8FH|5 K2yi)bsR00Y)>P^M diff --git a/nlp_resource_data/nltk/metrics/__pycache__/spearman.cpython-37.pyc b/nlp_resource_data/nltk/metrics/__pycache__/spearman.cpython-37.pyc index 608ae3dfa0f521e441fcf2e6f736cc86b933f72e..d125645b173d44d10026495bb4d4da893acc186b 100644 GIT binary patch delta 492 zcmZ9IyGjE=6o&Wgb#hs8H4-oJ0aoxr#705{QGzHoR&KLIXOxg+SI%Tbv9+=bQ~MBV zW$QcGOd*I^SlOp?X0?z^@ek+r{fF}sJO=)@?-vdAGH)1nhyL7NT?N#j&@=iF0T|Sz z)~ykl)TWO5E$UKF{WdMr0xjM`^TWkF~!HB8KVC-{>x4ilK{+s-UaOkVp#|M=uX3YVNNTbf&^D zSR4E8BtA_MS+6}%3)IBDy#RrDw!fEke`rZ_WlcgSr;H&+g~mBc*wqC#>*CdEO^!KC zi+86|Rupb3F>I_p9k_QuAP3`&c=I;br&S=WF1wB#t`=cq9x}jo(&h0Pru{f?;2UC} zES}WV++heft6{Dz#_?G;$dJV`YB$su!az$^i*Emi&Nt1^KpOp&WY5WU&kyW5=JnMjDnF9Kq5N)iPt5epj&K?Jo)bHwZ!V?I`971YMY%1-VV z_!HFH#=_1&aIKYg9FaoiZlJ-Xzh_0GSM&b9QaxQcQdnuJ{%I3CgLVa*gKi=#y^cQ9 zHUB~F!Guf7U)%L#)lcdt&C6p&soi=leS@uXKrtN>SdIVzJ}{Vq5Kuf-**naBn8*hh zFe+_BjqOAXMOx+M1)CUhf(!1PGAJ{=#S>c_(by-F3jNboati6+j|qC#+)4`|-B z69G<;kDSblW2stkjDk4k4U`O+G}CJ7cOk>EJ-W=qdSA)&Ub106gEnQ{7NHmy-Z#XC BX@>v+ diff --git a/nlp_resource_data/nltk/metrics/agreement.py b/nlp_resource_data/nltk/metrics/agreement.py index 8fff0eb..445a87f 100644 --- a/nlp_resource_data/nltk/metrics/agreement.py +++ b/nlp_resource_data/nltk/metrics/agreement.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Agreement Metrics # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Tom Lippincott # URL: # For license information, see LICENSE.TXT @@ -68,19 +68,24 @@ Expected results from the Artstein and Poesio survey paper: 1.0 """ +from __future__ import print_function, unicode_literals, division import logging from itertools import groupby from operator import itemgetter +from six import iteritems + from nltk.probability import FreqDist, ConditionalFreqDist from nltk.internals import deprecated +from nltk.compat import python_2_unicode_compatible from nltk.metrics.distance import binary_distance log = logging.getLogger(__name__) +@python_2_unicode_compatible class AnnotationTask(object): """Represents an annotation task, i.e. people assign labels to items. @@ -117,7 +122,7 @@ class AnnotationTask(object): return "\r\n".join( map( lambda x: "%s\t%s\t%s" - % (x["coder"], x["item"].replace("_", "\t"), ",".join(x["labels"])), + % (x['coder'], x['item'].replace('_', "\t"), ",".join(x['labels'])), self.data, ) ) @@ -132,7 +137,7 @@ class AnnotationTask(object): self.C.add(coder) self.K.add(labels) self.I.add(item) - self.data.append({"coder": coder, "labels": labels, "item": item}) + self.data.append({'coder': coder, 'labels': labels, 'item': item}) def agr(self, cA, cB, i, data=None): """Agreement between two coders on a given item @@ -142,29 +147,32 @@ class AnnotationTask(object): # cfedermann: we don't know what combination of coder/item will come # first in x; to avoid StopIteration problems due to assuming an order # cA,cB, we allow either for k1 and then look up the missing as k2. - k1 = next((x for x in data if x["coder"] in (cA, cB) and x["item"] == i)) - if k1["coder"] == cA: - k2 = next((x for x in data if x["coder"] == cB and x["item"] == i)) + k1 = next((x for x in data if x['coder'] in (cA, cB) and x['item'] == i)) + if k1['coder'] == cA: + k2 = next((x for x in data if x['coder'] == cB and x['item'] == i)) else: - k2 = next((x for x in data if x["coder"] == cA and x["item"] == i)) + k2 = next((x for x in data if x['coder'] == cA and x['item'] == i)) - ret = 1.0 - float(self.distance(k1["labels"], k2["labels"])) + ret = 1.0 - float(self.distance(k1['labels'], k2['labels'])) log.debug("Observed agreement between %s and %s on %s: %f", cA, cB, i, ret) log.debug( - 'Distance between "%r" and "%r": %f', k1["labels"], k2["labels"], 1.0 - ret + "Distance between \"%r\" and \"%r\": %f", + k1['labels'], + k2['labels'], + 1.0 - ret, ) return ret def Nk(self, k): - return float(sum(1 for x in self.data if x["labels"] == k)) + return float(sum(1 for x in self.data if x['labels'] == k)) def Nik(self, i, k): - return float(sum(1 for x in self.data if x["item"] == i and x["labels"] == k)) + return float(sum(1 for x in self.data if x['item'] == i and x['labels'] == k)) def Nck(self, c, k): - return float(sum(1 for x in self.data if x["coder"] == c and x["labels"] == k)) + return float(sum(1 for x in self.data if x['coder'] == c and x['labels'] == k)) - @deprecated("Use Nk, Nik or Nck instead") + @deprecated('Use Nk, Nik or Nck instead') def N(self, k=None, i=None, c=None): """Implements the "n-notation" used in Artstein and Poesio (2007) @@ -191,7 +199,7 @@ class AnnotationTask(object): """ data = self._grouped_data( - "item", (x for x in self.data if x["coder"] in (cA, cB)) + 'item', (x for x in self.data if x['coder'] in (cA, cB)) ) ret = sum(self.agr(cA, cB, item, item_data) for item, item_data in data) / len( self.I @@ -227,10 +235,10 @@ class AnnotationTask(object): """ total = 0.0 - data = (x for x in self.data if x["coder"] in (cA, cB)) - for i, itemdata in self._grouped_data("item", data): + data = (x for x in self.data if x['coder'] in (cA, cB)) + for i, itemdata in self._grouped_data('item', data): # we should have two items; distance doesn't care which comes first - total += self.distance(next(itemdata)["labels"], next(itemdata)["labels"]) + total += self.distance(next(itemdata)['labels'], next(itemdata)['labels']) ret = total / (len(self.I) * max_distance) log.debug("Observed disagreement between %s and %s: %f", cA, cB, ret) @@ -261,8 +269,8 @@ class AnnotationTask(object): """ total = 0.0 - label_freqs = FreqDist(x["labels"] for x in self.data) - for k, f in label_freqs.items(): + label_freqs = FreqDist(x['labels'] for x in self.data) + for k, f in iteritems(label_freqs): total += f ** 2 Ae = total / ((len(self.I) * len(self.C)) ** 2) return (self.avg_Ao() - Ae) / (1 - Ae) @@ -270,7 +278,7 @@ class AnnotationTask(object): def Ae_kappa(self, cA, cB): Ae = 0.0 nitems = float(len(self.I)) - label_freqs = ConditionalFreqDist((x["labels"], x["coder"]) for x in self.data) + label_freqs = ConditionalFreqDist((x['labels'], x['coder']) for x in self.data) for k in label_freqs.conditions(): Ae += (label_freqs[k][cA] / nitems) * (label_freqs[k][cB] / nitems) return Ae @@ -302,8 +310,8 @@ class AnnotationTask(object): def Disagreement(self, label_freqs): total_labels = sum(label_freqs.values()) pairs = 0.0 - for j, nj in label_freqs.items(): - for l, nl in label_freqs.items(): + for j, nj in iteritems(label_freqs): + for l, nl in iteritems(label_freqs): pairs += float(nj * nl) * self.distance(l, j) return 1.0 * pairs / (total_labels * (total_labels - 1)) @@ -324,9 +332,9 @@ class AnnotationTask(object): total_ratings = 0 all_valid_labels_freq = FreqDist([]) - total_do = 0.0 # Total observed disagreement for all items. - for i, itemdata in self._grouped_data("item"): - label_freqs = FreqDist(x["labels"] for x in itemdata) + total_do = 0.0 # Total observed disagreement for all items. + for i, itemdata in self._grouped_data('item'): + label_freqs = FreqDist(x['labels'] for x in itemdata) labels_count = sum(label_freqs.values()) if labels_count < 2: # Ignore the item. @@ -336,7 +344,7 @@ class AnnotationTask(object): do = total_do / sum(all_valid_labels_freq.values()) - de = self.Disagreement(all_valid_labels_freq) # Expected disagreement. + de = self.Disagreement(all_valid_labels_freq) # Expected disagreement. k_alpha = 1.0 - do / de return k_alpha @@ -347,7 +355,7 @@ class AnnotationTask(object): """ total = 0.0 label_freqs = ConditionalFreqDist( - (x["coder"], x["labels"]) for x in self.data if x["coder"] in (cA, cB) + (x['coder'], x['labels']) for x in self.data if x['coder'] in (cA, cB) ) for j in self.K: for l in self.K: @@ -367,7 +375,7 @@ class AnnotationTask(object): ) -if __name__ == "__main__": +if __name__ == '__main__': import re import optparse @@ -415,7 +423,7 @@ if __name__ == "__main__": "-v", "--verbose", dest="verbose", - default="0", + default='0', help="how much debugging to print on stderr (0-4)", ) parser.add_option( @@ -457,7 +465,7 @@ if __name__ == "__main__": # read in data from the specified file data = [] - with open(options.file, "r") as infile: + with open(options.file, 'r') as infile: for l in infile: toks = l.split(options.columnsep) coder, object_, labels = ( diff --git a/nlp_resource_data/nltk/metrics/aline.py b/nlp_resource_data/nltk/metrics/aline.py index 4b88bb4..3b8cba5 100644 --- a/nlp_resource_data/nltk/metrics/aline.py +++ b/nlp_resource_data/nltk/metrics/aline.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Natural Language Toolkit: ALINE # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Greg Kondrak # Geoff Bacon (Python port) # URL: @@ -38,6 +38,8 @@ Example usage University of Toronto. """ +from __future__ import unicode_literals + try: import numpy as np except ImportError: @@ -45,7 +47,7 @@ except ImportError: # === Constants === -inf = float("inf") +inf = float('inf') # Default values for maximum similarity scores (Kondrak 2002: 54) C_skip = 10 # Indels @@ -54,1022 +56,1022 @@ C_exp = 45 # Expansions/compressions C_vwl = 5 # Vowel/consonant relative weight (decreased from 10) consonants = [ - "B", - "N", - "R", - "b", - "c", - "d", - "f", - "g", - "h", - "j", - "k", - "l", - "m", - "n", - "p", - "q", - "r", - "s", - "t", - "v", - "x", - "z", - "ç", - "ð", - "ħ", - "ŋ", - "ɖ", - "ɟ", - "ɢ", - "ɣ", - "ɦ", - "ɬ", - "ɮ", - "ɰ", - "ɱ", - "ɲ", - "ɳ", - "ɴ", - "ɸ", - "ɹ", - "ɻ", - "ɽ", - "ɾ", - "ʀ", - "ʁ", - "ʂ", - "ʃ", - "ʈ", - "ʋ", - "ʐ ", - "ʒ", - "ʔ", - "ʕ", - "ʙ", - "ʝ", - "β", - "θ", - "χ", - "ʐ", - "w", + 'B', + 'N', + 'R', + 'b', + 'c', + 'd', + 'f', + 'g', + 'h', + 'j', + 'k', + 'l', + 'm', + 'n', + 'p', + 'q', + 'r', + 's', + 't', + 'v', + 'x', + 'z', + 'ç', + 'ð', + 'ħ', + 'ŋ', + 'ɖ', + 'ɟ', + 'ɢ', + 'ɣ', + 'ɦ', + 'ɬ', + 'ɮ', + 'ɰ', + 'ɱ', + 'ɲ', + 'ɳ', + 'ɴ', + 'ɸ', + 'ɹ', + 'ɻ', + 'ɽ', + 'ɾ', + 'ʀ', + 'ʁ', + 'ʂ', + 'ʃ', + 'ʈ', + 'ʋ', + 'ʐ ', + 'ʒ', + 'ʔ', + 'ʕ', + 'ʙ', + 'ʝ', + 'β', + 'θ', + 'χ', + 'ʐ', + 'w', ] # Relevant features for comparing consonants and vowels R_c = [ - "aspirated", - "lateral", - "manner", - "nasal", - "place", - "retroflex", - "syllabic", - "voice", + 'aspirated', + 'lateral', + 'manner', + 'nasal', + 'place', + 'retroflex', + 'syllabic', + 'voice', ] # 'high' taken out of R_v because same as manner R_v = [ - "back", - "lateral", - "long", - "manner", - "nasal", - "place", - "retroflex", - "round", - "syllabic", - "voice", + 'back', + 'lateral', + 'long', + 'manner', + 'nasal', + 'place', + 'retroflex', + 'round', + 'syllabic', + 'voice', ] # Flattened feature matrix (Kondrak 2002: 56) similarity_matrix = { # place - "bilabial": 1.0, - "labiodental": 0.95, - "dental": 0.9, - "alveolar": 0.85, - "retroflex": 0.8, - "palato-alveolar": 0.75, - "palatal": 0.7, - "velar": 0.6, - "uvular": 0.5, - "pharyngeal": 0.3, - "glottal": 0.1, - "labiovelar": 1.0, - "vowel": -1.0, # added 'vowel' + 'bilabial': 1.0, + 'labiodental': 0.95, + 'dental': 0.9, + 'alveolar': 0.85, + 'retroflex': 0.8, + 'palato-alveolar': 0.75, + 'palatal': 0.7, + 'velar': 0.6, + 'uvular': 0.5, + 'pharyngeal': 0.3, + 'glottal': 0.1, + 'labiovelar': 1.0, + 'vowel': -1.0, # added 'vowel' # manner - "stop": 1.0, - "affricate": 0.9, - "fricative": 0.85, # increased fricative from 0.8 - "trill": 0.7, - "tap": 0.65, - "approximant": 0.6, - "high vowel": 0.4, - "mid vowel": 0.2, - "low vowel": 0.0, - "vowel2": 0.5, # added vowel + 'stop': 1.0, + 'affricate': 0.9, + 'fricative': 0.85, # increased fricative from 0.8 + 'trill': 0.7, + 'tap': 0.65, + 'approximant': 0.6, + 'high vowel': 0.4, + 'mid vowel': 0.2, + 'low vowel': 0.0, + 'vowel2': 0.5, # added vowel # high - "high": 1.0, - "mid": 0.5, - "low": 0.0, + 'high': 1.0, + 'mid': 0.5, + 'low': 0.0, # back - "front": 1.0, - "central": 0.5, - "back": 0.0, + 'front': 1.0, + 'central': 0.5, + 'back': 0.0, # binary features - "plus": 1.0, - "minus": 0.0, + 'plus': 1.0, + 'minus': 0.0, } # Relative weights of phonetic features (Kondrak 2002: 55) salience = { - "syllabic": 5, - "place": 40, - "manner": 50, - "voice": 5, # decreased from 10 - "nasal": 20, # increased from 10 - "retroflex": 10, - "lateral": 10, - "aspirated": 5, - "long": 0, # decreased from 1 - "high": 3, # decreased from 5 - "back": 2, # decreased from 5 - "round": 2, # decreased from 5 + 'syllabic': 5, + 'place': 40, + 'manner': 50, + 'voice': 5, # decreased from 10 + 'nasal': 20, # increased from 10 + 'retroflex': 10, + 'lateral': 10, + 'aspirated': 5, + 'long': 0, # decreased from 1 + 'high': 3, # decreased from 5 + 'back': 2, # decreased from 5 + 'round': 2, # decreased from 5 } # (Kondrak 2002: 59-60) feature_matrix = { # Consonants - "p": { - "place": "bilabial", - "manner": "stop", - "syllabic": "minus", - "voice": "minus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus", - }, - "b": { - "place": "bilabial", - "manner": "stop", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus", - }, - "t": { - "place": "alveolar", - "manner": "stop", - "syllabic": "minus", - "voice": "minus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus", - }, - "d": { - "place": "alveolar", - "manner": "stop", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus", - }, - "ʈ": { - "place": "retroflex", - "manner": "stop", - "syllabic": "minus", - "voice": "minus", - "nasal": "minus", - "retroflex": "plus", - "lateral": "minus", - "aspirated": "minus", - }, - "ɖ": { - "place": "retroflex", - "manner": "stop", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "plus", - "lateral": "minus", - "aspirated": "minus", - }, - "c": { - "place": "palatal", - "manner": "stop", - "syllabic": "minus", - "voice": "minus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus", - }, - "ɟ": { - "place": "palatal", - "manner": "stop", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus", - }, - "k": { - "place": "velar", - "manner": "stop", - "syllabic": "minus", - "voice": "minus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus", - }, - "g": { - "place": "velar", - "manner": "stop", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus", - }, - "q": { - "place": "uvular", - "manner": "stop", - "syllabic": "minus", - "voice": "minus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus", - }, - "ɢ": { - "place": "uvular", - "manner": "stop", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus", - }, - "ʔ": { - "place": "glottal", - "manner": "stop", - "syllabic": "minus", - "voice": "minus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus", - }, - "m": { - "place": "bilabial", - "manner": "stop", - "syllabic": "minus", - "voice": "plus", - "nasal": "plus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus", - }, - "ɱ": { - "place": "labiodental", - "manner": "stop", - "syllabic": "minus", - "voice": "plus", - "nasal": "plus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus", - }, - "n": { - "place": "alveolar", - "manner": "stop", - "syllabic": "minus", - "voice": "plus", - "nasal": "plus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus", - }, - "ɳ": { - "place": "retroflex", - "manner": "stop", - "syllabic": "minus", - "voice": "plus", - "nasal": "plus", - "retroflex": "plus", - "lateral": "minus", - "aspirated": "minus", - }, - "ɲ": { - "place": "palatal", - "manner": "stop", - "syllabic": "minus", - "voice": "plus", - "nasal": "plus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus", - }, - "ŋ": { - "place": "velar", - "manner": "stop", - "syllabic": "minus", - "voice": "plus", - "nasal": "plus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus", - }, - "ɴ": { - "place": "uvular", - "manner": "stop", - "syllabic": "minus", - "voice": "plus", - "nasal": "plus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus", - }, - "N": { - "place": "uvular", - "manner": "stop", - "syllabic": "minus", - "voice": "plus", - "nasal": "plus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus", - }, - "ʙ": { - "place": "bilabial", - "manner": "trill", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus", - }, - "B": { - "place": "bilabial", - "manner": "trill", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus", - }, - "r": { - "place": "alveolar", - "manner": "trill", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "plus", - "lateral": "minus", - "aspirated": "minus", - }, - "ʀ": { - "place": "uvular", - "manner": "trill", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus", - }, - "R": { - "place": "uvular", - "manner": "trill", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus", - }, - "ɾ": { - "place": "alveolar", - "manner": "tap", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus", - }, - "ɽ": { - "place": "retroflex", - "manner": "tap", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "plus", - "lateral": "minus", - "aspirated": "minus", - }, - "ɸ": { - "place": "bilabial", - "manner": "fricative", - "syllabic": "minus", - "voice": "minus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus", - }, - "β": { - "place": "bilabial", - "manner": "fricative", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus", - }, - "f": { - "place": "labiodental", - "manner": "fricative", - "syllabic": "minus", - "voice": "minus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus", - }, - "v": { - "place": "labiodental", - "manner": "fricative", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus", - }, - "θ": { - "place": "dental", - "manner": "fricative", - "syllabic": "minus", - "voice": "minus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus", - }, - "ð": { - "place": "dental", - "manner": "fricative", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus", - }, - "s": { - "place": "alveolar", - "manner": "fricative", - "syllabic": "minus", - "voice": "minus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus", - }, - "z": { - "place": "alveolar", - "manner": "fricative", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus", - }, - "ʃ": { - "place": "palato-alveolar", - "manner": "fricative", - "syllabic": "minus", - "voice": "minus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus", - }, - "ʒ": { - "place": "palato-alveolar", - "manner": "fricative", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus", - }, - "ʂ": { - "place": "retroflex", - "manner": "fricative", - "syllabic": "minus", - "voice": "minus", - "nasal": "minus", - "retroflex": "plus", - "lateral": "minus", - "aspirated": "minus", - }, - "ʐ": { - "place": "retroflex", - "manner": "fricative", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "plus", - "lateral": "minus", - "aspirated": "minus", - }, - "ç": { - "place": "palatal", - "manner": "fricative", - "syllabic": "minus", - "voice": "minus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus", - }, - "ʝ": { - "place": "palatal", - "manner": "fricative", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus", - }, - "x": { - "place": "velar", - "manner": "fricative", - "syllabic": "minus", - "voice": "minus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus", - }, - "ɣ": { - "place": "velar", - "manner": "fricative", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus", - }, - "χ": { - "place": "uvular", - "manner": "fricative", - "syllabic": "minus", - "voice": "minus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus", - }, - "ʁ": { - "place": "uvular", - "manner": "fricative", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus", - }, - "ħ": { - "place": "pharyngeal", - "manner": "fricative", - "syllabic": "minus", - "voice": "minus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus", - }, - "ʕ": { - "place": "pharyngeal", - "manner": "fricative", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus", - }, - "h": { - "place": "glottal", - "manner": "fricative", - "syllabic": "minus", - "voice": "minus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus", - }, - "ɦ": { - "place": "glottal", - "manner": "fricative", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus", - }, - "ɬ": { - "place": "alveolar", - "manner": "fricative", - "syllabic": "minus", - "voice": "minus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "plus", - "aspirated": "minus", - }, - "ɮ": { - "place": "alveolar", - "manner": "fricative", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "plus", - "aspirated": "minus", - }, - "ʋ": { - "place": "labiodental", - "manner": "approximant", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus", - }, - "ɹ": { - "place": "alveolar", - "manner": "approximant", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus", - }, - "ɻ": { - "place": "retroflex", - "manner": "approximant", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "plus", - "lateral": "minus", - "aspirated": "minus", - }, - "j": { - "place": "palatal", - "manner": "approximant", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus", - }, - "ɰ": { - "place": "velar", - "manner": "approximant", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus", - }, - "l": { - "place": "alveolar", - "manner": "approximant", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "plus", - "aspirated": "minus", - }, - "w": { - "place": "labiovelar", - "manner": "approximant", - "syllabic": "minus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "aspirated": "minus", + 'p': { + 'place': 'bilabial', + 'manner': 'stop', + 'syllabic': 'minus', + 'voice': 'minus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'b': { + 'place': 'bilabial', + 'manner': 'stop', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 't': { + 'place': 'alveolar', + 'manner': 'stop', + 'syllabic': 'minus', + 'voice': 'minus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'd': { + 'place': 'alveolar', + 'manner': 'stop', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'ʈ': { + 'place': 'retroflex', + 'manner': 'stop', + 'syllabic': 'minus', + 'voice': 'minus', + 'nasal': 'minus', + 'retroflex': 'plus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'ɖ': { + 'place': 'retroflex', + 'manner': 'stop', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'plus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'c': { + 'place': 'palatal', + 'manner': 'stop', + 'syllabic': 'minus', + 'voice': 'minus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'ɟ': { + 'place': 'palatal', + 'manner': 'stop', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'k': { + 'place': 'velar', + 'manner': 'stop', + 'syllabic': 'minus', + 'voice': 'minus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'g': { + 'place': 'velar', + 'manner': 'stop', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'q': { + 'place': 'uvular', + 'manner': 'stop', + 'syllabic': 'minus', + 'voice': 'minus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'ɢ': { + 'place': 'uvular', + 'manner': 'stop', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'ʔ': { + 'place': 'glottal', + 'manner': 'stop', + 'syllabic': 'minus', + 'voice': 'minus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'm': { + 'place': 'bilabial', + 'manner': 'stop', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'plus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'ɱ': { + 'place': 'labiodental', + 'manner': 'stop', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'plus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'n': { + 'place': 'alveolar', + 'manner': 'stop', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'plus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'ɳ': { + 'place': 'retroflex', + 'manner': 'stop', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'plus', + 'retroflex': 'plus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'ɲ': { + 'place': 'palatal', + 'manner': 'stop', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'plus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'ŋ': { + 'place': 'velar', + 'manner': 'stop', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'plus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'ɴ': { + 'place': 'uvular', + 'manner': 'stop', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'plus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'N': { + 'place': 'uvular', + 'manner': 'stop', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'plus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'ʙ': { + 'place': 'bilabial', + 'manner': 'trill', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'B': { + 'place': 'bilabial', + 'manner': 'trill', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'r': { + 'place': 'alveolar', + 'manner': 'trill', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'plus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'ʀ': { + 'place': 'uvular', + 'manner': 'trill', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'R': { + 'place': 'uvular', + 'manner': 'trill', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'ɾ': { + 'place': 'alveolar', + 'manner': 'tap', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'ɽ': { + 'place': 'retroflex', + 'manner': 'tap', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'plus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'ɸ': { + 'place': 'bilabial', + 'manner': 'fricative', + 'syllabic': 'minus', + 'voice': 'minus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'β': { + 'place': 'bilabial', + 'manner': 'fricative', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'f': { + 'place': 'labiodental', + 'manner': 'fricative', + 'syllabic': 'minus', + 'voice': 'minus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'v': { + 'place': 'labiodental', + 'manner': 'fricative', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'θ': { + 'place': 'dental', + 'manner': 'fricative', + 'syllabic': 'minus', + 'voice': 'minus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'ð': { + 'place': 'dental', + 'manner': 'fricative', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 's': { + 'place': 'alveolar', + 'manner': 'fricative', + 'syllabic': 'minus', + 'voice': 'minus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'z': { + 'place': 'alveolar', + 'manner': 'fricative', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'ʃ': { + 'place': 'palato-alveolar', + 'manner': 'fricative', + 'syllabic': 'minus', + 'voice': 'minus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'ʒ': { + 'place': 'palato-alveolar', + 'manner': 'fricative', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'ʂ': { + 'place': 'retroflex', + 'manner': 'fricative', + 'syllabic': 'minus', + 'voice': 'minus', + 'nasal': 'minus', + 'retroflex': 'plus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'ʐ': { + 'place': 'retroflex', + 'manner': 'fricative', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'plus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'ç': { + 'place': 'palatal', + 'manner': 'fricative', + 'syllabic': 'minus', + 'voice': 'minus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'ʝ': { + 'place': 'palatal', + 'manner': 'fricative', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'x': { + 'place': 'velar', + 'manner': 'fricative', + 'syllabic': 'minus', + 'voice': 'minus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'ɣ': { + 'place': 'velar', + 'manner': 'fricative', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'χ': { + 'place': 'uvular', + 'manner': 'fricative', + 'syllabic': 'minus', + 'voice': 'minus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'ʁ': { + 'place': 'uvular', + 'manner': 'fricative', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'ħ': { + 'place': 'pharyngeal', + 'manner': 'fricative', + 'syllabic': 'minus', + 'voice': 'minus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'ʕ': { + 'place': 'pharyngeal', + 'manner': 'fricative', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'h': { + 'place': 'glottal', + 'manner': 'fricative', + 'syllabic': 'minus', + 'voice': 'minus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'ɦ': { + 'place': 'glottal', + 'manner': 'fricative', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'ɬ': { + 'place': 'alveolar', + 'manner': 'fricative', + 'syllabic': 'minus', + 'voice': 'minus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'plus', + 'aspirated': 'minus', + }, + 'ɮ': { + 'place': 'alveolar', + 'manner': 'fricative', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'plus', + 'aspirated': 'minus', + }, + 'ʋ': { + 'place': 'labiodental', + 'manner': 'approximant', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'ɹ': { + 'place': 'alveolar', + 'manner': 'approximant', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'ɻ': { + 'place': 'retroflex', + 'manner': 'approximant', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'plus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'j': { + 'place': 'palatal', + 'manner': 'approximant', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'ɰ': { + 'place': 'velar', + 'manner': 'approximant', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', + }, + 'l': { + 'place': 'alveolar', + 'manner': 'approximant', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'plus', + 'aspirated': 'minus', + }, + 'w': { + 'place': 'labiovelar', + 'manner': 'approximant', + 'syllabic': 'minus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'aspirated': 'minus', }, # Vowels - "i": { - "place": "vowel", - "manner": "vowel2", - "syllabic": "plus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "high": "high", - "back": "front", - "round": "minus", - "long": "minus", - "aspirated": "minus", - }, - "y": { - "place": "vowel", - "manner": "vowel2", - "syllabic": "plus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "high": "high", - "back": "front", - "round": "plus", - "long": "minus", - "aspirated": "minus", - }, - "e": { - "place": "vowel", - "manner": "vowel2", - "syllabic": "plus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "high": "mid", - "back": "front", - "round": "minus", - "long": "minus", - "aspirated": "minus", - }, - "E": { - "place": "vowel", - "manner": "vowel2", - "syllabic": "plus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "high": "mid", - "back": "front", - "round": "minus", - "long": "plus", - "aspirated": "minus", - }, - "ø": { - "place": "vowel", - "manner": "vowel2", - "syllabic": "plus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "high": "mid", - "back": "front", - "round": "plus", - "long": "minus", - "aspirated": "minus", - }, - "ɛ": { - "place": "vowel", - "manner": "vowel2", - "syllabic": "plus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "high": "mid", - "back": "front", - "round": "minus", - "long": "minus", - "aspirated": "minus", - }, - "œ": { - "place": "vowel", - "manner": "vowel2", - "syllabic": "plus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "high": "mid", - "back": "front", - "round": "plus", - "long": "minus", - "aspirated": "minus", - }, - "æ": { - "place": "vowel", - "manner": "vowel2", - "syllabic": "plus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "high": "low", - "back": "front", - "round": "minus", - "long": "minus", - "aspirated": "minus", - }, - "a": { - "place": "vowel", - "manner": "vowel2", - "syllabic": "plus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "high": "low", - "back": "front", - "round": "minus", - "long": "minus", - "aspirated": "minus", - }, - "A": { - "place": "vowel", - "manner": "vowel2", - "syllabic": "plus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "high": "low", - "back": "front", - "round": "minus", - "long": "plus", - "aspirated": "minus", - }, - "ɨ": { - "place": "vowel", - "manner": "vowel2", - "syllabic": "plus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "high": "high", - "back": "central", - "round": "minus", - "long": "minus", - "aspirated": "minus", - }, - "ʉ": { - "place": "vowel", - "manner": "vowel2", - "syllabic": "plus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "high": "high", - "back": "central", - "round": "plus", - "long": "minus", - "aspirated": "minus", - }, - "ə": { - "place": "vowel", - "manner": "vowel2", - "syllabic": "plus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "high": "mid", - "back": "central", - "round": "minus", - "long": "minus", - "aspirated": "minus", - }, - "u": { - "place": "vowel", - "manner": "vowel2", - "syllabic": "plus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "high": "high", - "back": "back", - "round": "plus", - "long": "minus", - "aspirated": "minus", - }, - "U": { - "place": "vowel", - "manner": "vowel2", - "syllabic": "plus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "high": "high", - "back": "back", - "round": "plus", - "long": "plus", - "aspirated": "minus", - }, - "o": { - "place": "vowel", - "manner": "vowel2", - "syllabic": "plus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "high": "mid", - "back": "back", - "round": "plus", - "long": "minus", - "aspirated": "minus", - }, - "O": { - "place": "vowel", - "manner": "vowel2", - "syllabic": "plus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "high": "mid", - "back": "back", - "round": "plus", - "long": "plus", - "aspirated": "minus", - }, - "ɔ": { - "place": "vowel", - "manner": "vowel2", - "syllabic": "plus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "high": "mid", - "back": "back", - "round": "plus", - "long": "minus", - "aspirated": "minus", - }, - "ɒ": { - "place": "vowel", - "manner": "vowel2", - "syllabic": "plus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "high": "low", - "back": "back", - "round": "minus", - "long": "minus", - "aspirated": "minus", - }, - "I": { - "place": "vowel", - "manner": "vowel2", - "syllabic": "plus", - "voice": "plus", - "nasal": "minus", - "retroflex": "minus", - "lateral": "minus", - "high": "high", - "back": "front", - "round": "minus", - "long": "plus", - "aspirated": "minus", + 'i': { + 'place': 'vowel', + 'manner': 'vowel2', + 'syllabic': 'plus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'high': 'high', + 'back': 'front', + 'round': 'minus', + 'long': 'minus', + 'aspirated': 'minus', + }, + 'y': { + 'place': 'vowel', + 'manner': 'vowel2', + 'syllabic': 'plus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'high': 'high', + 'back': 'front', + 'round': 'plus', + 'long': 'minus', + 'aspirated': 'minus', + }, + 'e': { + 'place': 'vowel', + 'manner': 'vowel2', + 'syllabic': 'plus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'high': 'mid', + 'back': 'front', + 'round': 'minus', + 'long': 'minus', + 'aspirated': 'minus', + }, + 'E': { + 'place': 'vowel', + 'manner': 'vowel2', + 'syllabic': 'plus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'high': 'mid', + 'back': 'front', + 'round': 'minus', + 'long': 'plus', + 'aspirated': 'minus', + }, + 'ø': { + 'place': 'vowel', + 'manner': 'vowel2', + 'syllabic': 'plus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'high': 'mid', + 'back': 'front', + 'round': 'plus', + 'long': 'minus', + 'aspirated': 'minus', + }, + 'ɛ': { + 'place': 'vowel', + 'manner': 'vowel2', + 'syllabic': 'plus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'high': 'mid', + 'back': 'front', + 'round': 'minus', + 'long': 'minus', + 'aspirated': 'minus', + }, + 'œ': { + 'place': 'vowel', + 'manner': 'vowel2', + 'syllabic': 'plus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'high': 'mid', + 'back': 'front', + 'round': 'plus', + 'long': 'minus', + 'aspirated': 'minus', + }, + 'æ': { + 'place': 'vowel', + 'manner': 'vowel2', + 'syllabic': 'plus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'high': 'low', + 'back': 'front', + 'round': 'minus', + 'long': 'minus', + 'aspirated': 'minus', + }, + 'a': { + 'place': 'vowel', + 'manner': 'vowel2', + 'syllabic': 'plus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'high': 'low', + 'back': 'front', + 'round': 'minus', + 'long': 'minus', + 'aspirated': 'minus', + }, + 'A': { + 'place': 'vowel', + 'manner': 'vowel2', + 'syllabic': 'plus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'high': 'low', + 'back': 'front', + 'round': 'minus', + 'long': 'plus', + 'aspirated': 'minus', + }, + 'ɨ': { + 'place': 'vowel', + 'manner': 'vowel2', + 'syllabic': 'plus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'high': 'high', + 'back': 'central', + 'round': 'minus', + 'long': 'minus', + 'aspirated': 'minus', + }, + 'ʉ': { + 'place': 'vowel', + 'manner': 'vowel2', + 'syllabic': 'plus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'high': 'high', + 'back': 'central', + 'round': 'plus', + 'long': 'minus', + 'aspirated': 'minus', + }, + 'ə': { + 'place': 'vowel', + 'manner': 'vowel2', + 'syllabic': 'plus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'high': 'mid', + 'back': 'central', + 'round': 'minus', + 'long': 'minus', + 'aspirated': 'minus', + }, + 'u': { + 'place': 'vowel', + 'manner': 'vowel2', + 'syllabic': 'plus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'high': 'high', + 'back': 'back', + 'round': 'plus', + 'long': 'minus', + 'aspirated': 'minus', + }, + 'U': { + 'place': 'vowel', + 'manner': 'vowel2', + 'syllabic': 'plus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'high': 'high', + 'back': 'back', + 'round': 'plus', + 'long': 'plus', + 'aspirated': 'minus', + }, + 'o': { + 'place': 'vowel', + 'manner': 'vowel2', + 'syllabic': 'plus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'high': 'mid', + 'back': 'back', + 'round': 'plus', + 'long': 'minus', + 'aspirated': 'minus', + }, + 'O': { + 'place': 'vowel', + 'manner': 'vowel2', + 'syllabic': 'plus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'high': 'mid', + 'back': 'back', + 'round': 'plus', + 'long': 'plus', + 'aspirated': 'minus', + }, + 'ɔ': { + 'place': 'vowel', + 'manner': 'vowel2', + 'syllabic': 'plus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'high': 'mid', + 'back': 'back', + 'round': 'plus', + 'long': 'minus', + 'aspirated': 'minus', + }, + 'ɒ': { + 'place': 'vowel', + 'manner': 'vowel2', + 'syllabic': 'plus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'high': 'low', + 'back': 'back', + 'round': 'minus', + 'long': 'minus', + 'aspirated': 'minus', + }, + 'I': { + 'place': 'vowel', + 'manner': 'vowel2', + 'syllabic': 'plus', + 'voice': 'plus', + 'nasal': 'minus', + 'retroflex': 'minus', + 'lateral': 'minus', + 'high': 'high', + 'back': 'front', + 'round': 'minus', + 'long': 'plus', + 'aspirated': 'minus', }, } @@ -1091,7 +1093,7 @@ def align(str1, str2, epsilon=0): (Kondrak 2002: 51) """ if np is None: - raise ImportError("You need numpy in order to use the align function") + raise ImportError('You need numpy in order to use the align function') assert 0.0 <= epsilon <= 1.0, "Epsilon must be between 0.0 and 1.0." m = len(str1) @@ -1164,10 +1166,10 @@ def _retrieve(i, j, s, S, T, str1, str2, out): out, ) elif S[i, j - 1] + sigma_skip(str2[j - 1]) + s >= T: - out.insert(0, ("-", str2[j - 1])) + out.insert(0, ('-', str2[j - 1])) _retrieve(i, j - 1, s + sigma_skip(str2[j - 1]), S, T, str1, str2, out) elif S[i - 1, j] + sigma_skip(str1[i - 1]) + s >= T: - out.insert(0, (str1[i - 1], "-")) + out.insert(0, (str1[i - 1], '-')) _retrieve(i - 1, j, s + sigma_skip(str1[i - 1]), S, T, str1, str2, out) elif S[i - 1, j - 1] + sigma_sub(str1[i - 1], str2[j - 1]) + s >= T: out.insert(0, (str1[i - 1], str2[j - 1])) @@ -1266,12 +1268,12 @@ def demo(): A demonstration of the result of aligning phonetic sequences used in Kondrak's (2002) dissertation. """ - data = [pair.split(",") for pair in cognate_data.split("\n")] + data = [pair.split(',') for pair in cognate_data.split('\n')] for pair in data: alignment = align(pair[0], pair[1])[0] - alignment = ["({}, {})".format(a[0], a[1]) for a in alignment] - alignment = " ".join(alignment) - print("{} ~ {} : {}".format(pair[0], pair[1], alignment)) + alignment = ['({}, {})'.format(a[0], a[1]) for a in alignment] + alignment = ' '.join(alignment) + print('{} ~ {} : {}'.format(pair[0], pair[1], alignment)) cognate_data = """jo,ʒə @@ -1350,5 +1352,5 @@ ahkohkwa,ahkɛh pematesiweni,pematesewen asenja,aʔsɛn""" -if __name__ == "__main__": +if __name__ == '__main__': demo() diff --git a/nlp_resource_data/nltk/metrics/association.py b/nlp_resource_data/nltk/metrics/association.py index c2638c8..4994f1f 100644 --- a/nlp_resource_data/nltk/metrics/association.py +++ b/nlp_resource_data/nltk/metrics/association.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Ngram Association Measures # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Joel Nothman # URL: # For license information, see LICENSE.TXT @@ -11,10 +11,13 @@ generic, abstract implementation in ``NgramAssocMeasures``, and n-specific ``BigramAssocMeasures`` and ``TrigramAssocMeasures``. """ +from __future__ import division + import math as _math from abc import ABCMeta, abstractmethod from functools import reduce +from six import add_metaclass _log2 = lambda x: _math.log(x, 2.0) _ln = _math.log @@ -43,7 +46,8 @@ TOTAL = -1 """Marginals index for the number of words in the data""" -class NgramAssocMeasures(metaclass=ABCMeta): +@add_metaclass(ABCMeta) +class NgramAssocMeasures(object): """ An abstract class defining a collection of generic association measures. Each public method returns a score, taking the following arguments:: @@ -127,7 +131,7 @@ class NgramAssocMeasures(metaclass=ABCMeta): argument power sets an exponent (default 3) for the numerator. No logarithm of the result is calculated. """ - return marginals[NGRAM] ** kwargs.get("power", 3) / _product( + return marginals[NGRAM] ** kwargs.get('power', 3) / _product( marginals[UNIGRAMS] ) @@ -242,7 +246,7 @@ class BigramAssocMeasures(NgramAssocMeasures): n_ii, n_io, n_oi, n_oo = cls._contingency(*marginals) - (odds, pvalue) = fisher_exact([[n_ii, n_io], [n_oi, n_oo]], alternative="less") + (odds, pvalue) = fisher_exact([[n_ii, n_io], [n_oi, n_oo]], alternative='less') return pvalue @staticmethod @@ -438,12 +442,12 @@ class ContingencyMeasures(object): def __init__(self, measures): """Constructs a ContingencyMeasures given a NgramAssocMeasures class""" - self.__class__.__name__ = "Contingency" + measures.__class__.__name__ + self.__class__.__name__ = 'Contingency' + measures.__class__.__name__ for k in dir(measures): - if k.startswith("__"): + if k.startswith('__'): continue v = getattr(measures, k) - if not k.startswith("_"): + if not k.startswith('_'): v = self._make_contingency_fn(measures, v) setattr(self, k, v) diff --git a/nlp_resource_data/nltk/metrics/confusionmatrix.py b/nlp_resource_data/nltk/metrics/confusionmatrix.py index 7d96d77..3f82f29 100644 --- a/nlp_resource_data/nltk/metrics/confusionmatrix.py +++ b/nlp_resource_data/nltk/metrics/confusionmatrix.py @@ -1,14 +1,16 @@ # Natural Language Toolkit: Confusion Matrices # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Edward Loper # Steven Bird # URL: # For license information, see LICENSE.TXT - +from __future__ import print_function, unicode_literals from nltk.probability import FreqDist +from nltk.compat import python_2_unicode_compatible +@python_2_unicode_compatible class ConfusionMatrix(object): """ The confusion matrix between a list of reference values and a @@ -42,7 +44,7 @@ class ConfusionMatrix(object): the same length. """ if len(reference) != len(test): - raise ValueError("Lists must have the same length.") + raise ValueError('Lists must have the same length.') # Get a list of all values. if sort_by_count: @@ -91,7 +93,7 @@ class ConfusionMatrix(object): return self._confusion[i][j] def __repr__(self): - return "" % (self._correct, self._total) + return '' % (self._correct, self._total) def __str__(self): return self.pretty_format() @@ -135,30 +137,30 @@ class ConfusionMatrix(object): # Construct a format string for row values valuelen = max(len(val) for val in value_strings) - value_format = "%" + repr(valuelen) + "s | " + value_format = '%' + repr(valuelen) + 's | ' # Construct a format string for matrix entries if show_percents: entrylen = 6 - entry_format = "%5.1f%%" - zerostr = " ." + entry_format = '%5.1f%%' + zerostr = ' .' else: entrylen = len(repr(self._max_conf)) - entry_format = "%" + repr(entrylen) + "d" - zerostr = " " * (entrylen - 1) + "." + entry_format = '%' + repr(entrylen) + 'd' + zerostr = ' ' * (entrylen - 1) + '.' # Write the column values. - s = "" + s = '' for i in range(valuelen): - s += (" " * valuelen) + " |" + s += (' ' * valuelen) + ' |' for val in value_strings: if i >= valuelen - len(val): s += val[i - valuelen + len(val)].rjust(entrylen + 1) else: - s += " " * (entrylen + 1) - s += " |\n" + s += ' ' * (entrylen + 1) + s += ' |\n' # Write a dividing line - s += "%s-+-%s+\n" % ("-" * valuelen, "-" * ((entrylen + 1) * len(values))) + s += '%s-+-%s+\n' % ('-' * valuelen, '-' * ((entrylen + 1) * len(values))) # Write the entries. for val, li in zip(value_strings, values): @@ -173,29 +175,29 @@ class ConfusionMatrix(object): else: s += entry_format % confusion[i][j] if i == j: - prevspace = s.rfind(" ") - s = s[:prevspace] + "<" + s[prevspace + 1 :] + ">" + prevspace = s.rfind(' ') + s = s[:prevspace] + '<' + s[prevspace + 1 :] + '>' else: - s += " " - s += "|\n" + s += ' ' + s += '|\n' # Write a dividing line - s += "%s-+-%s+\n" % ("-" * valuelen, "-" * ((entrylen + 1) * len(values))) + s += '%s-+-%s+\n' % ('-' * valuelen, '-' * ((entrylen + 1) * len(values))) # Write a key - s += "(row = reference; col = test)\n" + s += '(row = reference; col = test)\n' if not values_in_chart: - s += "Value key:\n" + s += 'Value key:\n' for i, value in enumerate(values): - s += "%6d: %s\n" % (i + 1, value) + s += '%6d: %s\n' % (i + 1, value) return s def key(self): values = self._values - str = "Value key:\n" + str = 'Value key:\n' indexlen = len(repr(len(values) - 1)) - key_format = " %" + repr(indexlen) + "d: %s\n" + key_format = ' %' + repr(indexlen) + 'd: %s\n' for i in range(len(values)): str += key_format % (i, values[i]) @@ -203,14 +205,14 @@ class ConfusionMatrix(object): def demo(): - reference = "DET NN VB DET JJ NN NN IN DET NN".split() - test = "DET VB VB DET NN NN NN IN DET NN".split() - print("Reference =", reference) - print("Test =", test) - print("Confusion matrix:") + reference = 'DET NN VB DET JJ NN NN IN DET NN'.split() + test = 'DET VB VB DET NN NN NN IN DET NN'.split() + print('Reference =', reference) + print('Test =', test) + print('Confusion matrix:') print(ConfusionMatrix(reference, test)) print(ConfusionMatrix(reference, test).pretty_format(sort_by_count=True)) -if __name__ == "__main__": +if __name__ == '__main__': demo() diff --git a/nlp_resource_data/nltk/metrics/distance.py b/nlp_resource_data/nltk/metrics/distance.py index ae988ab..e295afb 100644 --- a/nlp_resource_data/nltk/metrics/distance.py +++ b/nlp_resource_data/nltk/metrics/distance.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Natural Language Toolkit: Distance Metrics # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Edward Loper # Steven Bird # Tom Lippincott @@ -20,8 +20,10 @@ As metrics, they must satisfy the following three requirements: 3. d(a, c) <= d(a, b) + d(b, c) """ +from __future__ import print_function +from __future__ import division + import warnings -import operator def _edit_dist_init(len1, len2): @@ -101,77 +103,6 @@ def edit_distance(s1, s2, substitution_cost=1, transpositions=False): return lev[len1][len2] -def _edit_dist_backtrace(lev): - i, j = len(lev) - 1, len(lev[0]) - 1 - alignment = [(i, j)] - - while (i, j) != (0, 0): - directions = [ - (i - 1, j), # skip s1 - (i, j - 1), # skip s2 - (i - 1, j - 1), # substitution - ] - - direction_costs = ( - (lev[i][j] if (i >= 0 and j >= 0) else float("inf"), (i, j)) - for i, j in directions - ) - _, (i, j) = min(direction_costs, key=operator.itemgetter(0)) - - alignment.append((i, j)) - return list(reversed(alignment)) - - -def edit_distance_align(s1, s2, substitution_cost=1): - """ - Calculate the minimum Levenshtein edit-distance based alignment - mapping between two strings. The alignment finds the mapping - from string s1 to s2 that minimizes the edit distance cost. - For example, mapping "rain" to "shine" would involve 2 - substitutions, 2 matches and an insertion resulting in - the following mapping: - [(0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (4, 5)] - NB: (0, 0) is the start state without any letters associated - See more: https://web.stanford.edu/class/cs124/lec/med.pdf - - In case of multiple valid minimum-distance alignments, the - backtrace has the following operation precedence: - 1. Skip s1 character - 2. Skip s2 character - 3. Substitute s1 and s2 characters - The backtrace is carried out in reverse string order. - - This function does not support transposition. - - :param s1, s2: The strings to be aligned - :type s1: str - :type s2: str - :type substitution_cost: int - :rtype List[Tuple(int, int)] - """ - # set up a 2-D array - len1 = len(s1) - len2 = len(s2) - lev = _edit_dist_init(len1 + 1, len2 + 1) - - # iterate over the array - for i in range(len1): - for j in range(len2): - _edit_dist_step( - lev, - i + 1, - j + 1, - s1, - s2, - substitution_cost=substitution_cost, - transpositions=False, - ) - - # backtrace to find alignment - alignment = _edit_dist_backtrace(lev) - return alignment - - def binary_distance(label1, label2): """Simple equality test. @@ -261,7 +192,7 @@ def fractional_presence(label): def custom_distance(file): data = {} - with open(file, "r") as infile: + with open(file, 'r') as infile: for l in infile: labelA, labelB, dist = l.strip().split("\t") labelA = frozenset([labelA]) @@ -482,5 +413,5 @@ def demo(): print("MASI distance:", masi_distance(s1, s2)) -if __name__ == "__main__": +if __name__ == '__main__': demo() diff --git a/nlp_resource_data/nltk/metrics/paice.py b/nlp_resource_data/nltk/metrics/paice.py index 46e8fce..b26069b 100644 --- a/nlp_resource_data/nltk/metrics/paice.py +++ b/nlp_resource_data/nltk/metrics/paice.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Agreement Metrics # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Lauri Hallila # URL: # For license information, see LICENSE.TXT @@ -24,7 +24,7 @@ from math import sqrt def get_words_from_dictionary(lemmas): - """ + ''' Get original set of words used for analysis. :param lemmas: A dictionary where keys are lemmas and values are sets @@ -32,7 +32,7 @@ def get_words_from_dictionary(lemmas): :type lemmas: dict(str): list(str) :return: Set of words that exist as values in the dictionary :rtype: set(str) - """ + ''' words = set() for lemma in lemmas: words.update(set(lemmas[lemma])) @@ -40,7 +40,7 @@ def get_words_from_dictionary(lemmas): def _truncate(words, cutlength): - """Group words by stems defined by truncating them at given length. + '''Group words by stems defined by truncating them at given length. :param words: Set of words used for analysis :param cutlength: Words are stemmed by cutting at this length. @@ -49,7 +49,7 @@ def _truncate(words, cutlength): :return: Dictionary where keys are stems and values are sets of words corresponding to that stem. :rtype: dict(str): set(str) - """ + ''' stems = {} for word in words: stem = word[:cutlength] @@ -62,7 +62,7 @@ def _truncate(words, cutlength): # Reference: http://en.wikipedia.org/wiki/Line-line_intersection def _count_intersection(l1, l2): - """Count intersection between two line segments defined by coordinate pairs. + '''Count intersection between two line segments defined by coordinate pairs. :param l1: Tuple of two coordinate pairs defining the first line segment :param l2: Tuple of two coordinate pairs defining the second line segment @@ -70,7 +70,7 @@ def _count_intersection(l1, l2): :type l2: tuple(float, float) :return: Coordinates of the intersection :rtype: tuple(float, float) - """ + ''' x1, y1 = l1[0] x2, y2 = l1[1] x3, y3 = l2[0] @@ -97,21 +97,21 @@ def _count_intersection(l1, l2): def _get_derivative(coordinates): - """Get derivative of the line from (0,0) to given coordinates. + '''Get derivative of the line from (0,0) to given coordinates. :param coordinates: A coordinate pair :type coordinates: tuple(float, float) :return: Derivative; inf if x is zero :rtype: float - """ + ''' try: return coordinates[1] / coordinates[0] except ZeroDivisionError: - return float("inf") + return float('inf') def _calculate_cut(lemmawords, stems): - """Count understemmed and overstemmed pairs for (lemma, stem) pair with common words. + '''Count understemmed and overstemmed pairs for (lemma, stem) pair with common words. :param lemmawords: Set or list of words corresponding to certain lemma. :param stems: A dictionary where keys are stems and values are sets @@ -121,7 +121,7 @@ def _calculate_cut(lemmawords, stems): :return: Amount of understemmed and overstemmed pairs contributed by words existing in both lemmawords and stems. :rtype: tuple(float, float) - """ + ''' umt, wmt = 0.0, 0.0 for stem in stems: cut = set(lemmawords) & set(stems[stem]) @@ -136,7 +136,7 @@ def _calculate_cut(lemmawords, stems): def _calculate(lemmas, stems): - """Calculate actual and maximum possible amounts of understemmed and overstemmed word pairs. + '''Calculate actual and maximum possible amounts of understemmed and overstemmed word pairs. :param lemmas: A dictionary where keys are lemmas and values are sets or lists of words corresponding to that lemma. @@ -149,7 +149,7 @@ def _calculate(lemmas, stems): global wrongly merged total (gwmt) and global desired non-merge total (gdnt). :rtype: tuple(float, float, float, float) - """ + ''' n = sum(len(lemmas[word]) for word in lemmas) @@ -177,7 +177,7 @@ def _calculate(lemmas, stems): def _indexes(gumt, gdmt, gwmt, gdnt): - """Count Understemming Index (UI), Overstemming Index (OI) and Stemming Weight (SW). + '''Count Understemming Index (UI), Overstemming Index (OI) and Stemming Weight (SW). :param gumt, gdmt, gwmt, gdnt: Global unachieved merge total (gumt), global desired merge total (gdmt), @@ -188,7 +188,7 @@ def _indexes(gumt, gdmt, gwmt, gdnt): Overstemming Index (OI) and Stemming Weight (SW). :rtype: tuple(float, float, float) - """ + ''' # Calculate Understemming Index (UI), # Overstemming Index (OI) and Stemming Weight (SW) try: @@ -206,25 +206,25 @@ def _indexes(gumt, gdmt, gwmt, gdnt): except ZeroDivisionError: if oi == 0.0: # OI and UI are 0, define SW as 'not a number' - sw = float("nan") + sw = float('nan') else: # UI is 0, define SW as infinity - sw = float("inf") + sw = float('inf') return (ui, oi, sw) class Paice(object): - """Class for storing lemmas, stems and evaluation metrics.""" + '''Class for storing lemmas, stems and evaluation metrics.''' def __init__(self, lemmas, stems): - """ + ''' :param lemmas: A dictionary where keys are lemmas and values are sets or lists of words corresponding to that lemma. :param stems: A dictionary where keys are stems and values are sets or lists of words corresponding to that stem. :type lemmas: dict(str): list(str) :type stems: dict(str): set(str) - """ + ''' self.lemmas = lemmas self.stems = stems self.coords = [] @@ -234,20 +234,20 @@ class Paice(object): self.update() def __str__(self): - text = ["Global Unachieved Merge Total (GUMT): %s\n" % self.gumt] - text.append("Global Desired Merge Total (GDMT): %s\n" % self.gdmt) - text.append("Global Wrongly-Merged Total (GWMT): %s\n" % self.gwmt) - text.append("Global Desired Non-merge Total (GDNT): %s\n" % self.gdnt) - text.append("Understemming Index (GUMT / GDMT): %s\n" % self.ui) - text.append("Overstemming Index (GWMT / GDNT): %s\n" % self.oi) - text.append("Stemming Weight (OI / UI): %s\n" % self.sw) - text.append("Error-Rate Relative to Truncation (ERRT): %s\r\n" % self.errt) - coordinates = " ".join(["(%s, %s)" % item for item in self.coords]) - text.append("Truncation line: %s" % coordinates) - return "".join(text) + text = ['Global Unachieved Merge Total (GUMT): %s\n' % self.gumt] + text.append('Global Desired Merge Total (GDMT): %s\n' % self.gdmt) + text.append('Global Wrongly-Merged Total (GWMT): %s\n' % self.gwmt) + text.append('Global Desired Non-merge Total (GDNT): %s\n' % self.gdnt) + text.append('Understemming Index (GUMT / GDMT): %s\n' % self.ui) + text.append('Overstemming Index (GWMT / GDNT): %s\n' % self.oi) + text.append('Stemming Weight (OI / UI): %s\n' % self.sw) + text.append('Error-Rate Relative to Truncation (ERRT): %s\r\n' % self.errt) + coordinates = ' '.join(['(%s, %s)' % item for item in self.coords]) + text.append('Truncation line: %s' % coordinates) + return ''.join(text) def _get_truncation_indexes(self, words, cutlength): - """Count (UI, OI) when stemming is done by truncating words at \'cutlength\'. + '''Count (UI, OI) when stemming is done by truncating words at \'cutlength\'. :param words: Words used for the analysis :param cutlength: Words are stemmed by cutting them at this length @@ -255,7 +255,7 @@ class Paice(object): :type cutlength: int :return: Understemming and overstemming indexes :rtype: tuple(int, int) - """ + ''' truncated = _truncate(words, cutlength) gumt, gdmt, gwmt, gdnt = _calculate(self.lemmas, truncated) @@ -263,7 +263,7 @@ class Paice(object): return (ui, oi) def _get_truncation_coordinates(self, cutlength=0): - """Count (UI, OI) pairs for truncation points until we find the segment where (ui, oi) crosses the truncation line. + '''Count (UI, OI) pairs for truncation points until we find the segment where (ui, oi) crosses the truncation line. :param cutlength: Optional parameter to start counting from (ui, oi) coordinates gotten by stemming at this length. Useful for speeding up @@ -272,7 +272,7 @@ class Paice(object): :type cutlength: int :return: List of coordinate pairs that define the truncation line :rtype: list(tuple(float, float)) - """ + ''' words = get_words_from_dictionary(self.lemmas) maxlength = max(len(word) for word in words) @@ -302,21 +302,21 @@ class Paice(object): return coords def _errt(self): - """Count Error-Rate Relative to Truncation (ERRT). + '''Count Error-Rate Relative to Truncation (ERRT). :return: ERRT, length of the line from origo to (UI, OI) divided by the length of the line from origo to the point defined by the same line when extended until the truncation line. :rtype: float - """ + ''' # Count (UI, OI) pairs for truncation points until we find the segment where (ui, oi) crosses the truncation line self.coords = self._get_truncation_coordinates() if (0.0, 0.0) in self.coords: # Truncation line goes through origo, so ERRT cannot be counted if (self.ui, self.oi) != (0.0, 0.0): - return float("inf") + return float('inf') else: - return float("nan") + return float('nan') if (self.ui, self.oi) == (0.0, 0.0): # (ui, oi) is origo; define errt as 0.0 return 0.0 @@ -334,56 +334,56 @@ class Paice(object): return op / ot def update(self): - """Update statistics after lemmas and stems have been set.""" + '''Update statistics after lemmas and stems have been set.''' self.gumt, self.gdmt, self.gwmt, self.gdnt = _calculate(self.lemmas, self.stems) self.ui, self.oi, self.sw = _indexes(self.gumt, self.gdmt, self.gwmt, self.gdnt) self.errt = self._errt() def demo(): - """Demonstration of the module.""" + '''Demonstration of the module.''' # Some words with their real lemmas lemmas = { - "kneel": ["kneel", "knelt"], - "range": ["range", "ranged"], - "ring": ["ring", "rang", "rung"], + 'kneel': ['kneel', 'knelt'], + 'range': ['range', 'ranged'], + 'ring': ['ring', 'rang', 'rung'], } # Same words with stems from a stemming algorithm stems = { - "kneel": ["kneel"], - "knelt": ["knelt"], - "rang": ["rang", "range", "ranged"], - "ring": ["ring"], - "rung": ["rung"], + 'kneel': ['kneel'], + 'knelt': ['knelt'], + 'rang': ['rang', 'range', 'ranged'], + 'ring': ['ring'], + 'rung': ['rung'], } - print("Words grouped by their lemmas:") + print('Words grouped by their lemmas:') for lemma in sorted(lemmas): - print("%s => %s" % (lemma, " ".join(lemmas[lemma]))) + print('%s => %s' % (lemma, ' '.join(lemmas[lemma]))) print() - print("Same words grouped by a stemming algorithm:") + print('Same words grouped by a stemming algorithm:') for stem in sorted(stems): - print("%s => %s" % (stem, " ".join(stems[stem]))) + print('%s => %s' % (stem, ' '.join(stems[stem]))) print() p = Paice(lemmas, stems) print(p) print() # Let's "change" results from a stemming algorithm stems = { - "kneel": ["kneel"], - "knelt": ["knelt"], - "rang": ["rang"], - "range": ["range", "ranged"], - "ring": ["ring"], - "rung": ["rung"], + 'kneel': ['kneel'], + 'knelt': ['knelt'], + 'rang': ['rang'], + 'range': ['range', 'ranged'], + 'ring': ['ring'], + 'rung': ['rung'], } - print("Counting stats after changing stemming results:") + print('Counting stats after changing stemming results:') for stem in sorted(stems): - print("%s => %s" % (stem, " ".join(stems[stem]))) + print('%s => %s' % (stem, ' '.join(stems[stem]))) print() p.stems = stems p.update() print(p) -if __name__ == "__main__": +if __name__ == '__main__': demo() diff --git a/nlp_resource_data/nltk/metrics/scores.py b/nlp_resource_data/nltk/metrics/scores.py index b5156ed..9e6a516 100644 --- a/nlp_resource_data/nltk/metrics/scores.py +++ b/nlp_resource_data/nltk/metrics/scores.py @@ -1,16 +1,19 @@ # Natural Language Toolkit: Evaluation # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Edward Loper # Steven Bird # URL: # For license information, see LICENSE.TXT +from __future__ import print_function, division from math import fabs import operator from random import shuffle from functools import reduce +from six.moves import range, zip + try: from scipy.stats.stats import betai except ImportError: @@ -52,8 +55,8 @@ def precision(reference, test): :param test: A set of values to compare against the reference set. :rtype: float or None """ - if not hasattr(reference, "intersection") or not hasattr(test, "intersection"): - raise TypeError("reference and test should be sets") + if not hasattr(reference, 'intersection') or not hasattr(test, 'intersection'): + raise TypeError('reference and test should be sets') if len(test) == 0: return None @@ -74,8 +77,8 @@ def recall(reference, test): :param test: A set of values to compare against the reference set. :rtype: float or None """ - if not hasattr(reference, "intersection") or not hasattr(test, "intersection"): - raise TypeError("reference and test should be sets") + if not hasattr(reference, 'intersection') or not hasattr(test, 'intersection'): + raise TypeError('reference and test should be sets') if len(reference) == 0: return None @@ -157,20 +160,20 @@ def approxrand(a, b, **kwargs): :param b: another list of independently generated test values :type b: list """ - shuffles = kwargs.get("shuffles", 999) + shuffles = kwargs.get('shuffles', 999) # there's no point in trying to shuffle beyond all possible permutations shuffles = min(shuffles, reduce(operator.mul, range(1, len(a) + len(b) + 1))) - stat = kwargs.get("statistic", lambda lst: sum(lst) / len(lst)) - verbose = kwargs.get("verbose", False) + stat = kwargs.get('statistic', lambda lst: sum(lst) / len(lst)) + verbose = kwargs.get('verbose', False) if verbose: - print("shuffles: %d" % shuffles) + print('shuffles: %d' % shuffles) actual_stat = fabs(stat(a) - stat(b)) if verbose: - print("actual statistic: %f" % actual_stat) - print("-" * 60) + print('actual statistic: %f' % actual_stat) + print('-' * 60) c = 1e-100 lst = LazyConcatenation([a, b]) @@ -178,7 +181,7 @@ def approxrand(a, b, **kwargs): for i in range(shuffles): if verbose and i % 10 == 0: - print("shuffle: %d" % i) + print('shuffle: %d' % i) shuffle(indices) @@ -190,14 +193,14 @@ def approxrand(a, b, **kwargs): c += 1 if verbose and i % 10 == 0: - print("pseudo-statistic: %f" % pseudo_stat) - print("significance: %f" % ((c + 1) / (i + 1))) - print("-" * 60) + print('pseudo-statistic: %f' % pseudo_stat) + print('significance: %f' % ((c + 1) / (i + 1))) + print('-' * 60) significance = (c + 1) / (shuffles + 1) if verbose: - print("significance: %f" % significance) + print('significance: %f' % significance) if betai: for phi in [0.01, 0.05, 0.10, 0.15, 0.25, 0.50]: print("prob(phi<=%f): %f" % (phi, betai(c, shuffles, phi))) @@ -206,23 +209,23 @@ def approxrand(a, b, **kwargs): def demo(): - print("-" * 75) - reference = "DET NN VB DET JJ NN NN IN DET NN".split() - test = "DET VB VB DET NN NN NN IN DET NN".split() - print("Reference =", reference) - print("Test =", test) - print("Accuracy:", accuracy(reference, test)) - - print("-" * 75) + print('-' * 75) + reference = 'DET NN VB DET JJ NN NN IN DET NN'.split() + test = 'DET VB VB DET NN NN NN IN DET NN'.split() + print('Reference =', reference) + print('Test =', test) + print('Accuracy:', accuracy(reference, test)) + + print('-' * 75) reference_set = set(reference) test_set = set(test) - print("Reference =", reference_set) - print("Test = ", test_set) - print("Precision:", precision(reference_set, test_set)) - print(" Recall:", recall(reference_set, test_set)) - print("F-Measure:", f_measure(reference_set, test_set)) - print("-" * 75) + print('Reference =', reference_set) + print('Test = ', test_set) + print('Precision:', precision(reference_set, test_set)) + print(' Recall:', recall(reference_set, test_set)) + print('F-Measure:', f_measure(reference_set, test_set)) + print('-' * 75) -if __name__ == "__main__": +if __name__ == '__main__': demo() diff --git a/nlp_resource_data/nltk/metrics/segmentation.py b/nlp_resource_data/nltk/metrics/segmentation.py index 412e00d..9a96c15 100644 --- a/nlp_resource_data/nltk/metrics/segmentation.py +++ b/nlp_resource_data/nltk/metrics/segmentation.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Text Segmentation Metrics # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Edward Loper # Steven Bird # David Doukhan @@ -45,6 +45,8 @@ try: except ImportError: pass +from six.moves import range + def windowdiff(seg1, seg2, k, boundary="1", weighted=False): """ @@ -118,7 +120,7 @@ def _ghd_aux(mat, rowv, colv, ins_cost, del_cost, shift_cost_coeff): mat[i + 1, j + 1] = min(tcost, shift_cost) -def ghd(ref, hyp, ins_cost=2.0, del_cost=2.0, shift_cost_coeff=1.0, boundary="1"): +def ghd(ref, hyp, ins_cost=2.0, del_cost=2.0, shift_cost_coeff=1.0, boundary='1'): """ Compute the Generalized Hamming Distance for a reference and a hypothetical segmentation, corresponding to the cost related to the transformation @@ -185,7 +187,7 @@ def ghd(ref, hyp, ins_cost=2.0, del_cost=2.0, shift_cost_coeff=1.0, boundary="1" # Beeferman's Pk text segmentation evaluation metric -def pk(ref, hyp, k=None, boundary="1"): +def pk(ref, hyp, k=None, boundary='1'): """ Compute the Pk metric for a pair of segmentations A segmentation is any sequence over a vocabulary of two items (e.g. "0", "1"), diff --git a/nlp_resource_data/nltk/metrics/spearman.py b/nlp_resource_data/nltk/metrics/spearman.py index a6d17db..3736b8f 100644 --- a/nlp_resource_data/nltk/metrics/spearman.py +++ b/nlp_resource_data/nltk/metrics/spearman.py @@ -1,9 +1,10 @@ # Natural Language Toolkit: Spearman Rank Correlation # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Joel Nothman # URL: # For license information, see LICENSE.TXT +from __future__ import division """ Tools for comparing ranked lists. diff --git a/nlp_resource_data/nltk/misc/__init__.py b/nlp_resource_data/nltk/misc/__init__.py index ab1f761..63c1da9 100644 --- a/nlp_resource_data/nltk/misc/__init__.py +++ b/nlp_resource_data/nltk/misc/__init__.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Miscellaneous modules # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Steven Bird # URL: # For license information, see LICENSE.TXT diff --git a/nlp_resource_data/nltk/misc/__pycache__/__init__.cpython-37.pyc b/nlp_resource_data/nltk/misc/__pycache__/__init__.cpython-37.pyc index 8b0d95bc6375a57988fc9f87715b5398651a21a2..c5604f08a18857a2976c4392601d841dc034fc9d 100644 GIT binary patch delta 31 lcmZo-{>H@Z#LLUY00gD%6E|{;GP2p|mlhZ4Pc~u<003t_2HpSw delta 43 xcmeyy)Wpo~#LLUY00eSL0UNnR8HGIbGxBp&^|KNyGx7@(i?a3gCaW<9007wR3nBmj diff --git a/nlp_resource_data/nltk/misc/__pycache__/babelfish.cpython-37.pyc b/nlp_resource_data/nltk/misc/__pycache__/babelfish.cpython-37.pyc index 8623bb4b6614ffdb12f5a173e717d28d7f6416e8..93b594b3a7fe56f86c501c2272ded4fef3b6e55e 100644 GIT binary patch delta 202 zcmcc0@}Gs*iI%0;vJ302_fsumUA+aoFVM Tr%#6Fq)R4reTu zVFJn)frww8`WgATsrp%ol^OX3iACA^dXsN4dU$aH<%&TH7#NFKfTW)$(=GP+_>}zQ g_;|1YST_>E3{-TB!zMRBr8Fnijt$5IYiD5w0OZOb=Kufz diff --git a/nlp_resource_data/nltk/misc/__pycache__/chomsky.cpython-37.pyc b/nlp_resource_data/nltk/misc/__pycache__/chomsky.cpython-37.pyc index 9de1f5dda5bfbf707a4320678d864b985f78f86d..3dc810df99a7e9d80f0147cc953a82dae846058d 100644 GIT binary patch delta 471 zcmYk2yH3L}6o!2qx4AS)OMzA$xNId0JOGFRmP!a1DunQWRnAqgiEBuyZtoC1*+ zw zVBa2-s55y5LcHD4;r@8yH!%s#rKyIZV_5J7-J7Z$*S z5UE$NQ#(SWI9rH_>Lxa{jfgt6EriUcLe!o>LuI97Av84t*T8(DZW;InJ%cWyc2ybc zGS8Dj|5`gGgu8{hUQhF@xcIU8w>q1oX0CDk56fndvz6wuO5$zfyf6zG_(QhH=2?$f M9q3Ts@;nE=0d$^aYXATM delta 361 zcmYk1&q~8U5XQ5c&EGb)w$vbK!Bfql<^@E=iylM@LWR97F|*Lvq$%5auon-Ccd6cd z15eU7@t8O82?S5h3W|I94d3j}H{ZVOzW0KoAn*-5-OGXKKL(q_0SYj<0fZBvoPxf}!0a<}lfM+^wShuS^izN|rgOx3CM?+onQ=SI|cdwM0SAr05U z554UyqLr17l5A4h*(}dexq4V9q(SjcnTX0muAj^>rMN%0k9h$<3&oCTbk- zEk9c)WU)MKhqqn*-W8%iJ0T*fn0B!cQN416fHD delta 44 ycmaDZ_F0VEiIY64nc&X$f067;WJShS3q8@G-g~5M_)^grb7c6BA+* zqc5h!G{ztfd{uWQ%EEjJomSK=!wG-+Vm1PX$G$t_)<- z8}f}&uQN>gJHE73qctdA6#A@WpY{lGFM?voi8<@vbR?H4fm~_ zN!l2sL%Ahg${B%25D*YY0U>7EX^-0NZhQ&ztb$-L3ud7XGjIkbK~AVw_P0BS)WoWt z)?g6#MC52_Yv_#A=aOR@WuC_JQOvtZHq36T&(0E5)pzG$g?RT(g!QPD*R1V)rL5vu z)DWN9J>eY8kU;gp=3ujNF_~Lsx^;F!;}s z+@a%S+UJ9o946EZsXllYp{{;-y_<(l)R9m-{OKs^Y({ob=Nu``6A+(@v> zm~%d*Y|4%KFhsF{N|zG ze!hbr!q_zg2=kq>3!)+{jHa-KgRvs2f?>3TD{2^RF(o{Vj_}1a#;TYRvlv#F*a!32P(RI$MN&S!x4xIg;rf2})n1x)4Ccmrd!7lge+{F|in!vVuWclv~oK+##qExCEq9!W3rO=^gZX!*B!h zd>X-E7BpcA8nB8A(pO{q=;0YPv1+HAWnm%$M?+6Tf0DkCoY3%i9Lg6VA4d5ozpsv* zHJDR>oYPI>Jto)=Dn-xQE>_ApoD1CZ{&!g%i7bthk(|MjoK@G@)y72`S;xvV6|xnm ztM}~W0TD|epoKxTpwlqaAT?zFSyDK3kxbV(x<86WQJQ4xhkFBB>cmZ+o;lG%LhbOE z2T^|z*kzw|M6yXhd>Zlzt7RLfKj52t2nJZd(BTFS*;e0cn}r*AfV8YUKq!ePIp^L{e_t$^!bqgFh)W3CYNWa<)0n=mxEeE$PqIwZV^# zv5HtGrVt9LkTQWnwng4=QnBV_*Hw2Lob0*kUW4mTAY`0`fB?BbZd^~&pfe?`q$q?! z#>pz7Le3GGIJl$UCPd3#X|+q3<`5Xq)u>RvfzY+6-g?aH>(%VDwz>3YES7i@3zK45 z#jP_a^lWH2{{)u_j!ekj8P(Il^H`BE)cy~AIR_sa3$-alG=yx+a!=U)g~3tI8{^99 z#a}k#)6uKRJ6eZUJNXOY4p#apVrefXm!%W)$38G33=g{|y zB_~hw;TciHr1>-WKnXvc_2X7&{dwk>@AmN?9O@R@QM$9@$IuZU*`nB!l)05OyNyN^0JfeR@>_)x zy5%XR40S1%hPaX~Tt>enyMz|DKNpfm0fW7qNKeVGj$Z8F6cLt_6G+>wnP>R$AIm63xiK8kd`ciuC-#;je>Ps5xq}v Swu&m%sYPu%Vc(*Yl>7wz7xtw9 delta 1241 zcma)4%Wl&^6!kc^Gvw9u0i~t9qy}(RC6y?0Evg#d4c6-9OA}~tvgO15>giQ z2P*#n5<3>KsQ3uBEawlpVuL_xH_RQkv`C2J#hLM)bI*PLy!!QOb){M@Y4W?W_S{W& zt5eS&$RDOL5^9ddbXM5W9KsA{%2Q_+E6TIL>a5HvJH#4B{MUzyX2aj9bIPzQT-gY-mJm|HWS(&Vij>tD|A(PZejX)`TMLw?<<-Z_X zj=t3qWZTiVJHmJk0TUzuRLkKUvJ9U3$pv}i6$u&{Ba4JeoFOo_=jaa!(X)4j)=hM? z35@1PO&+I2=u+rKYs_1o&3+VC=l|qliYHtcm%p=Unm{OIho=2capiDJIb@&A#@XUI zS32zfr}%s>erR5AO=`096tqZ-mAjsyi%#q4LX(^#oBno@#moG zC?na;3)#@JyCbv1MadAsB|fc*&y_naRWFuni{l7OLV++-=Gte`~pPz=<)yn diff --git a/nlp_resource_data/nltk/misc/babelfish.py b/nlp_resource_data/nltk/misc/babelfish.py index a43fd4d..fb00bf5 100644 --- a/nlp_resource_data/nltk/misc/babelfish.py +++ b/nlp_resource_data/nltk/misc/babelfish.py @@ -4,6 +4,7 @@ translation service; this service is no longer available; this module is kept in NLTK source code in order to provide better error messages for people following the NLTK Book 2.0. """ +from __future__ import print_function def babelize_shell(): diff --git a/nlp_resource_data/nltk/misc/chomsky.py b/nlp_resource_data/nltk/misc/chomsky.py index 0d4b065..d910024 100644 --- a/nlp_resource_data/nltk/misc/chomsky.py +++ b/nlp_resource_data/nltk/misc/chomsky.py @@ -12,6 +12,7 @@ To generate n sentences of linguistic wisdom, type (CHOMSKY n) -- for example (CHOMSKY 5) generates half a screen of linguistic truth. """ +from __future__ import print_function leadins = """To characterize a linguistic level L, On the other hand, @@ -118,6 +119,8 @@ scope of a complex symbol. import textwrap, random from itertools import chain, islice +from six.moves import zip + def generate_chomsky(times=5, line_length=72): parts = [] @@ -129,5 +132,5 @@ def generate_chomsky(times=5, line_length=72): print(textwrap.fill(" ".join(output), line_length)) -if __name__ == "__main__": +if __name__ == '__main__': generate_chomsky() diff --git a/nlp_resource_data/nltk/misc/minimalset.py b/nlp_resource_data/nltk/misc/minimalset.py index ea49d08..ca298e8 100644 --- a/nlp_resource_data/nltk/misc/minimalset.py +++ b/nlp_resource_data/nltk/misc/minimalset.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Minimal Sets # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Steven Bird # URL: # For license information, see LICENSE.TXT diff --git a/nlp_resource_data/nltk/misc/sort.py b/nlp_resource_data/nltk/misc/sort.py index 0dbaf99..cef988e 100644 --- a/nlp_resource_data/nltk/misc/sort.py +++ b/nlp_resource_data/nltk/misc/sort.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: List Sorting # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Steven Bird # URL: # For license information, see LICENSE.TXT @@ -10,6 +10,8 @@ This module provides a variety of list sorting algorithms, to illustrate the many different algorithms (recipes) for solving a problem, and how to analyze algorithms experimentally. """ +from __future__ import print_function, division + # These algorithms are taken from: # Levitin (2004) The Design and Analysis of Algorithms @@ -174,5 +176,5 @@ def demo(): ) -if __name__ == "__main__": +if __name__ == '__main__': demo() diff --git a/nlp_resource_data/nltk/misc/wordfinder.py b/nlp_resource_data/nltk/misc/wordfinder.py index 4514f62..a0b8ae7 100644 --- a/nlp_resource_data/nltk/misc/wordfinder.py +++ b/nlp_resource_data/nltk/misc/wordfinder.py @@ -1,12 +1,13 @@ # Natural Language Toolkit: Word Finder # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Steven Bird # URL: # For license information, see LICENSE.TXT # Simplified from PHP version by Robert Klein # http://fswordfinder.sourceforge.net/ +from __future__ import print_function import random @@ -48,7 +49,7 @@ def check(word, dir, x, y, grid, rows, cols): return step(word, x, lambda i: x, y, lambda i: y - i, grid) -def wordfinder(words, rows=20, cols=20, attempts=50, alph="ABCDEFGHIJKLMNOPQRSTUVWXYZ"): +def wordfinder(words, rows=20, cols=20, attempts=50, alph='ABCDEFGHIJKLMNOPQRSTUVWXYZ'): """ Attempt to arrange words into a letter-grid with the specified number of rows and columns. Try each word in several positions @@ -109,7 +110,7 @@ def wordfinder(words, rows=20, cols=20, attempts=50, alph="ABCDEFGHIJKLMNOPQRSTU # Fill up the remaining spaces for i in range(rows): for j in range(cols): - if grid[i][j] == "": + if grid[i][j] == '': grid[i][j] = random.choice(alph) return grid, used @@ -127,7 +128,7 @@ def word_finder(): print("Word Finder\n") for i in range(len(grid)): for j in range(len(grid[i])): - print(grid[i][j], end=" ") + print(grid[i][j], end=' ') print() print() @@ -135,5 +136,5 @@ def word_finder(): print("%d:" % (i + 1), used[i]) -if __name__ == "__main__": +if __name__ == '__main__': word_finder() diff --git a/nlp_resource_data/nltk/parse/__init__.py b/nlp_resource_data/nltk/parse/__init__.py index e4f0f15..52cd4f1 100644 --- a/nlp_resource_data/nltk/parse/__init__.py +++ b/nlp_resource_data/nltk/parse/__init__.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Parsers # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Steven Bird # Edward Loper # URL: diff --git a/nlp_resource_data/nltk/parse/__pycache__/__init__.cpython-37.pyc b/nlp_resource_data/nltk/parse/__pycache__/__init__.cpython-37.pyc index f0b45038b6021c6abfbde5ec7722108195e02851..868c424f4c797a44a8a18b626e24a4ce35720914 100644 GIT binary patch delta 31 lcmX>lw@;4SiIFD)+8-yF+)j12&9z6Z4c delta 44 ycmbO(vRH)MiISrZZX5<$n7G>+}Z4P8U#s&c8TMPLB diff --git a/nlp_resource_data/nltk/parse/__pycache__/bllip.cpython-37.pyc b/nlp_resource_data/nltk/parse/__pycache__/bllip.cpython-37.pyc index 6edee046b155db82a0e0cf42e8aaf1675de85164..da0115ef30299f6304be19c1407cacd46a594bc0 100644 GIT binary patch delta 1264 zcmZuwON$g&6s}uU{ivF%cI=rp>F)7~I+oy!A|#_3Tu2}UbPyFCOW{)Xy*(Dz%Tu=y zMHon2hJ>LO`2ksmY|`LL31pFF2)Gc-U!XrBE6=Hx8D_8>?mhQBzVrI(!^$rg+{>(qiT_MzS91vEduFY(!?<@Ea(b8_aJ$Iga*5)Qa1FyS6tY zE4F zNFL;)FdU;_-oZm-I_u)UgSXD0SN>+4XJ+}vIQ#V;fC0}(DM-Py0CLGy%9BVvX(^Q4AM5A-9<(;+VG=6NpG{)>n0}>4o&!9pMvV`BlsyxM zq-y6fQ8F3H%1-XfT=6K(6?{n{yJ6ul>VBWRbFLek)LqpKvP>odw&8u}Q~ANhh7{8* zR-MJ~K2AaKAQ4;z8`xzCIdl=kZ_zsrGMJ-`^Mq@l3XanAGgVX$T5W`%j_TFq#q3Y! z!4`UZt;!fhd1b|^kP#Olzyg6BEP3zh;_hCm17k>IUL@f-M5yhEx8>_zaq$p^QMYg$ zk7{NV1$iDz^&l0nNa`K1TB_O6KtnP4;lSUzvy7sze|yu@;cR)We`19ca1IaB;I+2q zxxPg!UJ`){;0)?2GYixM+z?R~2n`m?Kl)#sq(Un9hDx*Vm4twZ3^f0`IM1$w;bi%t zf9muRQd5HIH0u_2YNawOQ^+`FtXv(evX%15;NQ&)0pq?99?I2O?3E%Q06L|<9Da%qu@aF^*eRHHA9wyEChp;R$|i>i0ELJ ze+>uCCaH%dvxc{T*1A&~P^u~0h``est&l`1Kev>sy*jP*67`ERw5cadZUVCka4TL1S`6+23VykvHY7Yr0>H$Ti90;f&k-CRhC-0G%_^)F} zRdR_cxPe+lhGVZtIj^|T15!^t^$PM4i362-?4@c2GhUh~5kAbDH$UHeGjII!g+CYE zORn2c@S9$}&L8c$t=-3X5sF};5;2c4($!FnG*3faTW6lWeHLwcSc?qLsQk6C9+{qr zx*_WK6wkV)#9dvHjm-w6Oz~`{>@=l)n+X&BjQqk9mS=x!uPEOrvAUv&hOjZy5uZ82 zMa8|Wh#{ff!;+HS)x05bLNw8SLJW(Ocs7M~L+PA)P1N`P{$Zw$Cme`>zU*xzk?h^^ zw>Og1hs|EM_>tXoJ&ej!%3)v?FIhZtkOC$lSSY^h3w44`AN7FXwCR zQU-ZAyp*QFQ$S_l|6$CrjN!>i~1I*MTw9h7XiQoVN4d^pT62W%DWsbt{J23dGYG>*2O7` zqDJ5&6s5fu`dJpqd?OJsOXkDeN~>N}M@QCwIQpCFbW!!xrJEfUdc|t%+yWV34h328 zSYGb9o<$GU6@Ko+Cul46)Xz6?&Je^o5E{%D&st-%M5J_Ym)4K~D71{Vos*c+TYYb)UAv4KCQJ{?X$1izS{TN`uh65|92-d!#EjR;WuB- z_nq&2=iAPA&OP_OdC2wH{jTzrW5$#?=-*J$9(D4^d&+B8-|TcaG)K^(DVj5)M4drr z)D?6^-9fhknk(W7dibw5=;gmf!6N$Wj`*U*!D8m|MD}d1Pev<(7143QaZK_>#z!Xv zCvdwsQW>oZR&l!|GBH{mtd77DMEFc?7U8q>Ch89m-lPQx4{-l%!e?vEgg5IA)Za{ai#CVw zIeLKb7OhKb)#mO}f^+m4I~{r>{btdxVW*?t9c;Lrk+wuzy2}wfPV3fIX(y8Yo%&*J^=9X8r?^4y-07e^aIJF$PBO1_ z-se6g;Ihi&B0OlQeM5u2MmRQT`Lytsa00E0p}}x(Ow-j!*w95Nl2{v1ta2k}gd%E8 zXgZDSCyGae2K#l{U36}X)2fRP8w0UHb%8oExHlG!hm7#1h;EgQ(1acr=Ki9OJ&hH# zI;`UL!*PAJ5HUgG+zCC>caK9DY%N5mhmHU<22}I^T8&qMuI}s-p zOs^(9pon~2o`ew+#>MhaW2L_Un@b-%#Q9!ezL4BK%7>+(DZ<_eMrUHzm;e?9V zM07)cL|#(;?Cb)}V~9CCIKuw44A*Q>#>uB@svF>G%M&vObdehtud3=7tLjVglbV%H z2xq`)2d07Ad6)-vRsEUNCtbAySs}^*#sN$xVEI&aFcc-0&yW%^RP`5fZtXRV7&b~S zkg$PJVn9`YCEu!@sWeG%-7ICITvJzb>SWAV4=@E_Du5qg8o&&IMt}go8~~V>1(1>g z7A%!}>H*r@Svt|%TdZ2+b3dCHN!T=e0k<>r6-?!k1c-zCTRoc09Z&M<^CWg+E$g;O70{T z(byJUrBqe>Vq$AZXf5)|rgeS@$sY{%#4_0ySmwucqcB0q6>?ADq(V%Si0MT6*TBkI zi)~2-1lAHqx7;-Qlos#`a~v9{sJOV zWB1Ex3y&|vv3|g_scKJLFpnPb|cQEVxUqTC{0C`^6>T7tUZ&AVf%;L-OZ~ z))o?d0x>-xPdsi~?PytRXJ^;v%+8f0KTcIDfrRmu$v2OyQ&!3&b(75ToimEYwiDGV zQjOSVeJ~-G$kiuIoyJSjn<80P~1nOT6Z7{^!Ut(TW~9cbqIwPtj=#juO{o6R%27kZRR`Q9 zosL;?vFsdqWKC=GOp+y_faOiZgrRGed$T^A5QvF54fGNk(GsBfoPFjJ)-v-ZBB|mjuws@pHku~Q+%!xrNkUK?J{NBdN7|& z0B2}uZ^x8_y@ALBvh?&NItE)M=Y%3d_JNz#W8GO>t0bWtn%);0iWrtR6pzyeA4r{A z&L5e;u5}jxg-<9udeG*Xg4Eh*-go*UWtNIQr^wRF=EE498ERcDOU_)rik;-4I7L%l z>jjer38~q8KwuR(vpzk$y!OmGWwX5d%umMf46GVWFCRRsdSZIczm+VBgcAl%vdb=& zZ=R(p6Xeo$_0tDPyyfnVM-o9F4mN0wScg7QC3mcwtZXxHS=a9BMDkc=s_h?6710MK z*18m}b^-yf9poMmE&_;3?}nDjw0gD|HdD_RW!HxDXCsAdqf?8h2ZS8<7|{-zM>mX7 zN>SxSEx=^??z#Te7ov#;Yk3Tz>xq=_EVqco3=v0f4|?6`&5>();~|mI6L*^@Z@kL6 z49pZfWVYp1ZE2~Tg$P^N#c1WICx~{~tT_Jy=MwZ#a%{yPI}a>pZ=6z}$8Eu2u05<6 zi{TUsQ^$Mqo(nHXrbUkocj*y>B<30LAyx+c9A3xTK)Jo;&%z8Sn*!xVu(wb=X*CT; zH^m|x6N{^Z5sO6ebDvRX@y)CD%1yd#= z{6KM3;C#r=M9Z@^tQiAYyi38BC0~ZLYQ<{~Yde**lIX);x!t*=ReTB97YSH6@$r;! zt>x|22dT!6RJE&^HlT;-j=*x!{&S4Xmip!lQQcur=p78^f!AtpcrwId~cw;X`8WR_~VGQFrC>k=3H(w7gbt#L@>gdUe z$_MFn7|UMdm7j?olF2I8X&<6{7egOe$4bIdhAm|qtDa+|RfHre?sH1R{4rVLKnE5voFbt?7EP#D*`U3KA(OIf=<0H%h9ag0qXyyeB+Wk8plyf zz`*=xIV_OU4xkspCg*g^wD(Qn$%i+KA z?vYcI%cuC13Z=s7c9py2j%0n_Slq2W8l5i;etetb*bCwc8Yt=UnLuyUew86 zx14`vrL4bfy)sRT%Vy8v`jXb&Ub>@c+9{vEY+o;X!;ksl4KkmXAiF2y8D`4s9&sOr z9sD|RcJI9~M7W?LBWSf!cU zw}ia~&qr@3Kwh0KH%8=*B$STJ-(Q)(Vinf>Fpcel=A6Y|&f>#q-S=K~BMQ*`Yht}@ z0Xw?LUaS8GRXIO0nsd&u`l%ytPQIHMGn(Ib$!W4m)?E8}7UM%Ccx&2>Vm9L;m|@;{ z?Y&A1y6p=i58X%(>Z26BiNto3x%s+zN^&tZi7R7uxotHY%w`rt=C=X(pvLZ~2Z}Vc zcXQ6bjCa^ECAl@b2ka~sS!ubUpJ;@qEKgrp&=cWk4%;i2?>-}M<$@ujSFVSt_bX{< z`Di)0#l!5Zk9Mmg@n%y}@#X@}2C_dg7IEeHyzzd4;@!47XPwQ1=7m%C5n_2Vtv;u$ z+_SGvVpn-hK5H8=Nj`kzrd~FJLs?`$3gj5U2V-XXyhgk&w|{MM-WU`%;!!fe6wgp~Rqt+t!hK9W+q zTs~m>Wcw{gle|9s&>wjnD7E)>uFSk7j@`$s3UyP=Fk;c6xY`$v(2+CiwM%C2@#QX@ z`!E5fvWoGpi?6qMyQ|B)Vz9|LJo46C-%sXjcqO?TUPjZ$F$X8e6toIxa|r!umBbT3 z^DSK3=A4dF>D+f_?iN#c7krEa{sW$;A8j$dQt=r3{eW2~pWF9*GL!11cB*p_GL~dA zKVuW;5w8KxqfdFb2S%_QHm6Yab~9J}F{S_rrxvgsTel&UdbQmm;ih$>egk6mgSw;;99q-% z@$NCNyL;GK%WmolREqCo(94kL#%-2(Mov3adHxSU{Q&@n|3$Q}2Y3$Pc>s>f3uxi# z!zvPZ_DSfLD;|rBXF)#zfH=@C$^^7e0XQo69IEnu9nJmnzC-oOOY((7^=&;MuwA8W z6K*;6PrVT=<%c(w@0a$<+WWq-CQjq%boO%$KNDaG0Is&5{ZlthJT-XjBU~n*kV@od zTv)v-i|=pj$w|L?=r3P+wz_7Ox^DehE7q^>-YB{th<8ZNt(Sqa2K)^B8qmxP+~SOV znmSIC*WF*QJk1O5(#xmHcklm>7j!XE9(-UzGy4fb>39d7gc=h4_VuAHk_BPvH|1Lo zH0KroXlpB$Z~oV*_cf$97XT`~;`I?d%*(#jrg&#zxKAE=uxW{{6X8}V89!qA$bAKy z{qNw;%#+saeL3}T`@g2y8zk!OV>Ejst=W$bk5}5$>WE(u>p!K&U*tG*;lHNFuSxbF z(`tCgLBCE_$mdsVg0*QitkSG}_@~tQyS(?I_T1veIh1i>CRN^{+5Q)rIH^I!?HztS zGw`J#u9`XYDIsc@N;X1V(O}czNUu#MMrt`OG6X zxb!6WQ8Mgz+r_p3_6VeKk)XR>J7d2O3RjHx(Yl&I%0ZU0HQ%KgeRN~!@mBMDkL^){ z=~>p&s%AZTEptI(c?TfxQ_luJ!&E3A`6dwmK^>>dyPo*6A96;i!SJ9a*FL$W3-@wKXe^MGKyV9CZD>p8W!YXl2e|ZsKjN*)BD@mf_SkdfZT+(xf6=N9J>^*YL4{kgiCQ$G=33=XEVJR|A{0;!-jHGp@Wba1T zv62Of>^?c~xsK1cn`E(wzhHN;ZLH~kf>s2;-u^vW9|C*?Flr6|4fU2F0q`8-8|!w; zcb}W2{Kovh=em_Oe;}Gwf=^?}#r#Q|04Yazt4!0Qu@P?`hA{fkd85qft`vWw*7`1S zySeB2xLf&y>3p$H@m)ql%9~o*_|g?VED=JEO7o@7%Ae)bm#=Q*T{i0lSWQP!6BvTM`LHH))||E9E(_G|V|0d^0Rp03aqDrS(Y*RBC%V3_-ep@gUcyd#j_!zM`g=T9!^f6DRA)} z4TT5k3p2$onMG^YcdZwNGicMS7dG^X>4xgcXFKCf5cE9KruiG4?+lAhm3GUlmahj zoIc#t#US1o$$4 z4g%aOpZe*9ZTx@kPF;Ke delta 11559 zcmb_i33OCdnyyz%QrQED(rMP9mQ5r z(W{~&G-|h^qJlN(V5{S z^S8xmp>K=V>a#g*3TUj}EMK-Wo7*<8-IwFc@#Q*i-cU8bU3PD=Zd8~{sV83>F=%m?5MH1b5?;voIfNIf#e^4QhO?CLVd`+ghcjMA z_z1Ox@Di)vmM=d41R4X4GusAEIqf9NQj&iMn2u2cKp^hYcBoSDgm4sKSqX-|x z_$b1w)X{{GX1t2jkjtLMJ4gxAtMi*o|4(yR`&&@wIBFj~ez9TU|_giq2Ogf|d=t~#0U$&5D= zK1FRMyp{1L!l$a!2%pAyGvU+K8HCSZyhUwOXI^D;PE_Zs=d0}`{v_>OwPS;Mk6B!% zO&T&@1cCdY!vOXkB?c_6z~K9hZMWUl=$wsR3~dQR7tpteAWfN&_fHKcia9yEzU z8u_|&Vqd_gO)7efCg={Zj?jGjVv4Wcl%Tp#%6>nuVF9+ZF!3MkmVawWU%fUWlRckheb4ER#dXa z`ih#k6r3eRJtE+91w}yZ?+$wc{)&iNzeib{ikHpc)dYsk022Ww5r~@FNny6hMVXSM zrC#g{Y}8y7sIH!X*yI*!z3g(VtO1kMhG&7>lzkiB7%?R${QLw z>RSy?85qKq9rDYD3z|SHl<%AXw=a<1#`YT6B8`}3G_*+mrg2UU+bU%pV&`5c8=Dq2 zfJnDcDPjAj;W?6smdgE2%b8qEBQz6vC5>Ms%bI(>A9)3lcgY8uPxQbJTeIt$CyScq zIa|#&4O=ZWbsJ6K;R678ijdLbnPnC@B0;b9GB7EOf<_92VIw7Y(>vm&<=| zxuT9UKc#Vy(_rUv^~x(IP8g^qo?-%k1E2u_iJ<3GF%NsXeOkCLpo%Hzp9;X?&movocaNBSHY=nai@aHWIb~tZTtd@08lvx#Yg#*p!uS02w9xQc`FiWznhZp+z;5Z7 z8q0?aw8)2>r!O%BAcJcVBfpTnGnQszjTky4 ze>-DL+Nk}1j!u)9LrZR;+7&_=+ViYtd!gN8m;G%e${e|DR7K>zwsF}-Ei|ZSyTXAD znm;6_$#>7M9>d9+743>SQpBS5Of%7BE^z$25}7`KjzUN2wCme6wO0$} zNA@mQrHs!&m*WX*f@#ZS>B57JTwNEX=bHn)n7%IZ!NO@arBIeHsXm7d2EGrL!# z6If8pSuPhZX&G2TodN>rS)qUkYpQOgQ&LF4C*nuw&vm&~l@3>`wpq9IYGH@fI1;Sj zL7|~;Vv26v==O%#8)S5hvbTt@=GD5x&GgU~LTNOW+m&2PzGAoJ(n;g1B|lNdEC=yL z1aQ=b^oCE_ohzw(oxFS5Y>kiQxtF-T{l>YR#beo7T0J+Ug;lM`-R}+SS?*wvc7I3o z_;T6M0)}r_0fkLS(A8-3N|#V;W#qi&)0O(mF=iGCav5|nI|_)kLGHU~?mRYauD@rJ~8U6@M+@6<9P{Ube3+D~+xBb?$7hCltmZcg`m1 zU+Gc`Q5ozdZwX`94^n90L9}X3<4n!<< zHd-wN99f2wS@a(CdgR_qn@VEh87Awck*)H*OIOqL8qusJFqA|*Gl_fyjqi)BxvW6R zi|>A=vVT?00w?ApVRc(rXj&+0JKDShfw1sm6dZLcMvd@@8Ai{d$FPtVx;LV)y4E}g z#3ThGThDSC+@dkdLNPNRI3|hbr@n_H_pDxHo{bR-ju5_#IMB`AK?;4ICos7HfO*9P z5gPTKkuzOu24bv-rdu>`m^h}G@Ch=5KBg?wQb(b&^N#^@R8N6&W1Q}>|57$>@vRGZ z1$?4&bqEIn-jJRhJvXxtMA>Z2!q%l{(c=~}p&-GbaV1zB)HVU-jr!j$SFCk31c9>| zQY+0V1N%H3`Ni6K%0$GKxD?=G0-2%!qJI%7Ti0z?TIDvisrv%ViU$Ore}b6ZDHQbj#+Xk|jt&M^+JSenqcnc2rK6jWy+!V>x29*`tK+We!^Y#|81K4%~I`&x> z0>;*}x-~yl-Jv>nHTU&tZdDVyg?6OG_2lWE3Z*vkRnK-M%iu1z_068hb~77~Vo+=- z2$bZ|<%nsQAN8rtF)c_K6dql1iQV*_xExT#Sclx?S=h9TT2aoN979Hj#Kx>x1zjjV z^-R5xb?n0E^#pY5`hdqDJs1m~w0LOdS**ZCFF4DpQzPrwpJ!28BZquHQe4w9Z5aTs zz_lRmPlT7AqtVW!33pf<%4{X1D_eABb3$ch6Yy$zctTS>y}}*rTPl|au1%3z`MAuW&>KZ>Jz6MY0;Ne<0Ime^10cFN(ZnuOBQ;?(u?OIG zfSAJ)Y~4WvN991UP3gcHT#mn?KJrzt#5R2x4MZbKw{`c?MUS3M+WCBLVMrV2#s;ua zf%a~BC^CQJM#Y$H9OA{DSo*HB*XJ3232a zM2BA39tvp!5BSFITDDP`;(GOg%B-DBpUFC1esWK*e}mIbdIvv z6}wWb6q~JHvF0q)$!@D`-&G_+OQlXS9l4&L3()?mB98l*m_Ae$#{874|;9&rewKLC(worh^) zrd)N^{Z}wYUI%Qc$nZMMnBBln->H{i?z~Q_B_1Mn9dgap^V(Ad(LowD%nMNjh*KK+ zK^mx%A6z{`d61*0aM$(A;06+utLFun$QwRaG1R1Jp-f0JK0c z<&i(WHg%0U2$pFje}qO|keq8&7RT))G1;50yA37h%q@Wd)_{f2HnQ_k@a2ezOU_Zk z;zth~NfG`S=A^Ux4v3LY6edbXt5QlSnXhau?+OIx2R6wgyGwsSS{@;8!I*kEtlkl* z7a4!ULrN2djcXUH07_D$o53Oxp9&*&`M3Bid&=5um&4~)whA?;{be!V>6Wj zg@uiMog*`DH8xmoa`j#fa%PKY)vb_E)WL$ft;ZwiX)&E4ZY^&7`Ttj|n9K4t^4k|hV;c&nw-;`x3RXdnb zwPQ4TJf@lv=Eq`T-goOuhI$pU^Y(S!tRg!)+jxjbQjw3;O!Pxl)Meh@nLj{{CrFK7 z$JEH>aDHM)4f)#Mzg_{cjT)0>cxE~}xG2WmlUi&}&Wx4uGdbqYmLDMYlO)6MV`5ub z>?fmQ7s_YuJUPIb6W`qOj5jbWHYdy3d!1hFTF24QA9VG2ymTxT><&DqC7clK$tiUR z<5)`1!OJtg6yq(iGG#73Oq5?)W#hh&2a-0$+~iF$F6mQP<9kW-04QT3iC+RumC_HH zOlFf+%DXR0-jpC<`knGPjs6d;P9NHo?0Moi+x)!!9}cj99O8LKh_eIYTErtht`El# z$3Dl;x2($ri}|oBnQFb+G)&%mU{PDrCYyGbdYUvSrozfk_jW}*Eyw=M-3R%*=&@@W zeRFkn;_W3)f!zT?#gg{@yD-G!#9ZREx#0YS!$lE@pyy~TVeCM~v;)!AnqoJLry_s+ zS-Dx+7O~y?ODpY$&)**yeh^dE5^zM#aF^_OU}5sD4$AFHx&MK+HP3=H_1PLCUL&gx z4HD~n4vnbYmB5v^&)28}0ZXspaMvkbmG2%JJD#nLI&3Hc9Vjir!riTreo@-%BGnIW zG4rO?)#)e^&w>68kXmsACLoqx*xjdfZ=lz^^uXh$E5jE-^gIB^!>`b~9^eIlmjKvN zFQbLW6+K(vVJW2PmS7+#{ssMa1HdtK5i_0-yPfEHm4LG-Ny_NKOrCeRN_kygezX$6<6_t1d9IaRE84pjELmP>B{Nhop*(I5I`a8iVOpx%lXW*$EPoDTeHjD4}>4 z)bVv#VVLNQJaV-4ACv4onx>y6*?Td`RzE&mX^n{^-XYTOCC2CS-pI6nOpH^+`_q^h zHZstuXf>^ri~s4-_A6*y#6;2a5+dY#iS(tMb*weHAaYd2%@_}!_i45Nf;47oP(gg3 zUjjXU>^@7}97EC0QTYnQL-wWeN{PJV_)l9oj(-i34*-5oVCeRW6DBSPyK@1JkB)Rb zb*n|&26|LuS6UHS+Xx8^5cV zyJi3L+iDMUwPp`E324P#iDQ5xPT=&1(d$X9WQQI zrm#s8oyZ|@Ha)#v*TQ&VPuGdPTl(d~m+zd+_Dr0|@sl(!5A&keZi908EBhvWD`t4_ zN}85~X;8u17lDI-4_>+1upvuTK4r z`%PvGe+xDT>&B9Pj6MqhTl*8V{s8bPK-wyPhDI$}l!&9KVK1_pW&djx%9+SbuPsy- ze@1Bt$mPg*Yu8ud}gx79KEAmUX8PauP?F7yz*pl8D zD()mY-9Fpv_N`Ohb7skpPmFE82z2AOo}jD>?n` zu>);r{ubbO04UJnGyqr16KJvX+2(x6=EK}#4BP{727m)%I$G?OThRIl0G`t=KKEwh z#l9PTT?8DX>E%OTz$5jyo2H`b;fII*uoG`}xeA5E-$47n07=qwNOa0s@6^=2g5fZx zD`DfPz;XuBJ(IiFi`{bPI}YQ$Foz@`#H{e>%ktTGjtvwlG@UNbeQuARe!!*}B{F+y zS7(Y&t*5ul{3|x&$6<58wgUhc z+G0Pz{QwUF90tHGk~jiz6aY`#;t7B!2LO))JPq&^K-zdXiBatL;uOI90A~O=qF_nB zq)v#^xXJCaee`=buQsQSCWNjcK=&7leLS7v^Ue8637zKY+fHZt0aIh~fXQCmNf7^i M#S@FA`tGX#1AzSm=Kufz diff --git a/nlp_resource_data/nltk/parse/__pycache__/corenlp.cpython-37.pyc b/nlp_resource_data/nltk/parse/__pycache__/corenlp.cpython-37.pyc index f706372bbb960e262da39ecd1745fe2ca8524e97..994838dd1ccca5259a831a5b260bfa1022cc5475 100644 GIT binary patch delta 3364 zcmaJ@U2Gf25x%`2^7t$2hm=T>5+&JiD8+K)+WkokOKK7sh~XbZwxygCUnuQKp>!nG zy`wA(6jWOZia2p=dDA3@oklF2hoV6HAOq=B2WW_D(HfB8N6^A{xcqgX5~!KW77G}X;mdMraEiAt2%QkfWp;9Rz4Ll&cA zC}LC%Rg9XU!Km1Qa?l8tbwe+QjLo@M)+8FTljW3=g8gBm^Nd6zG`bG@&j0|vEXaYD1s~b33ShvzPShrbmD`7>~H6sVKcG>~74l4&TJ+MyF6s%KX-Ag-Z z*M?-|trShK6PlsjCpV;ZV)X40vNR{w{ZyWi3cdeAcMHTz)GDQ!Dz!|zhB0?zEjWW$rrR19C3w&IsYcr5w;u8YID7_4iGmB{laj~UULghKKD%YiX z!t!-_OUxRWiprMastvNLxEgQ+@C2!PM<&ur#?|X67s6P=H+Tmm%7opx~$hPds9ul(u3H1%=^Xa>U1ujZ=$&Uk*k{hFmb#e=w zhwE)U&oDmx@8Qu1hIiak7fXv4&qqom$!|v%k^!JH6+uK^+=0f!(E$?U!_i^V$KQ$` zS3>Lve;7S8{8Wx=aKFs8VYl-dfCQ--kOL&Dgh@h{72XAqqy!{lIjfal?Hx&U-pC8{cs z-Ea7W{-Unx8q4#gmO(SKQz+_Ap=h*QlnQ~8$%N}G zlrZg}NuWe%igv;nrCl@)V~ncU82Eo%zJcg{lj#4U7o4yhr&O(YLC2pIG(V$72~e<= z%_58;?EBTeeXey`KKy;4o`)L1%r3~DJp3I`yADO=G>h`D+ix|;cKdqAUr8TcO(7u8 z@^r_lxK?GxVhoR|AXIrJ)z!R>Z>Gk=Gg!j-(fB`81By~tc(5~9P#UB}AbE92+faju zx==Cgx55pGF^qu*CY{dDb$1?*i;zgX&t#?oRZP_fr5*TP!liYIbD%lkA24&n+ zU(64T_-3KHpr1kSp^m+HcmZZPH5+^qTHWe`RcY4BALf5TI(V+{rk>yV@Xyl2{HwlS z6b_~tE1}|fgoEK_3;cHf^(+z#vM(h9)Fj1o19OR|iQ69z{6Wcbf17CMXk)W-UODm` zxu}A5r355MsWLYle#**z{JB5YY$>UU-&O%l=gXNd)$1KyOkkbiT_wM~AM)k>fXUoT1HL+`Mr4(#6Y{%<&7a zpBuZxM5n-NX;L8!6nZ`a#vKB-;s3n8CM8tt@G|>o`_s{hr6{(mp- zgUDfd-;04AFYfb^$*(sh~G4hWj{mdHvk~re}(g= z?BHJ+`Pt8X9$bP{81TZTIa_mU%rZ>|9!YEf0h>BIkB|Z|;uYIn7%sto539g?vHyN3 z-Z(Ur!50uCDrBG}hM{+}ARoKhk=BvX>NBv9l~ydl4e`p=d>&K42>D!QSt1_|9bUp+ z*Mf|U^50(VUW?%Y_{Ql4s?+nTiUwiHB`e(;Zg0o4m$&nnP zybci~&DUMk)m_!qRn`6Y=j4y?k>KZpK}~|Mf9h47cnnWLF_ePW@D@}C8|;Xx=vb z>5er*6EwLltq~*T&~e&Bdj&(M%#c&0Hwrz5@W~fOW=jQgWG;Jcwp7lt z`H^A%Gx=~d0As!IS7ZQ{umJy${Nn>prm+~XTdK{jWy&RH>nJHaNFpR82Z$=Oe&FTq zMdM_Q>tmel zg<>_I&$&PZKR^8Fk>l`JrZJodgp0K7qOKXT?5;_3gy}VRUB+I6K-jqJF5BH8%O2YU zJOzF)_1u<;w3M{f8qV-hrABHp_1*@Fj5Y}Y2(r*GZi-%%KuoLq8`82;Q>Z-eV{Z#i zpdo!lUiL1lb`bb#JycWcVd`su>NR;%O(mVP&JIm~fZqJ(f@e(t~Up}i>OtxS$P185N7a%sX1-)X^Qq|s%t;yI6`nj^1 zv!*SxqF>Y?BV_7qWSa>I>O?6=q~XerV&^hW6`2&f3^3 zTV-?TiE0LiCynzH+L#Ap;}pN6S)`NyOPh~-fM$5&PQZxJar0~bog~1&=N~3I|DFH2 zpx=p`BPe&2lfdcWr}D@H0m|$-7_YnpAbC_-krfhfX(S@cgf9fX?{AyJcL%4o(QgG8 z`V6nRn9IP{S_*dTDUsd|#Q5&ek%Y`XhZZ2DH5SBv2qDZ1p~3!FvAd;2^f1y{3Sck) zNhqmmM^(4#VJZHH(B4eXMn1c#m#R>NJhYeks3!7yv&eaAKMm6e@Knh97)beOigwUW z;Ak{XyMW`T-E;@E0jGQ!LF$I6(D}viKfrxoiVPBi7bA(3j<)0wOoU?yTjs`^Ka4EO z*1anU%^W zvmwaZ74nbb$HHtBBy(;6{1kquYp2UybMwnxeQ8K#i_}P6#l&rpnu{v8-OG@!b*&*0 z30kEg!B)Mz(KSuVJlMT!8seY5hf7Yyar@T&6cf9E}PGPC^a8-|CW#tzn(~Th?(;s zFrM}}Cav&CiGd`tm!r)TaF3p=oYsuVSsg(1U4OQ7OmETY892LGpEorZlbYF+Q zBY+NQDa@?EnX(EJ9{tkfHWkhJq1<=Mjns zRRj@vnD^`!!fgcfIJ<{%2SIFT3xrRuz`(l_pV-$=u5e{+toiP~*$#4(_l=Gmya{9A zBn_IDHqA-}$W^O211z!4H@B}TmRsdFM)w_WE4?U#=C(qaGZW{}PMpu2I`hhxkDX_t z5@4o0(=5*aKH5!qQyDuKe-5LQbpkvUqK3Dav23%jt!$yJ#owGb3AOV5i5k)K#WzO%p{;FunY-(M?qTS3_U%p@7es!%TSbAgxy6 zX=$4fPktcTrvX}#lf?y#m5K$puAH30C%SmZwv^J;9K3MERn0JD8Ao`4@DQPnfctCP qwAg+(y11ntEtF_AZ@!4yE1v=2^~O_tarxt;@O$)o!P{O zsxEaXf~H8x<=F}v2uSoL6;}O$kgC$EFR7KPf)E-NRj4mj6>9$$Rcg<7#`Zdk3$^7l zckVs+JLjI~9Y6VJ@sq!ZxkLGUHbOtu)J3CzHWK+GUKW26G&n$i-n&H1h#oP88MQ?@ zsz=K)JtpuTv*YE2p5SrZPL@-8ipL2%UC!tk9w+Uro+aL-ozrvlOxaE4yq>4EIbo*l z=JG0i6*Fe+mU64!Dk4_v(TJHfbLS&w&RRVmn-_W;%`};Ln#o&h=41LbX0y5Kd_-?I zd(2jI^?9Lpm~G}7n&~vJG27|6*6c7l>AB8at9O~}^lr0D@3DH#?&+v}+iIVWh)B{* zo{VU{7vH_SAsRGSWXbW3iKO z>KDo5;;?E>oy@JmQVU3va#P1eT)mzezG)pMxFSM!__t<$3qiv`mZG+D6P{T1)~UNS-a&Rm%&Kg-XS8 zOfAWPdd8Kd$&y1x3Di`!ueU*!ytK@cw2Hn_kVVg|y`0U7{xwkID)6jl`KC2dsM@~V zNUK_0UZ>v4X`KylDN+zfvewZwAc}k%Mq68ljI+M7A2VA(76EA-BpmYNw(v0|Z^Cke zhKzLHa z(vtb*CcHMBKk2)M>H1I9-fw#?Dt4hO{cu@Xk&eZ>p{iM@s^Y z4=ci8YuJ!73`!ZpcwfENQQdl!xPzE&IUl!$BAJC4f2y7Cyb?Wjb=+%0qGoB)AUZH8 zH((fBmKDs*Iv^`P%{k98{{XXTQ<1LjFT#LPsY4znUq2f2U(6l#t+iPsgg}O>x60x`8nbUt3cki zCVXRYJqn@lP$bbQn#BV>M=YeVmR&ze36s#sE9FJpaF?mRLVq?@{g6wJ3)LW!Z{WWs}dnX|^R~I3tFk8p2_nKBL)J2r= z+HkOzMLTU93Ntz()$n{`ETMp9a3euq~f)~Uw(mKPINrk3T^$odFCpWR# z0=XY#nuwl-xz(pOcb($Mt6+fx@sm%eH%Zr}-W#^{Msr_FNS*4X%|F`(qz}awW@M`2*kOD%|XIA=SuTZ?aG#2BFUPxsef$UKFFS<`e*pK4lUFUG#aWMjLX4< zk|^&W@{&3_Fml^4UU!1jt4Kdy(?qmHSocIJ>|dcUtcJ2eSVnw^W}mJ7W}thyN&^dQ zSo~y#UR8Gv?l`d?@c&bxWflr^s159m+5~Z!;Fa22gFD3>E6rKPyhvX%2y8s2rNWq5 zNgNClr0AkqvFVZ--nl|6aEg8fa$#IO8S*rENuH;*8b!EzY{=hJpZo zg__D>>>VaBXb>}|%%7^8ho^^F<8ZTlI0RyMuCyyoUEBUu?XBVYxWUKeD+*c)XWJ-w zl#>Q74ibKq4j1s%sW@A-ErTx0QxYeqiUk>Z#-z?&aEjK#4F)HcJ3vb6hdVd-H7MP@ zbfIFr->U!Yd}niA4>AWXn2%yVq{!Qdp#6K6^bu^AcCGJY5nsdV)!@7>8m;Z$b)OKn zy1aXxD62PiuNSWR{qCp4xbpVgb(lNMT5N`O#>mkq9MsHE_jrzdNY{ikUC=5vs?3tj zxwhS)(tGzDtJj?P{cgz#1ClK_0=&>~LBjKA7w;`XJ`NG=yENV8$P%`luU**NC&U@` zy?wWCgB;rVRj@r0j>Br~tel{OYUBQq4G3Q?6Qmu>r?j7*k=M{fkdOsuk_sfSde!{? z{Yef9rGB};cf%!ch{=zpMcSDOFV~w?=D^-HIY7ro$0QHpV;Cv@z{uS7fV}}^GspnQ zHV}5&c8qp|oB;VW$QY4;zRT!it5~+^E6kLqF>ip}2T}mJ7Uaty4}r{sAW`Z@kryJ7 z?U67X-hI&$=lxiYj!4& zZ_z>(M+H_Skrv^v#C#D`_=(Y9oc<2eA2~pM>DbeY#U9%TpF=xgIli22R36=DUL8KZ zed-I~O~~)kW4YEtd=sdvuEq2ue-wH_f}blYewk1ng5s!o5mI}N$g=YCviigE*XPdA ztmN9TP=}CO{52eYm|@%BXn#bHkDa>x4hfq|n+R#hcFm)!j2E0~HZ5z7a1CR7!K8Wy zm9JDZ$eC9gHYr~q-p5qWi7xR%ZREsk|6Cu(W{@@LKOTNnTJ+5Y`4ZNJJ#3pkhw1+y zo`kJ~3_eeM*QGl(H*0uRCP5B@JOG}USDN9=l%8>He|i8ab1&C({2u;CL}tJ{mvqPP zCkcEyc?&R*67oe5L>T*AUd9Nz%O8Mz2ju%8oM-hz@(3Xmr1z9vvua!W&~`o4i@0cs VHVOPvG)0>*PB%qGTFuCw{{mcc4e;aFSXQ))S7Zsqj>sR3WXUnKUGsNmHGZOx3Eo z9M>{KXCyOarePv&Rl`J$S!HJCB(2t5Yu1>xb4;r<>&z7}v%*|yu7c-Ev))_{&sF9c zt=??VR-2958mqy4WFp4zTD6xY0FXE-Wt)V|Zi!#AR#`s9t~)4E}r-17aWOE*$= ztg{xFN3wB_3PRVd4x~x(r zy!=x*^B9r^g)r6yTpTG1~n`=s#3Tn zZv{*Icv<|R=I7l@qc{tRVS*5 zFNKP0#qU?`8bo`v{V-t%R>7Y?s*w5oQCwPdKi`b675i5LPdB~bTIp2%yD_#?BpVL1 zW^ue>Q&R;8qj{zGxC!R^MBCEsUR6l@g&NnQ5?uIujupN|P4i4{Go#Qw%J zo%`V>j60V5zvLmT$)f&2=~s<+Vuv400x+t8mTWAHb!_J?IE*hz5n?7lm?HPmeVB_m zuTb3gPx0NRvyVThEL3cF%yukt&ck{r%Vfth%s+{tb$?edmLW;|TegUE>l@qA3?$qz zJVKU((@WT{X-!3*CiAJjyyaR`MZPaC-dNw?8b{XZG42&~I5KXGg6Z-x#{J0xfM^!! z=CvD1IpZhMB;lxbtg%4PjoS|7u6th+hnoLYMzp*A1aOW-8;LOSCt(KMMJg6u!cW0$ zM!eZFa1_z-r;sdS;HOarFyQRdNIr|?Auu&i4aUo85~6DZ--7`PvK=Tv8k=xpz9Lab z5{gAAp^9mac~(>UV8eiXfXpPqM9y*?Kf+D%PQ|fi0Z7@uVXZY4a08OKM<*FLm(4Rf zpIP99jQAXm$paOdC_9$m5p}KgtW9igy)%pQ5MOcI4Z`$kEQg0U_joo{))uB6+qIzl z;(3#1Y;jb@L#A`WT4@0o%7Ax8fVVTq5!{}{RhDHC>F(k)OlK#Q>$g!Pns&HrY_UXmHnd z^L{AfDBBn{K&TYO$w24J_2p+#SVv+av4Ci4bho&>t?3j+-+5f1VEp7AY8mc%&`F~v zUFq)iplPLOX#3f2)O3%v5d(HoG=XUrDp1qF1tGgwAsi<3iyk+xme#cIW(pyx6{kCP zY^Rfh0;UJtvgT_VwT%aAJk}w81jtSCgO1)~?fBY_q+Cg;9VcKen;<|Tv!iYl8AImC zRs@$cJ_oa+G|;(jDWr~hHeCEK7hp3!aq6uYm!R^vu*W+xK zKvR|xFJ?421bsY`O-3=ZoH=w%m<+gGbgYH89C?9K^b#zSb2TO6sW>G*3k=_Z2p1o9 z2alr&H~`}{Ob=thiK@|jT_t6>iAV`LG>q%x=8wc%Jrmu8I1=Vxgr$XQyquH_ZP)op zX>iA7S&up$YQ?rf*)|9#y4WYeL@)`P3+W=1o6b3wUNnMnjys2ng`udWOlZA^o3rNc zBXkStLUlQ@rFUCP1?ZZ^>k;Yuwcx$)&6aUs!b6bHp*#iN5R9;Y`xX!8gH222COM~$ zqSfX-!=d4Pl_v9FLGcF-lgj&`-p6@~)i4$QYM>rxSm_tLkFs@Ga6Vr@gPC(FH4_+l zpyQTdUP|-4=;+(PZ1F_jtLz!^Vc(MnsaMf86@5Gc;ezD{+Tt)gIQbpp2W|^0T-u5b z)+z!-sesM7Ma=B&J8U33iiYxe&vv6cBmeiHcy#(O~nMHlWz#d8CD>yxNDJ~+Z@Adlck{AQqc zb_1&2h@=%s7Rfdwbo3oK+KJ>4l0hVgfrJ?dxWLX8tYF+T`Ei^-gG57e7Rh=fGf1$o z@Gl_2bSpcBUy-C8QWR)caph$sbVOxBho!&*ZrVoxYFvDO{~@M`kN1!D3KaP|l2?$t zili(|Bhv61EbM}2NDWbC{55g&z)agRoS|&OkGDO@inlC@DfH#9mku2~9f#w+F__IF z9@$Jgz1726df;c>`{IbSzZ?Z82t3v7h$L=Q{b)%UsI-|XVNz=SYvix zll^h;Vm7J8+^H~T<|9X;x=6)Ks5oBq{3Y9>9eo294u}_qvI>rJk}x*Zi{B32HuLa0 z0v9p_8CFqj>ZTDGU6JZrIGu=4qt_u!@4zEkEIWj3T!#e@T|xg=NT`so_~qBddxzg$ z=o@GUlu&#pEbzs2B~)x=m&Esmcg#*8Z-T!GkEPJM=~7X~myK%*{aW<31U;8U_f60; zgXp4boyzK2I>Lu5c-Ns1a7Y;S2cgW{M{dtjou#_LU=}JJrXBryj(*7DvHuN)G|3LTJ3(%x&0r`;aX;@)D3DOA?VJuL`k+%4egk;t#EVCp*p1RV zN2gn78!2wlA&vt-lx@%TIRXe%_}eA$Jh-D%A z&w~p&P0pZ_wnEOt-5UHKjJob@FLBrn3J4*JIS) diff --git a/nlp_resource_data/nltk/parse/__pycache__/earleychart.cpython-37.pyc b/nlp_resource_data/nltk/parse/__pycache__/earleychart.cpython-37.pyc index 19306a955c13f007b2c1ea85178164602c8bc723..b385ab6a71ba94e493c03347326c6922ae77bb39 100644 GIT binary patch delta 5460 zcmbVQYfK#16`t7_%d#vl!?G-R7Z!{coA>)+z+fJZW1JWq*W<@{!(5v+y9>TEVB?Sw zj2+i@+J;_N$&V;*(?3-eskTt;DpIPdt*WMql&X>yb=3AxDm6{3R_Q~nsd_-t9ce_8YlWM{hs_&1laDhtD_p5pNLEP|j2lAwr+B__r# zk|l1HtRm{xm@RIX?EKvpTRjk7iswmr@q8&iULX~4vpu#gUMLmDi=?8sSMtV-rQ*0x z^2JM}lDJ>;$4jMBQ5X>vhmvtcP%=oyid8BD-Kk`Po=M6`;fhV#4i=Y^1(qz512Y1k zyOnIvvq=E-Am|KLeL9IDd?4; z7b#xQy(9>F73jr^4|E?X2Ym*JwfF2lv*h}L=@i93`|IaaTF>5g z4vPcqpr?%aGEe)>VY(gW=&ooYszoEQ=q1w6Ze^bHS8llT7{(+!QO*+e0Q0%7H=uvm zN=wn%iS}4j)pQ%a86F$}W77bLL+t0So?rz!iFAwDv>JxSSY6idoIJ9Hma>4`m-BFx z(`>}uDORv$cd;V~4m8AWyS;rFPq&hULU*GvjH-(gRipAWSx|M`d?cpwtN@%(;Wax{ z5+jq^(`$coH;Yvpuo$5X1&4kZ{~WY$=~=9q?g9bPYRs8tsk2~GVbKJe5O(PqQxP?y zX_RJwMRz?DiOrEAN@pkyf`OK!2;opBNzBCwjc9~cu)3U*5#FU{97inO9!)6ZB5gv0 zQK2>!Rb|84f>vI^dP+Q-2Cz=DujKr?x((jK&{LfD=*;KYt=#>GIRa0~9@R)(H4^PY z*KQ2sFxHP(&gb$RdvJWqH1r~NH@8Ss*>K+Z4rG8)1CM1%iN#yZ1z#Pv9GRVkHI z1V;NOLjNgX36wL(q&3}xPN|i1 zqVY^jQh$y8#QQ1i_%QP^Qh$qmQvB30lisu!{eviy>7_;P)b8+{kWJrW%r`tTh~f63 z7(%g$a9h!A80NhQkLm#76rJ0g1#}n)_k#dts%ODBJ~^1*j=cbGfL(r71ElKxL7`O)yr-@UfqinXUi;a~| ziB5LEG+4`5;2?UBq2R0XG`^+4qWTPoA2VB7_0T3{1fWMYrlhzfa~OxwAHt*BKsZH* zg^oiOw+1KB%ZbADhc-M;Mjx<|?N$FrMn?hkeRz@?9c6d7_wL+OTe1hqV!i-w4_GYN z&yEJp4Lt#xl!JR(Y{6X6$1Fl;j$mi)CTQ&Umd!i{1HWM96{W19q>{ZA{OZzvoXYu+ z+0X+ZAOqe~Zslx!5{EEoc>PmNuK6Lb>XtLqi18Fo+=>B6v5(&f83#8L z-6_MN9@S*Ib?v(~Z;If1e0N|F7bu)hpM@V%uB_3>B#{&30zVy9x&tFeaLh9~MJA_# zVe@d8fV-H2BaZe)9G;=>&}ODdLZwqMPs-U0 zYfytV)^>Jaev$(a6H3I`9Hue!7W~k&CZ{4)lV>ATC8yb&wNqIsjb(*(;ocH-jiES? zBH1AR#)P|0< zK`%g*>nWfYIB0R>kx`>!7}mW5N96rugNKI>4vkIheNrBuIJ|daXyh2&+5dMstL*K@ z&a~560K8XH(pli?9B-0a%yg&s@DBOnCYt%t;IS&rIunB5mkv5cbm)=xgln-3P=! zu)&^b`(nn|Grqy@_5=>|H(}9TEIjXCw9`A9d(pEZF50Ip>K!dx$ygCDi;EWe{>92g z&pzR#`}rKrwwSXbio*GKjtU8nW>cJo&x%D976tk-3+_?HDRyhmfrguq0!@I>?dohS zs!`kob^By&W^&rtu<3){oyHCagM7OCVp-GwW3*7qH%V4EnLcxRbk7O29 z$w4ACMiz8Yqx_IrTe0L6U1GlIw%WvPq7B|X77zTSuc9xU zs_{rPAI>~volyVe(4@}bJlGyeUKqKemCiZ@G&cSMR8HOj9r zcvGQx=b)!h%%Ql5;t~qnL+SG<`0j#Tr+5@l+<+-=g%o!udKtwP6fX&IbLLE>oS4gK o#BYZpgDG-}cZc!=c#CaCclN|*l(`t`#X%Jdw~4vzeE-e=00E~U$^ZZW delta 5582 zcmbVQX>e3o74FwtI-O1@boRZo5jtccBtQ}%3uI$Q2rxVb-aPVdk|w=`^Ilj(1xRWf zKppV9RXJ|+M;#f*s??0-Pqft05%h;z`dp?;Ep^lyhtevoajNv3`$%^alBz~h_vC)( zo_n@?&V4t0^SboGHOaptJKJl+f0d^XDnB2*<1eoNy4_~eY_d(0G$b4SDx zb;{1DOLj%w@|^>rIl*>D)XC`TPJwAh=aJ# zA&1atnO2TQ%b5#}HX^RjDiK$*M#Sq8S7|}SL4lhPS8FwhYY@*l$SCr*9zP!4CbT38XUDa>O>S968r+>Rf{0n1g{WrQG&NI!7EPi+R;XXwr;^Tr>$d+ zEDukw16d)h5zRKT4)l8y;`Le+;wFJNBW~7O5Vr`t1#zpk0r3WbJGE|Yv$kbHlDk;9 z);VRrZHHG_*SrlYtn08Xv{n5X)}(hpHPql(MAdbs2i4f(f9-GCb0pI@&0t&^i_gRi z28-{yeqt{#!GZ=N@9%A$j7M4P8TI01d|HL6))xN2GbHu%g3K#r2{9-~S<@Sig^jQp z314Ep{MF1eWi?A`gf2_S#k2{g@8%o5H#X4zkON9-vxoSRux^-6npr!90n!Z>NcQu; zdbb6GI15Rzf^MkCrG0!j>m5t8-O}usVi!XgCNSNA@R$*f$MmE_LbJl3^K}i8X44tQ zESPRR4hGXqC+^ju5*MJFR=tX4OX4oJ=9IObcXuk*}lf`DD3{(s?C`5H;Xlz`ai5R9^ou0;MLLQ22 zzy@u!i##x0B5v3~j0UmIm<;PyHee%hAB8O(6+X5g_cZ@X&Tkt!a29$L^6K%-@KwzH_So>*aX%V1Lb_!n6zAGI&fvp+0G#C z6KaHVyH#4rWK&R9?fMieHwsng1?~x)>@;1V#wM8Q#e^ymrk-Q^D#c0Jzl=j1myO@c zDd67-w4JCW?Q4lp^+G!l${K7Z(nCa?oA4q@M0SgDl_d0BBp%6=<#FIoz?187dmK>A zueFyf9?1(yZOJOKTQ!;NoE zxKwiRxUfX+0NaQNmy_5%*+TIH$-a*QPT_IiAjrLdSdLnwUl&-;xN}xOGYkYM=zOsgTGd`vujyYA41ub zOEwwHFfP1~LllTdf?;YJ{>6%F&dMhxH-AtbY!)ZEmo)4nB2M=p%^vp!jv(*5ysV;O zaG7;70;5DoBxPIDk3+cdZ5+B2iAQqV;m{*bF7G@{n<&>w9hlCi@#+CTRayUkcy$zw zzJ(*{)lq)Gri4dp^Y}k210BoaOy(mQ*fHGxQ&M3^f(bq*3ME!`W^flW z8Q3p*MX;-RnH#Gn`5CnM3*J)I$G;Q&cw_{TRg~lj>_&pt#W9JF6P>V!mPn!XJ+wix z4)!D0w2yaY3^Cxii zA1P{1pqgD9pGw8Excy)JUQIooc+Nw$7b`3+0HoOt%rAV)(e6 zK%Shl+$F1W_(*eCC;2YB(VLj2TKCBrZp3F))2Hw;X>Y-Nv zOc9wTveL1f!i72sE6RrxkKgICOZ4Nbhf|^*zuz)4LNY>)nKRgbXz$>03cp+&m=%q-yvC@M@mdPNI^;-f-NEbwEkjmt}2c(;;cI_{V!$Lu)4+d&uZyxTgO zoph&Ybb#M!E3QrW05d3@5@*8p4SsV&XWF*Up_Ug?Y@ZXhTicGfE!+HNTU8oHZ}T6u zb){|e0vdfW#pnfL6gDayiGTr6CN<})#`|gRYJcKWw`#60q1sQSn7)Lj{~ZvopoDF+ zXz$K6ll^)Ay^j909u0D$4Uiw1y|oBzv%nwaZ*>n6>%osEeG7R3R7I!l1?G~nty3ad)f)P zimHE=l8~#yXn*I4Y|AJvn^jo(Qg<+oBK$r7TW4?Dwy&e|-=)~TE^L3M>)EA%-|uQj z!{}f5KzCo-Mi4E z&s^R%(T^&ZTp^ktH}(wR#&}P`u3IGX5h6l`sDAnzUv@JUBJ83m8P*HrMbh9jfGz)@64R-%X#4|y zzo+JyTk=W)$?I@R`H~aoe!Cxk!zXDker}^`IHoB3L+;U`QTQ+!eSydgB%!Tvmgd40 z;rB-gI*2&=GrisXrQXuaT!L$m@Yj3mm}oTd)TPHP(AyE{^#}Cqg5yN!Ndok!fRjWF zBIk)*BtrKd%n}jTfXEqL(muIy@iS{RmATyORt0wJPc|S&^PclU zHnfdQ*PPxMGPs>!|Ds;Hr#lU0hIWz-OGHft1lN9G=|u=>K80vfWux+JK5gC6la?wk zh!SaCL(HX#AOa>$0U27l)z&rlJQfukAm%7-jI%L>c9wG}q@9$I7Pzjcz{N&A@WuCj zze`FBrm+a?cmL_$(GxX`9W5*7PxMJ>D)H4XbjEfq${2H2ZeU9{ims}%LWm-ynrD6uH0PB7zpoc4^j&Sr%r6yjV%Gow delta 387 zcmaDTe@LF!iI%#FG1=zrcUJ$X9F^RdFp56=cejsC01tS7bF&C>+4Ns<#b|n-|WG;l2H_-q=+4) zj?*SRH7~WIpvW$4G8=b*6)R9MiZwGYxTL5Eq)L;qh##i-7H@n}YHog6YJ5RyUUG>) zP`sE0NHDO8F;*!~Ze$eRyqG(NiIHm(pEMgYNEgdwMLsd{TdaBcDXGP`STjpfbBl|F zfijw0MXZzk_;MgFfjj3GOKMqSPQ>K%e63X+AOk>Zi{wEBSPg;zIlhPqNbmr0F-R?k xB?}V=2L~ruRFk=g8z^>*Ek7wMHMs;VhRuju95%W6DWy57cAOw{Kn8L!0|1^IO+o+w diff --git a/nlp_resource_data/nltk/parse/__pycache__/featurechart.cpython-37.pyc b/nlp_resource_data/nltk/parse/__pycache__/featurechart.cpython-37.pyc index 8cdc11676d4121cd362df087c53e973265fcc6b9..3f3c6a619aff5337c6e3308a5284aaeb80f5b3ba 100644 GIT binary patch delta 5514 zcmb6dTWlQFb!NP~o_*L}KjKIHj_t(j`2EbojvdDi0a8;)NMHgC8_%`9cxHEVXVy5u zU?QiesN&TN6xxCu@;-}RiRZ_yR=dKYT}x&!D+qYCIM)&%r^psS4< zpljG3pgV!yZPWr?%Nl|1Vhw8&i^0E8gS{ZE&Zq}r z^{f_z^#R>rGy>ho`he~Sy2)q;x|!$!p!XOpK(`P*2z0B_26P+S3;aXIUSr=CNgFoW zH~wR+&*+#dH1->vS7dF(=rX#Kz;J*aGizmA*KEp4$DPD^lVv|t(sEz|D%!qB-0!<1f)jxz80BT+@4?AKt$?~dI~`BD1?Gb77H|Yc%Hb`@gzIC9ha)R*L(mRj zBAX7qCTm?tcO&RQ&Yhr%hAf7C497LYYvQBmhciJWhlt-y*bW~?+FQTKF4}HjDQ;$%%>yvL zvoJTf6z7RJREhnhXe>Kaa|~GR+W@Rc>(W&y07qU|R-~)4;AQQB4RpQi?#_**Hv_Bd zgNn4ay90xt21n-^c%9})z9E{+PoCsu$QstSDpYrU52X@Gz8}WPYTgML6cW|YR%PXa z>K5ucc&Y0zil3H`$VK9B<=u-|W^TlGpw?$C2Aja(11N9=K@);e1SD<}u@eZ;`fY#; z7VEJe9vcl+BR(Zm8z~BvtA4dW4X8!DA^S?jm?Afe-&NLBkc}jT)=t5acZFJYEKJ@^ z0LeSVnW|>mJN!V#THZ$3*^Zo2wRj!_zp5pv8u0MsKgQjfe7;G+{ zF&#G$Pp4sjz^)8#RaUlD4<9h|nfVo(d&%zH!JKds$ttXOrNtg_-lWCmD^Ybu{MG=t4 zxP#m%XUVw5^sQ5a)1Ja;@DcInmPWZ%sIC10TH7KfS}QBBH`nz;k(n3Eh1q(~#7+Tl zP*iRWVdQ55CjJGF?E|1z_!OQ1|7x=JZ9kM($uV*QHOKsJDPEign3{Crrp_}a^Fm8W zqN@lM#R|PDwgBfz1Ze=SG6!1)DH}n~7_`GJ(a8gSGj`LmB#D`QZ>*ANvH|5oRV?9Z zN+_8}G1*xwDSH^%QU8u=OE=W!lJ*5qY*VKy2jE{k(mr}>r-)+UJX@9#yNDp?Q!c<6 zn1Gf>vN7Fiz^4QhRpl8BvUq2pRvd}hs}wol@gzW;!@0}Yrpod*3Le33Ad$2gcQ*IO zR*+pEF51aUj^Vj)(*{Ydlq2wesqQ$7I~mSSi{6gvgLyvQuwX8MtaB(!0YsJgrR?R7 znS#|ppx`n@Pfo)x?hFb?tkl~DlqO0T)jr-Ht{*ysB;#k0_Ynj=2ug7(lshOi9~>BS zbToL~4s|#f3cdm$7RF+$lK562yi<`B_6yeC-1aC+^v@+YTogOh9))K~e7F1F|8MI& z24ZJ160~(nVe34$mDS~**H_7#TvkJT5p5<%DdD+;fjB8Ho=`w$k-_Bjer^zZe|4=|nvNN`JL2 zQBR0hDr&@CL$8-rpgHXTVjD&zMR>TWB6lX?IZF(mi1Fc(@V0P>%fodoGn>;p9%EmI z*^kA$!!OJ2LK~@Vg*~@%wm*f~4q6mNygbs^y-m&kRPYSU%8K~M$P1w1=>xSbpRb`% z{PMs+ep)dC+sw%w#LvM*Ky)5l?;)0_f!uiA@b0J^`4JJH9UQ-DEds=bv-5{OmJe

    9jlT zVvNPn!)(^^)^^~N?KHp}8U(1VLInXZr&znQ=U4=+4FHV*O#saRQGkDqAjYk|{8<}8 ztoow!8bU41?`}&;;107yU~ASFkw6fpuc#T%U{WXzj199n_b${?z3!eyN4J1i3jpFE zJ81{>1tBDEvDOo+6Nc)9Wv!y0U=b;bI;%UR`;KFRuMOMdq64!y*F42`4iYI`_{w!`L_ zK2@(B9hW#tVwYqW-3IIQu$f3lb6pOxzX@)+$Ce^3C{cYaGU;kvZ#N}HQH!dfC||SR zqjMuWfoL~Cii3>k9_VlI36o;3go(1Cmx@Yhe&Ie_jGdc;NwUjyI~X5w3k3q~T0Diu*n9ELjS7!( z5?A$y_?J%PXLq~fz3bI_;G58RV7XGEJUPfufo|vX-L@;^>`q^bbtInS*W~p?XL7xl z>Bq!SjIySesBv)B&4#Y*1-EK=>pKW-WuI=FLrE6u=|&Pu^&CP+*yW!7u~EpX`B|do zFB$|ZBL7^eQZ#f+8*$|ttWvZ_oWs@cdR{^3P*vNpfV#GF5_%Ni5e|Gu9_7s(m-`ru z9cO>`9#1EsI|Fb6z{SCITv5xWL$4I*Nf@$Z;sTI=9j(Z#1BMSlaVP^bb>+mx>j_Wv%J6-`j=0R1N~lrdQR(A z;JF)Ac4XX@A~0D2&2tYfA*92b9Qcb(h_SEwV(1o|->FQ&!MD-`G0o6ET_+UY64R8H zs3kKfOrHce4*)C6Uh9e~4DISXD$;)nZ07);1|R^C^IDmy0;8MhD*|g}_xf6S?YX|| zSLYBE%o!dxEX->#NM<_AS(Tc|Fj3wTswOwha{E1YXYg!F1S!0bX8QWVfme+`(0^0QA=uF33pi;fL(j&?$c1 zz8%_sVB;akIC&I%1P*Lm6ZZeJ)jR06&Co-@xP_e`?id$2isLUfaDrP*ceY$6MQjOX z?T_iz$)yUfn?$D?co$f8cxPfCbSD8;vJo~>al}4E*;G1;ZnN0P*`)|f+wA>32!=EV zE~l2&^#U<2mM|TLVW^qbsRD^;9^eu{Eq|*gH0tMu&-tUnYhMb%$Aqs@as*tG*B4%r z;=ZdCrp(AO)2HyS7^6aoqEIMk)WHF^Q`3m}w$Vj#6*VCv!P|H@6kPa9S^Yb&Q4e^+ zP0Bc73iU!aKuMk#OhOg#sJ-V8TQC1kaOnc*21o!vP@5%WN)v?=u4rTuvem&gVrREU G+y4gmY#7@B delta 2586 zcma)8U2GIp6yCctyPe(cU(0Tx|1Gw#+tO~!mQqNmP-vk7h1#OH6^Cu_WxHi|X1y~T z5L6O@n5Z$q8;!<95@LvoF~*G{F&e`&YGQoRdGWzOVtCL8ACxEWxzp~pwHh~>@7}rR zp6{IV-JjXtjN3+VG#K=|_^)N|xH@nxSdVUA+uD2yxv&dMnkVl~d-GN4D)BCBO5T_D zAs6v2`VYFW2Yau$u$Oo*OP5i)dTF!@D|}R08Vz6{_Fr+OgSZ+8cpJh&9O7*lhjE0r z5nO|7d0VsOy#YtLcQi_5L0XGr)2>9_uaIF$LZhh3$9oju;eq6WktfN;%ySDyAw$n5 zd)W#17;0tjx)=L{%hSYM2LdpraM;Kn)5}4TWw-1WW9%T>!%XS@PT)?sDTqR=0m>1n zAOPkbHY|7eLSPMZV5%JJ{nv<5KBJRADabP{s1SxKgaxf=9%X$>3^kR; zm3z_=!M6$A1wE)m8vyD#B&7dCUv);e=Cst#S#~Zkjhah$1NYHXC0Ri!A^@dBE(%y8 z|8Fs>=4ejSwhz3@+~HlQr8FE4qE1*0+n+Pf6!m#4XXv)4Ky$i9YanM2ZHG9UnI375 zR^$-t*~%@q*!9Rp)LOb78S&Jww%dxT=JcGUs^7Cv-OOMY5N!kK;gCSI7y4^_!bG1p zFi{sxLsyNt#b4Olb+db6Qo>_P^O{NH;I*3TA0(KpBDr5mZH%Kfh`oMTn&hSjrHh+B zKtqt6EoF(tw=!gy_L)}3%9&Pfer=p;i)M1V3U>1kHrw!X7vyD2Rwhf`qGuAz_7T09 zCp2RbTgqVUNHD-~NTf>lxeyiwFG(~(A;xEis29HXPn zSn02hHxN2ddTq-hYHi~r^dP_?4tzl-cr(M*ehkJAvsnCSY72Ca02~F7IM~vKT)~#i zVxCUI*f9WcUXMd>ioF@%il*6D@dorb`!U|SbA}Hms^~aaPjImP7BMX+Bl-l4J_!Id z(USl|rvU3sG$6G!k=Td2r}!{WY#&&s%Tt_r{Pm%4I7@VLzCcH=vd_D&50;hYXT9vi z_d`KTAX_P#1cUAst>}Kv@-lm+``YoHKtBXfscE$eympHwq_nrF1Cx``Y3{)#gmQS3 z0e`Uwb?o=9I&_7-v`yU$``#%N#I!^EOp{PJ>0^`{)R7q!rfGm#0EnFMIVbcKYwu~9 z5amA&wxMV`ijKr*|%Nw{OY;U^FSVe`*M=!4T1R;43gP$A(Nvv zvTT%fgeuETRc^n-J&~QaryISl^0u`Dcu|uizolyMpVrF@N zJ@!x7z0~U|QIgxaWZ(!cd-D~vAT9GO`ULi`jS1`j zB`P1?NqPVn>(~oBnsCTnI(7te*2nglHi=1d;JwIx*s-ns zQRof>EL9_H;=&Q@5M!rPG4u`_96Wg`1k>PiOd|kr@zYv>K7f7>Tuv=xnt5U^7?`GD z7_LlbSAj&70O$ba`kkK8D8UV1^T&kmlM;ge4*pt23V0NsKYU4P^k1YfWiqh=7EaTM z_!61o78R#It-xou7j9OWPh5VT)2N5PiZf%&FojluIY3F4UyX#S;8FRYi;sL0cr*jF v0JI(kgz)R7ZY#ri0~a+i0)J^t@Qt_wD1rh?2q{5$lm;cF^eJshG=%;JjWPzc diff --git a/nlp_resource_data/nltk/tag/__pycache__/brill_trainer.cpython-37.pyc b/nlp_resource_data/nltk/tag/__pycache__/brill_trainer.cpython-37.pyc index cb584824fcb7feb3cc43e20e3f7ac6e91490669d..e40d4eda26612a53f31577a8efbcf5d301ee6460 100644 GIT binary patch delta 2840 zcma)8U2GIp6rMXfJNwt4?GJw~Efwg`w9r!MA5v+7ZHod0!cTRhEZd#gcIfO+&dkz+ zOPYu=7-Iykkq2TTFZ$pU?u*eEO?)9H#+aDs`13|04?d_b1{3w1JKeT45Z$zA&b{Z{ zd(QdJIrrXgZ?ZpbvdV*%m0<~gg}{x(M>i@rEuO%EF6qpYGAzzAa$IIKmMvf0C%#Ht z!LQHqXVkbVWQrBY1mnR>C>~jHzO&hsMjkyusNZ-A5Ux}K=Lp=OiNO^On(1gJiq<*txY)VGRIR{ z(F+aF7+GT>5Bp`_*m`d#sm0ZN(gZ_?&9uhvK*5<^vktbSD8OslHnUdV-S$OotKL$u zjda0kfr4e&-O#|5_5rz7=fmw?(XFTrlzeC-65;dh+gL5X-Tq3YN)r^B7KVD>*|AxD zie_5)(T>KQu3zltD)e#vb}9#k@T43t#Y!F~&Fp#pc1Is>dat9MtuOu7@eMoLiqc7~ z8Wh^Gkw_Kk`)tF3YT^{#OgRuGNsW+T#Xv0$>u4Ak@E9&_^&*PJ;3@M5+SpWyGNi=T7jqy58JN&ImtNJXkSaj&RNDK3sF5 zbY@2lJ56}H(PTbvnW^NoWh4Yvn(CE8Gi`p5R03IJp1g5`j*-b*6wTxO8leTHPjI@;Mt~ICMif zOHZt1(SwF^2%|_|F8$h*VJ8Lt%Q=CgBtAk!*jK^G}_JV zOZR%K6!kF4TKP}?Lmd>QS4=W^-H}?oilkeRe8-;|fJL#6_S8IOI8|P~DhN{l9wD=3 zWK>C8=c;xN9PA{4l`>~svO)BMD8>rZ!u~}KEN~+aqB`L@+E|pIZ^m$3b=tIz6z?9~ z5*a0lhYo*!uz8&ToC*;Jh%EKZ8-tB(h<`BHta@qSUk+;ZtI#0Tok;GMf;zYp{P1aSPdWFKfWl6!NB397KIf|sK??_%tIn;m@~7}Ihf7mbXWC) z_nP-b)PWO3#)yaniFzHSp~$4)#MZMtaO;-9UBb_6+V+}N?X#THc&KdE_S$KJ?;h!A zF+Mx8tx=RZg;EYnM6??x0;TsycF7$YvaSkvahUlhoF+R^CC2|g@O4E|rOT@5Pb=#s z?jD@&T-zN_Eci3rI%IeL@BTQ;L$TOF;rD-Z(iC>lQYS$JI(oUfwNGFE2?PZLQY!5kDaiJUWQ z0PLl4KxE7T?0Yo7s{4CTdXnE7o7zS9%_01^=@8c071@!BGCd#YtII`tOo+D*zE^yc zm+ScFu{yqIylxC{(dB;X#+;GMnopEQJVJhWMPs{a9*nfPu=c!4qy7bceSCP(kDPGf zY0m`(O)I)gW(DUfJ0V6&Q7-z@GN#@?5u;1{$V3Z$G7}BQ(WfjPxRzG-$M2PD1~-N* zksOiBM3&o5JVIa&=W>)q(d|rOVOGU|m>8-Iq5$LCL~9y&%i%tKBeCSGxF3P8IS@C} zC~=~@=+F`e2vDN~RXR`s0|5^dx-huPiIqkVpmZbu_;A-Eaax5NPTL4-`HTm%mNVB~etiMGTU<3aeF2q#Hhr93H#nTh>G*cpPZen6 QX%dm5a*zdCg&Yn13-Bpd8vp^$v5}j zbM8I==iWbVvmb7=@Mt(3lJK`_=4AZi_rmLszK9b;GMFXByf2F}oXeJ+^~HRQ)_j(d zRby&ai)l=nlnljCZ%Bq}`WJk$fWb~ly7rLH{OrzeN>d{a9v|yV<+EnrrNnG1Ur0cv zub02CjEAWr}JK7`D(zKN8QY0sm2_fws z!;_eV64#Q%lAq+2|MH*MI8N&7imO;@+W|jLT%VJ;2tHbtUAed4Rctfo=nNWg35Xrm zB0LTa(UxRBTNwVEzrFR=mKI!fnd7lk`GP|i&766)0K*dB+x|@tvBk7PA`PYi8)%Q} zK+#Fn?1L>xQh86uM%KosI_@=GpNkf4^Fq;zLeVnqUTEgg&OP$=IexaYI}$~12oP)~ z2y))p#v=Ul&Nt^3S|H2xkg&mB8C|K!aV#YE(SL>RWO{5@<5Q{%&ULkGVfMaG07TNYJy@rstP`=-D zhc%EFAO3?sU^vnPP|x4$YfA_R`VxlWX({~YH_~=80R}P&SBaB{ahH(oIn7td+i(ci z++f}^;?>xCuA#`nL4?_Ius_Srh-g$I1TPW!2!W8Vftk%;HkZw;kvBzrM+sK?&28q9 zoz3jS@`D}qN?@EA&`91pwx^4X^)gKcug;P+o1qn1?(=hdU|#H`qjirNPA>0$$`)e( z5j|&97HX2d*46AhI9QXyWlF}JwINCyMTSz(#7=-5nEhq}M9RV>(M3-BE}D+3&7^HJ z$*1;io+np%5g;eI+U0~*G;O#3kZqgbr1QCPfIR38UO*^qVSu1ggI-~ZFvLPLRE}Pe zk}qRH3|5*>v17hsE`z1$Bv3P!#wLS@j7a>~yW+%hjXEAnWH??14+*0w&}@~FIUpezqotX_`(Ye>UF z6-o-C$YqNEYK;Ap_+J~~Ol#zy9Psm7!<(B$zRe)ZaXkW6Zw2^Q!<+gk#o-*m%LJnY zq8>y6h{%haP}5k;HumEBk(Tyq;Z=ei3+Bx9#nen5Y_AZ<`0
    =3^(($*r1gw$PD zU1;5p3u^i6k$$U?NMMXxno-#=i?)d0UcmhTnL?LZzcXJa&al^{yc zMX-v1@)$b#k@41vs=LqAVw~W01oU|^8_zo_6JQU`{i3WKsBI@bNIb#sj!(C%IHEMS zqX-6@lO3rf(_@9chP=p1_?Y1B6T4dRimp^fFQy-bNT{d#pn(@A*4L(O5*bqb&cx6@ z4M#!O+}BLl`d8tx^aTNQQ^kd@*(@k)qVlV}q_g$$JCK*^M$nO4FChODYIUeL#y!Un)?kLOoN7;scf&Z(FMy+ZsDksR|K~ zA3)`Qz?}nx1CLwz1zb4z#FZbwr6*>bic~6IX?AwL`DVYJUH@4;UaG8CDu#q#OFMGf zN0pIwfD2EuC69TsU`^SU8TktOiml|nDpXs=m?{c(0q24!HZ@yg(laxU>bCw^3ITM?B%sbg&!x4uoGs3qr?w`tN?DX!(oDGc>|Qh z-N{s9Wy*mc!A(T0B8vMlICCxEy4#%YPI)g%NO58wB=OtM_|P}T`qN*-bF=4l!YwLx zmj6#F+(zhYx;`@7*3q-I4RHN{1DLcjsN0bXZZn1o24IfNESX$6iOdQKQD7K-OUp0N zHu8|;xrqxy7`L@Nz#&sfFDy`nc{06|)ui6hUq!^V)j{xP$m{x;wVs zu2Vshk>4E!mKjf$0;)$XSdhfW+0M1XA19Q%8h(b37~dn<@NlxSOW+lulPS1nZDfpw^rzF#dNnSGjZrR LOj$;+%8JTg%E#Zj delta 946 zcmZuvK~ED=5Pt9N+jd*p0@YF=XkwxyRnrO>0U_0p5W~$xBe>BtOZQP$w!53zH34J9 zsCR>V)01}-H1%RUssCdCf?hm$;pEIhED3hgnfGSqoB7_%d|&u;y|`2?<{AFZuHSA= z?G?u!+{25_OlEUixZLEa7N$V0xms76lCzycYSm4Bm3gNnQ<<$i+~kbD`^*z->=v`7 zt-oTnuH;M2%-Z4(t7Z1yUitL%OE*foj#YAK5WimrL-40gtS8CMd9n-YV|9ZLx_nFDl;=D zsY0Fd9Z&I(EHu#dC4M?Kvzx(_nY{~^<0$~z7&A`|$H7_M5C+fyS4m8fNXZk=M2`pp zBY3W5(;1ri)YP)A$bt&m&1`B=kVv8ovy|Z)iT>k;S^sq~kG>DR8{@O$!WK2 z8vWnmcM~hSrzj!ONXG{ROcMcx9NVQnRzpV>{~;}hF)GE-WkVWu* z|E_kmiy%ZAefQk+_|N}7|G9elIroX@-PJc%RRvx2lPNo+9skyu>c-h!ip!{At>!Xy8Mn*h%gq()3bfTWJ*ckKEA-0KF16iQw!@`Y>D8xQ zdbP3Ql;@P9c2G}^UQ0c-#;Q{ub+x`oUrck?=u7mv5o)Z}>-DAd?9?0dMtX+yCcT-S z>+}}Am7ZPtGJQEcyY&@%8$H+SEA@7I_UK*uTD|kMqW0<`eI4~|&^;e!i39Vn7TNO>3sc)sN9iv4|dGvHVTThWchSTL7(q zWq{?fzg61_5=_qaPfmx$xFy=D$1Y2U$2H4HQ$vJf2}9VP3DXpyT*960*LcdM?6uaA zfAB_{5%P%-Vd8awA;9&3Er4RtY#*DEk))jLmZl;kpvR@CqsC2bqjaMU;DOmu^wa9Fj=oWrzhCD0? zPqn?|niD0Gh&yOjr`+3pt)K0&bKatlP}}xfGiD?iFI{#Ujdf=at^bZ^+X`xmTL5fZ zSlMNBHU=zm&K!&umQjYa?EH4(tuF|5y2&WA#8jemF z8?#^Rn^9(=*HD!h2L;}kxATWnDI=j5xUhZJL^x##mFB7b2`e3rPKrCR=%WB;h4-#< z^aFPj8Xycf3>X6(1pFD`5P`ix*()M=ivkS5JGjoI^gP!el~)I5JHaT$1)I5KdWA5^ z!5tBgWT!U1qHJgb)%{rFc;P~Jw#y?F?HUYvEITv!tfQD;zD+-&MFllDdYS}5Ruo^kgfaR9{0^{n6yaC3#wT$GZi=9 zODnadlfRIC;<^qudyPmCJ#w9br{=Yh^U=YLcDa$rn3VS^_ZK`Bj^els=PhVQq(E#* zkqYKCATVwL*m=lZZgUC5QceTZvfb8QB7-jYk?o!e+n%X#Tuh-0$`R9mVqXzQsPC!l z`7Muom3ldJQ{yaHwEZHS(37SUd>q3F3ASN@=};N7YI{Ut-1bvC8c$Ey{&aHEpmT(I zo8z@~SG&rguUTPe|5b&OT_bJas`tuQZu-AkIDr~KI@Dui&b{&{+d4|K*euNnWS8Fj z2ft&9ZS*dP5X{krcEKjY^lH~@5sFtj2y|^;o)vpA^%$UF8;)c)3yZ#sMnj7iG|LDm z%d@+d-@pOPLhnVNJH3Ja`c!v}o)-PZ`smr`SgXVY57L4~+1GYma{^94l2C9VA;kSO zP|N?OgOwi-QL|iU$B6CxADmMKTpLEY$}^Sj>~H;OHlr9sl67zbchP z8RN{K8O@IE`Ha$+=bb`^4|tY2-^*3To57(t*$s8emT5V-cb8)!7{?K=yi_>WIL82+ z0vhLIit3eka$F1RdZ=b0;p|{M95LeYSb~ms&IMB}W4@RD(B4oEDvq?*VS;}_BASZ%boky2P(WuPzfk?vi5YnOzmHseSF^=%DTM3@1X{9j;LOi-yPXk z@K6$?4SN-w^W8)j=V=~g$3!eKriFzF&+y@E`(g>*IKqdJlXYSLcX7`P*oxE=b~#z# zQ+Q{CU8I(}$WfD_Q$3U`NglzBPXpjAYE>ar$YI1|7&rqs3&<-$G|0E^Sgu^mHt*l9 z47Om5!zXv}v!C39USy`CLko_^Rp9gTu>+&MycDi&2gXFwM7iwvH}r+m!#RO(2N^G& z@T*2wcd?U$l{v!fyh~8KC9D_Y2cGn*dmRJxIR=gbDK=8`{WE0!tG)ywjQ zgSU=;#@Q~d{Q%003>VM3})QukY z4XSAi9&riOKL_AAIEGdsITa3$&tm}ji&npD;C~jAn1@1!LZGPC4l|l;J+!Daw<2M_ zlHGb}wZq*oN3C<#RV>IT48kr+2XJycSC%pN1tu5HPMnySNe*A;bTz0kyI;VmFyGJc zdOyMcX8#c0oGIG-nkw5rjtUqR?Dl#nLm+q zNIpDv+sgMK?WG#q>XdJd>B<3lczju7$<|cShSO;y(IwB1-`CBA9FgcEffIE>p>TcL zBzH`lD7hnshnd5z@eD1FP z{cSX}Pz&e3&e zk!Jby@xt-vSDJ za|?+^G!bAo3;YE-mqlt>j05< zK!G|Fh6eQPmb?2s%0_v2dTG@lCVh~AF2*b6xpen(w#CK;)p$BBg^!kd=qUmHIdFS*Fj%GZ7obt?Pq>Ahv7 zi7RL8#^@ru^B}3iW3#|YJ2rqG|gVj+c}QH$6zaFGanpH z>>~%z_ZQ$w@X?kpt2h#+#NH==!GeR^$tw?Z?JVdAf~r(e zTWL%1*~1m)L6Cy#Y4lp}Jl5 z(vVh45Waa51nG|m1{ASJR(;|djUYEnrR@*s>D2h&N9%F<(@%78Rp1a}iO$3O!T zySxFVQo&uNPN{Z_^Rn-gm;3$!Yr(Pz5>S2cL$vQBuob<8B{GwK;p{{A?&0}}!;s(h z@x?<8Mw1EMDW!R}xCS~PW|!xOC~y4>y7MNnJ<*i@Y5CGa8+Y?z(t*)c01m4))S@-) zZGORnJneg!VFCUb@Dl)N&e3m>!w)w!ozDxd1iY30N)kS(xE1lLy!YX^6;-?Im<|5R>#VjlL0U#u5Hp@kD-WZ;&=!L%((IT1EUD(MOE) zkI&`G-g#z|Qu|YS75@qt0Q`(xanov+Cztd0p}TlnzB#@i-#q)!pIC;zD8DL4N0H{3 z8>cMk#p{EBM*$Lm(t^%x7P+AM4`66JpqL-Z$cCAGT8^LVQ+}R(Q?^g(}o?I)&-7nV?MZ3rs3U8{fh$)sOG; z!Vn(@$#%Kn!phDFnmqs(p@gRZeFCO$IZ5mLa^^xv`K|oih2O2Kq}de6AuL)TQHmF7 z;0>~$cu9WyQ|%L7^m;Tnf`4)(#(R2tM##xfQj%&|iN6IMWELmyKtD9}GCCYS(YgV! z6;R3{v<41;BX?h1p}a25iznA0?GlIGnE&KE7qRI*K3V@r=N(AS6tw?I54${*;8bkr z;QR0#AHMWMKa?Q`C=8qA zOBbum=@b)NWT17?>yOqj_e3}ah$;R@8fT#w%0PqNE{TWF$gOv3J$y#B9KL8A}+MUMJ<{4&yEABHv``eg_(8k-gE~ z$LFaX)aZYb>5MSwa^uDn4}hYe10q=D@YxY(&8v!^wY=1Ix7ky80RE&e#`9OLetyckTqMxJ1;ru#U?25lf z3woR9J8XGYZ(ztw>Y2E)1qspGN#LbCA5hi>+_i2uoj28r-`!nNsnk-t(&JUU9y*DX Oz^rTTH!fSRH2**5wYW_H delta 7875 zcmb7J3v?XSdES{Ftwt+pC0S2fvWzXu(#o=A`4vNjU$ISWuxuIIh$(njX-3jo`{K^7 zWq}(Ra3Ce%M=rK`Ljq~S>B&jRa`G(D(DWo}l9Dt%X*x**&N(@0Af+v*NgDS-|L>pG zu6D5rm5#o-b07cx@BhA|yIxVh_?K#ETXl8NM?dYO`^?Y1d^XfF+oJewH$wsyyklD($9w{Kg`Bud=G?z1nW2_ckkJ)zG}fR;^V>&vt8pRZq_)R)f_@ z&#SB^tC^lltrlw`J(pRFtX6t}pYPXh9qtoiPmRp@C6=Q|9!s?=)E=#@9 z7Y=8SDV5dAkbEihefim%?QOLrJps71t$#8ZxBHJoW+s!Vh?wf{lS=JiwMAo&O5jt( zg6!_v-MUgO&$XOdm|rRCXwcP7Gn$B|P4j{5;KG}fmF3hKR$X=6a<$QD!r@Ik;Q1A{ zla9tC>7;m8zO?90rB+_k+PbKi=%tFm2|HmQONmXNk;&FO4`Mj1i52t^U4U*t7*M=Y z^ibb(^vA4gzOW)X>5U#s>EV_iwZ&-Cv3F9(O1ZghS#>W)t3(w&M72z`t!`{Uvk}k) zXa=;%Y@4|oB$%8Vn3|4=2}iV1k6WIOOqhulkvC!-np?(e|0?^nj%jsp5lslIqLm)5?%20y?8KN|x}q{A zlB1E)Xe^qZ`324N^jR&H^^39}T@mmvtf9`KaHRk>*B`SJu4W$CdL5nj)8SsFN&bDf zJAf_37OD1hH54->Hq*#|WUuY{uCkH|XeLP_$CP)Bk_Av$lU&y;u5tZ$L{niu^UqUV zEfq;mmK;Frr3D@2*^WjHpAuB=Xp=v5kCY~{X42i2%{&JwB zz4&l$wK*p2NZK~XB4d+wf3|zgj4}%WhpWXXDDcL-T_BQ5*$Jz_gCIS4DhWO}8r$R}V@#c?5 zQf{SbPT1*a+K!thZNukb@7nF7_~D7^GD38fL$0z-+6)F?fj)$|;o=YXfJ5*6G>3#ziW z@A--6Z~`X@2QPOBagv5Smh*Ndj-n4NBj%x<^B!t(i~*10BY=AeV2x>!Ohs+SD1#sz z=E+r115m{B?1PJd>Qs`v*!1Sl+?f64-lvp-Jn!T(oay<@`CcwJE(Hf;q_?qTJhe&4 zFAd+|X$Z>k4p&inIF>m30ILEL=fp*YODs8IMl35_bGgI0!B}L}j>V!0%JUoxVC`;2 zzL)LL-Bkf9-f7okLK#nR)mVy!BA|j2&v5Ha``Bb8Lxs>$drk;3`JMf%bXB7yrz;Ke z?8(NDj|XJpU{$v3#@{PFc`>|)aKvfa>x|reaDTyW>2M_`dnJL0Ghn~cOhgmoW<-d{ zjM&cyiY6@k7^fyL7Q-s8V6cjSu1d8~_GmR2STZj7B_+ zfd>E&0`f-|b@KS3cIE!;V~6%A8yYdjE|g3DY%9mmi|AC8xL|Bt622hYM@IU2=|Spq z^>LAmQ)UpJjl-dlLA)%y8)P)f@}C;%T)>77R^|xT$)Aq2D9>kqGt#Z}a#GHjD_EF| z_8gx00C^T(k-LmtBaeEt^PGsssDqi9D`T+K(36?8$^7xwxy;H`*@0?Invlw*9rLI? zMz!>d**A=JepWkGvxWF{0o0!au;G_Pqe7;53R4iUX!R96{RvEB9tt4|o?(PMW=#3S z;kwdbiU|4F*;fvCdfW}Mn|jHu^0JIV`fP2Kag!6dI*hq5FnRfK#F2=ZWPfE&mw+0x z`*aJM^Nb3whskY3Rh)er-W&4mh0mavkO3X z8G&0lMzy6Ax6_kJOW;;0t_5rb;F9GDo)5wzyx3tun&*{qyZ-YC}g~YoiX;L{pN=apeQy{Kjg6R&S^%42(^s34y(DdSy z{OR;{?B3ofZ~(WKYROF8oVH_QlctllQKgX*1T(=k^aQ{aK!6W#7rL&bNAX= zzO~n2-zKpJL|G1CN(2a8&5Dkt*{pdx&rmo)a&f_YuzRtM3}fBP0Jaerh4?Z6X5CAp-a15de5pgUq`c-z%92^ zPBfNGh_9mSYkDLuI`c)C{Xe**IT}meX5_l3jf;7Kx8qkxe`0@ju;ZvTUo!9PG3wL6)tGg`^Xo z_#l9tX^dJV8!Pw4L&E^#N$yhaF7S$K+oI2C-j^&#bjqdouiRab&lZ|*P;u>~9q}9p z!0Zd(J6tstb(O8&MYbfuAbt$U^Y!S6qsZwx9X;4C{ zI3vIC;FANdVC{ZD837dxKScX(0#~t0SRyr9AC-5V`tV*}fP;nuZW&)UjNn)@VMP)5 z9bPVe1VW%@ROE*!aQy?i^Ez?;V=3zaIdpp69!@WdFUi8W}+p6R!if3<;xkAAqZfQ)r=-pp?u9DhPG}Lt6mF{7}HP zpMUu zgc6cO!$SNuNFZ={VFmJ`q3@u>;}flG0DlB1-pth@HE{Tv{M(0@C~wKP9=>}8qAGE? z0`s4H_aavPZ!~%Sk?wO(T9}xC8rR@1d`Ja9_!R0Cx}M=?_;ZH*=UgC=+ja`N^A#7FOYFk-#OvXpnFclndBIx#JKyu(QxcrWs>qG>f~N_;!|ym4!jB z4TE4~VPmaWSy=T0%UP)@HP3_d7&N{9pZt77_-)q$8HVtkX+~Mxh;I8nVZO}URxW6=L$jv*( zAXptzpOlUi3(4e@xp?=jUKF;l$=INx;VN7IF0Arxc;FgU|0}DFpZqRT4=f&3CMJyRB-oFSwk;`DA zld8xjGcKT|gS9uPW4dwxu@q(HK~m&X2-PyYL^|qiIBcF&g^^}2)Q|9BbPg4I2iKk<9Q4<%x@y>BA^Ypj_`N%vB}cj bJsQ0iXhwcUgJs$zGJP{5zG+oV%ZlV55j2~V delta 703 zcmZuvO^ee|6wOQ1=DW?H*eT;wg{qVpS`cIehe1Tqm5T0y@W}K%C!OYt`w~Z2E*wPM zNPYeSLBWk6u3ZT({Q>y{uKWYK@!ihGk-o)wmwV4SIl0Nt!H;+s_-J#~kJ!YN5~c2ink^zYRbC z>Dnz@If5%!o%_U)GpA3Q@|8n3UDXb~xXOxL&>|{h@lk^{)%<;P9Oo$???+GLyog{n z9!cBXA(!QC_vLyI-H>(!r&U#(lro-Bx+j0RFD?Z*T*baS$D z`QE#60X>(KYMWA?l^iH7WXHc_p=fA8PjY|I`!9>7xKE}frQ*vw{~Ot?8={1|#j{Dy zICPbAkqvxR)y)H#mb`90N-7aUTh2Qhq$j_2#wU-v!j@t`eiIyQt)syi1PlS^5Y8hY zDJT z<_f|Oh+Na~4^S$81s#Q33PcGKO)6#_=?JN?<+pEVXP#$v=40!3%YI_prUFl z?+CYrDQYJQuM11quuh4(n1Zz-8vDx9w7X3e#oUQfPae9TPM^|_TGWTqk5cC_iw3D5 zM@1ux{6Q={C-A||4U)nT{>V?@EdNS>&Cf#Casub~SF=Q}=0DU&WG!D}>$fWZoL_j( zp=};u4M{V9&lVQ{!>^3Wzp(SgX{3{%USbpF#}~>!D`QPZs^udsAlLG5+VbY#Ky)u= zogj9F19gr(nYe)yy2A}PNSXi&X|fqwCkDKf)M=B>5R1z8*}ndrj9cI!YY5m%X(7}R z$^>VSO(8T8Y=kO;0kAY(knVA%+v)G^mVw85ck>nQIP3Oaz9@A#+vy!V<63XG_k8Ph zsm1kvcWbco}t6c?UJ`0&w#}(SgXTYC6cEeFT09maf=%OAJ3MJFxFTk-FoE zc;GmN>A+B>ne-e-!q_QNMPL9$Z4d{62lFVD(;Fh3?(K%v4FD=R_Ygno=Z?yjaM|8bm delta 763 zcma))zi-n(6vut`+2{C2nt&Q%hDH$7CJ2&&p%f5R5JiB%iQ0my8?fA*)r8t{WIF?0 zGSGz)-2gK)3+l+9!RCK}5iziI;+`Xgz=S1#?)#qJ``){=e>tBV`+;p6l6ck*``+#M zw)5&h1Wck5Lre})BvToIRzfAh6c-wYYNSz3Xq6JCQ9Y0uVfv}`7SR$bG2>LC1~Zu@ zR+HJRELMxH>`AT4mydfgca)!3Dq@>tf%8Y5VH^dW*Z!MfJn@s`PMg>9HrnJK-fGBy z)y})Bkh>tT3F0FEimx{Rqps-Us(RimgIo--q!!NGbLS0hreTl@|Ez^*l{@5y`KPhU z`{WY3!$+i!8fWjwcQmU(wGJSF62Jr$d#-@805$+_KhXfHK&zUg?upvH+28FIjVGl0 z@FhePcik7y3m=M~xcg73=JwoYI|qdqCH?O1j=TSu;y(XT+R`qAV}rMiT779&k<81P zf0PE97x|OPXnfS;1LGEIoqaIwB2?qw%xkk245B0(V9^wyMjB6(LEwqbe=aH~!UpnF zQJ2E0BwI&1!tlz;brHKQJ13sU;(_Po<{%EkV33XCahkyQ%1NBICw?|e6cN$VILr=@ z+sx1Wq_}iPWRp5zej-pXUtzs8OmRr~bh&;nirF*_?yrd`gRrCvHMb9&R3lG>(&|sDY(uv%a&wx%S>iX7@VA z*~|TK3K2h04S%S}K>}2M2r?29YT*Nkss)Lk^0UB4RUrglw1AK*ia2N1v0aP3nlm$J z&YU@Oo^#**;!ke0&*yS!3BIQBs`J|kNqQeAPd*I>Gth#c0-zElzEmYPAvl&{EZcHb zu@x~_eAQL~ulib5xAje&VH{5F(ewj}roGNJ#qRP7fXUD-V6t9%O<5y$H_g%f1Ig~81=<09k#^E9=zG0B+Px&R z$5eSyvU>gsqDvNu3+{YS_nV>TRH}_S3oSX$H&}0_7CH;fS}Cm5Yq3cy%as5|uYPc3 zcu(Ui;hlQTIqo!Tl~SF0PN`mPxM5}9_g44wce)4pdD3%T2KF=10tB996Drv(!s`<2hJ}yL9Bl^dD6cE$v!h5YtSamLO=Uc900NtW3laaFI(SZQ zDUlpWi^`G_bVUj!ps|U&U5JU~0jZ^~9E#L+`H^%{`ts1ra!YIJ52Z-cqyZ^XBAp-7 z##E43<4jvQi2{I%HQ$~0d>H6Ksm{DW2*!F~F*Z;8ZgrlzXFmh8e?XH~k8olP@>BXi z+tL|&TC6SjbvKN)O6XMsmO+$KbyrvpMwY?45MeHOZrB9ov5KQuTXy}XCx}@u66p85 zO8HJG&bA2a16(44(O@2}l)_2C1RVe**(6=?Gh~v6ZfN7b`V_fXzzv-UkVY`Jw3w7) zvJjJdF% zM!lKne1}5R0JeXLKhA`QO=Kyy$%&MoMua$M>okT$xA|OlRGG8V{HwX~HY+08QwSh8 zN$fa+X#|4d_Wo@q==Xb|1$h9buF7WK%$l8M&eRyLm7r~X7!Whif|mj84&Jgnn?!F} zhUo311~nfLh+mPIAb$Hr0wNH>Ndii3CZII!rachR8Cs;h&}V4|B4uFX7x@mA|Fi2~ z6vc?ODx?%YHbS?IUdgh66rLU{LC6?}Ypmb6a^dRiC2`qJeT%W3eeQ;H?fS(_v(D`G z3%k{bI{P}m)-!pigt-GhJP|b}62LcL@K9QmAgUn?lp)sn4}12y+R7I~IU;KWB7PRu z^p*is%DNgEk4Vdm%)oyrt!t4U8IcCLLwTgNQb3`v8&OJ7sOu)InG5nGC=n567_%kc zJ$XxtFjXl0h2r<7(NK^fQnfm(x_)KVgFGxx9Alpn_+26OQTnuB2|~=fXJ6yvy;Hp2 zJJ@~>NZHc>;+#|VAboFU1U7>U=q$1BHX0s;L^|#P7#69Le>pQi7IZQ>SpjiYP+ba$FB^*?PLxFu0Y&<+7|b-nlqDn}$!eJS-V z!WP0_7#nNyH=k;C@?Z8BN$1Az`~N~FHt%*3#I$5~3A+Dp2KHH4`W64%;HhB?Mytgu zp~ptmN_=kI}_9$Rt0>wJ;z;3mY7i(xUur3%aVApWN z9D*ANL>7JlN8M)@&q)8qutAc&Aa+(Yi{@Rd9 zvit`_zj;>Vpcx=c%8$5{s8vahixt0K+8vy@=a2kHd&l^P+2Y3Rz7?`hK0G>F6jeh|AwjITlzuGnmqx!kBn*?Tj8BdP&4q=^%BoozKR$wr zVV~z6V+U?5B5z>@thEfY<7Q3Wp(CNPZ4KT<%1t$iqlH*#v42Y&UmJUejBoMUanCLJ z7)D4x0UiS|SUtwyJ2=e0bD$e8^Wy`j+9>iS_gKZ6zk`Q32)FNlvK{K}Ak3jMh{pnZ z33p#bfGTG~R8buF!W437vl{E-Z4d}&voc8x-^NwF1O+n>elU+)h3aqe#=)rp7w5aI za0_tV{LO=x+jHzi7+X1pj~I3u!5IW+5o|lSs984==OGYYf~HJd9og+TVq00~U;#G- zcz1Iz6fR=Q>6}?ipDJGf+E8idC$o8s?Ty}Iy@tNdeY=<;n?7K({gzf$wqEY1@h;pmgviTfOQvrz}0 zDK5N^YrgL+U}g`;`R&68$r^un_}w;&&DIdK5QwvcYA(PuHa#>GJaa6?)**z;9)d^^i^vGGlUTKR{9H$Dw?KoC{oOa;J zkB>;lVW=L4Nt&q$Y^2SU>b~!ZzXUh<|WU@e{^!k)rXiFRH=D&bq(O5XAhctXP^o4$7zZy1aL$dV3l ck}O^oF4_SJN>w_Zhk0ANXsTv6ym{pR0j2f#TL1t6 delta 4600 zcmZu!O>7&-72cWs;qq4!C0ak0C0Vj;GmT{@cJ0P>93{5pI&oyRQR8@Pn_Y31w5a8h zo?Y3py$la>6X#&0$oAN#R&ND*XrcZb5)^GwplF+8PIG7r1Za>$iyn#=2#TQZ4JnFN zvc$ffpEon_Z@%~VvxN_@G+#)k6ACTN;#rjq#f1i<8lzjFRP(zu|6Om+2$zF8>7VRbDakJvryudE|(N%J!sRGX| zygmcsC>CLgMHvAlDJVF|G^XDtmd*?ogA)uk#84Ju#o98MZ5iTi84|3IWq~uv`q=<{ zQ%;)Y*sl8u$em#-$Tcjo&);&mU3S=l6ManH%irDaxxvZ|$TU-Tc+sf`yynepGi?pc zM`Yiw)G-h=B-{*%N2B-YJfTri9p6ud<~weQqZfay@Lu?Qdg1VmTGcsRvG3fd)os3V zcqV#IeIa!a=6o3kG!s2k4>dIy!1oMJJOLz7DkLP2=!O!gj69?b8YqvnkZzEM8miFN zLv@4fRzMvJ76$tScj`{w5Y($%W{KCT%Ynm%am#jB9bf1rw`K=Ib=K-a^;fHTjrYJQ z!tjeV?)XhXd`p8#Uy9u#eaK)I{8}lf+H1#AG6RT0vLr$7?*6=WX0~|eis)2)P{$a& z*!SNB5-0&#CvPchS6kYhRjw^urLzsSp@nLwRMeH2pA9ueAc|&gW^p7`hn0rDb}-b@ z9I#;YDs32z*aIarbY)lxwJ;X#)%NXgm?334EPN->LgkV1K-qw^Jj#qlJT$|2iGsCa z(FJWSkIsR()LnbYar1-^LBCt;Q?6ZIV)p5|=pWj%&BghAj8kOE0TDWww(3hi3l&Vm zQA<7oB%f%R)hs!7unLA1N$x&9NoY7Mbm zU67(6B|&Zra!cqCk#-fd$580jezndDDC(u?V9Yk;9%l4*Y;ab_j2`gS;D*r45LiCn zi)#-d$s@sI+G4l1l~Nn9qa8RGH~${fNK^8RBje*T3Z2(hJ(iERR!(j0XZ3o{Z7EQp zWhsc&qHFtn8abW>BJ`jZ*sf*T?R(wQ2EELs06pyq+&~9g``02b{)a=-C~C`RTHOY% zv{G#~*tuHsVRoPQ2K?3?qHWn&EwENmxl3qOD@ zNP-GKf#f+P(!jIO5=r#2Uv+{THOA3!U2=9T#1UQg!0Yb4sSGcDhNl z(@oMzb<#~5($^5ApbX0mvk_?Yu-$AFzP&6Bu9(;yOZ95g`#QPDcW$maUa)MtGYg$G zv}l*h4v!A?{)eHiLa#zVxFQCoL?GaC7?cPEz#edpSwt8M7tXwN@tibfz7^lHZKa-H zD7<{>?754DiUjlgmfeZMnHT1I5FrvTp z?PzwrQ*7{GEE0|eJM z^hOG}jYqLC1!<19`HV*h>bTeUfin>%OR8sJs|2Z#0QNWZOqghBH;-S2bO1&*qU+fo zk=}^(4^Cnt{T#e~nmFYy`$4f*t)Koybh7_gX0Z%>@cipJ$ffg<*FW;oMdBD(#qoST33FC*)psz~(LvE~`^PULh6?R=jWeO* z9oUcCNa(~!-jY#z;TjD51YRF=l42x?M#o5&PEbwa`~-~-4CFKYFqec^`lTLS9~htf zE?H9oC<~+?{@s8a+R))O)_x6kMc@29`oX}7uS9!?o;)dYELtD3gzc3bq267t^KanpuOL}O(s3LACU((J{Al#%&{LxlV4lJL zNhFv)Te#cLd!i4A@}SgbLxa~W4ZP#?*Kqm@l5ZiA8SpB$o(5v+m>YdT3j8#V?erid zBW^EyzlS;hI1r`Rgxs8fn5+%wfPQEA&(BDdld&X|p_CgnY-bgry0zlY>xpar8@)BM zui2edI#na$A7&M(8a<@2ytILYfyVIlOmFT_c1LR@!(e#HUREa-C@+K0wsWj0%bcN~QzZ9t1;d*fx zuuKVR%U;=v1@Jco8r{3&l75*f;s1%`EXXpY6ipn+HQ$yy77A5>#X=!J$Q>L=7Hh8S z6ocg&q)i!15}M0+TB=hM+4W_PfMUhv5d4j>WsM^}DKs49SWpYyTdmgba7>IMd7)ab z@!%ZiHO}W>mpgK&3~ArtvIdR85SJD}XSB?KQnn&$+ibFhP7~%SP-xY4PN$&f9|U3o ghF}b&NH0xM3cU<5AT9>3XNcKU7NPX)hZ2QC{>Zt^ME!5h&rxPVmZz@b@#=gRgK;P5$h-XNS zG7~s+$Wh5}>;%zEr4FS%!+2g)=ipXOk)noQ+oOcVoY~)r+>1HSxJ{YkeA% znjA($;S&g_5mJ57JBKidaFpz@2PqVuVFfI~n?Rv#n@@pM6L574qCd8tN>+x|f068I zf4O*lC|L#e9!e(j{13^ViG%Tlnv1FFpHKgngJUO8z>X)REbf(F|E(*Kbv+1wIRZQ(f4flBD%$NA|&Sq`90usV1iRrS{S24l>u$ W+RGT0;B@M$O>E63nLOB4u)H zPb2gs-UN>l5Rd*D67b|-AcAM#n>2|a7CJDGd2iqQzBk{zeRug=X;AjOG=a6UyTklN zLVln#Sq2<7VJG*&a1s(uxz?r~Ez~HvOSsOBL&A+nd!>gu_$If&x0G*yZ*vEHNBJgC z@$?}HEuN{8z^zp+LekYwM~^KzZ-1b={A^$IE2T!YGTm}O!8V}7pm<{GFx$rV|gN=$Tnlt#x8YBAhtDDdN3y0a2;V-vk!nK)SwG^l+zxWqd6zP>XY%*cLFy(2^{0)1q4V%}><0zZaUx3;$ziJ6RQDo1V-b$bVFocRV4 z6ZP*=SJP;Y&QY-rvF*Sf*^KeH&RFQ}x4W&P8rbHOz9JPBoKB&pM=6Cxe=*HQz_~eQ Sx`mo3DnoOPte?G+_5T3nwx^!} diff --git a/nlp_resource_data/nltk/tag/__pycache__/sequential.cpython-37.pyc b/nlp_resource_data/nltk/tag/__pycache__/sequential.cpython-37.pyc index d808ebfbc45595563f5f5b8fd89f9209b7f2bf18..05dcf3229325a8c3c28b0c32acd52f92b2fe6d5e 100644 GIT binary patch delta 6953 zcmcgxeQXrR72nz2yIY^{!)N zS_SKzOJq_hQ34{9HvReo6e<6-MNLnt)U;8m_K!$W6jf@aw%e*{RjDdXRkdoBsHiG^ zZ}!e>I|qd%%2_k)zL|Y9^WN{hdGGP-pOFuKPKx7xzgK|YyysHlm8>BA5nn1lH$03& zGae^`pb4s=5lzgHtf-1vNtFn`OBp%qQeFI6&bU=K&|Dc$R#6pBb7yL@Ue(K=J(=39 zPxbL^$4CHW`yskXC zNoxjbbD6rnOkJm~2kQDV;|Mxbrf zTj6k?HcdV%#3H3r#3$~IiB@2argLUudOnvj)A^iLH=j$V@|vE=q)nYBGe(S9f#j57 z(qzia>gH@-i?gD%rJYzUbLZeSm)M%9e3Qy&=aOc6Dx;UKO7DubLD1#(k->R`4zf$` zaWcey;r>R%2h&=4gISr{+0+EWSL&b*PLT ze!PMqXU%0$3jTC15k6MpQ}CLNCc* zV2ipF4n8V2#5U+?#tbgMn&YAK}2NS7$&eYGE2{X?nDsf4u zfz@RDNNePQ3>HiE4c%nhZ41%zlo>zE&NLq1(*rDMH*}UXqnj~TRWCEj0FJT^z}D$#v_CRO!L)KS<@YCIeoHcRle%!(bkhPXZJ2xyBH~d&5^|NXi!6ZB6M{)Div_rLu843ypz=u^v@N3*I0h0EZAhXsS+Q5XO+4M- z*)x&D(JH>p3hFuTSWg?^GxJlY|HVFv9NobUMY5z+#;_!y+9G#^z$F{h^i`jJ#GdYW zd=p=U>+O~v>OU`a4zZnK{{b7|6<9Y9Pw3Oh`HcB6cZl~y;Fu#(V-FyC1kO<%&=mD_ zgI(zCEXHG1RctBzd?+)n>^<;-Rm<(gzCGv|P;BqVmF^tX(f2S+`3*E%-yvC)MbNec zZcw5)2Kv6#&_M1%{dxkLTl1VJY__Xi)TGi2T}hejWq<2w^4LyKJ zu{VW4i+Q*rEs_JUBW^8M7Vf=_?b+_axjV3P_P4C#c6Mpg&~XS+1PR(+XaNc z5P1M*I35|i=DkQ5#cuWTYfpo^=*M-@90buz7FyynG?@iVus`)pP1XYyY`|sC&t-JF z72Y_*>V8?R1+7=EkF_hIy$`Wh`Zv#bt9piU$27O3EBTC}kx-WIG*tYF!OEJM9rMN~I7el)!N0AC{F%SIPq3)iTIEqMjD@Sb*H zXKS1r$rubIa9vzO5EZh3D?0*b*ZS78x3{$oqBd_Y)K;2`fcRSBfby4qyIr{xuw2845eyfR`ia5v z)C$dp$I~-Rzq_7wPc|L+I#-^$ntaY60hO9j+cOy9o2)Es4ITPnva9{pRb~Mq^F=Q|2*(CiWj`Wql}(9Zu|tC z0IwBrC%|h2+zIfifLa6L$GMSx5;@4;8r{&rohw>6WE2=;LbgT9K_=WEE{IC5kM49w zVFC@YonyTU4@(pX$kSt+yD(ayx4bNV!hSe*a_|Vwdl+^5;Pgka zTaFyyo1;L0OUsEJs<2*rt&hPs?=xlZ*Y`9aJH8M6ta^k%ci|+*BKc*3p8Xp8{oaA% zVH`aOUB%Qtt5D$(@ccGv9YZN@@3KIgKd)a=d|oQ!H1;d0hrKo7a%-~Yf~fMniEfN4 zuTR{sY(v{cta8Oh;K50wc|}P)4{DwemN1)ukKhKC+4l}kjPqMgv_z_7wws+xXLJav z#DOiAo9Z+A*|};#|8?(h1_~EAe@~R{ir@7hWFt-Ze>lPNaupb?2Wgkq24@;CEq83@9a8||4Cy~nSuSy=rqtaev%IlaA+$f}u_}C8) zo!Q5E;`ShyNSdkHN(i_nNxP|t+u+l!EPZ&#dW0H>8E!)(CQmGuNRzI(VHaW<1k(|6AOmBs$I+y2TF#1n4Cfu6R%P&ti#7&CgrZ|>@ zw*2F66j5LtVc$Q}QL(B&9O=cvROskC?v4DIr(g8vJ znR$I5rFqI;Q3oBL`JTGF4|k;2rshqkp(LRCHg(e!ZkFvhezmB>e2V*}8h)wpG&Mba zPW9u*c9jQSdE%qWNGoTJbhSn;@3fvY=cx|G8PzqXf!$U$Ar$QM2px>j1d*jqjFXqy zYbVaOe906v(S)=Pb-Y9X{2&PKNZf4q6W=XHwMaT=n8{p9H)3(v#WrBTOF$IEcN+`* zIAMIn<3N7S;kz9P)7W7?tA@&lOzRYKH97~y*}HKN3!G|bJ_kA6jIJdN9jbt^TDc~~ zD-5$ZZ;kiHJL{|L#gh?I$F832$3lZrDKxNuoZQ$QrC3tn8>(LOMVMYK?M|3va0(u8 zFCD<6zFzv>sr@8!3)V9*>rHmw)aIhEYS8G0amw>Eo_TWIdoc6 zvG=vG8|i%`_&o-qOQD!N|{ zQ~D%wGq9uB<*}Q?&V^x zWOz^{S)@pbwe!y`Zvy_)0c{3Kq%cXK5_C|toJ!dRLOhFFm&=%E2GH=5Q|V0lF{q`x zknZR7in&tMqgrdz_)U8f8D8X)Qj8@0bD5 zICdBeaDT=%$hW>iF3YF1lwl|v&1ZBT02x@wk?}qd712lFPplJt?z;UxqGUx9Al~i( XQMN)Gg4R)P_u+`*z28T?MWOb8N_qB; delta 6778 zcmb_h3v3+48Q$65yW4x(KA-J4wi7?%H0Q*2LP!E8IKjptDR2p~^I-G1oNw3m+4nHB zYns@1T&RPfY710TfkcsrMIRDEw9@sNszO4lDy6il+Nx5wQdPAQ1&Tx~q)4R_>Hp8( z!#OSvw5&D%&Hgj{&&>atZ^r-l9r@#LNPfic_XzN8m^>1_HYW&w!LSFcP8B_kLpQzRWE;cC4DKs z>Lz-vv4csSa9?c8yUOfQsb-I7H9(X>@4?I8emWkR>$L{pHt0dlZ7gzwS|e~9i`-C=8=BObfZbGNHx=2#{rJFMu zjr~H}UTBtoDYp1P#~&UVoXMp0!P(e_naq5Q&J7N*_naeSI~#O;qtgvN!i1u!DE79A zA8TG9yV<+0r%64#pmg_PAIoWG=Jd2-$!2WYut=inBTEAQU4@@3FN>`x+VaF_G8sdU z0z2%qhiPVydqZ8V(5gCtJ+0F!A+!B<)D5rn??^C)uw8 zv3mkgvy^$7nb)a#fveFyRjCBQhwxO%{T%TT>xhl zR8b?Uq%}M#Oo8KS@+CJ*$s5@3>w?ZF1zAwNg}3XDi!Bm-fW_DV#1xi=%OoqV5Oyqh z<+$ZA7GR!~Pr5D|&!kQLycsn!TxAkpxf-~U&6fmrcSob~WXv$4QF|?X1t@5#aH?@L z+4+f;XSs?4EVJ*2P7QAc5p)v}OPbcru%o8>d8ZmS_zHWl>CMgcB@wlq%aV=6qW&z~ z*Ss~4$zT+U5dxx-%R*L|C9`5yxGb#*E9mNCWYNEODL2%KAKkr_8xG5sb1FkqF_ZS< zKpjZ70J+I%3TQ~5r_t#1(8oXz5XfTlDA#r2@6Ac#x$Mt2WP{np6{&D$-8GTav5B@l z(|yGOw{=d&;&Yj)sk@hk25y_m^=|8&rwi#sdb-3o6AoCCp_>%#Oq+mMZauw_(rL`p zDc2Ro6L&#NifNi9rDEr~l4(8rXIrzIhM_$iq8{dHKh@rhom-Hg!&?p>cD5I9W{%&Atxl#FR<6!&*pu$j?e+=MkJj`&{yn$;4|b=!3FnW!w?Y5 zU-YrKo=lefjQbU_RnL8{8+s<87${Kj%VI$BxfNM)DH6rDI4-CBpo;J@4ziVwMo&3- z6n3R!GrVHpfYtqlw>qZ9Q`j4efikirD?|m8KnRJNutFf~3(%EB!jdIp#Lyn-#n%-U zZMVh92e;sU!riqTCGWFucRtixBlTHzdODub^ysXSNk=o2vmdbbuJJCuY{`=1NyCzW zYny}1v<0(gyF%m|`&QSIrM;EzU(7WwL z)R-$pQ+>o&aJcSo~Vlt7~N1to=fRUuEl$BRRwrR_b2g;V= zjum!s^H?Me&W(O*IZ~kVq@AvB$C>Q{Qks;lVC15Ub+DJV>^OpPjGi$H#lQ(8inxU7 z0}||taIdgTa*&qcU7VHXoOA$cGJ4RgV@ew5z63Q$Fx-da2zx!!97XLUK*pKiX)#A~ zVpcSv=Q3g9Bl5VdEy%nwrW5cvabUYe<}C7PMU52o_n~L6mWZ42Olp2O3>b&!9H+22 z$oj)=`GLL3Ogxq}h6gIW=)Le6Y>_xzm0b-d57$8>jOH*G=94r; z^!4YxHBwKr;2swBbODYvb2DHq<>**#$I`GxcSD-w5L zbRGjE)&^A?xQgv~S;q>V7I1G6Ba~}ZT(vb6k?2AAvSMX=i=;Dyi^iOT*x?v^Zbvh_ z*yv+dZ)^P&C>6~z$AUZi^U5vE!gu5~>@We6#&Aj)$;w8|&;f}e_v1DIrh)QSp;q?Y z4rhzP_g<2d;cc|Ke0-|O(X35o=`sQh(sb3 z_`(6$FgVPsf-n_7UPD*MKtKkiJE5WkQCAdsx6%}QrNetuB^|pn^r@7D&iel=>7`Xl zYMx+k?QQz3lym~-_oXT&A@2qyxqyL>fqJ*yiA7}>dug}`2=LVHX0HsF0m0SbF^L>! z^&{)MPy+_$55V@g{faA)NNg|%Ve0^#aR102XAg7$sPfcEpMnT9tNN;re0oR4cC40!JQHG6*ak;7-PdlX3}?Gt($7_cc zii{g9E&~G}ymGwQQ))Fr}&P!TW?#ASfu zMF1Li0ML*CXq+TM4sLuF*P8$$OgRgeF`8^TG>7S2c7BnZ5HzRe%t?<)Cj}dXXqcy5 zi*8S6@reU4eIv4a6crg3Es^SYrcKQ!l6nyuSWc=>>*wb`hT7o^fNRNwVZ*g{HgtGb zezE;f8em02>paT8vhh==En;ZVRjS}U$WR1wlkK%2%TnO1i31syd+dB%pEnbkG@K1h zN>Q(T_~MO*RTN`zlwlFQjCQ-QXZfJg0{NA1w}Rv&A^^L3BC;!oFYe;2C6;SGW&#vS zmq1W?oNB+)Kx{WlAKBNd;{Y7n)V};dEZN0=dt^rpt%?>Mg<=GOa5xBn&<1(Sct9r+ zJ`yae5-Bs#m5)$835<_TxFYUZqQVXxog3rxi9rh-17c=;rhHk1QreIpHmqo{S}1~m zF#Gq>-EAn9&lRH&PgEu%esLK={AtwPaOFs*#QqpgY22{-kThsD|-*B zYZZpV+3{gZOaff@FInH&;L9*D+zBbXshR=n# z4Y?Cz_sX>ZhqH@qd$C{t5TN4n1p%L4?NCN9{$}CTb0!%~LG_c}rsC?qQ#d?%kaVsg znnJGv`%%0<@2?5YFcv9C+5Duz!va@q#|GChidZX3lF+^r?TjBol*l4vKp6`D)vs3q zO0yzC)X*f0YK+tuKF|ipC|o?avhrmCFA%jLyeT)lIZAI?x7PTgKj+DMK(!%zkLAY& zdYoxfxh53`ednX=*RQ_=<@%yyDF?SQ?Ap}VXJyNg%zObZZZT=mIVAi7ujnE-IyKE< zdmhPSNS2Ufkz52~DJ7S$_72q08|?k*C%oRi z^gDt>O0mhfee)M%f1YU`!K9)3(@Aq~pcJ_jt*81p51e`+HlLslU{M6omW+IgS&8BS zgm$xoiS`^GKRB|ch#WbX;JJ*V8Tl+$EwMadOW6x|KMP5HxDmP;i1B2iz<)Os(Fe3% d^f~Jf)@K4l*$ia_N*GFOu^fQkgTcJu{Vx!u?2-Tg diff --git a/nlp_resource_data/nltk/tag/__pycache__/stanford.cpython-37.pyc b/nlp_resource_data/nltk/tag/__pycache__/stanford.cpython-37.pyc index f85ec79d795f8b778b56d45da3ff208f6c00c463..016ccda7fe441ef3f81dd0aeef28f53322ba9a41 100644 GIT binary patch delta 1468 zcmaKs&2Q9J5WxLx$Lsal&Tcj|WH$kCL_lt75)h#(;VTshO(A@Qv{Bt6cUixnCEgD` zdqp?}skv0@f#@CD8&XfbaESKS8#ko>0U(zP>amA%;lhcTXKfY`VpsdynR)Y`XXefL z$=r)M>nqDLB=~ePJO0v+HA?>6IWoURq<{okXf$$Ou3>s+!}6?#?b(e%Z&0jd!hECP z6+qU#p^H+G4UBCmFlgat#VdlI3rx^WS_FLqQt^;05A z#=O&L1)Y#O>n-N&{&R+yB+15fGxpOiYo1G2$g0Z%540f@j zh%6$580WjjG@0Npj6)R_42edYwN^lrfxE08!(T0IMU?62TjGavZb1Z^jc79lPa3g< z{HNUF@Lv8RVs;4peg0SOnf4j(H2Lr5=R^2tS5FJ!qWL_3YA(IM^&G6+=O?T$GZsjT z%;vFm^3p~0-;byO61~k@ZOY<%Oh(%T;xGaq%XqvsXGRm3&KyWz9z zXv3fr%lB(xHHvC941K>JG(2Suog4&4_$&M(92MPGWtFJ1sj!vqgW{xmaut0%L@x`J zzeRip=pFTLR!_SBja10gURp`UU%a}qJiq#p{QmBq8r8|{k9cx=uP9wW5PXo%Zlb(^ zSVV}lzeedOzgu!Hiy26-9*s+gZxELdmjQ{!Xw(VgWFU&GEKXExw0>FAdf@#&=l_-# z);?Zq5e?R)U+}ayzDJv=2clOX2d!AgUA|QwAzt^la-L{4w65?+W92O&?rlqF(~M}` zY5VEh<0be;CO{&lEWFTl!<0}>F!C?BS mr;-DBRYJ6vMG;2N~)se@8l=*=iUI?9UPAU delta 1507 zcmaJ>O>7fK6yDiguh;8c|Ai>c55aCpii_$rLWxvCLsKAWT7L3Ff7lke&h7}3?Aq4s zMukhQik!HixfTh5xPc<%o_gpJ+6$*1s!=bMI3tb-wNl@kZR~<7cD0}1y!mG4z0dE> zpGSW=>U`ulmIBZC`f`2phBHilxbgPk!2!z7gcYyqgC*?G_dl z-}Ey(il3qRYnoexvE^rBoTWt=mtbuB4vZZ+9`JL1en)Zl`P!ONEj;*bnRKVsA)S=? zk}*S4ykopcbbi~oI9Y;d*cd`#4_+)zU2cVRYO`_ma;x27TT?YYoc@s1xSPITL0Xlt z{d??W-2oU1z3zqnDnDd;6`U4E+}fhesJ}ClG%apiy3oC6RtUuNII}iA3FNa?BJ|DH zMzg94wf3GMS6*56B6I8#ZPF`kHn+)FtuF>gF@nPgCW3)5#(%b^NQFPO4ps)iAq?8| zT0Rx2Z`no+KVHy^C`%)Em>64^pryIwZ>JFGfdsANR|hL-lHjecOc#U zAh)AAE8vweR5iArKQ11~_qQ*@+)e&h@sEiSWE}zc|36*^e2f2Gn%O#y%rgjQ5zZmJ zoGe>LAEqdrdff{eQRLAesMq@`!YQ94NhXOAUWYHjY|WfZQnfX<(!Dt_q0g=%@d82_ z0JA{*Ji=$__;;(9vwGNlK2#yKURokuUpTjVX8!zZ^82;BIBb%cuW;w23eY-%K=2@w zeU0`>gi{DI?M1W>^PQ3MX&G2jDCqncVF}?Agiis4!DzG{#3B{N4HgR>%d#&^q6b!k z;Lk=*bzZNvjHZlR_XJNW=}YuUJrMQ;l3JsVyYNLh0JA8yCOW8X7p2A?W{MA%>75@( ziLnj_Tj4*AjdrBj+c#--JEHM+yB@au?SMW7ZiHQ+5L;D=O>#t48HI6mP*#f28tY!t z0<+*{(*VMXw%4IOpf@9?p?3hl%{7B~tF{4M%bJZKV!G0SsT9u3i$Hcb-V3fRnQ-OQ z8to01K?b&qYzrpKUsex>(N`t0P!nMuOQN5{n^JZf2trJF0o5@`Mb`?Og$cEA{2$(a B7Bm0= diff --git a/nlp_resource_data/nltk/tag/__pycache__/tnt.cpython-37.pyc b/nlp_resource_data/nltk/tag/__pycache__/tnt.cpython-37.pyc index e66f53c6e0bc3ad6cee514b4d5ad93f5b545924c..cc0cb1aae14f01221ffa1fc69de19089e17bc590 100644 GIT binary patch delta 1822 zcmZux+i%-c827QA#JQ*GmbU9IZCbXt_q1hPRBg~noj|BsX*ZBJYTO(*^=2pGvs1)N zD?!B*5*kKA>~RuE2;PE_5WMgw@G>MzJRn{e67a$k5|r;dk;oQd%Rm3l?K{8ka?ZE* zpLT9Fn@vghv=evD-`DQuh7S+0#U(@HjLRZDb>07(DiH^Z3Ypr{p|N{t`d`X$0w{%2XD9GBnNN;rBy7R%o8Cba#>WtA@P4*l%DK zr^GuCh99s?3rJy-mc+yoxPUxT!1ken%utc_3y2aZkOEcc{CcV=vvc9!rx!Makz~(^ zk*&1>Y)rWySb;6fMJlfJVVU+|4ou@8MhW_2O{&VBgEQOe;26^NwJlh}|HFTdkB{ME z_{aF@AtfKm1S0Xn#AXOYCe&T9-{=(rOwh>VnE;jgNHq)*i1K=h80{XZeiH(SQTthf zK>`Z6ULqeA2`b@x<@vafuZQ22$JuoFb6GzxR?Fs_M#62@ThQF;caDs#hKmyxONAd! zEZ;awmd6Pwu|UazCke=s*zyqzy(&oJD0%IZTiS{$rxvt1?PV>c6}2%)cFsw%7cDp*Q4`#4>V*Wt2E8 zD)=&uWmu67Gq}~cJ@o|}6^DxgzvaAyS$s&B9&8efh1HpZjq5~TBM|w#L9C~fBu1LO zZ@F!%OObrq6<$Ta^GORM#=F&dlLl5kn4Ky0DUOli#oxl!*~_fnd3$zSo)$Zjx6UsL zUJ~~{rswO+u#H`7y4<}JM7@lK>4ncWdourvF_6|s!Hcq6bt0TS=_aY^6Fk!ORG4@@ zOjNI=MSw-9Z-&>a7%Q6yDigd)I%l>(KlVlBW6J{M!i?hmb-L{ShTJBotUmthHzBEZMcq+jUSu zs)fjj15&joI8>rHq)H%}3L$ZY3pWlN7IEN!M2|>_0|&x;V}x6aUClRd=FNNa&3kX= z5BnRtZ!42YO8AYHua>@h{&wF`tA-;knG$DQc9|!eG9y`W6;Cx)A**i8i<@yzGc_@e zxw@Ay6HK}&@i^CxC9c_t4-_-W^E|i#OU;@Cu`rW$1|P?XzqVEm>YlywmUZ)>-mu_sWx4%+ z_O5*R7%HKkOkx38P7U%TYmVP+G;Dx0sS^ZrT1b&H7Bq3oC~o138jqAly#X0gWJ{I0 z*JuWI$+f&Pw+syiXdOcC&=>|0!l1U za$uE!Jc%tI^3bgU4YgEdK9!Zep~}fseO}+ulX_ks2d%w1HK6JuDH=?N*XNA+4e}?P zJ{s;QTef_sQt~NT6})txhM&(ZuuJWq=45v9GFeevqBJJ2YICv9k7Bgk2cpqoFF3E$ zkOCIP8sO9dBLTv}OX1Y~^s^%2=V(a}1C`re{Rf7pp(9BwEvK=?SzgAU=`79iY>2^L z`@{KrY(!ix3jCgP1>cYd;=;c`ZpXr%#alZslK2XN$mcdmU7kcS((NP5ZBku|J2~+<6p4!Hk6qtDo(5dHxqx7rC(`Tq=vqM? z#2qnnH8(h1rgFE+j_U+BfgX2=A(&~Q!{S-C8VNqYn(0@~~$GDxc+0X@Bu hzTwq*)3rD028#+&Wvmc6$I?o!CDETw7W967>L2$2XXpR` diff --git a/nlp_resource_data/nltk/tag/__pycache__/util.cpython-37.pyc b/nlp_resource_data/nltk/tag/__pycache__/util.cpython-37.pyc index 48708c09f1235e0f4118a9083f2da1fbb21b50ec..1366e573df21d635d48dea636a342fed8de2063b 100644 GIT binary patch delta 32 mcmew+^h$`^iIlB;&jtW~n+J0M delta 44 ycmaDQ^i7D{iISrZZX5<$n7G>+}ZI)*CX9EEFJPWq~ diff --git a/nlp_resource_data/nltk/tag/api.py b/nlp_resource_data/nltk/tag/api.py index c72fb03..0d4ffda 100644 --- a/nlp_resource_data/nltk/tag/api.py +++ b/nlp_resource_data/nltk/tag/api.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Tagger Interface # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Edward Loper # Steven Bird (minor additions) # URL: @@ -13,13 +13,15 @@ information, such as its part of speech. from abc import ABCMeta, abstractmethod from itertools import chain +from six import add_metaclass from nltk.internals import overridden from nltk.metrics import accuracy from nltk.tag.util import untag -class TaggerI(metaclass=ABCMeta): +@add_metaclass(ABCMeta) +class TaggerI(object): """ A processing interface for assigning a tag to each token in a list. Tags are case sensitive strings that identify some property of each @@ -72,7 +74,7 @@ class TaggerI(metaclass=ABCMeta): def _check_params(self, train, model): if (train and model) or (not train and not model): - raise ValueError("Must specify either training data or trained model.") + raise ValueError('Must specify either training data or trained model.') class FeaturesetTaggerI(TaggerI): diff --git a/nlp_resource_data/nltk/tag/brill.py b/nlp_resource_data/nltk/tag/brill.py index fe280a3..b44e335 100644 --- a/nlp_resource_data/nltk/tag/brill.py +++ b/nlp_resource_data/nltk/tag/brill.py @@ -1,13 +1,15 @@ # -*- coding: utf-8 -*- # Natural Language Toolkit: Transformation-based learning # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Marcus Uneson # based on previous (nltk2) version by # Christopher Maloof, Edward Loper, Steven Bird # URL: # For license information, see LICENSE.TXT +from __future__ import print_function, division + from collections import defaultdict, Counter from nltk.tag import TaggerI @@ -26,7 +28,7 @@ class Word(Feature): Feature which examines the text (word) of nearby tokens. """ - json_tag = "nltk.tag.brill.Word" + json_tag = 'nltk.tag.brill.Word' @staticmethod def extract_property(tokens, index): @@ -40,7 +42,7 @@ class Pos(Feature): Feature which examines the tags of nearby tokens. """ - json_tag = "nltk.tag.brill.Pos" + json_tag = 'nltk.tag.brill.Pos' @staticmethod def extract_property(tokens, index): @@ -204,7 +206,7 @@ class BrillTagger(TaggerI): of the TaggerTrainers available. """ - json_tag = "nltk.tag.BrillTagger" + json_tag = 'nltk.tag.BrillTagger' def __init__(self, initial_tagger, rules, training_stats=None): """ @@ -304,7 +306,7 @@ class BrillTagger(TaggerI): tids = [r.templateid for r in self._rules] train_stats = self.train_stats() - trainscores = train_stats["rulescores"] + trainscores = train_stats['rulescores'] assert len(trainscores) == len(tids), ( "corrupt statistics: " "{0} train scores for {1} rules".format(trainscores, tids) @@ -349,7 +351,7 @@ class BrillTagger(TaggerI): print(s) def print_testtrain_stats(): - testscores = test_stats["rulescores"] + testscores = test_stats['rulescores'] print( "TEMPLATE STATISTICS (TEST AND TRAIN) ({0} templates, {1} rules)".format( len(template_counts), len(tids) @@ -427,24 +429,24 @@ class BrillTagger(TaggerI): return sum(t[1] != g[1] for pair in zip(xs, gold) for (t, g) in zip(*pair)) testing_stats = {} - testing_stats["tokencount"] = sum(len(t) for t in sequences) - testing_stats["sequencecount"] = len(sequences) + testing_stats['tokencount'] = sum(len(t) for t in sequences) + testing_stats['sequencecount'] = len(sequences) tagged_tokenses = [self._initial_tagger.tag(tokens) for tokens in sequences] - testing_stats["initialerrors"] = counterrors(tagged_tokenses) - testing_stats["initialacc"] = ( - 1 - testing_stats["initialerrors"] / testing_stats["tokencount"] + testing_stats['initialerrors'] = counterrors(tagged_tokenses) + testing_stats['initialacc'] = ( + 1 - testing_stats['initialerrors'] / testing_stats['tokencount'] ) # Apply each rule to the entire corpus, in order - errors = [testing_stats["initialerrors"]] + errors = [testing_stats['initialerrors']] for rule in self._rules: for tagged_tokens in tagged_tokenses: rule.apply(tagged_tokens) errors.append(counterrors(tagged_tokenses)) - testing_stats["rulescores"] = [ + testing_stats['rulescores'] = [ err0 - err1 for (err0, err1) in zip(errors, errors[1:]) ] - testing_stats["finalerrors"] = errors[-1] - testing_stats["finalacc"] = ( - 1 - testing_stats["finalerrors"] / testing_stats["tokencount"] + testing_stats['finalerrors'] = errors[-1] + testing_stats['finalacc'] = ( + 1 - testing_stats['finalerrors'] / testing_stats['tokencount'] ) return (tagged_tokenses, testing_stats) diff --git a/nlp_resource_data/nltk/tag/brill_trainer.py b/nlp_resource_data/nltk/tag/brill_trainer.py index b284a03..f518dcf 100644 --- a/nlp_resource_data/nltk/tag/brill_trainer.py +++ b/nlp_resource_data/nltk/tag/brill_trainer.py @@ -8,6 +8,8 @@ # URL: # For license information, see LICENSE.TXT +from __future__ import print_function, division + import bisect import textwrap from collections import defaultdict @@ -251,19 +253,19 @@ class BrillTaggerTrainer(object): # Collect some statistics on the training process trainstats = {} - trainstats["min_acc"] = min_acc - trainstats["min_score"] = min_score - trainstats["tokencount"] = sum(len(t) for t in test_sents) - trainstats["sequencecount"] = len(test_sents) - trainstats["templatecount"] = len(self._templates) - trainstats["rulescores"] = [] - trainstats["initialerrors"] = sum( + trainstats['min_acc'] = min_acc + trainstats['min_score'] = min_score + trainstats['tokencount'] = sum(len(t) for t in test_sents) + trainstats['sequencecount'] = len(test_sents) + trainstats['templatecount'] = len(self._templates) + trainstats['rulescores'] = [] + trainstats['initialerrors'] = sum( tag[1] != truth[1] for paired in zip(test_sents, train_sents) for (tag, truth) in zip(*paired) ) - trainstats["initialacc"] = ( - 1 - trainstats["initialerrors"] / trainstats["tokencount"] + trainstats['initialacc'] = ( + 1 - trainstats['initialerrors'] / trainstats['tokencount'] ) if self._trace > 0: print( @@ -280,7 +282,7 @@ class BrillTaggerTrainer(object): print("Finding initial useful rules...") self._init_mappings(test_sents, train_sents) if self._trace: - print((" Found {} useful rules.".format(len(self._rule_scores)))) + print((" Found %d useful rules." % len(self._rule_scores))) # Let the user know what we're up to. if self._trace > 2: @@ -297,7 +299,7 @@ class BrillTaggerTrainer(object): if rule: rules.append(rule) score = self._rule_scores[rule] - trainstats["rulescores"].append(score) + trainstats['rulescores'].append(score) else: break # No more good rules left! @@ -318,15 +320,15 @@ class BrillTaggerTrainer(object): # The user can cancel training manually: except KeyboardInterrupt: - print("Training stopped manually -- {} rules found".format(len(rules))) + print("Training stopped manually -- %d rules found" % len(rules)) # Discard our tag position mapping & rule mappings. self._clean() - trainstats["finalerrors"] = trainstats["initialerrors"] - sum( - trainstats["rulescores"] + trainstats['finalerrors'] = trainstats['initialerrors'] - sum( + trainstats['rulescores'] ) - trainstats["finalacc"] = ( - 1 - trainstats["finalerrors"] / trainstats["tokencount"] + trainstats['finalacc'] = ( + 1 - trainstats['finalerrors'] / trainstats['tokencount'] ) # Create and return a tagger from the rules we found. return BrillTagger(self._initial_tagger, rules, trainstats) @@ -600,30 +602,30 @@ class BrillTaggerTrainer(object): rulestr = rule.format(self._ruleformat) if self._trace > 2: print( - "{:4d}{:4d}{:4d}{:4d} |".format(score, num_fixed, num_broken, num_other), end=" " + '%4d%4d%4d%4d |' % (score, num_fixed, num_broken, num_other), end=' ' ) print( textwrap.fill( rulestr, - initial_indent=" " * 20, + initial_indent=' ' * 20, width=79, - subsequent_indent=" " * 18 + "| ", + subsequent_indent=' ' * 18 + '| ', ).strip() ) else: print(rulestr) def _trace_apply(self, num_updates): - prefix = " " * 18 + "|" + prefix = ' ' * 18 + '|' print(prefix) - print(prefix, "Applying rule to {} positions.".format(num_updates)) + print(prefix, 'Applying rule to %d positions.' % num_updates) def _trace_update_rules(self, num_obsolete, num_new, num_unseen): - prefix = " " * 18 + "|" - print(prefix, "Updated rule tables:") - print(prefix, (" - {} rule applications removed".format(num_obsolete))) + prefix = ' ' * 18 + '|' + print(prefix, 'Updated rule tables:') + print(prefix, (' - %d rule applications removed' % num_obsolete)) print( prefix, - (" - {} rule applications added ({} novel)".format(num_new, num_unseen)), + (' - %d rule applications added (%d novel)' % (num_new, num_unseen)), ) print(prefix) diff --git a/nlp_resource_data/nltk/tag/crf.py b/nlp_resource_data/nltk/tag/crf.py index 48f9de1..828125f 100644 --- a/nlp_resource_data/nltk/tag/crf.py +++ b/nlp_resource_data/nltk/tag/crf.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Natural Language Toolkit: Interface to the CRFSuite Tagger # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Long Duong # URL: # For license information, see LICENSE.TXT @@ -9,7 +9,8 @@ """ A module for POS tagging using CRFSuite """ - +from __future__ import absolute_import +from __future__ import unicode_literals import unicodedata import re from nltk.tag.api import TaggerI @@ -78,7 +79,7 @@ class CRFTagger(TaggerI): """ - self._model_file = "" + self._model_file = '' self._tagger = pycrfsuite.Tagger() if feature_func is None: @@ -88,7 +89,7 @@ class CRFTagger(TaggerI): self._verbose = verbose self._training_options = training_opt - self._pattern = re.compile(r"\d") + self._pattern = re.compile(r'\d') def set_model_file(self, model_file): self._model_file = model_file @@ -117,31 +118,31 @@ class CRFTagger(TaggerI): # Capitalization if token[0].isupper(): - feature_list.append("CAPITALIZATION") + feature_list.append('CAPITALIZATION') # Number if re.search(self._pattern, token) is not None: - feature_list.append("HAS_NUM") + feature_list.append('HAS_NUM') # Punctuation punc_cat = set(["Pc", "Pd", "Ps", "Pe", "Pi", "Pf", "Po"]) if all(unicodedata.category(x) in punc_cat for x in token): - feature_list.append("PUNCTUATION") + feature_list.append('PUNCTUATION') # Suffix up to length 3 if len(token) > 1: - feature_list.append("SUF_" + token[-1:]) + feature_list.append('SUF_' + token[-1:]) if len(token) > 2: - feature_list.append("SUF_" + token[-2:]) + feature_list.append('SUF_' + token[-2:]) if len(token) > 3: - feature_list.append("SUF_" + token[-3:]) + feature_list.append('SUF_' + token[-3:]) - feature_list.append("WORD_" + token) + feature_list.append('WORD_' + token) return feature_list def tag_sents(self, sents): - """ + ''' Tag a list of sentences. NB before using this function, user should specify the mode_file either by - Train a new model using ``train'' function - Use the pre-trained model which is set via ``set_model_file'' function @@ -149,10 +150,10 @@ class CRFTagger(TaggerI): :type sentences : list(list(str)) :return : list of tagged sentences. :rtype : list (list (tuple(str,str))) - """ - if self._model_file == "": + ''' + if self._model_file == '': raise Exception( - " No model file is found !! Please use train or set_model_file function" + ' No model file is found !! Please use train or set_model_file function' ) # We need the list of sentences instead of the list generator for matching the input and output @@ -162,7 +163,7 @@ class CRFTagger(TaggerI): labels = self._tagger.tag(features) if len(labels) != len(tokens): - raise Exception(" Predicted Length Not Matched, Expect Errors !") + raise Exception(' Predicted Length Not Matched, Expect Errors !') tagged_sent = list(zip(tokens, labels)) result.append(tagged_sent) @@ -170,13 +171,13 @@ class CRFTagger(TaggerI): return result def train(self, train_data, model_file): - """ + ''' Train the CRF tagger using CRFSuite :params train_data : is the list of annotated sentences. :type train_data : list (list(tuple(str,str))) :params model_file : the model will be saved to this file. - """ + ''' trainer = pycrfsuite.Trainer(verbose=self._verbose) trainer.set_params(self._training_options) @@ -191,7 +192,7 @@ class CRFTagger(TaggerI): self.set_model_file(model_file) def tag(self, tokens): - """ + ''' Tag a sentence using Python CRFSuite Tagger. NB before using this function, user should specify the mode_file either by - Train a new model using ``train'' function - Use the pre-trained model which is set via ``set_model_file'' function @@ -199,6 +200,6 @@ class CRFTagger(TaggerI): :type tokens : list(str) :return : list of tagged tokens. :rtype : list (tuple(str,str)) - """ + ''' return self.tag_sents([tokens])[0] diff --git a/nlp_resource_data/nltk/tag/hmm.py b/nlp_resource_data/nltk/tag/hmm.py index 6e543d9..5e834dc 100644 --- a/nlp_resource_data/nltk/tag/hmm.py +++ b/nlp_resource_data/nltk/tag/hmm.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Hidden Markov Model # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Trevor Cohn # Philip Blunsom # Tiago Tresoldi (fixes) @@ -68,10 +68,13 @@ of EM. For more information, please consult the source code for this module, which includes extensive demonstration code. """ +from __future__ import print_function, unicode_literals, division import re import itertools +from six.moves import map, zip + try: import numpy as np except ImportError: @@ -90,6 +93,7 @@ from nltk.probability import ( ) from nltk.metrics import accuracy from nltk.util import LazyMap, unique_list +from nltk.compat import python_2_unicode_compatible from nltk.tag.api import TaggerI @@ -101,6 +105,7 @@ def _identity(labeled_symbols): return labeled_symbols +@python_2_unicode_compatible class HiddenMarkovModelTagger(TaggerI): """ Hidden Markov model class, a generative model for labelling sequence data. @@ -180,15 +185,15 @@ class HiddenMarkovModelTagger(TaggerI): ) if test_sequence: - hmm.test(test_sequence, verbose=kwargs.get("verbose", False)) + hmm.test(test_sequence, verbose=kwargs.get('verbose', False)) if unlabeled_sequence: - max_iterations = kwargs.get("max_iterations", 5) + max_iterations = kwargs.get('max_iterations', 5) hmm = trainer.train_unsupervised( unlabeled_sequence, model=hmm, max_iterations=max_iterations ) if test_sequence: - hmm.test(test_sequence, verbose=kwargs.get("verbose", False)) + hmm.test(test_sequence, verbose=kwargs.get('verbose', False)) return hmm @@ -519,7 +524,7 @@ class HiddenMarkovModelTagger(TaggerI): if cum_p <= p <= cum_p + add_p: return sample cum_p += add_p - raise Exception("Invalid probability distribution - " "does not sum to one") + raise Exception('Invalid probability distribution - ' 'does not sum to one') def entropy(self, unlabeled_sequence): """ @@ -564,7 +569,7 @@ class HiddenMarkovModelTagger(TaggerI): for i, state in enumerate(self._states): p = 2 ** (alpha[0, i] + beta[0, i] - normalisation) entropy -= p * self._priors.logprob(state) - # print('p(s_0 = %s) =' % state, p) + # print 'p(s_0 = %s) =' % state, p # state transitions for t0 in range(T - 1): @@ -579,7 +584,7 @@ class HiddenMarkovModelTagger(TaggerI): - normalisation ) entropy -= p * self._transitions[s0].logprob(s1) - # print('p(s_%d = %s, s_%d = %s) =' % (t0, s0, t1, s1), p) + # print 'p(s_%d = %s, s_%d = %s) =' % (t0, s0, t1, s1), p # symbol emissions for t in range(T): @@ -588,7 +593,7 @@ class HiddenMarkovModelTagger(TaggerI): entropy -= p * self._outputs[state].logprob( unlabeled_sequence[t][_TEXT] ) - # print('p(s_%d = %s) =' % (t, state), p) + # print 'p(s_%d = %s) =' % (t, state), p return entropy @@ -640,6 +645,15 @@ class HiddenMarkovModelTagger(TaggerI): log_probs.append(lp) normalisation = _log_add(*log_probs) + # ps = zeros((T, N), float64) + # for labelling, lp in zip(labellings, log_probs): + # for t in range(T): + # ps[t, self._states.index(labelling[t])] += \ + # 2**(lp - normalisation) + + # for t in range(T): + # print 'prob[%d] =' % t, ps[t] + entropy = 0 for lp in log_probs: lp -= normalisation @@ -803,33 +817,33 @@ class HiddenMarkovModelTagger(TaggerI): if verbose: for test_sent, predicted_sent in zip(test_sequence, predicted_sequence): print( - "Test:", - " ".join("%s/%s" % (token, tag) for (token, tag) in test_sent), + 'Test:', + ' '.join('%s/%s' % (token, tag) for (token, tag) in test_sent), ) print() - print("Untagged:", " ".join("%s" % token for (token, tag) in test_sent)) + print('Untagged:', ' '.join("%s" % token for (token, tag) in test_sent)) print() print( - "HMM-tagged:", - " ".join("%s/%s" % (token, tag) for (token, tag) in predicted_sent), + 'HMM-tagged:', + ' '.join('%s/%s' % (token, tag) for (token, tag) in predicted_sent), ) print() print( - "Entropy:", + 'Entropy:', self.entropy([(token, None) for (token, tag) in predicted_sent]), ) print() - print("-" * 60) + print('-' * 60) test_tags = flatten(map(tags, test_sequence)) predicted_tags = flatten(map(tags, predicted_sequence)) acc = accuracy(test_tags, predicted_tags) count = sum(len(sent) for sent in test_sequence) - print("accuracy over %d tokens: %.2f" % (count, acc * 100)) + print('accuracy over %d tokens: %.2f' % (count, acc * 100)) def __repr__(self): - return "" % ( + return '' % ( len(self._states), len(self._symbols), ) @@ -878,7 +892,7 @@ class HiddenMarkovModelTrainer(object): model = self.train_supervised(labeled_sequences, **kwargs) if unlabeled_sequences: if model: - kwargs["model"] = model + kwargs['model'] = model model = self.train_unsupervised(unlabeled_sequences, **kwargs) return model @@ -954,7 +968,7 @@ class HiddenMarkovModelTrainer(object): # create a uniform HMM, which will be iteratively refined, unless # given an existing model - model = kwargs.get("model") + model = kwargs.get('model') if not model: priors = RandomProbDist(self._states) transitions = DictionaryConditionalProbDist( @@ -998,8 +1012,8 @@ class HiddenMarkovModelTrainer(object): converged = False last_logprob = None iteration = 0 - max_iterations = kwargs.get("max_iterations", 1000) - epsilon = kwargs.get("convergence_logprob", 1e-6) + max_iterations = kwargs.get('max_iterations', 1000) + epsilon = kwargs.get('convergence_logprob', 1e-6) while not converged and iteration < max_iterations: A_numer = _ninf_array((N, N)) @@ -1064,7 +1078,7 @@ class HiddenMarkovModelTrainer(object): if iteration > 0 and abs(logprob - last_logprob) < epsilon: converged = True - print("iteration", iteration, "logprob", logprob) + print('iteration', iteration, 'logprob', logprob) iteration += 1 last_logprob = logprob @@ -1179,8 +1193,8 @@ def _market_hmm_example(): """ Return an example HMM (described at page 381, Huang et al) """ - states = ["bull", "bear", "static"] - symbols = ["up", "down", "unchanged"] + states = ['bull', 'bear', 'static'] + symbols = ['up', 'down', 'unchanged'] A = np.array([[0.6, 0.2, 0.2], [0.5, 0.3, 0.2], [0.4, 0.1, 0.5]], np.float64) B = np.array([[0.7, 0.1, 0.2], [0.1, 0.6, 0.3], [0.3, 0.3, 0.4]], np.float64) pi = np.array([0.5, 0.2, 0.3], np.float64) @@ -1198,34 +1212,34 @@ def demo(): model, states, symbols = _market_hmm_example() - print("Testing", model) + print('Testing', model) for test in [ - ["up", "up"], - ["up", "down", "up"], - ["down"] * 5, - ["unchanged"] * 5 + ["up"], + ['up', 'up'], + ['up', 'down', 'up'], + ['down'] * 5, + ['unchanged'] * 5 + ['up'], ]: sequence = [(t, None) for t in test] - print("Testing with state sequence", test) - print("probability =", model.probability(sequence)) - print("tagging = ", model.tag([word for (word, tag) in sequence])) - print("p(tagged) = ", model.probability(sequence)) - print("H = ", model.entropy(sequence)) - print("H_exh = ", model._exhaustive_entropy(sequence)) - print("H(point) = ", model.point_entropy(sequence)) - print("H_exh(point)=", model._exhaustive_point_entropy(sequence)) + print('Testing with state sequence', test) + print('probability =', model.probability(sequence)) + print('tagging = ', model.tag([word for (word, tag) in sequence])) + print('p(tagged) = ', model.probability(sequence)) + print('H = ', model.entropy(sequence)) + print('H_exh = ', model._exhaustive_entropy(sequence)) + print('H(point) = ', model.point_entropy(sequence)) + print('H_exh(point)=', model._exhaustive_point_entropy(sequence)) print() def load_pos(num_sents): from nltk.corpus import brown - sentences = brown.tagged_sents(categories="news")[:num_sents] + sentences = brown.tagged_sents(categories='news')[:num_sents] - tag_re = re.compile(r"[*]|--|[^+*-]+") + tag_re = re.compile(r'[*]|--|[^+*-]+') tag_set = set() symbols = set() @@ -1251,7 +1265,7 @@ def demo_pos(): print("HMM POS tagging demo") print() - print("Training HMM...") + print('Training HMM...') labelled_sequences, tag_set, symbols = load_pos(20000) trainer = HiddenMarkovModelTrainer(tag_set, symbols) hmm = trainer.train_supervised( @@ -1259,7 +1273,7 @@ def demo_pos(): estimator=lambda fd, bins: LidstoneProbDist(fd, 0.1, bins), ) - print("Testing...") + print('Testing...') hmm.test(labelled_sequences[:10], verbose=True) @@ -1279,7 +1293,7 @@ def demo_pos_bw( print("Baum-Welch demo for POS tagging") print() - print("Training HMM (supervised, %d sentences)..." % supervised) + print('Training HMM (supervised, %d sentences)...' % supervised) sentences, tag_set, symbols = load_pos(test + supervised + unsupervised) @@ -1296,7 +1310,7 @@ def demo_pos_bw( hmm.test(sentences[:test], verbose=verbose) - print("Training (unsupervised, %d sentences)..." % unsupervised) + print('Training (unsupervised, %d sentences)...' % unsupervised) # it's rather slow - so only use 10 samples by default unlabeled = _untag(sentences[test + supervised :]) hmm = trainer.train_unsupervised( diff --git a/nlp_resource_data/nltk/tag/hunpos.py b/nlp_resource_data/nltk/tag/hunpos.py index 3053e8c..9513338 100644 --- a/nlp_resource_data/nltk/tag/hunpos.py +++ b/nlp_resource_data/nltk/tag/hunpos.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Natural Language Toolkit: Interface to the HunPos POS-tagger # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Peter Ljunglöf # Dávid Márk Nemeskey (modifications) # Attila Zséder (modifications) @@ -15,12 +15,14 @@ A module for interfacing with the HunPos open-source POS-tagger. import os from subprocess import Popen, PIPE +from six import text_type + from nltk.internals import find_binary, find_file from nltk.tag.api import TaggerI -_hunpos_url = "http://code.google.com/p/hunpos/" +_hunpos_url = 'http://code.google.com/p/hunpos/' -_hunpos_charset = "ISO-8859-1" +_hunpos_charset = 'ISO-8859-1' """The default encoding used by hunpos: ISO-8859-1.""" @@ -68,27 +70,27 @@ class HunposTagger(TaggerI): """ self._closed = True hunpos_paths = [ - ".", - "/usr/bin", - "/usr/local/bin", - "/opt/local/bin", - "/Applications/bin", - "~/bin", - "~/Applications/bin", + '.', + '/usr/bin', + '/usr/local/bin', + '/opt/local/bin', + '/Applications/bin', + '~/bin', + '~/Applications/bin', ] hunpos_paths = list(map(os.path.expanduser, hunpos_paths)) self._hunpos_bin = find_binary( - "hunpos-tag", + 'hunpos-tag', path_to_bin, - env_vars=("HUNPOS_TAGGER",), + env_vars=('HUNPOS_TAGGER',), searchpath=hunpos_paths, url=_hunpos_url, verbose=verbose, ) self._hunpos_model = find_file( - path_to_model, env_vars=("HUNPOS_TAGGER",), verbose=verbose + path_to_model, env_vars=('HUNPOS_TAGGER',), verbose=verbose ) self._encoding = encoding self._hunpos = Popen( @@ -121,7 +123,7 @@ class HunposTagger(TaggerI): """ for token in tokens: assert "\n" not in token, "Tokens should not contain newlines" - if isinstance(token, str): + if isinstance(token, text_type): token = token.encode(self._encoding) self._hunpos.stdin.write(token + b"\n") # We write a final empty line to tell hunpos that the sentence is finished: @@ -144,6 +146,6 @@ def setup_module(module): from nose import SkipTest try: - HunposTagger("en_wsj.model") + HunposTagger('en_wsj.model') except LookupError: raise SkipTest("HunposTagger is not available") diff --git a/nlp_resource_data/nltk/tag/mapping.py b/nlp_resource_data/nltk/tag/mapping.py index 9dedbeb..2e38365 100644 --- a/nlp_resource_data/nltk/tag/mapping.py +++ b/nlp_resource_data/nltk/tag/mapping.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Tagset Mapping # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Nathan Schneider # Steven Bird # URL: @@ -29,6 +29,7 @@ X - other: foreign words, typos, abbreviations """ +from __future__ import print_function, unicode_literals, division from collections import defaultdict from os.path import join @@ -36,44 +37,44 @@ from nltk.data import load _UNIVERSAL_DATA = "taggers/universal_tagset" _UNIVERSAL_TAGS = ( - "VERB", - "NOUN", - "PRON", - "ADJ", - "ADV", - "ADP", - "CONJ", - "DET", - "NUM", - "PRT", - "X", - ".", + 'VERB', + 'NOUN', + 'PRON', + 'ADJ', + 'ADV', + 'ADP', + 'CONJ', + 'DET', + 'NUM', + 'PRT', + 'X', + '.', ) # _MAPPINGS = defaultdict(lambda: defaultdict(dict)) # the mapping between tagset T1 and T2 returns UNK if appied to an unrecognized tag -_MAPPINGS = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: "UNK"))) +_MAPPINGS = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: 'UNK'))) def _load_universal_map(fileid): - contents = load(join(_UNIVERSAL_DATA, fileid + ".map"), format="text") + contents = load(join(_UNIVERSAL_DATA, fileid + '.map'), format="text") # When mapping to the Universal Tagset, # map unknown inputs to 'X' not 'UNK' - _MAPPINGS[fileid]["universal"].default_factory = lambda: "X" + _MAPPINGS[fileid]['universal'].default_factory = lambda: 'X' for line in contents.splitlines(): line = line.strip() - if line == "": + if line == '': continue - fine, coarse = line.split("\t") + fine, coarse = line.split('\t') - assert coarse in _UNIVERSAL_TAGS, "Unexpected coarse tag: {}".format(coarse) + assert coarse in _UNIVERSAL_TAGS, 'Unexpected coarse tag: {}'.format(coarse) assert ( - fine not in _MAPPINGS[fileid]["universal"] - ), "Multiple entries for original tag: {}".format(fine) + fine not in _MAPPINGS[fileid]['universal'] + ), 'Multiple entries for original tag: {}'.format(fine) - _MAPPINGS[fileid]["universal"][fine] = coarse + _MAPPINGS[fileid]['universal'][fine] = coarse def tagset_mapping(source, target): @@ -87,28 +88,28 @@ def tagset_mapping(source, target): """ if source not in _MAPPINGS or target not in _MAPPINGS[source]: - if target == "universal": + if target == 'universal': _load_universal_map(source) # Added the new Russian National Corpus mappings because the # Russian model for nltk.pos_tag() uses it. - _MAPPINGS["ru-rnc-new"]["universal"] = { - "A": "ADJ", - "A-PRO": "PRON", - "ADV": "ADV", - "ADV-PRO": "PRON", - "ANUM": "ADJ", - "CONJ": "CONJ", - "INTJ": "X", - "NONLEX": ".", - "NUM": "NUM", - "PARENTH": "PRT", - "PART": "PRT", - "PR": "ADP", - "PRAEDIC": "PRT", - "PRAEDIC-PRO": "PRON", - "S": "NOUN", - "S-PRO": "PRON", - "V": "VERB", + _MAPPINGS['ru-rnc-new']['universal'] = { + 'A': 'ADJ', + 'A-PRO': 'PRON', + 'ADV': 'ADV', + 'ADV-PRO': 'PRON', + 'ANUM': 'ADJ', + 'CONJ': 'CONJ', + 'INTJ': 'X', + 'NONLEX': '.', + 'NUM': 'NUM', + 'PARENTH': 'PRT', + 'PART': 'PRT', + 'PR': 'ADP', + 'PRAEDIC': 'PRT', + 'PRAEDIC-PRO': 'PRON', + 'S': 'NOUN', + 'S-PRO': 'PRON', + 'V': 'VERB', } return _MAPPINGS[source][target] @@ -127,10 +128,10 @@ def map_tag(source, target, source_tag): """ # we need a systematic approach to naming - if target == "universal": - if source == "wsj": - source = "en-ptb" - if source == "brown": - source = "en-brown" + if target == 'universal': + if source == 'wsj': + source = 'en-ptb' + if source == 'brown': + source = 'en-brown' return tagset_mapping(source, target)[source_tag] diff --git a/nlp_resource_data/nltk/tag/perceptron.py b/nlp_resource_data/nltk/tag/perceptron.py index 1742a59..548c004 100644 --- a/nlp_resource_data/nltk/tag/perceptron.py +++ b/nlp_resource_data/nltk/tag/perceptron.py @@ -9,6 +9,9 @@ # # This module is provided under the terms of the MIT License. +from __future__ import absolute_import +from __future__ import print_function, division + import random from collections import defaultdict import pickle @@ -16,30 +19,22 @@ import logging from nltk.tag.api import TaggerI from nltk.data import find, load - -from nltk import jsontags - -try: - import numpy as np -except ImportError: - pass +from nltk.compat import python_2_unicode_compatible PICKLE = "averaged_perceptron_tagger.pickle" -@jsontags.register_tag -class AveragedPerceptron: - """An averaged perceptron, as implemented by Matthew Honnibal. +class AveragedPerceptron(object): + + '''An averaged perceptron, as implemented by Matthew Honnibal. See more implementation details here: https://explosion.ai/blog/part-of-speech-pos-tagger-in-python - """ - - json_tag = "nltk.tag.perceptron.AveragedPerceptron" + ''' - def __init__(self, weights=None): + def __init__(self): # Each feature gets its own weight vector, so weights is a dict-of-dicts - self.weights = weights if weights else {} + self.weights = {} self.classes = set() # The accumulated values, for the averaging. These will be keyed by # feature/clas tuples @@ -51,13 +46,8 @@ class AveragedPerceptron: # Number of instances seen self.i = 0 - def _softmax(self, scores): - s = np.fromiter(scores.values(), dtype=float) - exps = np.exp(s) - return exps / np.sum(exps) - - def predict(self, features, return_conf=False): - """Dot-product the features and current weights and return the best label.""" + def predict(self, features): + '''Dot-product the features and current weights and return the best label.''' scores = defaultdict(float) for feat, value in features.items(): if feat not in self.weights or value == 0: @@ -65,16 +55,11 @@ class AveragedPerceptron: weights = self.weights[feat] for label, weight in weights.items(): scores[label] += value * weight - # Do a secondary alphabetic sort, for stability - best_label = max(self.classes, key=lambda label: (scores[label], label)) - # compute the confidence - conf = max(self._softmax(scores)) if return_conf == True else None - - return best_label, conf + return max(self.classes, key=lambda label: (scores[label], label)) def update(self, truth, guess, features): - """Update the feature weights.""" + '''Update the feature weights.''' def upd_feat(c, f, w, v): param = (f, c) @@ -91,7 +76,7 @@ class AveragedPerceptron: upd_feat(guess, f, weights.get(guess, 0.0), -1.0) def average_weights(self): - """Average weights from all iterations.""" + '''Average weights from all iterations.''' for feat, weights in self.weights.items(): new_feat_weights = {} for clas, weight in weights.items(): @@ -104,26 +89,19 @@ class AveragedPerceptron: self.weights[feat] = new_feat_weights def save(self, path): - """Save the pickled model weights.""" - with open(path, "wb") as fout: + '''Save the pickled model weights.''' + with open(path, 'wb') as fout: return pickle.dump(dict(self.weights), fout) def load(self, path): - """Load the pickled model weights.""" + '''Load the pickled model weights.''' self.weights = load(path) - def encode_json_obj(self): - return self.weights - - @classmethod - def decode_json_obj(cls, obj): - return cls(obj) - -@jsontags.register_tag +@python_2_unicode_compatible class PerceptronTagger(TaggerI): - """ + ''' Greedy Averaged Perceptron tagger, as implemented by Matthew Honnibal. See more implementation details here: https://explosion.ai/blog/part-of-speech-pos-tagger-in-python @@ -149,59 +127,54 @@ class PerceptronTagger(TaggerI): >>> pretrain.tag("The red cat".split()) [('The', 'DT'), ('red', 'JJ'), ('cat', 'NN')] - """ - - json_tag = "nltk.tag.sequential.PerceptronTagger" + ''' - START = ["-START-", "-START2-"] - END = ["-END-", "-END2-"] + START = ['-START-', '-START2-'] + END = ['-END-', '-END2-'] def __init__(self, load=True): - """ + ''' :param load: Load the pickled model upon instantiation. - """ + ''' self.model = AveragedPerceptron() self.tagdict = {} self.classes = set() if load: - AP_MODEL_LOC = "file:" + str( - find("taggers/averaged_perceptron_tagger/" + PICKLE) + AP_MODEL_LOC = 'file:' + str( + find('taggers/averaged_perceptron_tagger/' + PICKLE) ) self.load(AP_MODEL_LOC) - def tag(self, tokens, return_conf=False, use_tagdict=True): - """ + def tag(self, tokens): + ''' Tag tokenized sentences. :params tokens: list of word :type tokens: list(str) - """ + ''' prev, prev2 = self.START output = [] context = self.START + [self.normalize(w) for w in tokens] + self.END for i, word in enumerate(tokens): - tag, conf = ( - (self.tagdict.get(word), 1.0) if use_tagdict == True else (None, None) - ) + tag = self.tagdict.get(word) if not tag: features = self._get_features(i, word, context, prev, prev2) - tag, conf = self.model.predict(features, return_conf) - output.append((word, tag, conf) if return_conf == True else (word, tag)) - + tag = self.model.predict(features) + output.append((word, tag)) prev2 = prev prev = tag return output def train(self, sentences, save_loc=None, nr_iter=5): - """Train a model from sentences, and save it at ``save_loc``. ``nr_iter`` + '''Train a model from sentences, and save it at ``save_loc``. ``nr_iter`` controls the number of Perceptron training iterations. :param sentences: A list or iterator of sentences, where each sentence is a list of (words, tags) tuples. :param save_loc: If not ``None``, saves a pickled model in this location. :param nr_iter: Number of training iterations. - """ + ''' # We'd like to allow ``sentences`` to be either a list or an iterator, # the latter being especially important for a large training dataset. # Because ``self._make_tagdict(sentences)`` runs regardless, we make @@ -224,7 +197,7 @@ class PerceptronTagger(TaggerI): guess = self.tagdict.get(word) if not guess: feats = self._get_features(i, word, context, prev, prev2) - guess, _ = self.model.predict(feats) + guess = self.model.predict(feats) self.model.update(tags[i], guess, feats) prev2 = prev prev = guess @@ -240,81 +213,70 @@ class PerceptronTagger(TaggerI): self.model.average_weights() # Pickle as a binary file if save_loc is not None: - with open(save_loc, "wb") as fout: + with open(save_loc, 'wb') as fout: # changed protocol from -1 to 2 to make pickling Python 2 compatible pickle.dump((self.model.weights, self.tagdict, self.classes), fout, 2) def load(self, loc): - """ + ''' :param loc: Load a pickled model at location. :type loc: str - """ + ''' self.model.weights, self.tagdict, self.classes = load(loc) self.model.classes = self.classes - def encode_json_obj(self): - return self.model.weights, self.tagdict, list(self.classes) - - @classmethod - def decode_json_obj(cls, obj): - tagger = cls(load=False) - tagger.model.weights, tagger.tagdict, tagger.classes = obj - tagger.classes = set(tagger.classes) - tagger.model.classes = tagger.classes - return tagger - def normalize(self, word): - """ + ''' Normalization used in pre-processing. - All words are lower cased - Groups of digits of length 4 are represented as !YEAR; - Other digits are represented as !DIGITS :rtype: str - """ - if "-" in word and word[0] != "-": - return "!HYPHEN" + ''' + if '-' in word and word[0] != '-': + return '!HYPHEN' elif word.isdigit() and len(word) == 4: - return "!YEAR" + return '!YEAR' elif word[0].isdigit(): - return "!DIGITS" + return '!DIGITS' else: return word.lower() def _get_features(self, i, word, context, prev, prev2): - """Map tokens into a feature representation, implemented as a + '''Map tokens into a feature representation, implemented as a {hashable: int} dict. If the features change, a new model must be trained. - """ + ''' def add(name, *args): - features[" ".join((name,) + tuple(args))] += 1 + features[' '.join((name,) + tuple(args))] += 1 i += len(self.START) features = defaultdict(int) # It's useful to have a constant feature, which acts sort of like a prior - add("bias") - add("i suffix", word[-3:]) - add("i pref1", word[0]) - add("i-1 tag", prev) - add("i-2 tag", prev2) - add("i tag+i-2 tag", prev, prev2) - add("i word", context[i]) - add("i-1 tag+i word", prev, context[i]) - add("i-1 word", context[i - 1]) - add("i-1 suffix", context[i - 1][-3:]) - add("i-2 word", context[i - 2]) - add("i+1 word", context[i + 1]) - add("i+1 suffix", context[i + 1][-3:]) - add("i+2 word", context[i + 2]) + add('bias') + add('i suffix', word[-3:]) + add('i pref1', word[0]) + add('i-1 tag', prev) + add('i-2 tag', prev2) + add('i tag+i-2 tag', prev, prev2) + add('i word', context[i]) + add('i-1 tag+i word', prev, context[i]) + add('i-1 word', context[i - 1]) + add('i-1 suffix', context[i - 1][-3:]) + add('i-2 word', context[i - 2]) + add('i+1 word', context[i + 1]) + add('i+1 suffix', context[i + 1][-3:]) + add('i+2 word', context[i + 2]) return features def _make_tagdict(self, sentences): - """ + ''' Make a tag dictionary for single-tag words. :param sentences: A list of list of (word, tag) tuples. - """ + ''' counts = defaultdict(lambda: defaultdict(int)) for sentence in sentences: self._sentences.append(sentence) @@ -337,8 +299,8 @@ def _pc(n, d): def _load_data_conll_format(filename): - print("Read from file: ", filename) - with open(filename, "rb") as fin: + print('Read from file: ', filename) + with open(filename, 'rb') as fin: sentences = [] sentence = [] for line in fin.readlines(): @@ -348,7 +310,7 @@ def _load_data_conll_format(filename): sentences.append(sentence) sentence = [] continue - tokens = line.split("\t") + tokens = line.split('\t') word = tokens[1] tag = tokens[4] sentence.append((word, tag)) @@ -360,14 +322,14 @@ def _get_pretrain_model(): # Train: section 2-11 # Test : section 23 tagger = PerceptronTagger() - training = _load_data_conll_format("english_ptb_train.conll") - testing = _load_data_conll_format("english_ptb_test.conll") - print("Size of training and testing (sentence)", len(training), len(testing)) + training = _load_data_conll_format('english_ptb_train.conll') + testing = _load_data_conll_format('english_ptb_test.conll') + print('Size of training and testing (sentence)', len(training), len(testing)) # Train and save the model tagger.train(training, PICKLE) - print("Accuracy : ", tagger.evaluate(testing)) + print('Accuracy : ', tagger.evaluate(testing)) -if __name__ == "__main__": +if __name__ == '__main__': # _get_pretrain_model() pass diff --git a/nlp_resource_data/nltk/tag/senna.py b/nlp_resource_data/nltk/tag/senna.py index 5231d25..8404656 100644 --- a/nlp_resource_data/nltk/tag/senna.py +++ b/nlp_resource_data/nltk/tag/senna.py @@ -1,7 +1,7 @@ # encoding: utf-8 # Natural Language Toolkit: Senna POS Tagger # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Rami Al-Rfou' # URL: # For license information, see LICENSE.TXT @@ -39,13 +39,14 @@ Note: Unit tests for this module can be found in test/unit/test_senna.py ('NY', 'B-LOC'), (',', 'O'), ('USA', 'B-LOC'), ('.', 'O')] """ +from nltk.compat import python_2_unicode_compatible from nltk.classify import Senna - +@python_2_unicode_compatible class SennaTagger(Senna): - def __init__(self, path, encoding="utf-8"): - super(SennaTagger, self).__init__(path, ["pos"], encoding) + def __init__(self, path, encoding='utf-8'): + super(SennaTagger, self).__init__(path, ['pos'], encoding) def tag_sents(self, sentences): """ @@ -56,14 +57,14 @@ class SennaTagger(Senna): for i in range(len(tagged_sents)): for j in range(len(tagged_sents[i])): annotations = tagged_sents[i][j] - tagged_sents[i][j] = (annotations["word"], annotations["pos"]) + tagged_sents[i][j] = (annotations['word'], annotations['pos']) return tagged_sents - +@python_2_unicode_compatible class SennaChunkTagger(Senna): - def __init__(self, path, encoding="utf-8"): - super(SennaChunkTagger, self).__init__(path, ["chk"], encoding) + def __init__(self, path, encoding='utf-8'): + super(SennaChunkTagger, self).__init__(path, ['chk'], encoding) def tag_sents(self, sentences): """ @@ -74,7 +75,7 @@ class SennaChunkTagger(Senna): for i in range(len(tagged_sents)): for j in range(len(tagged_sents[i])): annotations = tagged_sents[i][j] - tagged_sents[i][j] = (annotations["word"], annotations["chk"]) + tagged_sents[i][j] = (annotations['word'], annotations['chk']) return tagged_sents def bio_to_chunks(self, tagged_sent, chunk_type): @@ -105,24 +106,24 @@ class SennaChunkTagger(Senna): current_chunk_position = [] for idx, word_pos in enumerate(tagged_sent): word, pos = word_pos - if "-" + chunk_type in pos: # Append the word to the current_chunk. + if '-' + chunk_type in pos: # Append the word to the current_chunk. current_chunk.append((word)) current_chunk_position.append((idx)) else: if current_chunk: # Flush the full chunk when out of an NP. - _chunk_str = " ".join(current_chunk) - _chunk_pos_str = "-".join(map(str, current_chunk_position)) + _chunk_str = ' '.join(current_chunk) + _chunk_pos_str = '-'.join(map(str, current_chunk_position)) yield _chunk_str, _chunk_pos_str current_chunk = [] current_chunk_position = [] if current_chunk: # Flush the last chunk. - yield " ".join(current_chunk), "-".join(map(str, current_chunk_position)) - + yield ' '.join(current_chunk), '-'.join(map(str, current_chunk_position)) +@python_2_unicode_compatible class SennaNERTagger(Senna): - def __init__(self, path, encoding="utf-8"): - super(SennaNERTagger, self).__init__(path, ["ner"], encoding) + def __init__(self, path, encoding='utf-8'): + super(SennaNERTagger, self).__init__(path, ['ner'], encoding) def tag_sents(self, sentences): """ @@ -133,7 +134,7 @@ class SennaNERTagger(Senna): for i in range(len(tagged_sents)): for j in range(len(tagged_sents[i])): annotations = tagged_sents[i][j] - tagged_sents[i][j] = (annotations["word"], annotations["ner"]) + tagged_sents[i][j] = (annotations['word'], annotations['ner']) return tagged_sents @@ -142,6 +143,6 @@ def setup_module(module): from nose import SkipTest try: - tagger = Senna("/usr/share/senna-v3.0", ["pos", "chk", "ner"]) + tagger = Senna('/usr/share/senna-v3.0', ['pos', 'chk', 'ner']) except OSError: raise SkipTest("Senna executable not found") diff --git a/nlp_resource_data/nltk/tag/sequential.py b/nlp_resource_data/nltk/tag/sequential.py index e49d3ad..3d3a767 100644 --- a/nlp_resource_data/nltk/tag/sequential.py +++ b/nlp_resource_data/nltk/tag/sequential.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Sequential Backoff Taggers # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Edward Loper # Steven Bird (minor additions) # Tiago Tresoldi (original affix tagger) @@ -17,13 +17,14 @@ determine a tag for the specified token, then its backoff tagger is consulted instead. Any SequentialBackoffTagger may serve as a backoff tagger for any other SequentialBackoffTagger. """ -import ast +from __future__ import print_function, unicode_literals from abc import abstractmethod import re from nltk.probability import ConditionalFreqDist from nltk.classify import NaiveBayesClassifier +from nltk.compat import python_2_unicode_compatible from nltk.tag.api import TaggerI, FeaturesetTaggerI @@ -105,6 +106,7 @@ class SequentialBackoffTagger(TaggerI): """ +@python_2_unicode_compatible class ContextTagger(SequentialBackoffTagger): """ An abstract base class for sequential backoff taggers that choose @@ -125,7 +127,7 @@ class ContextTagger(SequentialBackoffTagger): :param context_to_tag: A dictionary mapping contexts to tags. :param backoff: The backoff tagger that should be used for this tagger. """ - super().__init__(backoff) + SequentialBackoffTagger.__init__(self, backoff) self._context_to_tag = context_to_tag if context_to_tag else {} @abstractmethod @@ -149,7 +151,7 @@ class ContextTagger(SequentialBackoffTagger): return len(self._context_to_tag) def __repr__(self): - return "<{}: size={}>".format(self.__class__.__name__, self.size()) + return '<%s: size=%d>' % (self.__class__.__name__, self.size()) def _train(self, tagged_corpus, cutoff=0, verbose=False): """ @@ -207,15 +209,14 @@ class ContextTagger(SequentialBackoffTagger): size = len(self._context_to_tag) backoff = 100 - (hit_count * 100.0) / token_count pruning = 100 - (size * 100.0) / len(fd.conditions()) - print("[Trained Unigram tagger:", end=" ") - print("size={}, backoff={:.2f}%, pruning={:.2f}%]".format(size, backoff, pruning)) + print("[Trained Unigram tagger:", end=' ') + print("size=%d, backoff=%.2f%%, pruning=%.2f%%]" % (size, backoff, pruning)) ###################################################################### # Tagger Classes ###################################################################### - - +@python_2_unicode_compatible @jsontags.register_tag class DefaultTagger(SequentialBackoffTagger): """ @@ -234,11 +235,11 @@ class DefaultTagger(SequentialBackoffTagger): :type tag: str """ - json_tag = "nltk.tag.sequential.DefaultTagger" + json_tag = 'nltk.tag.sequential.DefaultTagger' def __init__(self, tag): self._tag = tag - super().__init__(None) + SequentialBackoffTagger.__init__(self, None) def encode_json_obj(self): return self._tag @@ -252,7 +253,7 @@ class DefaultTagger(SequentialBackoffTagger): return self._tag # ignore token and history def __repr__(self): - return "".format(self._tag) + return '' % self._tag @jsontags.register_tag @@ -280,7 +281,7 @@ class NgramTagger(ContextTagger): context-to-tag table for the new tagger. """ - json_tag = "nltk.tag.sequential.NgramTagger" + json_tag = 'nltk.tag.sequential.NgramTagger' def __init__( self, n, train=None, model=None, backoff=None, cutoff=0, verbose=False @@ -288,34 +289,18 @@ class NgramTagger(ContextTagger): self._n = n self._check_params(train, model) - super().__init__(model, backoff) + ContextTagger.__init__(self, model, backoff) if train: self._train(train, cutoff, verbose) def encode_json_obj(self): - _context_to_tag = {repr(k): v for k, v in self._context_to_tag.items()} - if "NgramTagger" in self.__class__.__name__: - return self._n, _context_to_tag, self.backoff - else: - return _context_to_tag, self.backoff + return self._n, self._context_to_tag, self.backoff @classmethod def decode_json_obj(cls, obj): - try: - _n, _context_to_tag, backoff = obj - except ValueError: - _context_to_tag, backoff = obj - - if not _context_to_tag: - return backoff - - _context_to_tag = {ast.literal_eval(k): v for k, v in _context_to_tag.items()} - - if "NgramTagger" in cls.__name__: - return cls(_n, model=_context_to_tag, backoff=backoff) - else: - return cls(model=_context_to_tag, backoff=backoff) + _n, _context_to_tag, backoff = obj + return cls(_n, model=_context_to_tag, backoff=backoff) def context(self, tokens, index, history): tag_context = tuple(history[max(0, index - self._n + 1) : index]) @@ -335,7 +320,7 @@ class UnigramTagger(NgramTagger): >>> test_sent = brown.sents(categories='news')[0] >>> unigram_tagger = UnigramTagger(brown.tagged_sents(categories='news')[:500]) >>> for tok, tag in unigram_tagger.tag(test_sent): - ... print("({}, {}), ".format(tok, tag)) + ... print("(%s, %s), " % (tok, tag)) (The, AT), (Fulton, NP-TL), (County, NN-TL), (Grand, JJ-TL), (Jury, NN-TL), (said, VBD), (Friday, NR), (an, AT), (investigation, NN), (of, IN), (Atlanta's, NP$), (recent, JJ), @@ -355,10 +340,18 @@ class UnigramTagger(NgramTagger): :type cutoff: int """ - json_tag = "nltk.tag.sequential.UnigramTagger" + json_tag = 'nltk.tag.sequential.UnigramTagger' def __init__(self, train=None, model=None, backoff=None, cutoff=0, verbose=False): - super().__init__(1, train, model, backoff, cutoff, verbose) + NgramTagger.__init__(self, 1, train, model, backoff, cutoff, verbose) + + def encode_json_obj(self): + return self._context_to_tag, self.backoff + + @classmethod + def decode_json_obj(cls, obj): + _context_to_tag, backoff = obj + return cls(model=_context_to_tag, backoff=backoff) def context(self, tokens, index, history): return tokens[index] @@ -384,10 +377,18 @@ class BigramTagger(NgramTagger): :type cutoff: int """ - json_tag = "nltk.tag.sequential.BigramTagger" + json_tag = 'nltk.tag.sequential.BigramTagger' def __init__(self, train=None, model=None, backoff=None, cutoff=0, verbose=False): - super().__init__(2, train, model, backoff, cutoff, verbose) + NgramTagger.__init__(self, 2, train, model, backoff, cutoff, verbose) + + def encode_json_obj(self): + return self._context_to_tag, self.backoff + + @classmethod + def decode_json_obj(cls, obj): + _context_to_tag, backoff = obj + return cls(model=_context_to_tag, backoff=backoff) @jsontags.register_tag @@ -410,10 +411,18 @@ class TrigramTagger(NgramTagger): :type cutoff: int """ - json_tag = "nltk.tag.sequential.TrigramTagger" + json_tag = 'nltk.tag.sequential.TrigramTagger' def __init__(self, train=None, model=None, backoff=None, cutoff=0, verbose=False): - super().__init__(3, train, model, backoff, cutoff, verbose) + NgramTagger.__init__(self, 3, train, model, backoff, cutoff, verbose) + + def encode_json_obj(self): + return self._context_to_tag, self.backoff + + @classmethod + def decode_json_obj(cls, obj): + _context_to_tag, backoff = obj + return cls(model=_context_to_tag, backoff=backoff) @jsontags.register_tag @@ -436,7 +445,7 @@ class AffixTagger(ContextTagger): tag of None by this tagger. """ - json_tag = "nltk.tag.sequential.AffixTagger" + json_tag = 'nltk.tag.sequential.AffixTagger' def __init__( self, @@ -451,7 +460,7 @@ class AffixTagger(ContextTagger): self._check_params(train, model) - super().__init__(model, backoff) + ContextTagger.__init__(self, model, backoff) self._affix_length = affix_length self._min_word_length = min_stem_length + abs(affix_length) @@ -487,6 +496,7 @@ class AffixTagger(ContextTagger): return token[self._affix_length :] +@python_2_unicode_compatible @jsontags.register_tag class RegexpTagger(SequentialBackoffTagger): """ @@ -530,36 +540,36 @@ class RegexpTagger(SequentialBackoffTagger): assigned the tag None. """ - json_tag = "nltk.tag.sequential.RegexpTagger" + json_tag = 'nltk.tag.sequential.RegexpTagger' def __init__(self, regexps, backoff=None): """ """ - super().__init__(backoff) - try: - self._regexps = [(re.compile(regexp), tag,) for regexp, tag in regexps] - except Exception as e: - raise Exception( - 'Invalid RegexpTagger regexp:', str(e), 'regexp:', regexp, 'tag:', tag) + SequentialBackoffTagger.__init__(self, backoff) + self._regexs = [(re.compile(regexp), tag) for regexp, tag in regexps] def encode_json_obj(self): - return [(regexp.pattern, tag) for regexp, tag in self._regexps], self.backoff + return [(regexp.patten, tag) for regexp, tag in self._regexs], self.backoff @classmethod def decode_json_obj(cls, obj): regexps, backoff = obj - return cls(regexps, backoff) + self = cls(()) + self._regexs = [(re.compile(regexp), tag) for regexp, tag in regexps] + SequentialBackoffTagger.__init__(self, backoff) + return self def choose_tag(self, tokens, index, history): - for regexp, tag in self._regexps: + for regexp, tag in self._regexs: if re.match(regexp, tokens[index]): return tag return None def __repr__(self): - return "".format(len(self._regexps)) + return '' % len(self._regexs) +@python_2_unicode_compatible class ClassifierBasedTagger(SequentialBackoffTagger, FeaturesetTaggerI): """ A sequential tagger that uses a classifier to choose the tag for @@ -615,11 +625,11 @@ class ClassifierBasedTagger(SequentialBackoffTagger, FeaturesetTaggerI): ): self._check_params(train, classifier) - super().__init__(backoff) + SequentialBackoffTagger.__init__(self, backoff) if (train and classifier) or (not train and not classifier): raise ValueError( - "Must specify either training data or " "trained classifier." + 'Must specify either training data or ' 'trained classifier.' ) if feature_detector is not None: @@ -659,7 +669,7 @@ class ClassifierBasedTagger(SequentialBackoffTagger, FeaturesetTaggerI): classifier_corpus = [] if verbose: - print("Constructing training corpus for classifier.") + print('Constructing training corpus for classifier.') for sentence in tagged_corpus: history = [] @@ -670,11 +680,11 @@ class ClassifierBasedTagger(SequentialBackoffTagger, FeaturesetTaggerI): history.append(tags[index]) if verbose: - print("Training classifier ({} instances)".format(len(classifier_corpus))) + print('Training classifier (%d instances)' % len(classifier_corpus)) self._classifier = classifier_builder(classifier_corpus) def __repr__(self): - return "".format(self._classifier) + return '' % self._classifier def feature_detector(self, tokens, index, history): """ @@ -719,32 +729,32 @@ class ClassifierBasedPOSTagger(ClassifierBasedTagger): prevtag = history[index - 1] prevprevtag = history[index - 2] - if re.match("[0-9]+(\.[0-9]*)?|[0-9]*\.[0-9]+$", word): - shape = "number" - elif re.match("\W+$", word): - shape = "punct" - elif re.match("[A-Z][a-z]+$", word): - shape = "upcase" - elif re.match("[a-z]+$", word): - shape = "downcase" - elif re.match("\w+$", word): - shape = "mixedcase" + if re.match('[0-9]+(\.[0-9]*)?|[0-9]*\.[0-9]+$', word): + shape = 'number' + elif re.match('\W+$', word): + shape = 'punct' + elif re.match('[A-Z][a-z]+$', word): + shape = 'upcase' + elif re.match('[a-z]+$', word): + shape = 'downcase' + elif re.match('\w+$', word): + shape = 'mixedcase' else: - shape = "other" + shape = 'other' features = { - "prevtag": prevtag, - "prevprevtag": prevprevtag, - "word": word, - "word.lower": word.lower(), - "suffix3": word.lower()[-3:], - "suffix2": word.lower()[-2:], - "suffix1": word.lower()[-1:], - "prevprevword": prevprevword, - "prevword": prevword, - "prevtag+word": "{}+{}".format(prevtag, word.lower()), - "prevprevtag+word": "{}+{}".format(prevprevtag, word.lower()), - "prevword+word": "{}+{}".format(prevword, word.lower()), - "shape": shape, + 'prevtag': prevtag, + 'prevprevtag': prevprevtag, + 'word': word, + 'word.lower': word.lower(), + 'suffix3': word.lower()[-3:], + 'suffix2': word.lower()[-2:], + 'suffix1': word.lower()[-1:], + 'prevprevword': prevprevword, + 'prevword': prevword, + 'prevtag+word': '%s+%s' % (prevtag, word.lower()), + 'prevprevtag+word': '%s+%s' % (prevprevtag, word.lower()), + 'prevword+word': '%s+%s' % (prevword, word.lower()), + 'shape': shape, } return features diff --git a/nlp_resource_data/nltk/tag/stanford.py b/nlp_resource_data/nltk/tag/stanford.py index cd7250c..9916386 100644 --- a/nlp_resource_data/nltk/tag/stanford.py +++ b/nlp_resource_data/nltk/tag/stanford.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Natural Language Toolkit: Interface to the Stanford Part-of-speech and Named-Entity Taggers # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Nitin Madnani # Rami Al-Rfou' # URL: @@ -23,10 +23,12 @@ import tempfile from subprocess import PIPE import warnings +from six import text_type + from nltk.internals import find_file, find_jar, config_java, java, _java_options from nltk.tag.api import TaggerI -_stanford_url = "https://nlp.stanford.edu/software" +_stanford_url = 'https://nlp.stanford.edu/software' class StanfordTagger(TaggerI): @@ -40,16 +42,16 @@ class StanfordTagger(TaggerI): - ``_JAR`` file: Class constant that represents the jar file name. """ - _SEPARATOR = "" - _JAR = "" + _SEPARATOR = '' + _JAR = '' def __init__( self, model_filename, path_to_jar=None, - encoding="utf8", + encoding='utf8', verbose=False, - java_options="-mx1000m", + java_options='-mx1000m', ): # Raise deprecation warning. warnings.warn( @@ -64,16 +66,16 @@ class StanfordTagger(TaggerI): if not self._JAR: warnings.warn( - "The StanfordTagger class is not meant to be " - "instantiated directly. Did you mean " - "StanfordPOSTagger or StanfordNERTagger?" + 'The StanfordTagger class is not meant to be ' + 'instantiated directly. Did you mean ' + 'StanfordPOSTagger or StanfordNERTagger?' ) self._stanford_jar = find_jar( self._JAR, path_to_jar, searchpath=(), url=_stanford_url, verbose=verbose ) self._stanford_model = find_file( - model_filename, env_vars=("STANFORD_MODELS",), verbose=verbose + model_filename, env_vars=('STANFORD_MODELS',), verbose=verbose ) self._encoding = encoding @@ -92,19 +94,19 @@ class StanfordTagger(TaggerI): def tag_sents(self, sentences): encoding = self._encoding - default_options = " ".join(_java_options) + default_options = ' '.join(_java_options) config_java(options=self.java_options, verbose=False) # Create a temporary input file _input_fh, self._input_file_path = tempfile.mkstemp(text=True) cmd = list(self._cmd) - cmd.extend(["-encoding", encoding]) + cmd.extend(['-encoding', encoding]) # Write the actual sentences to the temporary input file - _input_fh = os.fdopen(_input_fh, "wb") - _input = "\n".join((" ".join(x) for x in sentences)) - if isinstance(_input, str) and encoding: + _input_fh = os.fdopen(_input_fh, 'wb') + _input = '\n'.join((' '.join(x) for x in sentences)) + if isinstance(_input, text_type) and encoding: _input = _input.encode(encoding) _input_fh.write(_input) _input_fh.close() @@ -130,7 +132,7 @@ class StanfordTagger(TaggerI): sentence = [] for tagged_word in tagged_sentence.strip().split(): word_tags = tagged_word.strip().split(self._SEPARATOR) - sentence.append(("".join(word_tags[:-1]), word_tags[-1])) + sentence.append((''.join(word_tags[:-1]), word_tags[-1])) tagged_sentences.append(sentence) return tagged_sentences @@ -151,8 +153,8 @@ class StanfordPOSTagger(StanfordTagger): [('What', 'WP'), ('is', 'VBZ'), ('the', 'DT'), ('airspeed', 'NN'), ('of', 'IN'), ('an', 'DT'), ('unladen', 'JJ'), ('swallow', 'VB'), ('?', '.')] """ - _SEPARATOR = "_" - _JAR = "stanford-postagger.jar" + _SEPARATOR = '_' + _JAR = 'stanford-postagger.jar' def __init__(self, *args, **kwargs): super(StanfordPOSTagger, self).__init__(*args, **kwargs) @@ -160,15 +162,15 @@ class StanfordPOSTagger(StanfordTagger): @property def _cmd(self): return [ - "edu.stanford.nlp.tagger.maxent.MaxentTagger", - "-model", + 'edu.stanford.nlp.tagger.maxent.MaxentTagger', + '-model', self._stanford_model, - "-textFile", + '-textFile', self._input_file_path, - "-tokenize", - "false", - "-outputFormatOptions", - "keepEmptySentences", + '-tokenize', + 'false', + '-outputFormatOptions', + 'keepEmptySentences', ] @@ -191,9 +193,9 @@ class StanfordNERTagger(StanfordTagger): ('University', 'ORGANIZATION'), ('in', 'O'), ('NY', 'LOCATION')] """ - _SEPARATOR = "/" - _JAR = "stanford-ner.jar" - _FORMAT = "slashTags" + _SEPARATOR = '/' + _JAR = 'stanford-ner.jar' + _FORMAT = 'slashTags' def __init__(self, *args, **kwargs): super(StanfordNERTagger, self).__init__(*args, **kwargs) @@ -202,27 +204,27 @@ class StanfordNERTagger(StanfordTagger): def _cmd(self): # Adding -tokenizerFactory edu.stanford.nlp.process.WhitespaceTokenizer -tokenizerOptions tokenizeNLs=false for not using stanford Tokenizer return [ - "edu.stanford.nlp.ie.crf.CRFClassifier", - "-loadClassifier", + 'edu.stanford.nlp.ie.crf.CRFClassifier', + '-loadClassifier', self._stanford_model, - "-textFile", + '-textFile', self._input_file_path, - "-outputFormat", + '-outputFormat', self._FORMAT, - "-tokenizerFactory", - "edu.stanford.nlp.process.WhitespaceTokenizer", - "-tokenizerOptions", - '"tokenizeNLs=false"', + '-tokenizerFactory', + 'edu.stanford.nlp.process.WhitespaceTokenizer', + '-tokenizerOptions', + '\"tokenizeNLs=false\"', ] def parse_output(self, text, sentences): - if self._FORMAT == "slashTags": + if self._FORMAT == 'slashTags': # Joint together to a big list tagged_sentences = [] for tagged_sentence in text.strip().split("\n"): for tagged_word in tagged_sentence.strip().split(): word_tags = tagged_word.strip().split(self._SEPARATOR) - tagged_sentences.append(("".join(word_tags[:-1]), word_tags[-1])) + tagged_sentences.append((''.join(word_tags[:-1]), word_tags[-1])) # Separate it according to the input result = [] @@ -239,9 +241,9 @@ def setup_module(module): from nose import SkipTest try: - StanfordPOSTagger("english-bidirectional-distsim.tagger") + StanfordPOSTagger('english-bidirectional-distsim.tagger') except LookupError: raise SkipTest( - "Doctests from nltk.tag.stanford are skipped because one \ - of the stanford jars cannot be found." + 'Doctests from nltk.tag.stanford are skipped because one \ + of the stanford jars cannot be found.' ) diff --git a/nlp_resource_data/nltk/tag/tnt.py b/nlp_resource_data/nltk/tag/tnt.py index eb2ce12..4837e11 100644 --- a/nlp_resource_data/nltk/tag/tnt.py +++ b/nlp_resource_data/nltk/tag/tnt.py @@ -1,18 +1,18 @@ # Natural Language Toolkit: TnT Tagger # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Sam Huston # # URL: # For license information, see LICENSE.TXT -""" +''' Implementation of 'TnT - A Statisical Part of Speech Tagger' by Thorsten Brants http://acl.ldc.upenn.edu/A/A00/A00-1031.pdf -""" - +''' +from __future__ import print_function, division from math import log from operator import itemgetter @@ -22,7 +22,7 @@ from nltk.tag.api import TaggerI class TnT(TaggerI): - """ + ''' TnT - Statistical POS tagger IMPORTANT NOTES: @@ -81,10 +81,10 @@ class TnT(TaggerI): It is possible to differentiate the tags which are assigned to capitalized words. However this does not result in a significant gain in the accuracy of the results. - """ + ''' def __init__(self, unk=None, Trained=False, N=1000, C=False): - """ + ''' Construct a TnT statistical tagger. Tagger must be trained before being used to tag input. @@ -111,7 +111,7 @@ class TnT(TaggerI): information for tagging. NOTE: using capitalization may not increase the accuracy of the tagger - """ + ''' self._uni = FreqDist() self._bi = ConditionalFreqDist() @@ -132,14 +132,14 @@ class TnT(TaggerI): self.known = 0 def train(self, data): - """ + ''' Uses a set of tagged data to train the tagger. If an unknown word tagger is specified, it is trained on the same data. :param data: List of lists of (word, tag) tuples :type data: tuple(str) - """ + ''' # Ensure that local C flag is initialized before use C = False @@ -148,7 +148,7 @@ class TnT(TaggerI): self._unk.train(data) for sent in data: - history = [("BOS", False), ("BOS", False)] + history = [('BOS', False), ('BOS', False)] for w, t in sent: # if capitalization is requested, @@ -168,13 +168,17 @@ class TnT(TaggerI): # set local flag C to false for the next word C = False - self._eos[t]["EOS"] += 1 + self._eos[t]['EOS'] += 1 # compute lambda values from the trained frequency distributions self._compute_lambda() + # (debugging -- ignore or delete me) + # print "lambdas" + # print i, self._l1, i, self._l2, i, self._l3 + def _compute_lambda(self): - """ + ''' creates lambda values based upon training data NOTE: no need to explicitly reference C, @@ -191,7 +195,7 @@ class TnT(TaggerI): ISSUES -- Resolutions: if 2 values are equal, increment both lambda values by (f(t1,t2,t3) / 2) - """ + ''' # temporary lambda variables tl1 = 0.0 @@ -246,6 +250,7 @@ class TnT(TaggerI): # otherwise there might be a problem # eg: all values = 0 else: + # print "Problem", c1, c2 ,c3 pass # Lambda normalisation: @@ -255,17 +260,17 @@ class TnT(TaggerI): self._l3 = tl3 / (tl1 + tl2 + tl3) def _safe_div(self, v1, v2): - """ + ''' Safe floating point division function, does not allow division by 0 returns -1 if the denominator is 0 - """ + ''' if v2 == 0: return -1 else: return v1 / v2 def tagdata(self, data): - """ + ''' Tags each sentence in a list of sentences :param data:list of list of words @@ -275,7 +280,7 @@ class TnT(TaggerI): Invokes tag(sent) function for each sentence compiles the results into a list of tagged sentences each tagged sentence is a list of (word, tag) tuples - """ + ''' res = [] for sent in data: res1 = self.tag(sent) @@ -283,7 +288,7 @@ class TnT(TaggerI): return res def tag(self, data): - """ + ''' Tags a single sentence :param data: list of words @@ -298,9 +303,9 @@ class TnT(TaggerI): with the correct words in the input sequence returns a list of (word, tag) tuples - """ + ''' - current_state = [(["BOS", "BOS"], 0.0)] + current_state = [(['BOS', 'BOS'], 0.0)] sent = list(data) @@ -315,7 +320,7 @@ class TnT(TaggerI): return res def _tagword(self, sent, current_states): - """ + ''' :param sent : List of words remaining in the sentence :type sent : [word,] :param current_states : List of possible tag combinations for @@ -328,7 +333,7 @@ class TnT(TaggerI): Uses formula specified above to calculate the probability of a particular tag - """ + ''' # if this word marks the end of the sentance, # return the most probable tag @@ -381,7 +386,7 @@ class TnT(TaggerI): # if no unknown word tagger has been specified # then use the tag 'Unk' if self._unk is None: - tag = ("Unk", C) + tag = ('Unk', C) # otherwise apply the unknown word tagger else: @@ -415,7 +420,7 @@ class TnT(TaggerI): def basic_sent_chop(data, raw=True): - """ + ''' Basic method for tokenizing input into sentences for this tagger: @@ -437,11 +442,11 @@ def basic_sent_chop(data, raw=True): This is a simple method which enhances the performance of the TnT tagger. Better sentence tokenization will further enhance the results. - """ + ''' new_data = [] curr_sent = [] - sent_mark = [",", ".", "?", "!"] + sent_mark = [',', '.', '?', '!'] if raw: for word in data: @@ -469,16 +474,19 @@ def demo(): sents = list(brown.tagged_sents()) test = list(brown.sents()) + # create and train the tagger tagger = TnT() tagger.train(sents[200:1000]) + # tag some data tagged_data = tagger.tagdata(test[100:120]) + # print results for j in range(len(tagged_data)): s = tagged_data[j] t = sents[j + 100] for i in range(len(s)): - print(s[i], "--", t[i]) + print(s[i], '--', t[i]) print() @@ -499,11 +507,11 @@ def demo2(): t.unknown = 0 t.known = 0 - print("Capitalization off:") - print("Accuracy:", tacc) - print("Percentage known:", tp_kn) - print("Percentage unknown:", tp_un) - print("Accuracy over known words:", (tacc / tp_kn)) + print('Capitalization off:') + print('Accuracy:', tacc) + print('Percentage known:', tp_kn) + print('Percentage unknown:', tp_un) + print('Accuracy over known words:', (tacc / tp_kn)) sacc = s.evaluate(d[i * 100 : ((i + 1) * 100)]) sp_un = s.unknown / (s.known + s.unknown) @@ -511,11 +519,11 @@ def demo2(): s.unknown = 0 s.known = 0 - print("Capitalization on:") - print("Accuracy:", sacc) - print("Percentage known:", sp_kn) - print("Percentage unknown:", sp_un) - print("Accuracy over known words:", (sacc / sp_kn)) + print('Capitalization on:') + print('Accuracy:', sacc) + print('Percentage known:', sp_kn) + print('Percentage unknown:', sp_un) + print('Accuracy over known words:', (sacc / sp_kn)) def demo3(): @@ -570,7 +578,7 @@ def demo3(): tallacc += tacc sallacc += sacc - # print(i+1, (tacc / tp_kn), i+1, (sacc / tp_kn), i+1, tacc, i+1, sacc) + # print i+1, (tacc / tp_kn), i+1, (sacc / tp_kn), i+1, tacc, i+1, sacc print("brown: acc over words known:", 10 * tknacc) print(" : overall accuracy:", 10 * tallacc) diff --git a/nlp_resource_data/nltk/tag/util.py b/nlp_resource_data/nltk/tag/util.py index 9d2172e..2a397d0 100644 --- a/nlp_resource_data/nltk/tag/util.py +++ b/nlp_resource_data/nltk/tag/util.py @@ -1,13 +1,13 @@ # Natural Language Toolkit: Tagger Utilities # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Edward Loper # Steven Bird # URL: # For license information, see LICENSE.TXT -def str2tuple(s, sep="/"): +def str2tuple(s, sep='/'): """ Given the string representation of a tagged token, return the corresponding tuple representation. The rightmost occurrence of @@ -31,7 +31,7 @@ def str2tuple(s, sep="/"): return (s, None) -def tuple2str(tagged_token, sep="/"): +def tuple2str(tagged_token, sep='/'): """ Given the tuple representation of a tagged token, return the corresponding string representation. This representation is @@ -54,8 +54,8 @@ def tuple2str(tagged_token, sep="/"): if tag is None: return word else: - assert sep not in tag, "tag may not contain sep!" - return "%s%s%s" % (word, sep, tag) + assert sep not in tag, 'tag may not contain sep!' + return '%s%s%s' % (word, sep, tag) def untag(tagged_sentence): diff --git a/nlp_resource_data/nltk/tbl/__init__.py b/nlp_resource_data/nltk/tbl/__init__.py index dca2b46..5298a5a 100644 --- a/nlp_resource_data/nltk/tbl/__init__.py +++ b/nlp_resource_data/nltk/tbl/__init__.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Natural Language Toolkit: Transformation-based learning # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Marcus Uneson # based on previous (nltk2) version by # Christopher Maloof, Edward Loper, Steven Bird diff --git a/nlp_resource_data/nltk/tbl/__pycache__/__init__.cpython-37.pyc b/nlp_resource_data/nltk/tbl/__pycache__/__init__.cpython-37.pyc index f6656e4e6c85496293b30b13d08929c55678e58f..80f7d82ad8df681afae8155dafff37f1c6619ca3 100644 GIT binary patch delta 31 lcmey#{Fa&9iI2sr=% delta 43 xcmaFM{F9m6iISrZZX5<$n7G>+}O`gLT2mtqW41E9q diff --git a/nlp_resource_data/nltk/tbl/__pycache__/api.cpython-37.pyc b/nlp_resource_data/nltk/tbl/__pycache__/api.cpython-37.pyc index 06de1f0c12599709615a94712e9ef1991b00ba71..9d6ef735b500ee3f31bd56882d65ab5af106f32b 100644 GIT binary patch delta 29 jcmbQq*vrW6#LLUY00gD%6DM+8u$k$X78mJH40QnjQ|ktq delta 41 vcmeBWoXN=T#LLUY00eSL0Ta0`gq-v<@^e%5vl1&a@(U7+vi0>Qdb$7rylD#E diff --git a/nlp_resource_data/nltk/tbl/__pycache__/demo.cpython-37.pyc b/nlp_resource_data/nltk/tbl/__pycache__/demo.cpython-37.pyc index fe124f33afee23620e916778ed3938d1c6806f61..ba75504929b8063709c4ff90542ee5e132c921bc 100644 GIT binary patch delta 2449 zcmah~OKcle6!jZ_VmtmMand*+=hJ+eN#cC^Nzyh-NzSw6&vZ!@ir@8cuL>F0A?<)g^iYqsz)KAuxHn57NKO`1>eNj}A= z`F?%?MWGF*4f2Eh5E>4d!+d5{`G~=U*}dVx*p+i0_3&dVr0R8JCCC>XeWg;!I{9M3 zjT=|XMXTbNdfqM-!Eu8;zm_kfsP;PRWlb?O1A?csla(?|iVu{z#tz(<6$Fo8-1puR zXT0t6QQS$z9S}#J+r~}1sDoJo9V(WouN1*HoPHG5UHBj&9(dndYC%p$W$;o*tbQh9Wny|lovfSf zVG8rHh!;l1Wb`N2BmRlbyz;^n=&Xb+tYsQd$QN>YwgPLWMGh%KPe4Iu3WKz~14ZqQsa ztcu~F+{xP}&{OLA9We3*H)Iwn$ReTO1J|nok_F5{w#ZF&Saz!0D9gZgZa4-UKm{#i z8|(F);1KTNAi)_jCAV4A4FUF0(Z(7AP2jUWg9+MB5sV{fiRW)woEhw8W5OQn+RF69 z!QWbUG#43CXJlroh(mMR}84$}&;TGJ_2f8_!OYDde}Ln>KJA8EEy^79X~)?uakO7n+Ar;QH|n zvHMkAYvRxG_9nWp`bpC8m`J9_`WCi)pl=z4lu3VQQB)P#^Z+|6-cMg)ws@5OdW`(- zjAQqGh30c+0Use)cqqP~NLNYU^_DBP$~3&LcgwpBAcqQH;tc16gC7>J7BAJZ7bV5JBc7%Q6!xswzwtWGzvINQY0@NXCr;9zG%6vXq@+|;O=(N3EmxoN^-*H_v~@}}BG!^73F_DUKqXVQ0$PBe zjo%DfAuVKuwXj&rX2gnWQL9O766=5&vlLCS;#%Bl)|x>Uq@im*EkVOH0NiM+^fm#<5bn*5owqGx?1Fyzyj|7V%2 zcFA#BiPcP_v}7|&cZV8cwL&Z-{l9T|CXiMN4L?2boIile)R zRW)_jfEws-*>TH@&atf^5tgpM6MdVIE!2Rf+ZSmDrg)CdYaZdI5@Rgkf@C z&{(bN6)KYSWD`O%ng1P2<}eY}uZ3T<8Fj0sqlH|385wcRhVsOCK}mO?1#>43;ESY4 zFXHEn1(0QnpI^3XtP^rOlY31}#zD)!8^0Xc0?a6;tG}J+NjI2uJzsKxYca=YFfO zJT6BORBjLK2@V6Rb?}Snju6{8%eMnRwcj6@7~0~sy~;n`eVat<#|Q5a74@p*NriEb zxQ+b-@_W%~Y`sO>s?IF&Bf~R;>(=ZXC_ULRD%(BSGoV@G9}Vv%S^n$r$K9f0c=K9p z9W?82@VY1ybhe!iRa>p)s@VHyD{-HY)I#u$lMx zKZVgXk&)l2St`-s0xvq-9M}Q`d>001eO!rHd*M>af}Spw)V5-g?ZYJT%@xB{WRsgL zj2E!;^F=U1eR001O2wku&mKh+cvsha?0ICL0MME$rn^#Dv{|+0FwrAnA#oQ>hSS1i zgc$^U6vbnxZJ6lGV%QkIP7LRjDF_LKR)lthZiEy<8es_b_puzZBMA7pVfbFGe;cra z7?xR!6^o_0!==-QjvRUXBzpwqSQo5AMYoJnsiA6Ho#Tb4Et|pza0W$A1t3hq68>XS Of+R@0Kg_=vf9^jFN#8L5 diff --git a/nlp_resource_data/nltk/tbl/__pycache__/erroranalysis.cpython-37.pyc b/nlp_resource_data/nltk/tbl/__pycache__/erroranalysis.cpython-37.pyc index 5db4cae6f71c8e2a8ef211512b57732170d07eb3..0d7de8d32097f59b19b6117875375be5c4653908 100644 GIT binary patch delta 251 zcmcb~)y&Q7#LLUY00gD%6XU;6av7r-85vTTQkYv9qL@-xf*CYf zUxL(XGT!1VD9X$$iBBudOD@UG&)aD6iAjJBC{+X^emUru78mJHc4Ky9RGr+*oT>^E z)nqDS2dUtIDFdr5;skL)PEeaH&T>{7ZU|W77FTLfQGQW;PG)h5;N<@->TY5{m42Gc rx47bgmY0+krN+k>F@cN#NkB|QGlIh=H$SB`C)JJ{sG=C8mWK@hlQ%Q_ delta 209 zcmZqXzRAVw#LLUY00eSL0rBT2@=6Nv068fPDU2-)Q4A?e!3>(r8>2rl$*=&0i$KIL zAN`E{+*JLn#LA5Pg2bY1eZ9%C%x;WIlb0~3s)AH#G8M6cRI|affb|uzgSZ?(LV2=iVo1Z`eER`{`B1aaW@G;bDR9Fm3z zg}+7KYVa}cf7;-_cw&ogkvQVPdyRhcXB6mD8}jUfOl4JRy91e(nq8^ORO7x^mswhI z!SZZH3z_7q@7d=03m4agPHZ>XP+BIbE`+{T@jkJggmVVFB+V10zLQH)s|^|tPA}I= zE}I{sbL3s~NA%NN53H7ol1u!*10A%5jAdn}89QSBY+YVNUZ2QAk4TR0nB$^!sa;o9 z$*PU(srk6QzKZ|hNHoorrC^m9VXw$I7yoj-Vji~BlT*l?2C!p!1_%2eZOBrJfn@B) z_LrSHoO7z4#Ew}q+#@HuVE)Q1Pux8{4y#LjPdmTzg|gM$RMMlP_YVmKdga&BaGTR*xMYL}&0EX;lo)eq-)TuD&w?WI2hBzH^jE zU>!F%vl3EK@+ijghCAAj8OC--$CIw=ffx>wl9$^PQ{U55u4GRk%@xaYjk12FuL-%Q z3O34=ss}?f=l-Ir`+CbI4CmVHz^O(smFSsjb;GqjQwJ6 z%}$v6htI7%>hNf~|K*y)5dLZ|&b)bax9n~R_sb4+O2OFgPUqgt%L@l^xT?)3v(x6- z>?;Qee-*<24dLHD2ssDx!|mT@Z;?|wiP%6_H=x1R&q^-#j;HO3p}WOwAS>vP{?H@l z+xbN@ZGN0Te)Cyu$MXm;0N9qh**h0r&Y*b?VG-dN!m9|!5mpdhLpXtO5@8j84~T26`wNh{%R-7v+%Qzay!EFCg7} zJ@Dq^L?{vD>w_Dayj>!hEo~=nmo-|}^k$XWt;s!S7yBth?F?;^eOI{Oe=nudUiJfxeBhwP0l3X)dWm_c;gAb88MEzC(e zTZ`&fdVv||;Q7pg;|gV)WI*6$QBtND=WE}im0(I_RcHX-$<>y06wdJ(dlY|lHOyDb zszQ5!h)iOss3F>>9XzJWUchlR>N-bfAmq>4W@7PLQyMx1bcGIpQRRCjh5980QoQ$)v;rUC49d0i7e^QfA-9CFde0{WW>|*NL zRmbyK1mm9~&tP)wID$>?{)-D1(#LS$B&?L|XFrnJ<5-)#mOoHSZ9P?#XX^f5z_p9K z$)3Mp8KiYYy>3!|x*1 w=~amF&!v+)Xri8F)#o8?L#GOVLxm%7n#_cE{kJk(vW<#K=HP>LK+ui;0kDn;H2?qr diff --git a/nlp_resource_data/nltk/tbl/__pycache__/rule.cpython-37.pyc b/nlp_resource_data/nltk/tbl/__pycache__/rule.cpython-37.pyc index de847c9208e708748b8a05f2afb932f13ee5d088..841cb5787b42a82916ee9cac89202799d0e13901 100644 GIT binary patch delta 2291 zcmaJ?-ES0C6rVdYJ3IT;7Ft@C0;QBKtSPM)1%^i9k3Q&vju6JFZSSQ(;FgKBH!wBj0W@;${((wXcUFsvLkX=bSh1g4t; zL5k*(50YLteMu>1eg_AXA`v|`Rt;i%p&pduY8c#E*?lbMxUTKf*eQEX6ctr5Tw94N zVPH?&^`Ke~U22y@zvjf%YaSKZ4IZO4_VB~M)gcU@Wq^F9upIn7J~CC0*cAU%E0IFu z7i~?OK{kLVom$PSQp8XvpB1E9R9O~8!idA`G>GJJ)`K?EC9fLAh&waKKS`cGg#g8b zRA>EoCRGcgD#j35M!Dhyi7;s)0+FoA4-< zXILMs=J}VYXiPep2;&N6T}r~RZTgOFU*YG|^uREj*udVo;Y>R!R1TD*4glXFqC#u-C&8wMnZ}y_eJQ5uV`p0%5*^OibN!vin3QK`t5WWaA zYvy#-G}9y5oT-_bl->DwurZx|%P>%p_42!0N1n%s**KC`c}2P$25vLu9XM!|p=USw zZ(A>sabD^lp4$)mqTXeI6_C4mq`fqdowgJ~l$7OxbIb~mvdpdykB)QRhi3hjpD2@RUH3t5)$0pBus zcdC`*W`>%Vf^E`3vf5wp9|q49u)I%cHQ5s1@DoEH!D7kw$L22ZOwbTK%+4!?E;K}GKT#dGIRUVP({^*o}UCoa1??y)pXmh=iLR;|TPS;Tj z<^MENdn;t_72J0O%VvQAgEh-={@HPeaP1I8U+c!M%l0=KV`lRLn>oQy@oceAq-}VWabm!&P=O~j=USO*u#(eWXi;^UC17i10x!Nk z*?f~3j!1?#&(e|2Ojrs71|2QOa=Oo3Sw7%g5Z<@Dw%3mHqx}CzuH&SZOhO@~w4`a0wzNP=^Ft*tKdX)RC3a%(nz!qa zB18@;7bJw*r%Iq6;0V2exu8m1kl=tgaNzRz6M`Ok;J|@P55UZ`V<%QruXg-?X6Bps z&Ai!n@zm_b2 zV!T39Qbnt)h(OM<=}rXC}W!srrzG~!i43b=ljvIZp)G)?-tXqF6xVgNB6hG(Ij`&ycyy@C z&|(ED0lI9A|1%or_xigR1AeKPr;w{=SWoNg)bbB-C<`t;vq2rYQ)IIrAdU(`SKb_7 zDdp*eO%H{{e;Dg{wsJDUj)NlH11*#Hga+|8_!X@U#y8QsysoNBg81++I`6hg%8?e< ziK2t=cJ)qIiXG?OAVdh`E-@;!ZJ=2zy44F)&k5GbZT@}Nt;t#nD_mD^$Pra4OY3uf zqx+cz`guq_%lf;^|LeZlCj*nPb!;rmugZ6NTFBTGI?^QBf~*U&-9QRUg2Ipd&)%8U z$6&;sKyd=aAPQM5=;J^k>YHxPd05@RC4#zRrul8z>wBLJVx|Ro<0#~Ne0bowlL#uB zmsYMUU08YHC3A89`3vkx1jPgZ4HLK@0H#aypcDS_Kri`)e>qS(Q6r#wRyK|1C()d+ zvpBA^(+1P~+#H-+kQg%4^4PURffGq9-WWkDsonP@n_9>;Js%GEbNTndoF04%0jBs; zY%(~H!&Cg7SS*1)t2q?fdZDq*CDnhIzl&u_>I_;PI4qk*TOy*&ayF>xy9if30ypDb zBvCHK4MHB_Z;x)1({Q=Y#PNXDYbA(%kSBDGX`bU(6PHh8V-i}y+5rK1L=K=UAh;mK zv|8AK*J(6=8n#pXr^KE87LGYRMDj&ksFRF%=lN%2({nXeWyw_Vs{D?j4U#MkA$aKo zu`1sIY%@cAe0*Y87UK|Z45JtUkv3#s@~sUf+w?fBi6(5R9`x3%Ycu%%9^JD~;fBnV z%savyIB@+CG1xHS^>(q}8|}IZt-Q-HqG6-Q2tzv$!>r3lF;rYk8bY(yG7S5b{9Xv% v$@^PO$2m6lbd=Ez_>@v+V$^7K6s$023a$;!ZWhnwVI>-EYBbn>xrO`>BXN~> diff --git a/nlp_resource_data/nltk/tbl/__pycache__/template.cpython-37.pyc b/nlp_resource_data/nltk/tbl/__pycache__/template.cpython-37.pyc index 63936904593f50f224427d947c69b2f14b21d9ea..4a2ce0c5df81f8589b9bb927187c919edcf2d46b 100644 GIT binary patch delta 1885 zcmZuyOK%%h6!!Iu$B)?Vl)Mv4cqoub6X(H0O%p+7XDy0m5TKPLxe5W$HFYw@G9Oh+wW->;6*30^esrag?%Cq9-e9hD(uX=g2fI2y^ z=$Firlxbd{UpC7u!!@oPuxlA7?-V}HI0argz_VFJNzo~xq{OQSih0ga-^dt!e-e&W zgQe97+(1}ct)L;?Ffg)der^5g1{Zc(wd=6}yCHlownGP%E4JfUc(NOw9mhX>^?|Av z(GL(gTApjgFqbUluCwvOk8=O8WfB_-P|j&igu6UQb8g`9wlwIF3SH_=F06pN&Fy*! z+hOR;C-?I2E~|J4RU+q!bVKzlp@0|t_>DCr1x?MC2c_)LD*RTOA|Ym!t<9%w>y$kO zJu0p=dBEEdtbUSg_WRSrC`6d8$m2#9M(D)sGe$6m;!l$Y{Vy-d7Bk|zc)O;Sl}~>7m}MJ z6O%F}6&A?oj1R&lJAU`@%aO}$i825KNm`7;*rf@L;YmE3I&aI_v?2&24r0&9pTeRi z<*~^DiKqu_5$(P`d~r-?^AZAsODMsFsn@1T-Ah??rcyGQpOahTmzQLD^4JP4WNA~y zcLOALTS7{6Ul@(tp7GJ-#?v#vo^X(^&s_c=chYiXHfFdj08VP>_~!TnfW7cSn|r;>miU&G7kHZzz#FWWX z%682n#ge$u{Y!Lr97Rsa`5+9qABjCH3g6>^i4QP*Ym`bxRh5zK5F5y9?EU1y#k(_9 zMUYv}BbTzBl+@J|JlD}pb9+BgcmG$Z1kk!#}dDwyTYqDH(XP!cPM0pc`&U86FXEi)- zWBg9QcqRG0_Rb_3qzWSWe=B$s@2?~qb5l)v(LhTM28aw2A#0c>GDk!%Nt$oDDe;-o ze(tPoc#Fhu6CszU!1anbgcobc{ke&K;tV}4S@@4YyUem6iy|y#y#Wf@Q@!yGxNXQ0 zSER`jVd%xO@TMMkVz(yhUhQ}rf`)2lr7MOOP9d$BR151n*mt?FfdWI3+I9+|EOTMS a4;?gGr7aWF!YDJHm01D5WkqL&*?$2vv71u> delta 1757 zcmZuy-D@0G6yG~DyF0VlO*d__*~E~lu$pPo&9p?y&I0dCMf?{Yo zCd=K(d3mpJH|G^N^NOG8j)fZ2vruDk>m%JMdD?o;E1tzC8Taw*521X1HtfdVe@tp*g?~2_s;kgeNi2K?V`%`&`!$|jBeiHZnh0P9gsaJEd_p>b>lEu{zQB_ z`U^9}&9RpnR3Dfop7%Q1kd2oW~v!|Q&rB#QGh4VZa)x@Y9F&H5s$A2aj<8hhuY^(etAkx-+!7zw&o)#B2DHfO1O`!2-Tq?#*ZO5vBf>C)LRv|j z`k5a`DKx~$WPRuuw74|c7**j194?^K*ZbjQm94hTon_R+Zv|8cyAhGFT8_&Q4yvf=bMe zoFwZx7Qrfth;*Iy4gpoON1{05VUq2(llXlOnB$1}clP@^stqXa^9pB6vR#~GBk{Yv zWBny64e`g^y}etwa!Sa8K)P9K=T&_N7_df=Z=gT|w`o0S5y8wdJhyZz4OU=lwgpRM zLzRI`D6qcKE>IL%NMp#j2eb51M&e1v7sDiQG4@r|*_PD1LF$wb4;>w|RHg1z)1IFk zk@TYtxG6@P7pB!pw8N7+F6aAE@+TDP=$f-{vT@?Q` z-)WG8)REnjypUlH5$GkbI{f$l9hP2vm*@&=nklZ3kWXp+_CCx_Hc*1w10EmG4z zfr30nZcyMc++D^erC~8khGrP%HM3w=%&IwJYpk4>4Im?X2n*fmf`)6L8&_IBmNkpx zAjSE5uT@*89yygL$a>AJ6EtBv;Qf1nE?oukBnr}83S$p#m#MTVjaHSJtjx;#>GS^s D2!d~k diff --git a/nlp_resource_data/nltk/tbl/demo.py b/nlp_resource_data/nltk/tbl/demo.py index da30446..28642ae 100644 --- a/nlp_resource_data/nltk/tbl/demo.py +++ b/nlp_resource_data/nltk/tbl/demo.py @@ -1,13 +1,14 @@ # -*- coding: utf-8 -*- # Natural Language Toolkit: Transformation-based learning # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Marcus Uneson # based on previous (nltk2) version by # Christopher Maloof, Edward Loper, Steven Bird # URL: # For license information, see LICENSE.TXT +from __future__ import print_function, absolute_import, division import os import pickle @@ -248,7 +249,7 @@ def postag( baseline_tagger = UnigramTagger( baseline_data, backoff=baseline_backoff_tagger ) - with open(cache_baseline_tagger, "w") as print_rules: + with open(cache_baseline_tagger, 'w') as print_rules: pickle.dump(baseline_tagger, print_rules) print( "Trained baseline tagger, pickled it to {0}".format( @@ -316,17 +317,17 @@ def postag( # writing error analysis to file if error_output is not None: - with open(error_output, "w") as f: - f.write("Errors for Brill Tagger %r\n\n" % serialize_output) + with open(error_output, 'w') as f: + f.write('Errors for Brill Tagger %r\n\n' % serialize_output) f.write( - u"\n".join(error_list(gold_data, taggedtest)).encode("utf-8") + "\n" + u'\n'.join(error_list(gold_data, taggedtest)).encode('utf-8') + '\n' ) print("Wrote tagger errors including context to {0}".format(error_output)) # serializing the tagger to a pickle file and reloading (just to see it works) if serialize_output is not None: taggedtest = brill_tagger.tag_sents(testing_data) - with open(serialize_output, "w") as print_rules: + with open(serialize_output, 'w') as print_rules: pickle.dump(brill_tagger, print_rules) print("Wrote pickled tagger to {0}".format(serialize_output)) with open(serialize_output, "r") as print_rules: @@ -380,15 +381,15 @@ def _demo_prepare_data( def _demo_plot(learning_curve_output, teststats, trainstats=None, take=None): - testcurve = [teststats["initialerrors"]] - for rulescore in teststats["rulescores"]: + testcurve = [teststats['initialerrors']] + for rulescore in teststats['rulescores']: testcurve.append(testcurve[-1] - rulescore) - testcurve = [1 - x / teststats["tokencount"] for x in testcurve[:take]] + testcurve = [1 - x / teststats['tokencount'] for x in testcurve[:take]] - traincurve = [trainstats["initialerrors"]] - for rulescore in trainstats["rulescores"]: + traincurve = [trainstats['initialerrors']] + for rulescore in trainstats['rulescores']: traincurve.append(traincurve[-1] - rulescore) - traincurve = [1 - x / trainstats["tokencount"] for x in traincurve[:take]] + traincurve = [1 - x / trainstats['tokencount'] for x in traincurve[:take]] import matplotlib.pyplot as plt @@ -398,19 +399,19 @@ def _demo_plot(learning_curve_output, teststats, trainstats=None, take=None): plt.savefig(learning_curve_output) -NN_CD_TAGGER = RegexpTagger([(r"^-?[0-9]+(.[0-9]+)?$", "CD"), (r".*", "NN")]) +NN_CD_TAGGER = RegexpTagger([(r'^-?[0-9]+(.[0-9]+)?$', 'CD'), (r'.*', 'NN')]) REGEXP_TAGGER = RegexpTagger( [ - (r"^-?[0-9]+(.[0-9]+)?$", "CD"), # cardinal numbers - (r"(The|the|A|a|An|an)$", "AT"), # articles - (r".*able$", "JJ"), # adjectives - (r".*ness$", "NN"), # nouns formed from adjectives - (r".*ly$", "RB"), # adverbs - (r".*s$", "NNS"), # plural nouns - (r".*ing$", "VBG"), # gerunds - (r".*ed$", "VBD"), # past tense verbs - (r".*", "NN"), # nouns (default) + (r'^-?[0-9]+(.[0-9]+)?$', 'CD'), # cardinal numbers + (r'(The|the|A|a|An|an)$', 'AT'), # articles + (r'.*able$', 'JJ'), # adjectives + (r'.*ness$', 'NN'), # nouns formed from adjectives + (r'.*ly$', 'RB'), # adverbs + (r'.*s$', 'NNS'), # plural nouns + (r'.*ing$', 'VBG'), # gerunds + (r'.*ed$', 'VBD'), # past tense verbs + (r'.*', 'NN'), # nouns (default) ] ) @@ -419,5 +420,5 @@ def corpus_size(seqs): return (len(seqs), sum(len(x) for x in seqs)) -if __name__ == "__main__": +if __name__ == '__main__': demo_learning_curve() diff --git a/nlp_resource_data/nltk/tbl/erroranalysis.py b/nlp_resource_data/nltk/tbl/erroranalysis.py index 9c0881a..c25d33d 100644 --- a/nlp_resource_data/nltk/tbl/erroranalysis.py +++ b/nlp_resource_data/nltk/tbl/erroranalysis.py @@ -1,13 +1,16 @@ # -*- coding: utf-8 -*- # Natural Language Toolkit: Transformation-based learning # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Marcus Uneson # based on previous (nltk2) version by # Christopher Maloof, Edward Loper, Steven Bird # URL: # For license information, see LICENSE.TXT +from __future__ import print_function + + # returns a list of errors in string format @@ -21,21 +24,21 @@ def error_list(train_sents, test_sents): :param test_sents: The tagged corpus :type test_sents: list(tuple) """ - hdr = ("%25s | %s | %s\n" + "-" * 26 + "+" + "-" * 24 + "+" + "-" * 26) % ( - "left context", - "word/test->gold".center(22), - "right context", + hdr = ('%25s | %s | %s\n' + '-' * 26 + '+' + '-' * 24 + '+' + '-' * 26) % ( + 'left context', + 'word/test->gold'.center(22), + 'right context', ) errors = [hdr] for (train_sent, test_sent) in zip(train_sents, test_sents): for wordnum, (word, train_pos) in enumerate(train_sent): test_pos = test_sent[wordnum][1] if train_pos != test_pos: - left = " ".join("%s/%s" % w for w in train_sent[:wordnum]) - right = " ".join("%s/%s" % w for w in train_sent[wordnum + 1 :]) - mid = "%s/%s->%s" % (word, test_pos, train_pos) + left = ' '.join('%s/%s' % w for w in train_sent[:wordnum]) + right = ' '.join('%s/%s' % w for w in train_sent[wordnum + 1 :]) + mid = '%s/%s->%s' % (word, test_pos, train_pos) errors.append( - "%25s | %s | %s" % (left[-25:], mid.center(22), right[:25]) + '%25s | %s | %s' % (left[-25:], mid.center(22), right[:25]) ) return errors diff --git a/nlp_resource_data/nltk/tbl/feature.py b/nlp_resource_data/nltk/tbl/feature.py index 9a5bb00..d9c6715 100644 --- a/nlp_resource_data/nltk/tbl/feature.py +++ b/nlp_resource_data/nltk/tbl/feature.py @@ -1,17 +1,20 @@ # -*- coding: utf-8 -*- # Natural Language Toolkit: Transformation-based learning # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Marcus Uneson # based on previous (nltk2) version by # Christopher Maloof, Edward Loper, Steven Bird # URL: # For license information, see LICENSE.TXT +from __future__ import division, print_function, unicode_literals from abc import ABCMeta, abstractmethod +from six import add_metaclass -class Feature(metaclass=ABCMeta): +@add_metaclass(ABCMeta) +class Feature(object): """ An abstract base class for Features. A Feature is a combination of a specific property-computing method and a list of relative positions @@ -30,7 +33,7 @@ class Feature(metaclass=ABCMeta): """ - json_tag = "nltk.tbl.Feature" + json_tag = 'nltk.tbl.Feature' PROPERTY_NAME = None def __init__(self, positions, end=None): diff --git a/nlp_resource_data/nltk/tbl/rule.py b/nlp_resource_data/nltk/tbl/rule.py index 3c872f8..6d70954 100644 --- a/nlp_resource_data/nltk/tbl/rule.py +++ b/nlp_resource_data/nltk/tbl/rule.py @@ -1,22 +1,26 @@ # -*- coding: utf-8 -*- # Natural Language Toolkit: Transformation-based learning # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Marcus Uneson # based on previous (nltk2) version by # Christopher Maloof, Edward Loper, Steven Bird # URL: # For license information, see LICENSE.TXT +from __future__ import print_function from abc import ABCMeta, abstractmethod +from six import add_metaclass +from nltk.compat import python_2_unicode_compatible, unicode_repr from nltk import jsontags ###################################################################### # Tag Rules ###################################################################### -class TagRule(metaclass=ABCMeta): +@add_metaclass(ABCMeta) +class TagRule(object): """ An interface for tag transformations on a tagged corpus, as performed by tbl taggers. Each transformation finds all tokens @@ -92,6 +96,7 @@ class TagRule(metaclass=ABCMeta): raise TypeError("Rules must implement __hash__()") +@python_2_unicode_compatible @jsontags.register_tag class Rule(TagRule): """ @@ -112,7 +117,7 @@ class Rule(TagRule): """ - json_tag = "nltk.tbl.Rule" + json_tag = 'nltk.tbl.Rule' def __init__(self, templateid, original_tag, replacement_tag, conditions): """ @@ -137,19 +142,16 @@ class Rule(TagRule): def encode_json_obj(self): return { - "templateid": self.templateid, - "original": self.original_tag, - "replacement": self.replacement_tag, - "conditions": self._conditions, + 'templateid': self.templateid, + 'original': self.original_tag, + 'replacement': self.replacement_tag, + 'conditions': self._conditions, } @classmethod def decode_json_obj(cls, obj): return cls( - obj["templateid"], - obj["original"], - obj["replacement"], - tuple(tuple(feat) for feat in obj["conditions"]) + obj['templateid'], obj['original'], obj['replacement'], obj['conditions'] ) def applies(self, tokens, index): @@ -205,12 +207,12 @@ class Rule(TagRule): self.__repr = "{0}('{1}', {2}, {3}, [{4}])".format( self.__class__.__name__, self.templateid, - repr(self.original_tag), - repr(self.replacement_tag), + unicode_repr(self.original_tag), + unicode_repr(self.replacement_tag), # list(self._conditions) would be simpler but will not generate # the same Rule.__repr__ in python 2 and 3 and thus break some tests - ", ".join( - "({0},{1})".format(f, repr(v)) + ', '.join( + "({0},{1})".format(f, unicode_repr(v)) for (f, v) in self._conditions ), ) @@ -223,16 +225,16 @@ class Rule(TagRule): Return a compact, predicate-logic styled string representation of the given condition. """ - return "{0}:{1}@[{2}]".format( + return '{0}:{1}@[{2}]'.format( feature.PROPERTY_NAME, value, ",".join(str(w) for w in feature.positions), ) - conditions = " & ".join( + conditions = ' & '.join( [_condition_to_logic(f, v) for (f, v) in self._conditions] ) - s = "{0}->{1} if {2}".format( + s = '{0}->{1} if {2}'.format( self.original_tag, self.replacement_tag, conditions ) @@ -301,26 +303,26 @@ class Rule(TagRule): if len(positions) == 1: p = positions[0] if p == 0: - return "this word" + return 'this word' if p == -1: - return "the preceding word" + return 'the preceding word' elif p == 1: - return "the following word" + return 'the following word' elif p < 0: - return "word i-%d" % -p + return 'word i-%d' % -p elif p > 0: - return "word i+%d" % p + return 'word i+%d' % p else: # for complete compatibility with the wordy format of nltk2 mx = max(positions) mn = min(positions) if mx - mn == len(positions) - 1: - return "words i%+d...i%+d" % (mn, mx) + return 'words i%+d...i%+d' % (mn, mx) else: - return "words {%s}" % (",".join("i%+d" % d for d in positions),) + return 'words {%s}' % (",".join("i%+d" % d for d in positions),) - replacement = "%s -> %s" % (self.original_tag, self.replacement_tag) - conditions = (" if " if self._conditions else "") + ", and ".join( + replacement = '%s -> %s' % (self.original_tag, self.replacement_tag) + conditions = (' if ' if self._conditions else "") + ', and '.join( condition_to_str(f, v) for (f, v) in self._conditions ) return replacement + conditions diff --git a/nlp_resource_data/nltk/tbl/template.py b/nlp_resource_data/nltk/tbl/template.py index 06ddff0..b0556ed 100644 --- a/nlp_resource_data/nltk/tbl/template.py +++ b/nlp_resource_data/nltk/tbl/template.py @@ -1,20 +1,23 @@ # -*- coding: utf-8 -*- # Natural Language Toolkit: Transformation-based learning # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Marcus Uneson # based on previous (nltk2) version by # Christopher Maloof, Edward Loper, Steven Bird # URL: # For license information, see LICENSE.TXT +from __future__ import print_function from abc import ABCMeta, abstractmethod +from six import add_metaclass import itertools as it from nltk.tbl.feature import Feature from nltk.tbl.rule import Rule -class BrillTemplateI(metaclass=ABCMeta): +@add_metaclass(ABCMeta) +class BrillTemplateI(object): """ An interface for generating lists of transformational rules that apply at given sentence positions. ``BrillTemplateI`` is used by diff --git a/nlp_resource_data/nltk/test/__init__.py b/nlp_resource_data/nltk/test/__init__.py index 639b0b1..107774e 100644 --- a/nlp_resource_data/nltk/test/__init__.py +++ b/nlp_resource_data/nltk/test/__init__.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Unit Tests # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Edward Loper # URL: # For license information, see LICENSE.TXT diff --git a/nlp_resource_data/nltk/test/__pycache__/__init__.cpython-37.pyc b/nlp_resource_data/nltk/test/__pycache__/__init__.cpython-37.pyc index aa37dc50161681f702ec452f630a39399e70b85e..319c44058e6bf65d5716100570190b778e061a02 100644 GIT binary patch delta 31 lcmdnSyo#CIiIfy delta 43 xcmZ3*yp5ULiISrZZX5<$n7G>+}O-^9+0sz|h3wZzl diff --git a/nlp_resource_data/nltk/test/__pycache__/all.cpython-37.pyc b/nlp_resource_data/nltk/test/__pycache__/all.cpython-37.pyc index f4967baf4c379e5c35a19be1e4aad5f727af2978..ee3607da80a18c00c1ac7e6344a2335b281c7a4d 100644 GIT binary patch delta 34 ocmZ3($oIqPTS=cejsC01tS7bF&C>+5ayWxBu!0QJ8N ATmS$7 diff --git a/nlp_resource_data/nltk/test/__pycache__/childes_fixt.cpython-37.pyc b/nlp_resource_data/nltk/test/__pycache__/childes_fixt.cpython-37.pyc index 129e94957cf2324856a3f388187edb41476b252a..7c11dc197e6397785506762d4ab0eae97b02e649 100644 GIT binary patch delta 164 zcmaFN(#Oi{#LLUY00gD%6XVZLav7r-85vTTQkYv9qL@-xf*CYf zUxL(XGT!1(Oe)ULDJ@Bj&&(~zFDjX6QOQ)qIC1k%M&`-6jF!^OKxI)Zc{wH7w-{4b zG8BRA_~oEqT3n<*`7EO+BkN>wCV3&|TU_z+X{9BlMXB-eMNB|7lg*iwxj~F#5W&L? E0O-Xi6951J delta 121 zcmeBUeayn^#LLUY00eSL0rATw@=6Nv068fPDU2-)Q4A?e!3>(r6JsiwUNTI)zLSw@ z@7;+h-7#SH-m{OQq7^0X`Sb`Ze zSzm$_Yck&APfRM#&nYcQjnB+2$S*3HXc5j-#5i%HB_s31(@NsZKvhL7Kw>3B5j&9j z<)~j;T%oQQd*Q6A78`-6a!0u bO+X?zfD*SjY;yBcN^?@}K+1|i1P==U3EU}_ delta 167 zcmX@ZypWmCiI zCMI5iiE|<(PP+5m`R=>BtLb!tK>X;PeZu<{fu z;qeFonqjuOR?q#3*KxPQpp9|`(p;A?)q_SR7-63ZADYdl|$GWUG<(;r}E8T{MS+pO` z*7eTi`k_s%a9*<>9gsE{gp(-7W(?ju8?+O~*4}v|7&GDWVlVLW9@VYam3AxrGb1Q% Q$7enJJT%&HPl6Nj3%ulK=l}o! literal 0 HcmV?d00001 diff --git a/nlp_resource_data/nltk/test/__pycache__/corpus_fixt.cpython-37.pyc b/nlp_resource_data/nltk/test/__pycache__/corpus_fixt.cpython-37.pyc index 7bed2971696e47f3e796e366ea0e1ff2631f29b2..4da5b4e88334e64704acd3f510650740878b0003 100644 GIT binary patch delta 154 zcmcc0_?wZ}iIav7r-89{8O9Hw06C}tp=Iha9{ z(rFF|T1CWsm_ zMRDillw|8A=NA=}78fxAg;z2ZF#{av7r-85vTTQkYv9qL@-xf*CYf zUxL(XGT!1(Oe)ULDJ@Bj&&(~zFDjX6kz9*$X>pPM(r6JxTOUNTHPna;>G z*^p6^k$JK=qox`YP+bu-khsN|yON=Z9mx9Sr=O9Zo2s9cSecPukXV$huQ$1m(c7C1 zC|Aq}Bp4WZ7(^b diff --git a/nlp_resource_data/nltk/test/__pycache__/doctest_nose_plugin.cpython-37.pyc b/nlp_resource_data/nltk/test/__pycache__/doctest_nose_plugin.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4e9a51887e6df762960e8daaf6c7a3edfa51d7c9 GIT binary patch literal 5254 zcmbVQO?TAR8NOH2Xf&D+8yk!vBvDDI84~O^X__ObhZi~a~L+b){Fz^c!CHRIrfp5|0@baiz<-}`<%@1yy! zUbh*pe)${k?}r)tC%sIr5-R6$r+-Beuz*Lb%U#Z?u0@)wsi*Gh>S?%!dY0UhdYZ0@ zryiBNmTPq@Zbi*ABD-64tEyg#YTde9S9LRLbf?^@?zB7IJ>VYTEHpitcMk1#P(^z+JdD82N2B$nZpyo&Coo|i8sEg52@ z9wqB}6>lr;C|vf_Ft1;@diDAp@Am4I@+WuS5%cX|p%$Xp&UR}1x#5%5s& zly)_!NDK6*%q<5-P{Pv+Oo&|04|%uab}I?O>wVel%jJ!*wHb=R;gk38WWQ~G*1!GW zP2lC3P18h2(1`t7_H^H+>(D`;s6$Tc6q6vilr!N`O#9;ez?@zmK#a@_Tsxs zDJGuo`K?WVJxrJ4NNz5Xo|b|_UtXM~q1RJdUF>b=GuWYFVT8NQKV5=CZ1~oh@UowNaHcV8{XUFkb#P3I8)Pp;}!5On1T(>sb zsncz5wPVK*g4Ed$1kPGt_C@HlyS+q6M<$LRMGkqf(?Wo!i?&+iibQ*gPFf-D_tfCE zM7*zhX`6-D#y;Y_u@4g#oWk;s-0Gxi|O>4uC5t>*$bSC4SWX+j~x&|AkN+_V-)b|$N!xi#n z@@+C?`UUg~OkscEVjzJl9fVtl?P%&KSI<*M{YxrQREA^@^NA-*i^l<(y?@ znsJy7?d^GaAhYGn4wJJxEHKvjjxj9VVeu~vCJ%1f;$NH%tDFtyYT~LSt%&1bE0K5pP33aj*O1tG_|bOc_}jh9U9Jz zg!Of4SkLO6V>pMDFK2Z~_=L6A&V_Jb4T@;8q35}Job(3AmXm%IIB_B;4t*kh-#L*wCr_l!fwR)y zQqEq4KD7gglIU?5kmH|7^U`{hJWK^l%$kOHjdm~y7jlbsQ8hDa$6=hfb{fil&!b3j z>oW93kUWXiD`w7Prdz^@+dbE!&Wq@DZS2;Mr1#hta5rF#O0p}><9P+iHjGmMq#$h1 ztuPMKCvCZrYe||LJs&meA;P2(fpTLD?MW|;^KubiK-KjS$y|hfke6B!QV&V34xHvz z+M_JDB?0Aap;pn;?KC(29@Yp54+{hU9+pW7i1yr6IWNttR}?s05+V`8BKb-|_^`j8 zmpA;>mr^KA2|{PLK91=dzDPsiRw0u&jw{y+xBPAo5hp4bs+5GyLP<;HGviT6cM~NE z0@CJ`$|d3iO?jQR(1MLz$7pwId^~Tg$+5vmPq~h>ig#$ZQUalU&cod7DY2?J8Z9-XBMp7i{B_oqp+@P-)=WO5S=!)DD2jx59<&$#+tzzfMBYIZhg>tIjQR&7bgAuV;*8W(3gLnF)XMaWYzI9ZeMg1XqiM`Fkn1@G*rRCz0G07u>|klz`WPd=DB!KH2me?U8Y_U`cT3h6g`wZIJ2+(ZQv(#h>N8)#QZ*_cG-(`wS6J z%CYCxv8*CMCL8?fN2z4-OyQR2P?@aKEQ!llD7V{bJ5Hq^w?gGSVgaq{Be|+8zN7#ZwAo?gQ$UBR@^YR?p2s8-Qf$64$T}9Jb#>JdO*#Tn+_!%Z)P!BX5M$}dX4Tlrc z!?nDiI7)$MmZxYefhl#2U!W)D&wD=n9A74X7r0ZxY6jq`fh7-)zvu}2L$*Z{(?uT^ zL|v8izX2W;UsCj0L0q6 z0sb@_Sl_ih!fXYwl~u(rp7A%I@i)FDtbN8$KjUWyM{dWPF?hlxm?y;f^Wor~vF#>J zVD!Xse22cLoJ63u8E!8<_M?92^!&DX-}!v#H0K57ioAmSxYk|=%@-fw-K_`X?^6{o zf{gA?kJ~Qo5s{PBJmJJ+=9G%J(Yim=ue9SJ6uG&ko+_)0C7Rn9FL_;k`GKxBn+n>^56z9kr#eluO#sx5j_QSp$ z_gf!_+r>x|-y~gMjz2(^9JyH>AGjdJm@B!Jy?U-gb9qUr?t4@~+`}=paHr=`FukEN zeGR3po7_ZRpHUg#=Eqcae?{f_8RUKwX~IOG3G6tiQSKkSJT~FJe7|RyW>pYKh?l51 zh9WO}#G5=%c+^DMO{`Ku=%hedNtf5h7}g4-2!j1#ozj=^cVDQ4Q@K4p1l(b@see!8?>2u9i#D*xpJ_FMHj4{- zZyqJSS$udxX6jL&-Qt&2qrZ#9X(|+>f~!#Xl?QA`w32>8`EX z3y>%Db`R|vICjWX5WXoUtC*-eE5db9iBNcje^68v|7IMbfrNzOCW_ol9(KZ(R7z7v hdC{43>r9bu&e6$IvL^Y=av7r-85vTTQkYv9qL@-xf*CYf zUxL(XGT!1(Oe)ULDJ@Bj&&(~zFDjX6k<3)YIB{;MI5SW|6n|b$Nw!`=Vo`CbUT$Je zNfFCrPDW`ZkX#W9NC9U`Y5`cfXeC1tJ5c18w|;4Hk^bZ$#*i3RAitOgNH8$+F!C@l z0ZBf_B2FORPm}o;SA2Y0X-R2OYJ7YV6Hp8+0X74P-~dY8;;_lhPbtkwwF4(r6JwH@UNTHP7%I*L zl#Ale%PGm$D@ZIVPSwjz%qb~io~+9#tp}1TVg@PTOi3*OOBbzVC}IPO{0i64$j?pH z&q}P!$Op<~>+4O(L zFF|TG8E^3?CKc!Bl$NB%XXX~<7nMx3$Yv^HoVYZDk$Ey7qa-8CWDQ15DQ2MJA{HQV zi!pa4LlFm%^~*`Ww75urayg@S0xOVTEC3`J7av7r-85vTTQkYv9qL@-xf*CYf zUxL(XGT!1(Oe)ULDJ@Bj&&(~zFDjX6kzAv3X>pPMcF!C_+FflQ*G4cT+6Jrs`Nq(Blx47ct q(@IN9i&Ep`iPO2S9Suu#(r6JxTOUNTHPnJvu( zl#3G1%PGm$%gjqlElSNxPSwjzOinFgp6t)4sRoiSVg?eo7;{%L6tM$YzXJ6$@^e%5 zvl1&a@(U7+vi0>QPh#|AWSRVsQPP+bsKQT^u?R$il_L=xAagiua`RJ4b5iYCfV|H@ G%m4sB=qZE% diff --git a/nlp_resource_data/nltk/test/__pycache__/portuguese_en_fixt.cpython-37.pyc b/nlp_resource_data/nltk/test/__pycache__/portuguese_en_fixt.cpython-37.pyc index f063665344998c5d4f812967f6c9d7e2117e1278..e888c661a6379ac9e690694c7be7757aea70017c 100644 GIT binary patch delta 441 zcmYk0yGq1B6oxaIT-G(aVrL^vVH3e!(N-)h1RGrt5z`FW%&1|LOqdH^P!`m}(%@70 z95y~f-oRI|@=O$QiZlQDIp_P&JAU?udxJsWfoH;oexPQ!aa~LX|5+w*(?G|o{ z6WnQBc%`5Cpgrb4IZ42RVSF=o{ch5)~W6y g{v25Y(}S6Axy$)iY)sqR`8j6SqErAS@*y?C-?Bn&ng9R* delta 242 zcmeBRJ=|Csu39ft4|)FoBfS0+rP;rZ6`%HZi6!0F|*!JQr@J$rQzsmtUM(#0=D;$#jb? zH$SB`2f|&+P{axp{S~C2k)NBYpOsjdkzbHll&!Bfc`KuTI7qIT4M;FB@-Q(m7O?|a tewxft+<7@A*?P(OMFpkBMNB|JungEJBm$)I7KaVQIy;cEVi3W@3;^3}F>(L^ diff --git a/nlp_resource_data/nltk/test/__pycache__/probability_fixt.cpython-37.pyc b/nlp_resource_data/nltk/test/__pycache__/probability_fixt.cpython-37.pyc index e7dacd8e5f859be5dd3f800d9fc4e8005e14d17b..b8f95606f905ab90f871460ff2e79a0b5c521d24 100644 GIT binary patch delta 195 zcmZ3^e2JOYiIav7r-85vTTQkYv9qL@-xf*CYf zUxL(XGT!1(Oe)ULDJ@Bj&&(~zFDjX65zbV^IB}v4BlE;_O5)5wRYfd7VkJWnJCOS2 zqF-8Eq(51h(U*~Paw4O!8z)fIPm}o;SA2Y0X-R2OYJ7YV6G#_G0;~s#-~fu<;;_lh QPbtkwwF4(r6Jx@eUNTJFYQxAh z@spA=6Hq}BGmu!xP{anLeg)`f&{8xSwwu-3^H_Rj zkdYkuuyBk7njaAFG3WdQZaL>P{~+fmmz+{PUIEkU>iVj_sVaTl@4S!A#_dMKA^7Fi z2mhZBjovW>LO9_Nk_Z??lu=Nah8okVQ)ha08qB~`3(d%4R>jdnJF2l-;+-1GzB&N^E4}>?k_m*%^SW}Ii6=$b} zt%}u?b2H>&`!(L=^d0RH)))Qb_mJwMGXezbyv5sZiRkZ;DNF&!*bse{`$kcFh`$zn zzQ8*psv>-mFY#r*!n-3w^j8QwFO2;SabB$MU*Nr8m5%fg8JziB={=xjGkB3EVV((p z98D6LmEJ^->v874%;RA;PU0eh%{8XSVRlg3z8?kS*!KreIy=cQ6KQsLeE7%j(1hck zu;AfFM)qNc{M`Kojv%84nzWC|iFO22G6Nv5>^-7zrk~=>kT)xA&asste@ahkN)}FV zQrI0h(!|=OH3{?4I8LR3h}8nlf2;!1R@wEo6g*B+;RmmSaTvS^g{-NBk>(R2rBi&X zZJFCw@>E_a-e}jBJxp%kPL)Hz1shu679X|8*Hxr!`2H{q(sU@o(D!8gx#BPV6(q%7Zi_}=T};u{#+@{thvCItkOktS-)33 znmbY^sIZDyQ4j|s)LUsN*NP)^JyO$t*6-9)>Xr6(l<+(hH`LteZ8R;s1+`qY=sI}R kqYiZ`#chKJ4sEqeuxlQeVA4hEXePKC)u~2-g07kLf6yQR2mk;8 delta 914 zcmYjPL2DC16rMLb+1(_Y%_dEgCat!oVxXj6)D~(5sc3sB1%>K>EZfeICfVJjv)j@_ zrH8afL0P;v7cW9Do;>?6_6G?51M%d{wy67P?+ZD0-fqvO?)FTsR@WfKYvxoaxu#mFHL~u zSZ<70Al5|CIw8t8cfkCDUheNT@=m`#HR-Z zaSccXE6~~-6yn$DIbua-T_9$0GS+CF*T;x9c;jkjB6(7>$w~|^aTC!NZ$&@2(*;@0 zGMkkz5N~auF^oa#l1<)H;3Gw>$wr;Gn8PYvtU9d9rdW+lvwAnjTMb0#IPTx!bG*^N z%Np;c9|kW`d*-K_XmSlpPD(;Q9;Bx023{|8-LJo(4Cm_-K550M4;$#M^A66?6+VM8 zngF0fa~H*$SW>;1-1OpHte>GVzA}^@hNK2;w@pFhcT!{IA9Z_SMDreFJ6WAp1ZUwe z;;wh>^?Yy7=fY6jTy!+z0t++wtaYj{mJfyjU+#OS2g8vk2Fsl!)mEyOWX{VKWj_X3 z;3W6RtH-LHmRxt=_o8T@`@ZXnvg~E44Z>Enqmn68Q?5vz&m=_YeIs=7FFT|C&aZWO z$WKOM{edE5nFtcI$!hKpwvsQot#TWu#1q|Pk(XF|K6#=q-d8j_t;}_9jd$2^Kju*! z2~A IFc7TcKi$K}Q2+n{ diff --git a/nlp_resource_data/nltk/test/__pycache__/segmentation_fixt.cpython-37.pyc b/nlp_resource_data/nltk/test/__pycache__/segmentation_fixt.cpython-37.pyc index 41dbea1d087729da317fd3feac041982ff4c957b..d703bab62078f5eb68379abece3115f072d23416 100644 GIT binary patch delta 210 zcmZ37;+h-7#SH-m{OQq7^0X`Sb`Ze zSzm$_Yck&APfRM#&nYcQjnB+2$S*3HXc5j-#5i%HEhF>9^Gf2(KvhL7Kw>3B5j&9j z<*Hv=T%oQQd*Q6A78`-6a!0u bO+X?zfD*SjY;yBcN^?@}K+1|i1P==UD^MxP delta 167 zcmcb@ypoyEiI_F<5lYVJ& zk^aQv-XW|&ZZQjxU|{57D&hpvewxg;xZ>l}N=r(MQsd)`n1Etn39wcqf&(aVi^C>2 RKczG$)efYr7)0$J*KTSq34c3K3umk08aoFVMrYm-iIav7r-89{8O9Hw06C}tp=Iha9{ z(rFF|T1CWsm_ zMRDillw|8A=NA=}78fxAg;z2ZF#{av7r-85vSoQ3VxBlrT!V)(XfoYm%gs+I%}Ff+iLYcRVgXXWeDpK&b5r%R5-T(E3lfX6_4Ov+ s^Kk}A6@v%{MjoajHXyCZ=%>jDW`Xq~5v)MjTO2kJv+Y2dJ_9iW0IrWEbpQYW diff --git a/nlp_resource_data/nltk/test/all.py b/nlp_resource_data/nltk/test/all.py index 5844a39..c48e52a 100644 --- a/nlp_resource_data/nltk/test/all.py +++ b/nlp_resource_data/nltk/test/all.py @@ -12,10 +12,10 @@ import os.path def additional_tests(): - # print("here-000000000000000") - # print("-----", glob(os.path.join(os.path.dirname(__file__), '*.doctest'))) + # print "here-000000000000000" + # print "-----", glob(os.path.join(os.path.dirname(__file__), '*.doctest')) dir = os.path.dirname(__file__) - paths = glob(os.path.join(dir, "*.doctest")) + paths = glob(os.path.join(dir, '*.doctest')) files = [os.path.basename(path) for path in paths] return unittest.TestSuite([doctest.DocFileSuite(file) for file in files]) diff --git a/nlp_resource_data/nltk/test/bnc.doctest b/nlp_resource_data/nltk/test/bnc.doctest index 4b27cde..e16f8a1 100644 --- a/nlp_resource_data/nltk/test/bnc.doctest +++ b/nlp_resource_data/nltk/test/bnc.doctest @@ -1,4 +1,4 @@ -.. Copyright (C) 2001-2020 NLTK Project +.. Copyright (C) 2001-2019 NLTK Project .. For license information, see LICENSE.TXT >>> import os.path diff --git a/nlp_resource_data/nltk/test/ccg.doctest b/nlp_resource_data/nltk/test/ccg.doctest index acc29d5..cc0ad49 100644 --- a/nlp_resource_data/nltk/test/ccg.doctest +++ b/nlp_resource_data/nltk/test/ccg.doctest @@ -1,4 +1,4 @@ -.. Copyright (C) 2001-2020 NLTK Project +.. Copyright (C) 2001-2019 NLTK Project .. For license information, see LICENSE.TXT ============================== @@ -282,7 +282,7 @@ Unicode words are supported. Lexicons for the tests: - >>> lex = lexicon.parseLexicon(''' + >>> lex = lexicon.parseLexicon(u''' ... :- S, N, NP, PP ... ... AdjI :: N\\N diff --git a/nlp_resource_data/nltk/test/ccg_semantics.doctest b/nlp_resource_data/nltk/test/ccg_semantics.doctest index 450e78e..ce62733 100644 --- a/nlp_resource_data/nltk/test/ccg_semantics.doctest +++ b/nlp_resource_data/nltk/test/ccg_semantics.doctest @@ -1,4 +1,4 @@ -.. Copyright (C) 2001-2020 NLTK Project +.. Copyright (C) 2001-2019 NLTK Project .. For license information, see LICENSE.TXT ============================================== diff --git a/nlp_resource_data/nltk/test/chat80.doctest b/nlp_resource_data/nltk/test/chat80.doctest index 50d0c42..9efe693 100644 --- a/nlp_resource_data/nltk/test/chat80.doctest +++ b/nlp_resource_data/nltk/test/chat80.doctest @@ -1,4 +1,4 @@ -.. Copyright (C) 2001-2020 NLTK Project +.. Copyright (C) 2001-2019 NLTK Project .. For license information, see LICENSE.TXT ======= diff --git a/nlp_resource_data/nltk/test/childes_fixt.py b/nlp_resource_data/nltk/test/childes_fixt.py index 312449b..04701fb 100644 --- a/nlp_resource_data/nltk/test/childes_fixt.py +++ b/nlp_resource_data/nltk/test/childes_fixt.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +from __future__ import absolute_import def setup_module(module): @@ -6,7 +7,7 @@ def setup_module(module): import nltk.data try: - nltk.data.find("corpora/childes/data-xml/Eng-USA-MOR/") + nltk.data.find('corpora/childes/data-xml/Eng-USA-MOR/') except LookupError as e: print(e) raise SkipTest( diff --git a/nlp_resource_data/nltk/test/chunk.doctest b/nlp_resource_data/nltk/test/chunk.doctest index ff4f157..6fd2ad7 100644 --- a/nlp_resource_data/nltk/test/chunk.doctest +++ b/nlp_resource_data/nltk/test/chunk.doctest @@ -1,4 +1,4 @@ -.. Copyright (C) 2001-2020 NLTK Project +.. Copyright (C) 2001-2019 NLTK Project .. For license information, see LICENSE.TXT ========== diff --git a/nlp_resource_data/nltk/test/classify.doctest b/nlp_resource_data/nltk/test/classify.doctest index 26d14e6..d208084 100644 --- a/nlp_resource_data/nltk/test/classify.doctest +++ b/nlp_resource_data/nltk/test/classify.doctest @@ -1,4 +1,4 @@ -.. Copyright (C) 2001-2020 NLTK Project +.. Copyright (C) 2001-2019 NLTK Project .. For license information, see LICENSE.TXT ============= @@ -49,7 +49,6 @@ haven't done this yet for all tests). ... (dict(a=0,b=1,c=0), 'x'), ... (dict(a=0,b=0,c=0), 'x'), ... (dict(a=0,b=1,c=1), 'y'), - ... (dict(a=None,b=1,c=0), 'x'), ... ] >>> test = [ ... (dict(a=1,b=0,c=1)), # unseen @@ -67,24 +66,24 @@ Test the Naive Bayes classifier: ['y', 'x', 'y', 'x'] >>> for pdist in classifier.prob_classify_many(test): ... print('%.4f %.4f' % (pdist.prob('x'), pdist.prob('y'))) - 0.2500 0.7500 - 0.5833 0.4167 - 0.3571 0.6429 - 0.7000 0.3000 + 0.3203 0.6797 + 0.5857 0.4143 + 0.3792 0.6208 + 0.6470 0.3530 >>> classifier.show_most_informative_features() Most Informative Features - c = 0 x : y = 2.3 : 1.0 - c = 1 y : x = 1.8 : 1.0 - a = 1 y : x = 1.7 : 1.0 - a = 0 x : y = 1.0 : 1.0 - b = 0 x : y = 1.0 : 1.0 - b = 1 x : y = 1.0 : 1.0 + c = 0 x : y = 2.0 : 1.0 + c = 1 y : x = 1.5 : 1.0 + a = 1 y : x = 1.4 : 1.0 + b = 0 x : y = 1.2 : 1.0 + a = 0 x : y = 1.2 : 1.0 + b = 1 y : x = 1.1 : 1.0 -Test the Decision Tree classifier (without None): +Test the Decision Tree classifier: >>> classifier = nltk.classify.DecisionTreeClassifier.train( - ... train[:-1], entropy_cutoff=0, - ... support_cutoff=0) + ... train, entropy_cutoff=0, + ... support_cutoff=0) >>> sorted(classifier.labels()) ['x', 'y'] >>> print(classifier) @@ -100,23 +99,6 @@ Test the Decision Tree classifier (without None): Traceback (most recent call last): . . . NotImplementedError - - -Test the Decision Tree classifier (with None): - - >>> classifier = nltk.classify.DecisionTreeClassifier.train( - ... train, entropy_cutoff=0, - ... support_cutoff=0) - >>> sorted(classifier.labels()) - ['x', 'y'] - >>> print(classifier) - c=0? .................................................. x - a=0? ................................................ x - a=1? ................................................ y - a=None? ............................................. x - c=1? .................................................. y - - Test SklearnClassifier, which requires the scikit-learn package. diff --git a/nlp_resource_data/nltk/test/classify_fixt.py b/nlp_resource_data/nltk/test/classify_fixt.py index b9d1496..dce0704 100644 --- a/nlp_resource_data/nltk/test/classify_fixt.py +++ b/nlp_resource_data/nltk/test/classify_fixt.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +from __future__ import absolute_import # most of classify.doctest requires numpy diff --git a/nlp_resource_data/nltk/test/collections.doctest b/nlp_resource_data/nltk/test/collections.doctest index 241913c..6a67511 100644 --- a/nlp_resource_data/nltk/test/collections.doctest +++ b/nlp_resource_data/nltk/test/collections.doctest @@ -1,4 +1,4 @@ -.. Copyright (C) 2001-2020 NLTK Project +.. Copyright (C) 2001-2019 NLTK Project .. For license information, see LICENSE.TXT =========== diff --git a/nlp_resource_data/nltk/test/collocations.doctest b/nlp_resource_data/nltk/test/collocations.doctest index b1bb33a..b4af859 100644 --- a/nlp_resource_data/nltk/test/collocations.doctest +++ b/nlp_resource_data/nltk/test/collocations.doctest @@ -1,4 +1,4 @@ -.. Copyright (C) 2001-2020 NLTK Project +.. Copyright (C) 2001-2019 NLTK Project .. For license information, see LICENSE.TXT ============== @@ -16,14 +16,13 @@ measured using Pointwise Mutual Information. >>> from nltk.collocations import * >>> bigram_measures = nltk.collocations.BigramAssocMeasures() >>> trigram_measures = nltk.collocations.TrigramAssocMeasures() - >>> fourgram_measures = nltk.collocations.QuadgramAssocMeasures() >>> finder = BigramCollocationFinder.from_words( ... nltk.corpus.genesis.words('english-web.txt')) >>> finder.nbest(bigram_measures.pmi, 10) # doctest: +NORMALIZE_WHITESPACE - [('Allon', 'Bacuth'), ('Ashteroth', 'Karnaim'), ('Ben', 'Ammi'), - ('En', 'Mishpat'), ('Jegar', 'Sahadutha'), ('Salt', 'Sea'), - ('Whoever', 'sheds'), ('appoint', 'overseers'), ('aromatic', 'resin'), - ('cutting', 'instrument')] + [(u'Allon', u'Bacuth'), (u'Ashteroth', u'Karnaim'), (u'Ben', u'Ammi'), + (u'En', u'Mishpat'), (u'Jegar', u'Sahadutha'), (u'Salt', u'Sea'), + (u'Whoever', u'sheds'), (u'appoint', u'overseers'), (u'aromatic', u'resin'), + (u'cutting', u'instrument')] While these words are highly collocated, the expressions are also very infrequent. Therefore it is useful to apply filters, such as ignoring all @@ -31,10 +30,10 @@ bigrams which occur less than three times in the corpus: >>> finder.apply_freq_filter(3) >>> finder.nbest(bigram_measures.pmi, 10) # doctest: +NORMALIZE_WHITESPACE - [('Beer', 'Lahai'), ('Lahai', 'Roi'), ('gray', 'hairs'), - ('Most', 'High'), ('ewe', 'lambs'), ('many', 'colors'), - ('burnt', 'offering'), ('Paddan', 'Aram'), ('east', 'wind'), - ('living', 'creature')] + [(u'Beer', u'Lahai'), (u'Lahai', u'Roi'), (u'gray', u'hairs'), + (u'Most', u'High'), (u'ewe', u'lambs'), (u'many', u'colors'), + (u'burnt', u'offering'), (u'Paddan', u'Aram'), (u'east', u'wind'), + (u'living', u'creature')] We may similarly find collocations among tagged words: @@ -64,10 +63,10 @@ Or spanning intervening words: >>> ignored_words = nltk.corpus.stopwords.words('english') >>> finder.apply_word_filter(lambda w: len(w) < 3 or w.lower() in ignored_words) >>> finder.nbest(bigram_measures.likelihood_ratio, 10) # doctest: +NORMALIZE_WHITESPACE - [('chief', 'chief'), ('became', 'father'), ('years', 'became'), - ('hundred', 'years'), ('lived', 'became'), ('king', 'king'), - ('lived', 'years'), ('became', 'became'), ('chief', 'chiefs'), - ('hundred', 'became')] + [(u'chief', u'chief'), (u'became', u'father'), (u'years', u'became'), + (u'hundred', u'years'), (u'lived', u'became'), (u'king', u'king'), + (u'lived', u'years'), (u'became', u'became'), (u'chief', u'chiefs'), + (u'hundred', u'became')] Finders ~~~~~~~ @@ -127,12 +126,6 @@ A closer look at the finder's ngram frequencies: ((',', 'do', 'not'), 1), (('I', 'am', '!'), 1), (('Sam', 'I', '!'), 1), (('Sam', 'I', 'am'), 1)] -A similar interface is provided for fourgrams: - - >>> finder_4grams = QuadgramCollocationFinder.from_words(tokens) - >>> scored_4grams = finder_4grams.score_ngrams(fourgram_measures.raw_freq) - >>> set(fourgram for fourgram, score in scored_4grams) == set(nltk.ngrams(tokens, n=4)) - True Filtering candidates ~~~~~~~~~~~~~~~~~~~~ diff --git a/nlp_resource_data/nltk/test/compat.doctest b/nlp_resource_data/nltk/test/compat.doctest new file mode 100644 index 0000000..1d668f3 --- /dev/null +++ b/nlp_resource_data/nltk/test/compat.doctest @@ -0,0 +1,134 @@ + +========================================= +NLTK Python 2.x - 3.x Compatibility Layer +========================================= + +NLTK comes with a Python 2.x/3.x compatibility layer, nltk.compat +(which is loosely based on `six `_):: + + >>> from nltk import compat + >>> compat.PY3 + False + >>> # and so on + +@python_2_unicode_compatible +---------------------------- + +Under Python 2.x ``__str__`` and ``__repr__`` methods must +return bytestrings. + +``@python_2_unicode_compatible`` decorator allows writing these methods +in a way compatible with Python 3.x: + +1) wrap a class with this decorator, +2) define ``__str__`` and ``__repr__`` methods returning unicode text + (that's what they must return under Python 3.x), + +and they would be fixed under Python 2.x to return byte strings:: + + >>> from nltk.compat import python_2_unicode_compatible + + >>> @python_2_unicode_compatible + ... class Foo(object): + ... def __str__(self): + ... return u'__str__ is called' + ... def __repr__(self): + ... return u'__repr__ is called' + + >>> foo = Foo() + >>> foo.__str__().__class__ + + >>> foo.__repr__().__class__ + + >>> print(foo) + __str__ is called + >>> foo + __repr__ is called + +Original versions of ``__str__`` and ``__repr__`` are available as +``__unicode__`` and ``unicode_repr``:: + + >>> foo.__unicode__().__class__ + + >>> foo.unicode_repr().__class__ + + >>> unicode(foo) + u'__str__ is called' + >>> foo.unicode_repr() + u'__repr__ is called' + +There is no need to wrap a subclass with ``@python_2_unicode_compatible`` +if it doesn't override ``__str__`` and ``__repr__``:: + + >>> class Bar(Foo): + ... pass + >>> bar = Bar() + >>> bar.__str__().__class__ + + +However, if a subclass overrides ``__str__`` or ``__repr__``, +wrap it again:: + + >>> class BadBaz(Foo): + ... def __str__(self): + ... return u'Baz.__str__' + >>> baz = BadBaz() + >>> baz.__str__().__class__ # this is incorrect! + + + >>> @python_2_unicode_compatible + ... class GoodBaz(Foo): + ... def __str__(self): + ... return u'Baz.__str__' + >>> baz = GoodBaz() + >>> baz.__str__().__class__ + + >>> baz.__unicode__().__class__ + + +Applying ``@python_2_unicode_compatible`` to a subclass +shouldn't break methods that was not overridden:: + + >>> baz.__repr__().__class__ + + >>> baz.unicode_repr().__class__ + + +unicode_repr +------------ + +Under Python 3.x ``repr(unicode_string)`` doesn't have a leading "u" letter. + +``nltk.compat.unicode_repr`` function may be used instead of ``repr`` and +``"%r" % obj`` to make the output more consistent under Python 2.x and 3.x:: + + >>> from nltk.compat import unicode_repr + >>> print(repr(u"test")) + u'test' + >>> print(unicode_repr(u"test")) + 'test' + +It may be also used to get an original unescaped repr (as unicode) +of objects which class was fixed by ``@python_2_unicode_compatible`` +decorator:: + + >>> @python_2_unicode_compatible + ... class Foo(object): + ... def __repr__(self): + ... return u'' + + >>> foo = Foo() + >>> repr(foo) + '' + >>> unicode_repr(foo) + u'' + +For other objects it returns the same value as ``repr``:: + + >>> unicode_repr(5) + '5' + +It may be a good idea to use ``unicode_repr`` instead of ``%r`` +string formatting specifier inside ``__repr__`` or ``__str__`` +methods of classes fixed by ``@python_2_unicode_compatible`` +to make the output consistent between Python 2.x and 3.x. diff --git a/nlp_resource_data/nltk/test/compat_fixt.py b/nlp_resource_data/nltk/test/compat_fixt.py new file mode 100644 index 0000000..5878d9b --- /dev/null +++ b/nlp_resource_data/nltk/test/compat_fixt.py @@ -0,0 +1,10 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import +from nltk.compat import PY3 + + +def setup_module(module): + from nose import SkipTest + + if PY3: + raise SkipTest("compat.doctest is for Python 2.x") diff --git a/nlp_resource_data/nltk/test/corpus.doctest b/nlp_resource_data/nltk/test/corpus.doctest index 73b8fd7..3fa0ae6 100644 --- a/nlp_resource_data/nltk/test/corpus.doctest +++ b/nlp_resource_data/nltk/test/corpus.doctest @@ -1,4 +1,4 @@ -.. Copyright (C) 2001-2020 NLTK Project +.. Copyright (C) 2001-2019 NLTK Project .. For license information, see LICENSE.TXT ================ @@ -94,7 +94,7 @@ If the reader methods are called without any arguments, they will typically load all documents in the corpus. >>> len(inaugural.words()) - 149797 + 145735 If a corpus contains a README file, it can be accessed with a ``readme()`` method: @@ -109,7 +109,7 @@ Here are the first few words from each of NLTK's plaintext corpora: >>> nltk.corpus.abc.words() ['PM', 'denies', 'knowledge', 'of', 'AWB', ...] >>> nltk.corpus.genesis.words() - ['In', 'the', 'beginning', 'God', 'created', ...] + [u'In', u'the', u'beginning', u'God', u'created', ...] >>> nltk.corpus.gutenberg.words(fileids='austen-emma.txt') ['[', 'Emma', 'by', 'Jane', 'Austen', '1816', ...] >>> nltk.corpus.inaugural.words() @@ -199,7 +199,7 @@ CoNLL 2002 Corpus includes named entity chunks. (NP the/DT Exchequer/NNP) ...) >>> print(conll2002.sents()) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE - [['Sao', 'Paulo', '(', 'Brasil', ')', ',', ...], ['-'], ...] + [[u'Sao', u'Paulo', u'(', u'Brasil', u')', u',', ...], [u'-'], ...] >>> for tree in conll2002.chunked_sents()[:2]: ... print(tree) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE (S @@ -387,8 +387,8 @@ examples illustrate the use of the wordlist corpora: >>> stopwords.fileids() # doctest: +ELLIPSIS ['arabic', 'azerbaijani', 'danish', 'dutch', 'english', 'finnish', 'french', ...] - >>> sorted(stopwords.words('portuguese')) # doctest: +ELLIPSIS - ['a', 'ao', 'aos', 'aquela', 'aquelas', 'aquele', 'aqueles', ...] + >>> stopwords.words('portuguese') # doctest: +ELLIPSIS + ['de', 'a', 'o', 'que', 'e', 'do', 'da', 'em', 'um', 'para', ...] >>> names.fileids() ['female.txt', 'male.txt'] >>> names.words('male.txt') # doctest: +ELLIPSIS @@ -595,7 +595,8 @@ We can compute stats for specific product features: >>> n_reviews = len([(feat,score) for (feat,score) in product_reviews_1.features('Canon_G3.txt') if feat=='picture']) >>> tot = sum([int(score) for (feat,score) in product_reviews_1.features('Canon_G3.txt') if feat=='picture']) - >>> mean = tot / n_reviews + >>> # We use float for backward compatibility with division in Python2.7 + >>> mean = tot/float(n_reviews) >>> print(n_reviews, tot, mean) 15 24 1.6 @@ -1215,7 +1216,7 @@ definitions of these data access methods wherever possible. At a high level, corpora can be divided into three basic types: -- A *token corpus* contains information about specific occurrences of +- A *token corpus* contains information about specific occurences of language use (or linguistic tokens), such as dialogues or written texts. Examples of token corpora are collections of written text and collections of speech. @@ -1336,9 +1337,9 @@ corpora, and returns a flat list of word strings: >>> nltk.corpus.treebank.words() ['Pierre', 'Vinken', ',', '61', 'years', 'old', ...] >>> nltk.corpus.conll2002.words() - ['Sao', 'Paulo', '(', 'Brasil', ')', ',', '23', ...] + [u'Sao', u'Paulo', u'(', u'Brasil', u')', u',', u'23', ...] >>> nltk.corpus.genesis.words() - ['In', 'the', 'beginning', 'God', 'created', ...] + [u'In', u'the', u'beginning', u'God', u'created', ...] On the other hand, the `tagged_words()` method is only supported by corpora that include part-of-speech annotations: @@ -1348,7 +1349,7 @@ corpora that include part-of-speech annotations: >>> nltk.corpus.treebank.tagged_words() [('Pierre', 'NNP'), ('Vinken', 'NNP'), ...] >>> nltk.corpus.conll2002.tagged_words() - [('Sao', 'NC'), ('Paulo', 'VMI'), ('(', 'Fpa'), ...] + [(u'Sao', u'NC'), (u'Paulo', u'VMI'), (u'(', u'Fpa'), ...] >>> nltk.corpus.genesis.tagged_words() Traceback (most recent call last): ... @@ -2017,20 +2018,20 @@ supplied by a read-only stream. Note that all of the read operations return ``unicode`` objects (not ``str`` objects). >>> reader.read() # read the entire file. - 'This is a test file.\nIt is encoded in ascii.\n' + u'This is a test file.\nIt is encoded in ascii.\n' >>> reader.seek(0) # rewind to the start. >>> reader.read(5) # read at most 5 bytes. - 'This ' + u'This ' >>> reader.readline() # read to the end of the line. - 'is a test file.\n' + u'is a test file.\n' >>> reader.seek(0) # rewind to the start. >>> for line in reader: ... print(repr(line)) # iterate over lines - 'This is a test file.\n' - 'It is encoded in ascii.\n' + u'This is a test file.\n' + u'It is encoded in ascii.\n' >>> reader.seek(0) # rewind to the start. >>> reader.readlines() # read a list of line strings - ['This is a test file.\n', 'It is encoded in ascii.\n'] + [u'This is a test file.\n', u'It is encoded in ascii.\n'] >>> reader.close() Size argument to ``read()`` @@ -2046,7 +2047,7 @@ characters than the ``size`` argument: ... """.decode('ascii').encode('utf-16')) >>> reader = SeekableUnicodeStreamReader(stream, 'utf-16') >>> reader.read(10) - 'This ' + u'This ' If a read block ends in the middle of the byte string encoding a single character, then that byte string is stored in an internal @@ -2059,7 +2060,7 @@ string, which could be mistaken for indicating the end of the file. >>> reader.seek(0) # rewind to the start. >>> reader.read(1) # we actually need to read 4 bytes - 'T' + u'T' >>> int(reader.tell()) 4 @@ -2072,11 +2073,11 @@ bytes from the stream: >>> reader.seek(0) # rewind to the start. >>> reader.readline() # stores extra text in a buffer - 'This is a test file.\n' + u'This is a test file.\n' >>> print(reader.linebuffer) # examine the buffer contents - ['It is encoded i'] + [u'It is encoded i'] >>> reader.read(0) # returns the contents of the buffer - 'It is encoded i' + u'It is encoded i' >>> print(reader.linebuffer) # examine the buffer contents None @@ -2095,14 +2096,14 @@ returned by ``tell``. ... """.decode('ascii').encode('utf-16')) >>> reader = SeekableUnicodeStreamReader(stream, 'utf-16') >>> reader.read(20) - 'This is a ' + u'This is a ' >>> pos = reader.tell(); print(pos) 22 >>> reader.read(20) - 'test file.' + u'test file.' >>> reader.seek(pos) # rewind to the position from tell. >>> reader.read(20) - 'test file.' + u'test file.' The ``seek()`` and ``tell()`` methods work property even when ``readline()`` is used. @@ -2113,14 +2114,14 @@ The ``seek()`` and ``tell()`` methods work property even when ... """.decode('ascii').encode('utf-16')) >>> reader = SeekableUnicodeStreamReader(stream, 'utf-16') >>> reader.readline() - 'This is a test file.\n' + u'This is a test file.\n' >>> pos = reader.tell(); print(pos) 44 >>> reader.readline() - 'It is encoded in utf-16.\n' + u'It is encoded in utf-16.\n' >>> reader.seek(pos) # rewind to the position from tell. >>> reader.readline() - 'It is encoded in utf-16.\n' + u'It is encoded in utf-16.\n' Squashed Bugs diff --git a/nlp_resource_data/nltk/test/corpus_fixt.py b/nlp_resource_data/nltk/test/corpus_fixt.py index 17b011b..ce0cd83 100644 --- a/nlp_resource_data/nltk/test/corpus_fixt.py +++ b/nlp_resource_data/nltk/test/corpus_fixt.py @@ -1,3 +1,4 @@ # -*- coding: utf-8 -*- +from __future__ import absolute_import from nltk.corpus import teardown_module diff --git a/nlp_resource_data/nltk/test/crubadan.doctest b/nlp_resource_data/nltk/test/crubadan.doctest index 2894a41..011af25 100644 --- a/nlp_resource_data/nltk/test/crubadan.doctest +++ b/nlp_resource_data/nltk/test/crubadan.doctest @@ -1,4 +1,4 @@ -.. Copyright (C) 2001-2020 NLTK Project +.. Copyright (C) 2001-2019 NLTK Project .. For license information, see LICENSE.TXT Crubadan Corpus Reader diff --git a/nlp_resource_data/nltk/test/data.doctest b/nlp_resource_data/nltk/test/data.doctest index 1fcfb29..184c512 100644 --- a/nlp_resource_data/nltk/test/data.doctest +++ b/nlp_resource_data/nltk/test/data.doctest @@ -1,4 +1,4 @@ -.. Copyright (C) 2001-2020 NLTK Project +.. Copyright (C) 2001-2019 NLTK Project .. For license information, see LICENSE.TXT ========================================= @@ -20,6 +20,7 @@ takes as its first argument a URL specifying what file should be loaded. The ``nltk:`` protocol loads files from the NLTK data distribution: + >>> from __future__ import print_function >>> tokenizer = nltk.data.load('nltk:tokenizers/punkt/english.pickle') >>> tokenizer.tokenize('Hello. This is a test. It works!') ['Hello.', 'This is a test.', 'It works!'] @@ -336,7 +337,7 @@ This is mainly intended for internal use. The test simply tests that reading and writing work as intended and does not test how much improvement buffering provides. - >>> from io import StringIO + >>> from nltk.compat import StringIO >>> test = nltk.data.BufferedGzipFile('testbuf.gz', 'wb', size=2**10) >>> ans = [] >>> for i in range(10000): diff --git a/nlp_resource_data/nltk/test/dependency.doctest b/nlp_resource_data/nltk/test/dependency.doctest index 854e11a..31590c4 100755 --- a/nlp_resource_data/nltk/test/dependency.doctest +++ b/nlp_resource_data/nltk/test/dependency.doctest @@ -1,4 +1,4 @@ -.. Copyright (C) 2001-2020 NLTK Project +.. Copyright (C) 2001-2019 NLTK Project .. For license information, see LICENSE.TXT =================== diff --git a/nlp_resource_data/nltk/test/discourse.doctest b/nlp_resource_data/nltk/test/discourse.doctest index a5dabe8..df18fde 100644 --- a/nlp_resource_data/nltk/test/discourse.doctest +++ b/nlp_resource_data/nltk/test/discourse.doctest @@ -1,4 +1,4 @@ -.. Copyright (C) 2001-2020 NLTK Project +.. Copyright (C) 2001-2019 NLTK Project .. For license information, see LICENSE.TXT ================== diff --git a/nlp_resource_data/nltk/test/discourse_fixt.py b/nlp_resource_data/nltk/test/discourse_fixt.py index 9a10215..d3ab46f 100644 --- a/nlp_resource_data/nltk/test/discourse_fixt.py +++ b/nlp_resource_data/nltk/test/discourse_fixt.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +from __future__ import absolute_import # FIXME: the entire discourse.doctest is skipped if Prover9/Mace4 is @@ -9,6 +10,6 @@ def setup_module(module): try: m = Mace() - m._find_binary("mace4") + m._find_binary('mace4') except LookupError: raise SkipTest("Mace4/Prover9 is not available so discourse.doctest is skipped") diff --git a/nlp_resource_data/nltk/test/doctest_nose_plugin.py b/nlp_resource_data/nltk/test/doctest_nose_plugin.py new file mode 100644 index 0000000..d77210c --- /dev/null +++ b/nlp_resource_data/nltk/test/doctest_nose_plugin.py @@ -0,0 +1,164 @@ +# -*- coding: utf-8 -*- +from __future__ import print_function +import re +import sys +import os +import codecs +import doctest +from nose.util import tolist, anyp +from nose.plugins.base import Plugin +from nose.suite import ContextList +from nose.plugins.doctests import Doctest, log, DocFileCase + +ALLOW_UNICODE = doctest.register_optionflag('ALLOW_UNICODE') + + +class _UnicodeOutputChecker(doctest.OutputChecker): + _literal_re = re.compile(r"(\W|^)[uU]([rR]?[\'\"])", re.UNICODE) + + def _remove_u_prefixes(self, txt): + return re.sub(self._literal_re, r'\1\2', txt) + + def check_output(self, want, got, optionflags): + res = doctest.OutputChecker.check_output(self, want, got, optionflags) + if res: + return True + if not (optionflags & ALLOW_UNICODE): + return False + + # ALLOW_UNICODE is active and want != got + cleaned_want = self._remove_u_prefixes(want) + cleaned_got = self._remove_u_prefixes(got) + res = doctest.OutputChecker.check_output( + self, cleaned_want, cleaned_got, optionflags + ) + return res + + +_checker = _UnicodeOutputChecker() + + +class DoctestPluginHelper(object): + """ + This mixin adds print_function future import to all test cases. + + It also adds support for: + '#doctest +ALLOW_UNICODE' option that + makes DocTestCase think u'foo' == 'foo'. + + '#doctest doctestencoding=utf-8' option that + changes the encoding of doctest files + """ + + OPTION_BY_NAME = ('doctestencoding',) + + def loadTestsFromFileUnicode(self, filename): + if self.extension and anyp(filename.endswith, self.extension): + name = os.path.basename(filename) + dh = codecs.open(filename, 'r', self.options.get('doctestencoding')) + try: + doc = dh.read() + finally: + dh.close() + + fixture_context = None + globs = {'__file__': filename} + if self.fixtures: + base, ext = os.path.splitext(name) + dirname = os.path.dirname(filename) + sys.path.append(dirname) + fixt_mod = base + self.fixtures + try: + fixture_context = __import__(fixt_mod, globals(), locals(), ["nop"]) + except ImportError as e: + log.debug("Could not import %s: %s (%s)", fixt_mod, e, sys.path) + log.debug("Fixture module %s resolved to %s", fixt_mod, fixture_context) + if hasattr(fixture_context, 'globs'): + globs = fixture_context.globs(globs) + parser = doctest.DocTestParser() + test = parser.get_doctest( + doc, globs=globs, name=name, filename=filename, lineno=0 + ) + if test.examples: + case = DocFileCase( + test, + optionflags=self.optionflags, + setUp=getattr(fixture_context, 'setup_test', None), + tearDown=getattr(fixture_context, 'teardown_test', None), + result_var=self.doctest_result_var, + ) + if fixture_context: + yield ContextList((case,), context=fixture_context) + else: + yield case + else: + yield False # no tests to load + + def loadTestsFromFile(self, filename): + + cases = self.loadTestsFromFileUnicode(filename) + + for case in cases: + if isinstance(case, ContextList): + yield ContextList([self._patchTestCase(c) for c in case], case.context) + else: + yield self._patchTestCase(case) + + def loadTestsFromModule(self, module): + """Load doctests from the module. + """ + for suite in super(DoctestPluginHelper, self).loadTestsFromModule(module): + cases = [self._patchTestCase(case) for case in suite._get_tests()] + yield self.suiteClass(cases, context=module, can_split=False) + + def _patchTestCase(self, case): + if case: + case._dt_test.globs['print_function'] = print_function + case._dt_checker = _checker + return case + + def configure(self, options, config): + # it is overriden in order to fix doctest options discovery + + Plugin.configure(self, options, config) + self.doctest_result_var = options.doctest_result_var + self.doctest_tests = options.doctest_tests + self.extension = tolist(options.doctestExtension) + self.fixtures = options.doctestFixtures + self.finder = doctest.DocTestFinder() + + # super(DoctestPluginHelper, self).configure(options, config) + self.optionflags = 0 + self.options = {} + + if options.doctestOptions: + stroptions = ",".join(options.doctestOptions).split(',') + for stroption in stroptions: + try: + if stroption.startswith('+'): + self.optionflags |= doctest.OPTIONFLAGS_BY_NAME[stroption[1:]] + continue + elif stroption.startswith('-'): + self.optionflags &= ~doctest.OPTIONFLAGS_BY_NAME[stroption[1:]] + continue + try: + key, value = stroption.split('=') + except ValueError: + pass + else: + if not key in self.OPTION_BY_NAME: + raise ValueError() + self.options[key] = value + continue + except (AttributeError, ValueError, KeyError): + raise ValueError("Unknown doctest option {}".format(stroption)) + else: + raise ValueError( + "Doctest option is not a flag or a key/value pair: {} ".format( + stroption + ) + ) + + +class DoctestFix(DoctestPluginHelper, Doctest): + pass diff --git a/nlp_resource_data/nltk/test/drt.doctest b/nlp_resource_data/nltk/test/drt.doctest index a0cd1f3..6163052 100644 --- a/nlp_resource_data/nltk/test/drt.doctest +++ b/nlp_resource_data/nltk/test/drt.doctest @@ -1,4 +1,4 @@ -.. Copyright (C) 2001-2020 NLTK Project +.. Copyright (C) 2001-2019 NLTK Project .. For license information, see LICENSE.TXT ================================ diff --git a/nlp_resource_data/nltk/test/featgram.doctest b/nlp_resource_data/nltk/test/featgram.doctest index b866978..a1775f8 100644 --- a/nlp_resource_data/nltk/test/featgram.doctest +++ b/nlp_resource_data/nltk/test/featgram.doctest @@ -1,4 +1,4 @@ -.. Copyright (C) 2001-2020 NLTK Project +.. Copyright (C) 2001-2019 NLTK Project .. For license information, see LICENSE.TXT ========================= @@ -9,6 +9,7 @@ Grammars can be parsed from strings. + >>> from __future__ import print_function >>> import nltk >>> from nltk import grammar, parse >>> g = """ diff --git a/nlp_resource_data/nltk/test/featstruct.doctest b/nlp_resource_data/nltk/test/featstruct.doctest index 0c14435..8c35dad 100644 --- a/nlp_resource_data/nltk/test/featstruct.doctest +++ b/nlp_resource_data/nltk/test/featstruct.doctest @@ -1,9 +1,10 @@ -.. Copyright (C) 2001-2020 NLTK Project +.. Copyright (C) 2001-2019 NLTK Project .. For license information, see LICENSE.TXT ================================== Feature Structures & Unification ================================== + >>> from __future__ import print_function >>> from nltk.featstruct import FeatStruct >>> from nltk.sem.logic import Variable, VariableExpression, Expression diff --git a/nlp_resource_data/nltk/test/framenet.doctest b/nlp_resource_data/nltk/test/framenet.doctest index d1ecc80..6de3a41 100644 --- a/nlp_resource_data/nltk/test/framenet.doctest +++ b/nlp_resource_data/nltk/test/framenet.doctest @@ -1,4 +1,4 @@ -.. Copyright (C) 2001-2020 NLTK Project +.. Copyright (C) 2001-2019 NLTK Project .. For license information, see LICENSE.TXT ======== diff --git a/nlp_resource_data/nltk/test/generate.doctest b/nlp_resource_data/nltk/test/generate.doctest index 2c7f3d8..4453518 100644 --- a/nlp_resource_data/nltk/test/generate.doctest +++ b/nlp_resource_data/nltk/test/generate.doctest @@ -1,4 +1,4 @@ -.. Copyright (C) 2001-2020 NLTK Project +.. Copyright (C) 2001-2019 NLTK Project .. For license information, see LICENSE.TXT =============================================== diff --git a/nlp_resource_data/nltk/test/gensim.doctest b/nlp_resource_data/nltk/test/gensim.doctest index 386e3e0..2e27597 100644 --- a/nlp_resource_data/nltk/test/gensim.doctest +++ b/nlp_resource_data/nltk/test/gensim.doctest @@ -1,4 +1,4 @@ -.. Copyright (C) 2001-2020 NLTK Project +.. Copyright (C) 2001-2019 NLTK Project .. For license information, see LICENSE.TXT ======================================= @@ -60,7 +60,7 @@ Each word is represented in the space of 300 dimensions: Finding the top n words that are similar to a target word is simple. The result is the list of n words with the score. >>> model.most_similar(positive=['university'], topn = 3) - [('universities', 0.70039...), ('faculty', 0.67809...), ('undergraduate', 0.65870...)] + [(u'universities', 0.70039...), (u'faculty', 0.67809...), (u'undergraduate', 0.65870...)] Finding a word that is not in a list is also supported, although, implementing this by yourself is simple. @@ -71,10 +71,10 @@ Mikolov et al. (2013) figured out that word embedding captures much of syntactic the vector 'King - Man + Woman' is close to 'Queen' and 'Germany - Berlin + Paris' is close to 'France'. >>> model.most_similar(positive=['woman','king'], negative=['man'], topn = 1) - [('queen', 0.71181...)] + [(u'queen', 0.71181...)] >>> model.most_similar(positive=['Paris','Germany'], negative=['Berlin'], topn = 1) - [('France', 0.78840...)] + [(u'France', 0.78840...)] We can visualize the word embeddings using t-SNE (http://lvdmaaten.github.io/tsne/). For this demonstration, we visualize the first 1000 words. diff --git a/nlp_resource_data/nltk/test/gensim_fixt.py b/nlp_resource_data/nltk/test/gensim_fixt.py index 2de144c..b1a6d2e 100644 --- a/nlp_resource_data/nltk/test/gensim_fixt.py +++ b/nlp_resource_data/nltk/test/gensim_fixt.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +from __future__ import absolute_import def setup_module(module): diff --git a/nlp_resource_data/nltk/test/gluesemantics.doctest b/nlp_resource_data/nltk/test/gluesemantics.doctest index 08b96e3..7bf29a0 100644 --- a/nlp_resource_data/nltk/test/gluesemantics.doctest +++ b/nlp_resource_data/nltk/test/gluesemantics.doctest @@ -1,4 +1,4 @@ -.. Copyright (C) 2001-2020 NLTK Project +.. Copyright (C) 2001-2019 NLTK Project .. For license information, see LICENSE.TXT ============================================================================== diff --git a/nlp_resource_data/nltk/test/gluesemantics_malt.doctest b/nlp_resource_data/nltk/test/gluesemantics_malt.doctest index a76e96f..1329794 100644 --- a/nlp_resource_data/nltk/test/gluesemantics_malt.doctest +++ b/nlp_resource_data/nltk/test/gluesemantics_malt.doctest @@ -1,4 +1,4 @@ -.. Copyright (C) 2001-2020 NLTK Project +.. Copyright (C) 2001-2019 NLTK Project .. For license information, see LICENSE.TXT .. see also: gluesemantics.doctest diff --git a/nlp_resource_data/nltk/test/gluesemantics_malt_fixt.py b/nlp_resource_data/nltk/test/gluesemantics_malt_fixt.py index 1a7fee3..70e149a 100644 --- a/nlp_resource_data/nltk/test/gluesemantics_malt_fixt.py +++ b/nlp_resource_data/nltk/test/gluesemantics_malt_fixt.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +from __future__ import absolute_import def setup_module(module): @@ -6,6 +7,6 @@ def setup_module(module): from nltk.parse.malt import MaltParser try: - depparser = MaltParser("maltparser-1.7.2") + depparser = MaltParser('maltparser-1.7.2') except LookupError: raise SkipTest("MaltParser is not available") diff --git a/nlp_resource_data/nltk/test/grammar.doctest b/nlp_resource_data/nltk/test/grammar.doctest index c604069..7cae9d9 100644 --- a/nlp_resource_data/nltk/test/grammar.doctest +++ b/nlp_resource_data/nltk/test/grammar.doctest @@ -1,4 +1,4 @@ -.. Copyright (C) 2001-2020 NLTK Project +.. Copyright (C) 2001-2019 NLTK Project .. For license information, see LICENSE.TXT =============== diff --git a/nlp_resource_data/nltk/test/grammartestsuites.doctest b/nlp_resource_data/nltk/test/grammartestsuites.doctest index 2eab462..4221537 100644 --- a/nlp_resource_data/nltk/test/grammartestsuites.doctest +++ b/nlp_resource_data/nltk/test/grammartestsuites.doctest @@ -1,4 +1,4 @@ -.. Copyright (C) 2001-2020 NLTK Project +.. Copyright (C) 2001-2019 NLTK Project .. For license information, see LICENSE.TXT ========================== diff --git a/nlp_resource_data/nltk/test/index.doctest b/nlp_resource_data/nltk/test/index.doctest index 31b46c7..7ce8167 100644 --- a/nlp_resource_data/nltk/test/index.doctest +++ b/nlp_resource_data/nltk/test/index.doctest @@ -1,4 +1,4 @@ -.. Copyright (C) 2001-2020 NLTK Project +.. Copyright (C) 2001-2019 NLTK Project .. For license information, see LICENSE.TXT .. _align howto: align.html diff --git a/nlp_resource_data/nltk/test/inference.doctest b/nlp_resource_data/nltk/test/inference.doctest index 5bf1501..c2a41a3 100644 --- a/nlp_resource_data/nltk/test/inference.doctest +++ b/nlp_resource_data/nltk/test/inference.doctest @@ -1,4 +1,4 @@ -.. Copyright (C) 2001-2020 NLTK Project +.. Copyright (C) 2001-2019 NLTK Project .. For license information, see LICENSE.TXT ==================================== diff --git a/nlp_resource_data/nltk/test/inference_fixt.py b/nlp_resource_data/nltk/test/inference_fixt.py index 5103cd9..3fe9d03 100644 --- a/nlp_resource_data/nltk/test/inference_fixt.py +++ b/nlp_resource_data/nltk/test/inference_fixt.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +from __future__ import absolute_import def setup_module(module): @@ -7,7 +8,7 @@ def setup_module(module): try: m = Mace() - m._find_binary("mace4") + m._find_binary('mace4') except LookupError: raise SkipTest( "Mace4/Prover9 is not available so inference.doctest was skipped" diff --git a/nlp_resource_data/nltk/test/internals.doctest b/nlp_resource_data/nltk/test/internals.doctest index f906203..74c2bd9 100644 --- a/nlp_resource_data/nltk/test/internals.doctest +++ b/nlp_resource_data/nltk/test/internals.doctest @@ -1,4 +1,4 @@ -.. Copyright (C) 2001-2020 NLTK Project +.. Copyright (C) 2001-2019 NLTK Project .. For license information, see LICENSE.TXT ========================================== diff --git a/nlp_resource_data/nltk/test/japanese.doctest b/nlp_resource_data/nltk/test/japanese.doctest index f82af81..181b080 100644 --- a/nlp_resource_data/nltk/test/japanese.doctest +++ b/nlp_resource_data/nltk/test/japanese.doctest @@ -1,4 +1,4 @@ -.. Copyright (C) 2001-2020 NLTK Project +.. Copyright (C) 2001-2019 NLTK Project .. For license information, see LICENSE.TXT ============================ diff --git a/nlp_resource_data/nltk/test/lm.doctest b/nlp_resource_data/nltk/test/lm.doctest index c2a97c8..f3bde33 100644 --- a/nlp_resource_data/nltk/test/lm.doctest +++ b/nlp_resource_data/nltk/test/lm.doctest @@ -1,4 +1,4 @@ -.. Copyright (C) 2001-2020 NLTK Project +.. Copyright (C) 2001-2019 NLTK Project .. For license information, see LICENSE.TXT .. -*- coding: utf-8 -*- diff --git a/nlp_resource_data/nltk/test/logic.doctest b/nlp_resource_data/nltk/test/logic.doctest index 45c6429..ab27009 100644 --- a/nlp_resource_data/nltk/test/logic.doctest +++ b/nlp_resource_data/nltk/test/logic.doctest @@ -1,4 +1,4 @@ -.. Copyright (C) 2001-2020 NLTK Project +.. Copyright (C) 2001-2019 NLTK Project .. For license information, see LICENSE.TXT ======================= diff --git a/nlp_resource_data/nltk/test/meteor.doctest b/nlp_resource_data/nltk/test/meteor.doctest deleted file mode 100644 index 7544d25..0000000 --- a/nlp_resource_data/nltk/test/meteor.doctest +++ /dev/null @@ -1,45 +0,0 @@ -.. Copyright (C) 2001-2020 NLTK Project -.. For license information, see LICENSE.TXT - -.. -*- coding: utf-8 -*- - -============= -METEOR tests -============= - -No Allignment test ------------------- - - >>> from nltk.translate import meteor - -If the candidate has no alignment to any of the references, the METEOR score is 0. - - >>> round(meteor( - ... ['The candidate has no alignment to any of the references'], - ... 'John loves Mary' - ... ),4) - 0.0 - -Tests based on wikipedia examples ---------------------------------- - -Testing on `wikipedia examples `_ - - >>> same_res = round(meteor( - ... ['The cat sat on the mat'], - ... 'The cat sat on the mat' - ... ),4) - >>> abs(same_res - 0.9977) < 1e-2 - True - - >>> meteor( - ... ['The cat sat on the mat'], - ... 'on the mat sat the cat' - ... ) - 0.5 - - >>> round(meteor( - ... ['The cat sat on the mat'], - ... 'The cat was sat on the mat' - ... ),4) - 0.9654 diff --git a/nlp_resource_data/nltk/test/metrics.doctest b/nlp_resource_data/nltk/test/metrics.doctest index 5ff9877..139a888 100644 --- a/nlp_resource_data/nltk/test/metrics.doctest +++ b/nlp_resource_data/nltk/test/metrics.doctest @@ -1,4 +1,4 @@ -.. Copyright (C) 2001-2020 NLTK Project +.. Copyright (C) 2001-2019 NLTK Project .. For license information, see LICENSE.TXT ======= @@ -8,6 +8,7 @@ Metrics The `nltk.metrics` package provides a variety of *evaluation measures* which can be used for a wide variety of NLP tasks. + >>> from __future__ import print_function >>> from nltk.metrics import * ------------------ @@ -51,14 +52,6 @@ String edit distance (Levenshtein): >>> edit_distance("rain", "shine") 3 - >>> edit_distance_align("shine", "shine") - [(0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (5, 5)] - >>> edit_distance_align("rain", "brainy") - [(0, 0), (1, 1), (1, 2), (2, 3), (3, 4), (4, 5), (4, 6)] - >>> edit_distance_align("", "brainy") - [(0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6)] - >>> edit_distance_align("", "") - [(0, 0)] Other distance measures: @@ -275,18 +268,3 @@ For trigrams, we have to provide more count information: True >>> tam.jaccard(n_w1_w2_w3, pair_counts, uni_counts2, N) > tam.jaccard(n_w1_w2_w3, pair_counts, uni_counts, N) True - - -For fourgrams, we have to provide more count information: - - >>> n_w1_w2_w3_w4 = 5 - >>> n_w1_w2, n_w1_w3, n_w2_w3 = 35, 60, 40 - >>> n_w1_w2_w3, n_w2_w3_w4 = 20, 10 - >>> pair_counts = (n_w1_w2, n_w1_w3, n_w2_w3) - >>> triplet_counts = (n_w1_w2_w3, n_w2_w3_w4) - >>> n_w1, n_w2, n_w3, n_w4 = 100, 200, 300, 400 - >>> uni_counts = (n_w1, n_w2, n_w3, n_w4) - >>> N = 14307668 - >>> qam = QuadgramAssocMeasures - >>> qam.raw_freq(n_w1_w2_w3_w4, pair_counts, triplet_counts, uni_counts, N) == 1. * n_w1_w2_w3_w4 / N - True diff --git a/nlp_resource_data/nltk/test/misc.doctest b/nlp_resource_data/nltk/test/misc.doctest index d72e0b3..71343b3 100644 --- a/nlp_resource_data/nltk/test/misc.doctest +++ b/nlp_resource_data/nltk/test/misc.doctest @@ -1,4 +1,4 @@ -.. Copyright (C) 2001-2020 NLTK Project +.. Copyright (C) 2001-2019 NLTK Project .. For license information, see LICENSE.TXT -------------------------------------------------------------------------------- diff --git a/nlp_resource_data/nltk/test/nonmonotonic.doctest b/nlp_resource_data/nltk/test/nonmonotonic.doctest index ea17c60..be761b3 100644 --- a/nlp_resource_data/nltk/test/nonmonotonic.doctest +++ b/nlp_resource_data/nltk/test/nonmonotonic.doctest @@ -1,4 +1,4 @@ -.. Copyright (C) 2001-2020 NLTK Project +.. Copyright (C) 2001-2019 NLTK Project .. For license information, see LICENSE.TXT ====================== diff --git a/nlp_resource_data/nltk/test/nonmonotonic_fixt.py b/nlp_resource_data/nltk/test/nonmonotonic_fixt.py index e6bdffa..0c38381 100644 --- a/nlp_resource_data/nltk/test/nonmonotonic_fixt.py +++ b/nlp_resource_data/nltk/test/nonmonotonic_fixt.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +from __future__ import absolute_import def setup_module(module): @@ -7,7 +8,7 @@ def setup_module(module): try: m = Mace() - m._find_binary("mace4") + m._find_binary('mace4') except LookupError: raise SkipTest( "Mace4/Prover9 is not available so nonmonotonic.doctest was skipped" diff --git a/nlp_resource_data/nltk/test/parse.doctest b/nlp_resource_data/nltk/test/parse.doctest index c84b469..b7c0ee1 100644 --- a/nlp_resource_data/nltk/test/parse.doctest +++ b/nlp_resource_data/nltk/test/parse.doctest @@ -1,4 +1,4 @@ -.. Copyright (C) 2001-2020 NLTK Project +.. Copyright (C) 2001-2019 NLTK Project .. For license information, see LICENSE.TXT ========= diff --git a/nlp_resource_data/nltk/test/portuguese_en.doctest b/nlp_resource_data/nltk/test/portuguese_en.doctest index 84cee4a..87051c9 100644 --- a/nlp_resource_data/nltk/test/portuguese_en.doctest +++ b/nlp_resource_data/nltk/test/portuguese_en.doctest @@ -1,4 +1,4 @@ -.. Copyright (C) 2001-2020 NLTK Project +.. Copyright (C) 2001-2019 NLTK Project .. For license information, see LICENSE.TXT ================================== @@ -140,8 +140,8 @@ We just saw a ``for`` loop above. Another useful control structure is a >>> [w for w in psent1 if w.endswith('a')] ['da', 'gl\xf3ria', 'era', 'a', 'coisa', 'humana', 'a', 'sua', 'genu\xedna'] >>> [w for w in ptext4 if len(w) > 15] - ['norte-irlandeses', 'pan-nacionalismo', 'predominatemente', 'primeiro-ministro', - 'primeiro-ministro', 'irlandesa-americana', 'responsabilidades', 'significativamente'] + [u'norte-irlandeses', u'pan-nacionalismo', u'predominatemente', u'primeiro-ministro', + u'primeiro-ministro', u'irlandesa-americana', u'responsabilidades', u'significativamente'] We can examine the relative frequency of words in a text, using ``FreqDist``: @@ -151,19 +151,19 @@ We can examine the relative frequency of words in a text, using ``FreqDist``: >>> fd1['olhos'] 137 >>> fd1.max() - ',' + u',' >>> fd1.samples()[:100] - [',', '.', 'a', 'que', 'de', 'e', '-', 'o', ';', 'me', 'um', 'n\xe3o', - '\x97', 'se', 'do', 'da', 'uma', 'com', 'os', '\xe9', 'era', 'as', 'eu', - 'lhe', 'ao', 'em', 'para', 'mas', '...', '!', '\xe0', 'na', 'mais', '?', - 'no', 'como', 'por', 'N\xe3o', 'dos', 'o', 'ele', ':', 'Virg\xedlia', - 'me', 'disse', 'minha', 'das', 'O', '/', 'A', 'CAP\xcdTULO', 'muito', - 'depois', 'coisa', 'foi', 'sem', 'olhos', 'ela', 'nos', 'tinha', 'nem', - 'E', 'outro', 'vida', 'nada', 'tempo', 'menos', 'outra', 'casa', 'homem', - 'porque', 'quando', 'mim', 'mesmo', 'ser', 'pouco', 'estava', 'dia', - 't\xe3o', 'tudo', 'Mas', 'at\xe9', 'D', 'ainda', 's\xf3', 'alguma', - 'la', 'vez', 'anos', 'h\xe1', 'Era', 'pai', 'esse', 'lo', 'dizer', 'assim', - 'ent\xe3o', 'dizia', 'aos', 'Borba'] + [u',', u'.', u'a', u'que', u'de', u'e', u'-', u'o', u';', u'me', u'um', u'n\xe3o', + u'\x97', u'se', u'do', u'da', u'uma', u'com', u'os', u'\xe9', u'era', u'as', u'eu', + u'lhe', u'ao', u'em', u'para', u'mas', u'...', u'!', u'\xe0', u'na', u'mais', u'?', + u'no', u'como', u'por', u'N\xe3o', u'dos', u'ou', u'ele', u':', u'Virg\xedlia', + u'meu', u'disse', u'minha', u'das', u'O', u'/', u'A', u'CAP\xcdTULO', u'muito', + u'depois', u'coisa', u'foi', u'sem', u'olhos', u'ela', u'nos', u'tinha', u'nem', + u'E', u'outro', u'vida', u'nada', u'tempo', u'menos', u'outra', u'casa', u'homem', + u'porque', u'quando', u'mim', u'mesmo', u'ser', u'pouco', u'estava', u'dia', + u't\xe3o', u'tudo', u'Mas', u'at\xe9', u'D', u'ainda', u's\xf3', u'alguma', + u'la', u'vez', u'anos', u'h\xe1', u'Era', u'pai', u'esse', u'lo', u'dizer', u'assim', + u'ent\xe3o', u'dizia', u'aos', u'Borba'] --------------- Reading Corpora @@ -244,7 +244,7 @@ We can access this corpus as a sequence of words or tagged words as follows: [['Jersei', 'atinge', 'm\xe9dia', 'de', 'Cr$', '1,4', 'milh\xe3o', 'em', 'a', 'venda', 'de', 'a', 'Pinhal', 'em', 'S\xe3o', 'Paulo'], ['Programe', 'sua', 'viagem', 'a', 'a', 'Exposi\xe7\xe3o', 'Nacional', - 'do', 'Zeb', ',', 'que', 'come\xe7a', 'dia', '25'], ...] + 'do', 'Zebu', ',', 'que', 'come\xe7a', 'dia', '25'], ...] >>> nltk.corpus.mac_morpho.tagged_words() [('Jersei', 'N'), ('atinge', 'V'), ('m\xe9dia', 'N'), ...] @@ -258,7 +258,7 @@ We can also access it in sentence chunks. ('Paulo', 'NPROP')], [('Programe', 'V'), ('sua', 'PROADJ'), ('viagem', 'N'), ('a', 'PREP|+'), ('a', 'ART'), ('Exposi\xe7\xe3o', 'NPROP'), ('Nacional', 'NPROP'), - ('do', 'NPROP'), ('Zeb', 'NPROP'), (',', ','), ('que', 'PRO-KS-REL'), + ('do', 'NPROP'), ('Zebu', 'NPROP'), (',', ','), ('que', 'PRO-KS-REL'), ('come\xe7a', 'V'), ('dia', 'N'), ('25', 'N|AP')], ...] This data can be used to train taggers (examples below for the Floresta treebank). diff --git a/nlp_resource_data/nltk/test/portuguese_en_fixt.py b/nlp_resource_data/nltk/test/portuguese_en_fixt.py index f417bc6..afbd59e 100644 --- a/nlp_resource_data/nltk/test/portuguese_en_fixt.py +++ b/nlp_resource_data/nltk/test/portuguese_en_fixt.py @@ -1,4 +1,7 @@ # -*- coding: utf-8 -*- +from __future__ import absolute_import +from nltk.compat import PY3 + from nltk.corpus import teardown_module @@ -8,3 +11,8 @@ def setup_module(module): raise SkipTest( "portuguese_en.doctest imports nltk.examples.pt which doesn't exist!" ) + + if not PY3: + raise SkipTest( + "portuguese_en.doctest was skipped because non-ascii doctests are not supported under Python 2.x" + ) diff --git a/nlp_resource_data/nltk/test/probability.doctest b/nlp_resource_data/nltk/test/probability.doctest index ea36fe3..3cb582e 100644 --- a/nlp_resource_data/nltk/test/probability.doctest +++ b/nlp_resource_data/nltk/test/probability.doctest @@ -1,4 +1,4 @@ -.. Copyright (C) 2001-2020 NLTK Project +.. Copyright (C) 2001-2019 NLTK Project .. For license information, see LICENSE.TXT =========== diff --git a/nlp_resource_data/nltk/test/probability_fixt.py b/nlp_resource_data/nltk/test/probability_fixt.py index fc786c9..680dab6 100644 --- a/nlp_resource_data/nltk/test/probability_fixt.py +++ b/nlp_resource_data/nltk/test/probability_fixt.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +from __future__ import absolute_import # probability.doctest uses HMM which requires numpy; diff --git a/nlp_resource_data/nltk/test/propbank.doctest b/nlp_resource_data/nltk/test/propbank.doctest index d3e8a68..9bec607 100644 --- a/nlp_resource_data/nltk/test/propbank.doctest +++ b/nlp_resource_data/nltk/test/propbank.doctest @@ -1,4 +1,4 @@ -.. Copyright (C) 2001-2020 NLTK Project +.. Copyright (C) 2001-2019 NLTK Project .. For license information, see LICENSE.TXT ======== diff --git a/nlp_resource_data/nltk/test/relextract.doctest b/nlp_resource_data/nltk/test/relextract.doctest index 6df3c1c..085fa90 100644 --- a/nlp_resource_data/nltk/test/relextract.doctest +++ b/nlp_resource_data/nltk/test/relextract.doctest @@ -1,4 +1,4 @@ -.. Copyright (C) 2001-2020 NLTK Project +.. Copyright (C) 2001-2019 NLTK Project .. For license information, see LICENSE.TXT ====================== @@ -65,24 +65,24 @@ this case, the strings are also POS tagged. >>> from nltk.corpus import conll2002 >>> for doc in conll2002.chunked_sents('ned.train')[27]: ... print(doc) - ('Het', 'Art') + (u'Het', u'Art') (ORG Hof/N van/Prep Cassatie/N) - ('verbrak', 'V') - ('het', 'Art') - ('arrest', 'N') - ('zodat', 'Conj') - ('het', 'Pron') - ('moest', 'V') - ('worden', 'V') - ('overgedaan', 'V') - ('door', 'Prep') - ('het', 'Art') - ('hof', 'N') - ('van', 'Prep') - ('beroep', 'N') - ('van', 'Prep') + (u'verbrak', u'V') + (u'het', u'Art') + (u'arrest', u'N') + (u'zodat', u'Conj') + (u'het', u'Pron') + (u'moest', u'V') + (u'worden', u'V') + (u'overgedaan', u'V') + (u'door', u'Prep') + (u'het', u'Art') + (u'hof', u'N') + (u'van', u'Prep') + (u'beroep', u'N') + (u'van', u'Prep') (LOC Antwerpen/N) - ('.', 'Punc') + (u'.', u'Punc') Relation Extraction ~~~~~~~~~~~~~~~~~~~ @@ -234,16 +234,16 @@ presented as something that looks more like a clause in a logical language. ... for rel in relextract.extract_rels('ORG', 'LOC', doc, corpus='conll2002', pattern = DE)] >>> for r in rels[:10]: ... print(relextract.clause(r, relsym='DE')) # doctest: +NORMALIZE_WHITESPACE - DE('tribunal_supremo', 'victoria') - DE('museo_de_arte', 'alcorc\xf3n') - DE('museo_de_bellas_artes', 'a_coru\xf1a') - DE('siria', 'l\xedbano') - DE('uni\xf3n_europea', 'pek\xedn') - DE('ej\xe9rcito', 'rogberi') - DE('juzgado_de_instrucci\xf3n_n\xfamero_1', 'san_sebasti\xe1n') - DE('psoe', 'villanueva_de_la_serena') - DE('ej\xe9rcito', 'l\xedbano') - DE('juzgado_de_lo_penal_n\xfamero_2', 'ceuta') + DE(u'tribunal_supremo', u'victoria') + DE(u'museo_de_arte', u'alcorc\xf3n') + DE(u'museo_de_bellas_artes', u'a_coru\xf1a') + DE(u'siria', u'l\xedbano') + DE(u'uni\xf3n_europea', u'pek\xedn') + DE(u'ej\xe9rcito', u'rogberi') + DE(u'juzgado_de_instrucci\xf3n_n\xfamero_1', u'san_sebasti\xe1n') + DE(u'psoe', u'villanueva_de_la_serena') + DE(u'ej\xe9rcito', u'l\xedbano') + DE(u'juzgado_de_lo_penal_n\xfamero_2', u'ceuta') >>> vnv = """ ... ( ... is/V| @@ -258,6 +258,6 @@ presented as something that looks more like a clause in a logical language. >>> for doc in conll2002.chunked_sents('ned.train'): ... for r in relextract.extract_rels('PER', 'ORG', doc, corpus='conll2002', pattern=VAN): ... print(relextract.clause(r, relsym="VAN")) - VAN("cornet_d'elzius", 'buitenlandse_handel') - VAN('johan_rottiers', 'kardinaal_van_roey_instituut') - VAN('annie_lennox', 'eurythmics') + VAN(u"cornet_d'elzius", u'buitenlandse_handel') + VAN(u'johan_rottiers', u'kardinaal_van_roey_instituut') + VAN(u'annie_lennox', u'eurythmics') diff --git a/nlp_resource_data/nltk/test/resolution.doctest b/nlp_resource_data/nltk/test/resolution.doctest index fc31db4..318efcd 100644 --- a/nlp_resource_data/nltk/test/resolution.doctest +++ b/nlp_resource_data/nltk/test/resolution.doctest @@ -1,4 +1,4 @@ -.. Copyright (C) 2001-2020 NLTK Project +.. Copyright (C) 2001-2019 NLTK Project .. For license information, see LICENSE.TXT ========================= diff --git a/nlp_resource_data/nltk/test/runtests.py b/nlp_resource_data/nltk/test/runtests.py index 9dc06ec..8f40cc6 100644 --- a/nlp_resource_data/nltk/test/runtests.py +++ b/nlp_resource_data/nltk/test/runtests.py @@ -1,5 +1,6 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- +from __future__ import absolute_import, print_function import sys import os import nose @@ -7,13 +8,15 @@ from nose.plugins.manager import PluginManager from nose.plugins.doctests import Doctest from nose.plugins import builtin -NLTK_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")) +NLTK_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')) sys.path.insert(0, NLTK_ROOT) -NLTK_TEST_DIR = os.path.join(NLTK_ROOT, "nltk") +NLTK_TEST_DIR = os.path.join(NLTK_ROOT, 'nltk') -if __name__ == "__main__": +if __name__ == '__main__': # there shouldn't be import from NLTK for coverage to work properly + from doctest_nose_plugin import DoctestFix + try: # Import RedNose plugin for colored test output from rednose import RedNose @@ -30,7 +33,9 @@ if __name__ == "__main__": def loadPlugins(self): for plug in builtin.plugins: - self.addPlugin(plug()) + if plug != Doctest: + self.addPlugin(plug()) + self.addPlugin(DoctestFix()) if rednose_available: self.addPlugin(RedNose()) @@ -50,25 +55,26 @@ if __name__ == "__main__": if not args: args = [NLTK_TEST_DIR] - if all(arg.startswith("-") for arg in args): + if all(arg.startswith('-') for arg in args): # only extra options were passed args += [NLTK_TEST_DIR] # Activate RedNose and hide skipped test messages from output if rednose_available: - args += ["--rednose", "--hide-skips"] + args += ['--rednose', '--hide-skips'] arguments = [ - "--exclude=", # why is this needed? + '--exclude=', # why is this needed? # '--with-xunit', # '--xunit-file=$WORKSPACE/nosetests.xml', # '--nocapture', - "--with-doctest", + '--with-doctest', # '--doctest-tests', # '--debug=nose,nose.importer,nose.inspector,nose.plugins,nose.result,nose.selector', - "--doctest-extension=.doctest", - "--doctest-fixtures=_fixt", - "--doctest-options=+ELLIPSIS,+NORMALIZE_WHITESPACE,+IGNORE_EXCEPTION_DETAIL", + '--doctest-extension=.doctest', + '--doctest-fixtures=_fixt', + '--doctest-options=+ELLIPSIS,+NORMALIZE_WHITESPACE,+IGNORE_EXCEPTION_DETAIL,+ALLOW_UNICODE,' + 'doctestencoding=utf-8', # '--verbosity=3', ] + args diff --git a/nlp_resource_data/nltk/test/segmentation_fixt.py b/nlp_resource_data/nltk/test/segmentation_fixt.py index 82918ba..bb8a7cf 100644 --- a/nlp_resource_data/nltk/test/segmentation_fixt.py +++ b/nlp_resource_data/nltk/test/segmentation_fixt.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +from __future__ import absolute_import # skip segmentation.doctest if numpy is not available diff --git a/nlp_resource_data/nltk/test/semantics.doctest b/nlp_resource_data/nltk/test/semantics.doctest index 32c0f84..f1a1f3c 100644 --- a/nlp_resource_data/nltk/test/semantics.doctest +++ b/nlp_resource_data/nltk/test/semantics.doctest @@ -1,4 +1,4 @@ -.. Copyright (C) 2001-2020 NLTK Project +.. Copyright (C) 2001-2019 NLTK Project .. For license information, see LICENSE.TXT ========= diff --git a/nlp_resource_data/nltk/test/semantics_fixt.py b/nlp_resource_data/nltk/test/semantics_fixt.py index 8d67144..135180d 100644 --- a/nlp_resource_data/nltk/test/semantics_fixt.py +++ b/nlp_resource_data/nltk/test/semantics_fixt.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +from __future__ import absolute_import # reset the variables counter before running tests def setup_module(module): diff --git a/nlp_resource_data/nltk/test/sentiment.doctest b/nlp_resource_data/nltk/test/sentiment.doctest index 36e5b20..359e165 100644 --- a/nlp_resource_data/nltk/test/sentiment.doctest +++ b/nlp_resource_data/nltk/test/sentiment.doctest @@ -1,4 +1,4 @@ -.. Copyright (C) 2001-2020 NLTK Project +.. Copyright (C) 2001-2019 NLTK Project .. For license information, see LICENSE.TXT =================== diff --git a/nlp_resource_data/nltk/test/sentiwordnet.doctest b/nlp_resource_data/nltk/test/sentiwordnet.doctest index 70f25ee..46126bb 100644 --- a/nlp_resource_data/nltk/test/sentiwordnet.doctest +++ b/nlp_resource_data/nltk/test/sentiwordnet.doctest @@ -1,4 +1,4 @@ -.. Copyright (C) 2001-2020 NLTK Project +.. Copyright (C) 2001-2019 NLTK Project .. For license information, see LICENSE.TXT ====================== diff --git a/nlp_resource_data/nltk/test/simple.doctest b/nlp_resource_data/nltk/test/simple.doctest index 48fdcd3..5636163 100644 --- a/nlp_resource_data/nltk/test/simple.doctest +++ b/nlp_resource_data/nltk/test/simple.doctest @@ -1,4 +1,4 @@ -.. Copyright (C) 2001-2020 NLTK Project +.. Copyright (C) 2001-2019 NLTK Project .. For license information, see LICENSE.TXT ================= @@ -8,7 +8,8 @@ EasyInstall Tests This file contains some simple tests that will be run by EasyInstall in order to test the installation when NLTK-Data is absent. - + >>> from __future__ import print_function + ------------ Tokenization ------------ diff --git a/nlp_resource_data/nltk/test/stem.doctest b/nlp_resource_data/nltk/test/stem.doctest index b80104d..2cf9857 100644 --- a/nlp_resource_data/nltk/test/stem.doctest +++ b/nlp_resource_data/nltk/test/stem.doctest @@ -1,4 +1,4 @@ -.. Copyright (C) 2001-2020 NLTK Project +.. Copyright (C) 2001-2019 NLTK Project .. For license information, see LICENSE.TXT ========== @@ -11,6 +11,7 @@ Overview Stemmers remove morphological affixes from words, leaving only the word stem. + >>> from __future__ import print_function >>> from nltk.stem import * Unit tests for the Porter stemmer diff --git a/nlp_resource_data/nltk/test/tag.doctest b/nlp_resource_data/nltk/test/tag.doctest index 7103b41..2248cba 100644 --- a/nlp_resource_data/nltk/test/tag.doctest +++ b/nlp_resource_data/nltk/test/tag.doctest @@ -1,4 +1,4 @@ -.. Copyright (C) 2001-2020 NLTK Project +.. Copyright (C) 2001-2019 NLTK Project .. For license information, see LICENSE.TXT Regression Tests diff --git a/nlp_resource_data/nltk/test/tokenize.doctest b/nlp_resource_data/nltk/test/tokenize.doctest index a3a7dfa..f99e22a 100644 --- a/nlp_resource_data/nltk/test/tokenize.doctest +++ b/nlp_resource_data/nltk/test/tokenize.doctest @@ -1,6 +1,7 @@ -.. Copyright (C) 2001-2020 NLTK Project +.. Copyright (C) 2001-2019 NLTK Project .. For license information, see LICENSE.TXT + >>> from __future__ import print_function >>> from nltk.tokenize import * Regression Tests: Treebank Tokenizer @@ -42,12 +43,12 @@ Some test strings. Testing improvement made to the TreebankWordTokenizer - >>> sx1 = '\xabNow that I can do.\xbb' - >>> expected = ['\xab', 'Now', 'that', 'I', 'can', 'do', '.', '\xbb'] + >>> sx1 = u'\xabNow that I can do.\xbb' + >>> expected = [u'\xab', u'Now', u'that', u'I', u'can', u'do', u'.', u'\xbb'] >>> word_tokenize(sx1) == expected True - >>> sx2 = 'The unicode 201C and 201D \u201cLEFT(RIGHT) DOUBLE QUOTATION MARK\u201d is also OPEN_PUNCT and CLOSE_PUNCT.' - >>> expected = ['The', 'unicode', '201C', 'and', '201D', '\u201c', 'LEFT', '(', 'RIGHT', ')', 'DOUBLE', 'QUOTATION', 'MARK', '\u201d', 'is', 'also', 'OPEN_PUNCT', 'and', 'CLOSE_PUNCT', '.'] + >>> sx2 = u'The unicode 201C and 201D \u201cLEFT(RIGHT) DOUBLE QUOTATION MARK\u201d is also OPEN_PUNCT and CLOSE_PUNCT.' + >>> expected = [u'The', u'unicode', u'201C', u'and', u'201D', u'\u201c', u'LEFT', u'(', u'RIGHT', u')', u'DOUBLE', u'QUOTATION', u'MARK', u'\u201d', u'is', u'also', u'OPEN_PUNCT', u'and', u'CLOSE_PUNCT', u'.'] >>> word_tokenize(sx2) == expected True @@ -175,7 +176,7 @@ It should not hang on long sequences of the same punctuation character. >>> tknzr = TweetTokenizer() >>> s10 = "Photo: Aujourd'hui sur http://t.co/0gebOFDUzn Projet... http://t.co/bKfIUbydz2.............................. http://fb.me/3b6uXpz0L" >>> tknzr.tokenize(s10) - ['Photo', ':', "Aujourd'hui", 'sur', 'http://t.co/0gebOFDUzn', 'Projet', '...', 'http://t.co/bKfIUbydz2', '...', 'http://fb.me/3b6uXpz0L'] + [u'Photo', u':', u"Aujourd'hui", u'sur', u'http://t.co/0gebOFDUzn', u'Projet', u'...', u'http://t.co/bKfIUbydz2', u'...', u'http://fb.me/3b6uXpz0L'] Regression Tests: PunktSentenceTokenizer diff --git a/nlp_resource_data/nltk/test/toolbox.doctest b/nlp_resource_data/nltk/test/toolbox.doctest index 1e430ad..1abf684 100644 --- a/nlp_resource_data/nltk/test/toolbox.doctest +++ b/nlp_resource_data/nltk/test/toolbox.doctest @@ -1,4 +1,4 @@ -.. Copyright (C) 2001-2020 NLTK Project +.. Copyright (C) 2001-2019 NLTK Project .. For license information, see LICENSE.TXT =============================== diff --git a/nlp_resource_data/nltk/test/translate.doctest b/nlp_resource_data/nltk/test/translate.doctest index 87966fb..6a1bb70 100644 --- a/nlp_resource_data/nltk/test/translate.doctest +++ b/nlp_resource_data/nltk/test/translate.doctest @@ -1,4 +1,4 @@ -.. Copyright (C) 2001-2020 NLTK Project +.. Copyright (C) 2001-2019 NLTK Project .. For license information, see LICENSE.TXT .. -*- coding: utf-8 -*- diff --git a/nlp_resource_data/nltk/test/translate_fixt.py b/nlp_resource_data/nltk/test/translate_fixt.py index 17b011b..ce0cd83 100644 --- a/nlp_resource_data/nltk/test/translate_fixt.py +++ b/nlp_resource_data/nltk/test/translate_fixt.py @@ -1,3 +1,4 @@ # -*- coding: utf-8 -*- +from __future__ import absolute_import from nltk.corpus import teardown_module diff --git a/nlp_resource_data/nltk/test/tree.doctest b/nlp_resource_data/nltk/test/tree.doctest index 9389417..a4b93ed 100644 --- a/nlp_resource_data/nltk/test/tree.doctest +++ b/nlp_resource_data/nltk/test/tree.doctest @@ -1,4 +1,4 @@ -.. Copyright (C) 2001-2020 NLTK Project +.. Copyright (C) 2001-2019 NLTK Project .. For license information, see LICENSE.TXT =============================== diff --git a/nlp_resource_data/nltk/test/treeprettyprinter.doctest b/nlp_resource_data/nltk/test/treeprettyprinter.doctest index 3c129c7..8302c2c 100644 --- a/nlp_resource_data/nltk/test/treeprettyprinter.doctest +++ b/nlp_resource_data/nltk/test/treeprettyprinter.doctest @@ -1,4 +1,4 @@ -.. Copyright (C) 2001-2020 NLTK Project +.. Copyright (C) 2001-2019 NLTK Project .. For license information, see LICENSE.TXT ======================================================== diff --git a/nlp_resource_data/nltk/test/treetransforms.doctest b/nlp_resource_data/nltk/test/treetransforms.doctest index 973c27d..e44e504 100644 --- a/nlp_resource_data/nltk/test/treetransforms.doctest +++ b/nlp_resource_data/nltk/test/treetransforms.doctest @@ -1,4 +1,4 @@ -.. Copyright (C) 2001-2020 NLTK Project +.. Copyright (C) 2001-2019 NLTK Project .. For license information, see LICENSE.TXT ------------------------------------------- diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/__init__.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/__init__.cpython-37.pyc index 76578bee449f75a27c8c0f9b8ab566964e4d98f6..2b18bf336de60a3090ee207202ed0a98fecfd80d 100644 GIT binary patch delta 29 jcmZ3&ID?VfiI20v delta 41 vcmbQixP+10iI2>I$~ diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_2x_compat.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_2x_compat.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e912498e1efaf86419a15ca551f549270a21441d GIT binary patch literal 2194 zcmb7FOK%)S5bmCt-JN~dm|%zl2!M6rlkK)M@jRC9 zp5R#POPmV_#N!tv;*>eCb)1?o;jhDxuMI6cT+F)@b%J0)(o3=uaUMgKlHP9 z*aqJet%oe^++e~N?T1XXrFXY`;D$ZybVL_B-Pz89a5q@Lx3|{$EKf{eq&7O(DpZiC zW@j)ivPoH}_k<}_l0CaIEVUY@#W+rfX>xm5M|OU2VW<)P z2a|Zb6K_jBr0mvtM99Wc$DR!)_p6?krkq6e5eqP_n^+j{IUZMCP-9}}4|%kPSHF)% zvJgoK9qCevge$yl?|=g_4bcQD8lok9e4C<;M0KjulpHF@riyc27s(_=UJ?{Hp8i~v zWPP*9o<6DEI2R=ax~`u7l8=+gaeXM=^oOm_8z83a5i@+s?m7ot@W&2dNtJO%x)13k zI`Lw#@#U^OUj(oY;==&ugLMT`SmT4i;Bw!sy0J#;&9$$~IITP-C#rG{-~$+GnQqz4 z*9-Mk)wXpewbNiV0f+Iu_x>f!!IAb6)c!{(0c_7hiESKc6dOcxA_BDtwaV2-sU=W- zu3p9GMZbOP36$Q1aPD^SpKe2W)NfUO6ybI?Tdk z1(`nC#+G)_(f|wK;~MWV|9%-X07N(zjqfl4KL?OLWjEO^Hg%5o_uZ*`w;}kBqi>n! z0}iewTyR^&8Gnl5Ae!LsS*b%^P+Q#hrXK!mLH3iCdHB-@_h_cs#>}*5?>D9mXtp8y z)yf*s?11~ta-3`)IQvbU);ORu(RUtm*^Z{kZAP%W~hXmk5s?qkhSHAl5_p|M_KJJCB zSuM0W15wpIG2yFT-C{T6L`z+@H}6m6HKhtwwevE=)HWa&uh=$3HEU5<$h^ps+zK08 zX%U-#%OX2m9>pfPC+h(^_de{6KH2>Ei0js4I3l;gj!t$X^NmdJ$*@)A5_A#1cDxxC zD%r+S>5ENs#x_lvBOh@_`LOl~a_=HYeHIIbChxgDzTz%Ai+s^p^04A-SGVJ=HuhFe zVzr(KFEcsnd+K!H6yeIOJ9Voxr9#&# zDwRTMS?JSsKp}-~TONZcS+_2QV{aZsb+%VP=#$%5V+=ogQCu{nn|@KRrY#W&i*H literal 0 HcmV?d00001 diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_aline.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_aline.cpython-37.pyc index ece381ae7a8a47ecb705cd2b33386990b8e2413f..8b09c9585d11a216f9e8485d22465a109c3b0794 100644 GIT binary patch delta 593 zcmYjNJxc>Y5Z#aaO1#A2X9O)oP)-pQtVARxg`i@gR@WSdyGrCP!M$57Ewr-=SNkKx zCXIz)ogZ*NC6zOGL9(!K-@ci7GrJ$|uUl`|YE=SD+k;@k9@LwyBe)6T6F~)wXv};@ zk@JZA98M!LV$(M%IU~Xp)*BI)7-VkA}V_WH0R|5JfGg0{!)*6BHgyN+6+_t(>>yWjoEFa5O9%a>cVPemJuX z+PJf)6SbEqeop7UzB#4W_?VnNJ-`9$uvJW-)6^xv0_c$ecL>R(+Ai|XN+PLSQDcC7-t>??-x7R1SFalne4e3foyDn2U9F?hS%I+Vq z!(J&rSSTR4ETf?+7tB}Vxo)7208MZ_4Zuf{BLK&~Iw3`tq}{=XX9*c4tr%1^XH;PS zv`7NuT48|;dbWJtoab>V1LNQ$59EsV_jtWB2esjGt=S1>?b^RtGMZlySD!{UdM8-+O z4_c9IXxnyw-Sm6&cx&PJ>VK2sAUN>F_=9b3Fnd+kTuUuM#k13>m0)AfiGrkE4P~NQ m7qOnsv(G@-^<$|mf9BC4dMLZH5q3lx$W8oZI>q2NbNCNEyKKh* diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_brill.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_brill.cpython-37.pyc index 5a5d98c9df6240a17686670a1161a94745c9f90c..4770fe9b1e06ef400442d3b6974acfba93904394 100644 GIT binary patch delta 141 zcmZ3=HIs|miIC>7Guf+l8sCrDp4G%6$PouC8;SZ8HxmfihlX) zXXNLm>SrZZX5<$n7G>+}O@7GqQGg#P2UY+fgeE^?PSO$v3f|%<&C4t)Ni8n9#ZsJ| iSx_W`suE;qkV}LV)kInXLeY`$%ShOoC)J zWc15x*6@rsA`=XrfD|(L3^Gu#73_;FlwXT(wt-7|TA8WPqurk)g zw%b;1wW~@}n=OvQeI#=enSzW-$hc&Ze}+s*0X)?0_G_5Dqzm$#T!O1cmv+7`%aYkpX@0pyhAjPi+Yp zR;)Ym;MJ3axrK68ok;>s5#5=>7^Ur_opDiXil4={*cjJ^RR~95RQA0<| zm-6z1%$u<-2<*XV;+E(>p9RO~1#=b|u!t=)juhdyd*d$z2fx|<8_#Fx=M+3wrg#qj z4^d6|CCnrC3ranQNB+PCqq?&R*D;;U6C>oN>)Xm`KN)!uIbq!dkF7W_ksi^df|-I2(6Bz+Kq6^oBYI z?+(I4KOsX-z1DLXqE>WLm-K8j)!b(`qDGhD5Pf?K?`3L5{^`v3;N1G|<+A&@S{8P- z>BdHR$yP(9k7xpp1Tm`T$hRfmtdcq3p_*>8rH`GK`D_lIs8xFcZ()>8LJzkKtomd# zf!*Z-GRo$UoBq#NIO|`bYD=ve)!Hi<$gCg(9q+F| zJa3Ce<+)qP^QM#In&ibir{S93=tXp|sjoGKJCTv^5NZxh|71jl#xZ8`LOk!!k|>L5 kd?;%d&`6yCD5eD03Y=v!{g+=C-<{@kj{rBM7PDpk6M-H#!TB5ZkvN11HI~YO3#uxD+M5-rp$A{At#^|+apJJ+NNp6v zfnNCo4ynY2U-Fd`p8zLj)=uk639Pi^nRzqgnemL@?d??woM#t3|9hE`pD1jO14P3AY3pTVG7C#OWDnmwtgCaoaRCyCzib%PEUafWn1Z%S4A6?avM&tGw$+twRwBang>Tsx7R%H6_mXmYI~qBD8UGn??uvdn=5nyt0I1` z@pD905Dwsia|VKQL5HCrVvDuFp_Pn8s{pzh;_*;}0ZWSDxdwv~Q-W(;-N$t%t$3!f zaCYa~hk>vMk+=*mBC-5jc;D7OWFlKS((f%ge#%&tc9oXLsPiQ<4oP841^^Q&YO&t7-lI4VrU!JGvXHWbR4MajfT zU^kq_EKD@#2ZP~Zo=sReVoz`Pu@6E*#&iuUo^PZHo3_~+vPKCuNAD*cmkS$N*=48q$a0s>yl2XZm>dfK zR%A)2I#4)_o5W1oc4}d7Qj4>MrB)O(F5#C)WCaQKK|Vl&Kg6~nYslA;;HSt)1Gu!V z>kVS;-sLOMD6}G3+caIi{BOH*cehig4}FCygYzW;H-q70D_;RSLZ z$z3vU{ICdxb=uT^p*8KYXX6N9y7CbqULEW2vu@7aAoyRgIBWn1*rew!tA Ogx^$Q$KVyTZvF-5Zh>3? diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_chunk.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_chunk.cpython-37.pyc index c6d9f897a8c24fc1a038880d73123a083d0d951e..b9ab9115f3872b8f270bf60a361bd20c67b50dfb 100644 GIT binary patch delta 533 zcmYjNJ8u**5cX>yx!l4DkP=>Hh&B=lRFP1GfJi}5oFbrdv)Y_}xjnqMt65u0(xeE` zM_cd@kSJ09L%Q^lyOjA0G+^(gh@~0NH}lOm_Iw|l50ZCD(nIc;=rsFvnmjyy<$yo} zD^y~ROTr2Em?+AryQyMjzypllBf&)Q4T(U~X?Q|;B*ic$%|@_yBh}jcqT>#zq^E zSq8oW)HxozrD`o3&Dl)pufV$G7nj)1-A29EEq(tk@I!38v{hEZDEZoJHGVw<9Fso% E4_7aH5C8xG delta 432 zcmYjN%SyvQ6rESv)EcPZ!n$x}tr2W1C~ibhK|!Pzbd^ns&eXKcqueBmcF~QGwGzZ{ za4Gr?uJi};2mXS%a3<}d1NYv!=bn4cFrTISl5uDlb4Z?*t5(~4GRiv}vf+qh7g?$j z7kk7a7@Z(aIlDug3Dl#v)M6Z8AXEEM+i#ESWHDut052uYrb#T;i(v@)+}5f^t_SkjAjB(zC8_*-UUvdd z)O+^O2?jRw>$T)L*OB^vxsUO(WMs6NR8|<3DK8@=GU+($P0i9{zZ*p&j8f*iQ6JRQ i(k~@&WQQVS%_u2Kjjx_0v?=;-8qNvW-19-Md1ednHcUtWzb!#c1mUBWL?`|PA|Es**CYBdG|w5NDE zJ4Yl(1tnd%M;X-fsR^AQX#&6JWaQ7ze|6SRFkkuO&-yceEDYm?i2zr#^XU-7kRea~ z$Vc8GPwOtVHPAo@15D&PSTOw{*#;D#h`R|TDC2Iy%&}CPO}0q8Sm>MuelH3gcHv$7 zJbbw)F_{xT%e|d_ZhqW6>~RfMt-xtAFYgmMvE@5s0ZZRU^(f%=h(`OK9Q3HwKF+2YToPA@dx!D delta 590 zcmeyweTJLYiI1@=7xDOjMVzPi0ME0Ylba<`lLxCKNt9oX?nokY|Le zhsY!N5dDlPAQ4pc5dARqAQd2a_OyDCg(wE3b4Ia&9RW22$_8sdRtsf=#nV}$*i$%C zI8(S%xPfer6rL2`7KSL!6uuOGAkCE`kRk}Axl@FK88n5fT)0B>GD{RnQj1F_2QWHq zUdmX*D8>m4n)GUsb?y(VTI`EBfILn9BIe2F%#MsqlPj4U#3g`2zk>8L@^e%5vl1&a z@(U7+vi0>Qv#`W4%1usWk!F;dT+3pnCk0ej1X7GdNCLUHxIh-h=ccA7=IQ|X#Y{kg zVR99VK|M=|ql>R5V-ZfRoM5dbi7B}TaGi`rGC;PUCeJPQ`1q9kH@!YE zP$Y153g5VA#t8WxZ+e#voomqKrvRLA8j=>R(-x~UiuWwE>K6Q3q20>WbDFcme5+6| zKtES^c%By?kb2*3!X4iCfbc%wS}bl^^?uIokZN%k{E{kDL*AT>!cOeFL2Ee@u_|}k zK_lY68wRm2yim?oshYSRG=P>6aL;nv-% zrL6T}Sy?c7%IUpTwH3Xu=$c~pFDSNl;YT6{!ThfeY8@$RIB{Vu4CZRfYw<$VzBv6> zO~MqPU-lYHUelMgb{H?!VqeBJSb|vh+(tU%a(a19l^S6reb;YCo#w&-c$W?U$&Jx- z)S;pb{RR@f3+MkDG8Rpeh#^_k296gm z3t9=9UcX_%^7|Hz4HQhnuRU81KDfr z3dXLF7kNJ~HEmn9-xB%}{O^3SVy6UPwz7uM>7qHnRdL3%LyOX9*-n_?W)_? z_SiQuCGlYBO=xgExoX1KQDkZD_@CBh7+GsGlq{{Iz*i2S@mWfjRZcF$J|l2@P{nx> zwcOQ6aH;x1ufB+9Rytm&@)9;CpDU-0Qg-LL%7JiNQWek`?~94~UD5HC75Z&u$xch< z6QOHWsFG|%!gsX`DR3`R)^aNVDcjNy=MmenqH|ia2@Ii83D?-2)BE4+lfcNg0SI+i z(XwfYGRA27SG18EVK#je8l!{zp07`&;+o#;`Cz5}246~*wg^f?O??f!rBP9;Hls8S zdU}T1Kxuq{4<1Tw-)7V{G(mV7+VJtis7|YmkMkpZf*;+dy2r^i$r&CJ%(y&L0qANM=;+ax3dhF&sxt1MA zbK%i<-_v`4Cc-ph$?qb z`>&*SF(Wk$Ll?C_p!46Prb}0!DP2hsOp6;x4a?7=E}uhSFK7#6@PlQP8W+^+^6?RZmLI_6cvWj$*i-^XjNP3 zv^A7T;#6|_MGd_onUqZJ%TOk9yDuii?$E--F!lDJwXA(~6;R$UQLu!oTRU}J7NHgZI)6B2vu!K`T-JzO09~tDzcm%XV;0>-p+VQ zH`<)iBlHj8LLww2{sS(EYp+P0d@NjY;|eF<8}E+ekhXyfPUg4cpP%2nnR!3ISr{29 z61Yw+-?n$B2>A_f2A2t)YtZBZ04JRKq=vuDr!7`v6#Is6w9J|beY2M1Ii7z+YWZ7) z7r6C^aLXmDg)O6Iae9YThIYXZS*p-@cEM{1r?nXRepq)RFAQ#a0e1yFno1shC}mjx z)OBP>xU%uVS8qLi+`Vz7&d>u~7+Mb6A~g9Cz{7=z^hl3xvFqfEckZ)IwrTVjr*oud zAg&qmazue)iUB-IwrIrGOi|*-9rB>`IZOe9%x$>$T;Z*B%Td#b%F%jQmhOX&8`ND{ z4p+*-d5pa5V0@kmz5J!)ci^ET-152?HNBu%PA>b#?K>ZT^4Su;=Q{OfIgFaFm{*Sl zg{XHCZr-gJv9aN`W5ZdNu{C(BV#f4gOuI3Acp+xp3qKH10`tG_RhwbUt*$!VX4rPb zT6G>5(Xaa6a<$!!nqhEp{=KU7BKLgTsjoQ=S5^Z*TB}B`jH;c$i&W39r>igL+ugWS z_e1I0ZV+}FO&m#D03 z@dB)^6&LS949O1Haqf5-&~P}lmrtO>={DIRn>3=^tVeg~ChL)HBQkqzHMgDL!F@A1 z!x7Lsrdp%0MRy<}wOv8m4e=r$=B0+IEv#q=@`Of3@K@L#Mt>#*vBtv3ipwP&ADIO% zFd3m_hbrGzRKh5>N21Zyu^seb@=-p<$N2=Gk47G!p7AHql6PG4td;t4VxEfqroUDFp)x+`!QL%s}TvC`ind z<6YcN7+iUrPWsAKzpw1EuVYH$!O$Df;QHgL4qru)rIq7Ie^Az zDLo$NWZU;5f%A%sD!IK>9tQ2YBD(RQ0< zUvkPtzkje4wI2I(P;kk?RWJ4pNTN_T(Y#DPnOnm8u;F`Am}Me>$zoVJ-?J5 z7c=m-lBGC;y;%gzeldq|4BP>aS)NYQGyh6z7c)}BSQl})-vR!c)Kuvj(xodYf@yIBsbTp!l-$10 z$ZZ0oHVJJC+BCG8L&;9bjnDC8{5U_sPx4a@h;2{7JN-iG;i0WU#q0PpsCBAX>?Q)p zNbZe+l}=HiDV5AB>$Fytl}cMp=_F1ir(RT`N7TuiTIs~?Q%SKqv@kJD^?6WQRzCU) z7~d~Zu!O5$J5^j5bl-lGCy#|e6Hm=?>eteqfT(3ix)@w0v@^kuyLr+;+uSu5H z>ypy{ub?lrA#{nVewLuIf~K_-MSbo-QKQ|nYUrhE#6G|Gh_1ev zwUXjnZX~>Va?}t7bc)}anuP~|MF&ndIL1MP$JXT}@GE#Ml0^WEm1xN{4Qu#s;NWp*|Y zHx7a-oi#;LCKVDD!X@x0(4>mCO_eTE6bdTdTOYQC9A#Jc?fm%W``&x^=JEWqaO2al zF^|Agwl<@vL&%>vdH3MXdH5Ai0TPl>Nm9yb$~dDqXVT(U{k6GWe;d33UrRRA7H`!Y zTRN%B-Fn=RUg~q7k{>$2YRVwp2zWrrHIgmbM2y|_)K8S~HYMxZgzBLgqBw5;VZD!VFQjU6hqp^^YZ0p? zzn5+E=eE`?$M&1Z97mWi>#c?Edkzh7xH>##UbJqW9BCifN83T|&q4dFS$2N;WSd#p za*zoq4qH4^(f)SaJMp~nmgJz&{X88mFPoNonRd+;w>y0ZXj;2*^}k0I2aJI3lbtX(m2@Ui*Vn?2J- z#s>&^PMrVU11s6n?`bG|meP2RtNU8@ctOe4%7%psga zm`7Md*p;Y9*u!=2%^E&6vw>Y09;m!1x?Sp0i6~N-kg@={1NTX5QRvEpB(%7%<=~dW zo5JlZhi0*H=x0*jU4#^eaHATy;?^t3S9PmnF)F`TU($<8<*K5hVuQSD$0OY|&f+zp v6xNPAt6=$A4EpsQ*(ZU@SxWPylwui!EwE=g%%SkIfCV&QjCSBXX0_e71v(5O delta 1214 zcmah|OK;Oa5cb+m;#ZQAK+>jA9uWxB()NHr6{X@S6cDsksVZ4Qk!xFRTEAF3P`Fg0 z{Revh35g3rToJu-;KY9b`Gz=g;RjUWG2`?RAQHCx?dsRx6g@b2k zai;mD$Z=nBv-^l3T!t@l0c_4DE@yOBVvuyt8$7JMD|n(I5!TovY6dOy2u1cO zH&XeFAp;Ea;R!Y%-9Nb#yV@hRgt4zf>~rje^!!rCG7;UD@92EOkB%qA>g2tb&$cq@ zwxZYyyl`p)LPk75mU}Y6%zT|Od0_S+D3U%a@U4K_rf)e-bfAY&twO~Pjc+_?n=Y+@ zM&~IemK3|~gLd4_%6`Z-a)I?L&q$fQRT?8Z6=6j1AB2S-Mo5#~%ak4g-EHtfhUR;$D zc0RfT91&guj+6-NE7!N4=3j~b4iOA!*o;Ecax8YGbZbEa4~oBoVx5NC)zbZX6KTnb zADV94(Z#fKLk?lk<0dveht6jZ{%k@eo7&8>pQW|M19)CS=tmepIF3+9IE65Rupc6o zaPrfM#s3|lzeGe{6E#sPNifoYVtTF@+4pjFJqs2I-wM)-4ABo@a0Jj9s_(|DqcB`y zJEGVoLryiZvldgl7fAss$DL>T=8{7xHpY5*L`O?rXdcAY8Usc;eA PMv367@$eE&D9FD7(*y5d diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_corenlp.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_corenlp.cpython-37.pyc index 06ebde869d49dbbe2a4c83b470d17c38594c92b1..06741c6ffff033609e3fac7c58017a78db05559e 100644 GIT binary patch delta 654 zcmYMyTWb?R6bJA*+hnrc>?SLlwLR*!##zLvif+2fzFApt9 z!1}5X=FM8I)p~Cg_5=7VeuI6`O5gPZ_~M*`IEVRh{)bsuX7}gxmuuAvnx+u3;#_xO zdWVo*4F8ransZ`@8{O(~XGv$kvUbjqn=+7D;+8VR>TN5*4Io$+Wpo7|z)j*xb3~lY z*@}~I<^g-zRvoRW0cjA{%an(NDOPr+r&$m=&s34sy<8MoV+D~5ds%1Fby6!X4=LSG zx2nr#?6#uF3ZuH?-D#IyuN=m{?*);?$}P7%wG`d;+|fh$BkLxBV!7l@b5+mekFelO6C4A^zBwu=hDC$>?G?0 zS{7dL?IZP>HE}0_iabT0AqMgsIf$$yi^y%{FtULRBQKGo$O2MBUh&b&{Gm;3Un3>t z9^xZ!khgrRQl;;(eb3X%-2ZnRGbc}c!1;Zofm{-#nJBQV8Vx3J@)5a=TtTiPTYS;z z~kwskfu4u@qyMv_4VU6MeDf+!W#9=${6#m0*M0vrUI{Qv*} delta 521 zcmXxhO-ma=90u_DZ`{cynN6ag5i4t{=1nEJiYFCOXbWlsS{6|vEBKP2#uxBnMEwFX zC*QTLZGFSTet=$!Lax17is-3&(NEw!0cV*X&+}&vGwiQVw*&Dm)ATd7tg-&l|Nj`f zLwQ{lDs-qlzq%Q3u^n>soHuXS1~HX+&X@Pw{&M!_0(oHzV!g~U9Nz|WeB=7M$2L{> zI|0=LC;Z6Em~f!gM+%S9l*)x+Q*0 z6m}PkK`%YJ*P(V!Z7#2wCbDHAGDLsmRivM`WM`sQ&!`(exNWtZQ&6r%-^i@BMt|LV z%hc&mJ~yTYch!l406Sn0M8H0%0|y`r#=tvp1X{os5Cfwi3BJmurs0Nfh{vEF%z;_( z9sH0_O>zDc@kCnD&*cmD(Wxv&-&UVt{2qJ&AHliw#G0wwO~p)&Utn^HH`Z^fc%!L5 yW2j(|QDj9nS->UlD zW-Bjs&MUMFIB{z(0jxxe)eWqql>$zx#mNIFZDoLyX>odhleKcd$+b8I;B;6zaP$_Z zXzOXc*idGZxW@+ahB67W~#lCFd64<%NB&tFG|njRbC3-XgBe_46;a1xj-2P9CK zITIwRUbW`=X02ie(WzPA_E;wf%L>b*VvkVtU-pHwkQDh%rJHQzKPV@5l~8FT5)8JC z6mgja(hb9l0MUpRXIXwj9pi6DE~+Xj=zLSOvsXmIJ+HP$?_2oE=rkGNpG8N{Y=w1M zW}D$dmB}*`uuF6kq0S5OGM7;)_BVF8~XV-jhF}Xsk7xmi_FmHed zcUUof(=lti`Iz<^+0H-LX2!J@&e;xF-q)EXoY`JuANS&0SEJmC9*2>%_aJ8C00%*^ z3&tW!l?}oH7Wwb-Pxr^sXbI?l+Kb5PLH<)>`~E((UX|>%c7V|zhT+LTG{Sa?0=kgI zlK3Tg`VdwYBpK7X{NQQ&T$Mo%M~qO^b}bT|5IGT|qx|*M-^YZTRKsK*lu_FoOM`o; zI8jUcV7@F%!ckaD%k3uk?ewA5S>B7L`&ZhFEr~3TfZ$OW?JSQ7lovDa4Movt50d3& zEf^~?F^pj>oX$Q*Ha&#;V@SqHC9RiCLDZry4+c6novFpP6ezsYRFVA!UZ0)%StQ2W?tXtL(`=XXw2 z>Z}28F}4~eZk>uV@m`3&E*h%11o%d`tio5GVIpZ<_z5p4zp%bEmqD&}3St~KJzG4t z3?C+jeVbwHWY}96-aCW5s=phBu6xPTQWu9ujn~F TrDrmFOwa39fh6|oBvJeq*F*qS delta 2429 zcmb7FOKcle6rDHY&)6RSCXVempC)Zco3x2jHBFi{jUY7rkb($FQ8Q4iiD#O+@z~~# zqmTuPP%0tB0?^x+rj?NRt%6u0q%06(0}=vhx?#m6-)`L!6v2pYieOib z7O=Ne?3+Z3o5Jmc5!)hQYc=9v0}ZAHwxl5#382~**4lsrnHTz#KVV^4nas0q$qw7NFiNx4~^x~oqJZKPvBT;?L z@#pMZ!7l5Ctm(Ly*2|_%BVg>8sEUp)+_C&ZnIl(3E9xTPSK{MrN%@4VxaUcyV^Uux>zgqMX>? zkRxo%GyHG|`gWq|LcxhXR3FbNbD{^pfa&ZKb~;Aia+s;UwJA>x23P$cc28_yFqwmyc4?G-;yc{PX8>`Ip=fq+ zePBq#v&+Xr`{3Z(*m?g4<6aD^P{P%J$%R*Mij4%0jyEf7vkV3S`wV6vf{+Ovn#979jm$McjBgkpuy!c(hqnzOtYx|5dBVHm~U2Ih)- zP89dJy8T}G0_hn?|5GR?K&;|P?Yv(wa#@p}M(rePjUI&o@<{aE3BCcRh8sApGBwSA zqs^&gS)cmq|06mIJ81A!h)S}Dj&A>~j*t_$HI-4|gz6`i%U@3r?lX>_Yq`9cGwhGq zmDq;^wK#0V=5f5~K4Yg^*XO&@Nu-a#OB2s7z;YIrv^qd8!P*foJ^?qXhKr+OA6%RQ zUocnv(=!j;9Y0)JbGb-!rrkWs3Gd$Rs@vDhpIG9D{tX`9 zD*|z`Xh)3QNnIvG?Dy2p7_ZVZh~|a6{}1)9bWPRgFZN6ORAbL_mRE7lW{j|3f8EUk z7`ds&N*xK3Vz)YyBO-?JD}ZqK<}2jOD?^pL#qM{cHUg-X7oejb z)#0m$<)ZlYpg3|VPCY&rC|(&}CW@tXT=q(?L@_7&9Ev6xJMyGeG*wc ZfI<{Wji`zmQoU;2uNq0>KdBOb+uw-O!!`f_ diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_corpus_views.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_corpus_views.cpython-37.pyc index 71105247d849c2d4205b47f0b6fbbc1312f60fdf..c44070a9aab56b4630ad0fa0f6e9d7022a544f6c 100644 GIT binary patch delta 576 zcmZXQzfasS6vtybKYY0h9KCDH@l!%#$ij&obpQ#Zs$$_LFD76k1)Kyq4g(7VNQ~%| zsY{3c2Oar0m{7L<0Tp|v`sJhwgd>0K=l9+>@4e^V_bA$pqR_Fc3a0#c8jU80)({SJ z1QJ-G3^SY&MiAIUxy)@(kNNg=Rgh6evkvQIAq$an;0RCncaHF-d)~bztS88k69?1t z&CM7$^T|nBsLDv5W|xIFO~1<1(?UqDQX_Su%3lw^QTL;*;P2OQRqAnlkN3TwJk%NDV#-)id>aJ^IC_68`fBah#}C|UF^&E=%Hka>D7^-QhgsT$zh4 yk(fk>^^3Q9I2#>=LZggMY}8&-%2xVSU;pcA=uTFMO35#fT0(m+>LY5uVek(zS#o;- delta 520 zcmZXQzfQw25XNIWPTbNU1ckPsc0)%patz!(og&_2a&=cBz+`}ZBvJt?GHkkM8k|}k>Ox0o2HtFqy>;g z$U#>MDomxEevv9_q$}pj(F_s%Wk@Ez3{?e=z*kj}nwEc9wF=fOxK68RGu@)@4bv8P zy1KRAPgiP6vf)i$b`7HXUqlWo!yVNEQ^X()moP>cS%lfFn0w7)X1lJ4&Rv(YQ8-8v z87I6D`pK{*JTvhWOIudGJN;ciglRbI>9jpBmN{gm`QOSJ+X*5u@?{4e#4w17nEuN4 EHwwgKv;Y7A diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_data.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_data.cpython-37.pyc index d5bc73f6ad90a5d1e3a61f313f0777e26200387f..4aefd5c4ed0afa5ec0a541d142eb9ad60a335bcb 100644 GIT binary patch delta 32 mcmZqS{K3KP#LLUY00gD%6E|{CV`OvEFD)+8-@KXeA`<|5_XwE) delta 44 ycmeyt(Zb2?#LLUY00eSL0UNoeF$($VXXNLm>SrZZX5<$n7G>+}ZC=iJkqH3z{tVp! diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_disagreement.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_disagreement.cpython-37.pyc index fae667e0b50fcc754c2123d7863783584f3a459d..86a7e6c404d5c95d3eed3b879eabd80d492095f9 100644 GIT binary patch literal 3860 zcmd590<424Sxif#>?Y*ptFnYc(^OSqZ}^ z7I7~M?+UfqBIE3`uyPas>KYcF(KNwmT416Z78ITs&0^|6vjkUESN74@cc(WdOvFx=} zJvXlFOm!w|ndlf@1^k!szlwjghy|GBBl!pAPWh1;8xPIL<|FH&@zi{1J++=0PRe<$LVRqP-!M2X++ZZ@Y2aS2rA|(~G-(5b-fSmbO_XEt&o%{Fmfp=gY=YIN(!IdSlFEiHp=ErZB?%n(Kr!}M?`6c@? zE}o26{2)^Coi7K%Pf9!v?6;^qznv68!~&(2i^h#nH9r^<!vdrvP zwyNgN{BD8ObE7e7l+c!?aTzNkX>>|^3Zu-zsGu;aJKyJ_Ul)OAtC2eFE!2R$WJ_E- zC}HUZ(R~j_LH&dP;oDK$j=Lhx+No|d@WIA4X9priUEHugyt;x?v3s2`!r<6AQ-Nak zyzeOVxAU$>T?`|iN z9D$>7435J*oPd*X3QofU*l-5U!Z|n(7vLgXg4dwdsqGp2%N+YF82iPY2l@Pegn_XI z+!tXB^8pQVlpE~46(tAE4O?4VY0^@dRyjprR6QYEUAHg#o>Wd0>ZyVk7$t<92v?iA zl9)^KC=Q(-ZgDMtysbaVBiJZ08*-i-%VT)s`VdIWRjwuGGRE)tuD5J0DOg-!3%1vR z6WPnG1#{>vQV(9w-ZR6g%nB))K?j&Y2U3PN#>Q|ub2Veo0cOyFl;O?c9HpaJ*v-HI zd7jM#1LOz-DZ_O+eB@Es#4rSS6gt2@=s?Qw)|d}xh#`$*Iu8sCW5o=5i8XVt_{N?LE=YRhF zy=>EXiS#hd=^5ct{#G`63R06Fei@Oc@K5Hf)EqXTS0`piViCOW{X zWk6qhdJE>zQwnqF0CVU7`=JBOp(hpQ&;jPq0p`#F=Frm$bLaqb=m2x*0CR9+8Ll7Z zc==o;ZJJZ^4wo@==m7hn1MG(mupfGIVGbQ&ju+-5xjVg#0nDMdU_W$#yWO2|qzng1ChaaEl8#H?N$OX2z0I<8$QvFSF9Zzh|Y!TCJK?T{pzjrt2n?t{X%!@TorK zx^yBRzNxq_L@n2qhiDQGQ*n`sOH_=GWn81PnWQr?oeLFx4pfUpt2&Fn2IJT#_S7E4 z1BpFJi`o~kXjbWbP7~BTviCXI%G^kblSYo!lx=*U_qyu@E$Q6CC%$Zed%K97B C1n##0 literal 3795 zcmd5-wx{Qt>aVN2zOL@MIW;wI;5E1Q zx%bf_!}yIJ@~e!M8~9h3u>b?iz-ZCk3e3=IS*G!^0VOCuF`(=l_e+mUtumOaM!oU} z+8Zl%Go4SS>Fj^LvtrA#JdSW0~87g?MKox3u zuYwI@c(1`Y_A!wjxZ|tjR$qy(^!?C}lI@jo2e0KoDCM-{C=varA9bCLsGm3q8Y`z0 zOD77F^+xC?vfoyXqH#lKYA{jTM91hV;lGOiW&EpoEWjim$v>!etB=gY*fAfQkE|Wz zskvi4wVoMJdxrA^HhMa?{fOzvx7B_)2z+*C-dPjM2Pck-x4k48sOzrV?I*pBwMIJ* z-A)__YBP?(U6=jAz>gr7osRNEFz5+aicL2ZD)HsW>&5^&*$}ADdMT~8WANodS_SDV zwUCy+yuDh$OVJv=OhDB*|ddv?*yG5Ub%$l5%pRF|1>Vh<}l>?zcaF zw{Z9FuRpDzImJ3Xh;i{{vJ`}|N^XC>A%e8Ro8(gr<^Z!b_%*Nw>1ENUAK7 z8j8f;{=SgGstA2Y?JC9DL_IhQj>L6@a+ZD=Kk#8Uz+Vs`0w?Y`NlzrXo$AFK0XVq+ zoKPgFk?YPUSC&vVPQM$)7$^s4Dpbr~^c_dRR?*dN7ef_fvd|?!~D(d(^Sk*K~c$ygIG()i|KEk!e>Q>Ms*})r~Wx)&5@YZu_?z)Vntut zP@|Ls<1hh}Fa^^v0|($B9D>7e1dhTnI1VS^B+SAoI1OiD4$guD=ioeCfQxVmUW3=+ z4XAhP`^Nq<$No)>{rvXBV*WqHz&HXPh^URZfd;voC!E5H5(Vao&CSg$fhkO}oGvh` zzL4#nHxL6~DmRYww84vwl0!j+E6-d?%>{WBhfWVSxmG;h(jVm!G)m2;Ji*5D7~Z%( z1X6REYpJ=2@jJffEw`4lgi?VP++GJx<}Y&#=FlxN58le(GXupx2be(zm_Y|JhPOw? zaGDr~gJK39UeB@Eg5yKE*A9R2jbRc7R zcf^OY#E`{NG6Ms{VKIYlkukiN51V~(h(VtR9bg6>peCuca8=Ue;HbFIb6C#-ZlMEY zfh}^iWfD10Ei|{$pvmFv%3r?ubN1)oKg>6fmq-s&oF4LZM7dbq!$ME%d9HZ1J8uqi z`)8WG`~O+8d0ec1-e|&+jN?BPbV9+Tvyr@L-&yM70c$?nkF@Ql*r;d&f`|nw) zxl*@N+w&qkZF*ih?s;Jh8v)fPJ&#W3!#6e0gShQ^58nu5ay4Z2#yUDMdywMJX9*sA&X%IiPLUIQFkba7KCgvwqi+?NeYDmdrIa4 zeS>7|D{1Z2SIE>mxlS{5Dtf}5jv|lu-FK(ce!ojVp3mONuauAxS4U?3{%`|dD9E=*pI`Und#9+0b#kC}=vUc$9D@oBM;R{P2pPA0|g zkH$)4m9|>RS>0Y=>0(|-!xo<#fwNCC;7x#-M7ZyRI7*z;)}ODdBpY;^!0GHl9J(Yg zGwqrlBDLwP@~J6AKQ{mc3xEt>^x=eiSpK2P9zYq*@jv1|L?YUWWnFllvL3w}{|`bY zFoU6#MUrbNo350(QB{V1Pf9F*w_}e0z@uVtCfbTDb_d6`ZBxkF^&k)-_7;n^A#92e av7RMBDxUw_MNK%|r5@o=;Bfjw8uDM(OZ-Ft diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_hmm.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_hmm.cpython-37.pyc index 332a391729f645383cc2c467c88e98b3041d69a7..e0810a305c935f8d202268a4ae2b936d62a5eba7 100644 GIT binary patch delta 731 zcmZ`%&2G~`5caGc+ez)zNmC`IBxxh{;0smZ#E*~w32^}lwH)wecK|H;0iJAa>(bwkj;lf(%WrT7 zwmc#G(2>u{ce|_KgeC8rC%0?2jod|OBKR7V%iraER&u7uJQvt)1A)9Why8)(g{Q#) z&uoJWJ<=v(AlHpjKD8zrg&n!HMAXC5TBd{Et_B5X$}FlZbT$wF&=-Bo>#UPSl5>{C zRPdro%i`K3_ZlNOlCK*>7;EKE-CAV_RrWN5Aa2O9{c66C#1_{FiFWBUo-Nj~U^Ex2 zX*^BzGT)1vSU$vx{|^_DX)Z>Xcy%wz9>HDt%O1jk>^qlmD8D+B^FvIV{FSSz@(Ny7 zIn7qAO8HA2De7MdJVL;OHi#n~dl9x&i_&>jR)SM12sSb`MD8*z7q`mzjj%9Q?-s8j k#s~)p*x=5U5$jm_IVlHkM18k?5yFAB-mOzz*X1q z184B~Hw zmZ@5?$GN7>HTdQ$-wi{o3mUV)HkyAJRp|1mT+YVc6x-{1_-+1@h{&mcY00GsR4?YjLfckB6y@M{^b}Gd-QuGys z5$Mp|6^}YyDuiA$Wj>1jd5?(O)ce1sh$Xr16Y-*}GA+tU5>IC@Vyz~*&XPDeou8Ix z^n3prWoT5a8QsP16c8+P&v^p_^W7Pq53tl%2`;F(MOrTN$s$wBd3uC|qPj|W0t{U6 zK@aiUgOL--XY+ElSH@2?qczp3Z&9{GK%y0OZGWZYV`C-zH*0}gG+QaSTy~@<1KIrp DG*fwb diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_json2csv_corpus.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_json2csv_corpus.cpython-37.pyc index 5ceab3016b5483651489424bcbafd1f3dd378b95..91c9a9deeb5891c35f056b9997b3451e0be18d05 100644 GIT binary patch delta 646 zcmZ8eO=uHQ5Z>9pZ1yKvrX9DXi&jJ ziw7?yP7jI*iWe`6^&z4+Z;A-@DtPejMLi3?SBs{;!^eBy_sz_knYX1cC8KH>Do0N* zHfVkt7$)=w`;XLsbHI_?abLAnU$Zs(HXOx?uYs-CIY)K0HO|po{<5(u*aQ;BG?{u6f3ly#E+VGN1cHxnN|D$xH6n4nm)G^ zCgW-23^cKlEs!_H;t*TLdNL0_Rf^L+~0M0)PeS6I$6 zuBVf5o5|#5ItAi9>uxhXK4eMgksC(MUg!qBpnZ>2vM2wml7c5R!(QB%GO>{R1`8O= zFAinEjeHSakr(;FuE1NS1@FjX^ZpJiVQp*)zT$(i)k1|0tID9p;24AB4C+|2EI5c4 ztXW9mx;5(A@zCo}`kgCoxV;=(TWLqjlb1Svw-p6!>VKJ_OIIc`CbR8{u6j}A2F&Ys vRsx9?kQro{M>$9PUTD$2vy3}|zpMj=9KDLX0t&D5I_NMWsQL6wuB^asN4}n( delta 591 zcmZ9J&ui0Q7{~J@ZJM7;6I;@{)Om2qKwE!++X}1e{83OCf*xj|hT25iHElwgf%78R z9lUrb59nbRg`qbuuL>SzD0uPUMJPzm`x68alS@~958uP*`#xWu_xt`Ve=5ssvMf;1 zId<=+MTT-7+~KL^r7mysK-rR=vbh~Fg&Jk^wy;Clf7=|Kr=tP2;(J2nR_9rFZ!Pt{?WoaNq5h^pl9H(!NSQV|~hM@1Zq=ixTdqzV9y zW4?erc?s&|jcgx+d0ba?=-@%NKnBW%Y2#N~g>53LJb;N=tuhIj$LE>?Pf=9o$Y+fM zxP;%1EKJ$uj0`WL$T{*LbLQVjiS^w^(|Jt}vL^u6a7MpAHMpx6;63@M52uHoH$(CLRGE_Ay7{kH1_ao48UbHh%t=>`5)Kl+fdk!KTeSQ7sjIjEstIGxKx UAVG7$fkaD?W&~Y(Os#P67jVUz)Bpeg diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_json_serialization.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_json_serialization.cpython-37.pyc deleted file mode 100644 index ae7351069302fcdca7d392cbad485bc0661fcac2..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3449 zcma)8OOx9~5Z35n$+qluc9SI!2=5A_$Qp_R94IQ}MHN(;6xm#+s8VS?TCW{RHltax z*>X-Bs<^;!aE|;Pj=FNnU*N>oBiYf0ePEY*rh7)CnXkLQ>D!%7%fRpY&Vz8HYZ!l% zvi#ZT+`^Op0U->*5`$A~CM-3%$*6B7R%&xQb-1H-J87gYcTBsQjjyFW-qX65Tu9gXIy1gCL{qe$8=@5(lZ_*bUli=25wu@o zPsSi%g|j24PqM%)E`0a!hrxC<8pYynJe-PH)=l>^Z3?WSb2r|N_7kP2%EJFK9>oW< zN^5_!yE{H8mB21KkFxPdM(J|y?Yh?4mUXo~cr_FI>OF|%FrFzn&8p?hV*R!pCrPPp z%V?ZokL{WbueGv7Jr;2~{p`yj9dY?H(Yb{uzY7r?oC$-Qf(i4v!7X748?7xI(Ln16 zS9oX}C3M^sZQ(=rL`QVdHpLns>=l=`<6IGH4`VrwlJQ}r#?x%b@CEu&YvH+rC;t>e z84LD8N5Ze>6=QCMrm~KxwUwjWhH`~@#9;A+flpY2m+#}f!W;~OrlccqUuh48d7P=d zXbh)vwx3J-OyP>sZIS}7aO2Wr(jl!YN~eYTbqS#CJWh81G~{Ir|MOFSZ<@yaN%VAY zI*a7v{#G&_MoB*z@APL+)!sDweCvyTK34I~Su}hcjpDpdK=&08=TC6Yxu%5fNV@8|s} zDQp?fB)18{z>;s!yb5EngkESgOL2uJY!E)-aqKrRP~p&JzPZT`gOhPM;oTJ~v8tq{_-iC0>wt&nTX*`ztQCe0OY z)*vts&8?5+I=+fHa(56A)7{;mB^$6vjUwSHuaO`;<(nklB5|F>dl1|Ur6!I%uuCu- zWoXK`Y05h!C^Tzeu4_Qbx2ib4iW#{NVX&U*1I>%n1 z_5tSSJ&3v^daTC|Z=B(b`U)#w&?#4;{TGzieW4J<@D0#7dVRXP!;Yl zp<7}}W9MYfrp}z%s$Dv>{(^dFHp)1Qsz;K zu|*h9D2r^We2_PkGc`_|xC`(W@&x&_XsQ-2&Nkw_HMgm)ux`TY!=ky0=0Wd3@2t4X z8EglPcSNjW3E~;&YCL>NHvp{7G&%_7egfdSl1hV;)7GdLkkCALnmhD8!q-A9s!(}V zDDp%RiyI3DYL`X=2)KNq>|otsMf4j{vJEZ>B1xbcvbwvOXm?>eT_lT zD!eevqBIV}q7{aSD4ZXCKMbkDC`TmSY4TkXM5^*b5;sWPB+)1F>SEMADm5gLFNL!X zBa~MS&-GfK@AbU49t$pUkH~=nkhiIL+p3C3T~~Irh3Yp1c3DvBI+A;O07hL%mUq0q zh%{xrMpU$c(wh{_yt{mNO9LUvyWFFgz7wIaqAj0vmcsk0#O7-%q4l-mAt!o_Zg*;Z F>tApDI7R>f diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_naivebayes.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_naivebayes.cpython-37.pyc index 1cd29362fc459967bf82bdb033bb72b5cb1718d6..34f3af427321f0995ca3077abe7978f44a3a08c2 100644 GIT binary patch delta 354 zcmYjMJ5Izf6!a%Q0)bFcQc$GYmPH65RtO{-B(^ez6=JV2a*~xl1vOjIt#T6Xz$wyD za|J4%Q>`V>Xf&RA&ztl+EpCb;M{s<)<>%YtL|uRoNMMax%y3H>!9f#E83m>~YGW2- zbcaMF;tdk9q;JUwWvL)fs7iiATSdmhwNahpE8jIv_1#$dPBp!dTq`F{t?k$I1D+@F z*rXwmnh1!+&eW=7#oF{QywSC_YV|(mz|y#-@sdTs8iyae*gt~A-~G&6GaKgQ5$D4v zdA&Ujox$3HGRPQU53VHu8J={NjX8%coi~zm6C6VUhEsZWJ#CaNrFDxM=7;=F=QJV| VkJ)A03$Nu>h}-a|l4kgT{sVBEPip`G delta 290 zcmX@Y-ono7#LLUY00eSL0r3nIc_mG?fSeSD6viBeC=kt*!i89+pOK%Ns-KlunUP`Mh|3HlIDohqWGe?Fn6Jq?Ii6Y74(tw&kksN5=R}~bAeBhE c!G?j1xW!?Uo1apelWGU@L@|iqVB}#20OYqiZU6uP diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_nombank.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_nombank.cpython-37.pyc deleted file mode 100644 index 6f65ef63d0c005f17c2ea19e63d90695ef7af886..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1328 zcma)6OK;Oa5Z;HKI4=oMffgiGiC%KRE&`QHRaLZzTLhwTu!I)ZyJ_4wcDn0`HY%s~ z%x_4J{0h$e1z$OF=hPE3cG3r>5{`CuGP|Dn=G$-mvel{+cpmM(5x*HBKk?7}vB24c zVb;J%;uA?Fi>Xgx<}n*^pHrgv#F`RHUK43a`;a{# zW|q6DUQXhy?}n)!j*Oe6gWVwM*MC6;(rMGYI*WuDm_Gol1hN8-A^?E+48fcdpGz(+ zn4!3=z--GZly&k(r%b)A25CrVnQ*pYY{4*_V2;UzWHe(FJ|!p6*ObwV81w}(b|q#P zF^(}wppeyxzJ8}9`f-<#=+hkn9p{a}7^SmU2csa)d8`tR4dk^bFV`Wrk zayv;!iGju~qvGy2qHD1A?5)>J2g=(Ij(X`Z(0$L1(=dp=INJ4wM_DgTp1ChP6J_ej zFbMm>o-!WN>LJVC2>32MF(cC*9_3A363J+=tF&oB5mNy}*aG~+m^=n3o100_Ny}z+EhEZf}NKJ zC7HJh9COa@LC7GDgpQZ4p`Gu#0vC%EaG!&7!$Dh_Et%w}q?Eum9QX;H(u>0l;7HH2 zVts7F;8-lOe!TUC9@7bkVM;&1H8=oWf-KnBat{YFD%w$rG@qla*Je3$SN%#iid7{4 zYWJ+RzNZp(IMf>tF?(5RymqbCt_}tYQy5VuGVXdI4zuB&r)XD7vQg^=<9>Cv*{_%4 z+|eVI^B|6IVE=x>{`F!@2o5oMcYS9E7PkVp Yp_uMpb#iCD#JEu$Hj2|?4r{{r15}hjG5`Po diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_pl196x.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_pl196x.cpython-37.pyc deleted file mode 100644 index 056626f4bf6951eda3d4c1f6fe8c6bd9bf989778..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 787 zcmZWn&5jc>3{EoBcIdVsRh&6+8zg4Lk3dKWt=I#%ssh9zLW;7Jw4FBX%qFR}t-2@n zy6>=i=U3zrxWpm67my`TZ%zB26oRuD59t# zygezWU^By35sxVOKvbmSOQK>;7V)=;cNG0Z(#|i4Bjc2YZf%~ud9^Ljz9k0AF|d0G zLKDo$nWBna5+11t;^Xk(vv&TZvh~J&DfPK40AfQC$S&{+uzLx@lLfto@EdwTeYA)V zkO)Dk&Q*ND4s6jmuQdX7m&TOGH(&?d)bq(*>+|2M_zAzsa)o_ zexDT5H(JVg@2`X7rI?mRr!4gFgx8@DmwHy&QoFPl;hz7u-{?%$JqUafS#kh^(ROS{Z7 zQx($WraaB-ou60h7uoCFm0mxsWwDeq?Q+DEdl+iIS(m=iM4LUUcVUDcffLy{VH;fc zppt982p!ObGT{B=f3}kSA2x>NB|@y_N(&MCLaZvaF{lrOINM0mdh8I=H{IM31H8ip z_pmk&EMbkEc6oxZv7E*2^AYxwwSco0a693ofhe7B0`DZp$yuxrlD93Uclf6qrb}2t G2hksNq|9Rg diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_pos_tag.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_pos_tag.cpython-37.pyc index 7a4365fad8e93667bff9858ff5a587b2db3ad630..e8a2725c063ebe79f7a886992cea352ef2c473cf 100644 GIT binary patch delta 1031 zcmah{O-~a+7~YxL?k88&-^UnLe^UO2zynDMlHI+&w z75D}ECjYXT%1lqf#8xcDCN_0Rm0FY_XD+iCjH;_v6IPdBnEXl3 zwDth~RU8jIF)q*1dD1S2)VzF4U#@Gg5~`({>m`$}(Sm2O+4%*NiWZEs3)7c@w_>9{ z0N|-8f3Y|}1^KL2@G)r-7TTkN=^_@Vu^<~497WzAa}*Y-3tQkIcOBu$&+N!(EELy{ zVIC8m$h#1_5m0>$y)V=p$$lJC&k`~yv-%(zmJ@nr{WxU6zS!T=N;9Ra#ifvmzyr%v zw_L02xq+Py-2{@twC{L6U#)xGFIPCU8iS~Z8z@AjlRT>~dGazty8KKV`SFAPAE zNdpQ)^OHCc&)RUd)%?(W*W7MyecceT{sGj#@WMZ+?&2I)izbERo%E9}D`mjsplU-y^YRRSb3=DmpKoUoZkxkKscNWyc^4aN9f}{TWZbq q?+B(8p&=S%84D1;SoWN#%fB`bjfraA4qRsv3p@irrES!pY4#f~KF6E@ delta 873 zcmb7C%WD%s7~h%M$E0=J_{eKx5@Xf6ij5Wo5mD5N&_hhy6l5JM3?M^S!@sW-a+BXr82(+i8@V^;-+wn|0ADO~^8xC8k`a z?@nl75vb)W^JSafrsJN&DzjCa3IpZr{M0n?7zRuO03VhQS^CJ1P&;)AEk{thH`0zG z&qN%n5pnr}WsgS{;dv+OnAnAUH^LqSEHi>6u)vaa)gn2lulQzOO%RfmEiFfivSMWT zKCpnSarr{a9{rENa#Kr?V^Y^YleFB_v*T76s83<@--`Xw2^;fQ{MY_Vf6ZU{I1o{4 z3sYBrjv#6b3`GuMEc6t4k3XVsC9`B?FHW6u(r)f zuJb&Zmf!gDx#$7(b;$SS!Rp4~zCdczJ_gwsJQeur{$7>*6iW|pkNiDmk&3j2uHH$3 zqbML8L^yY0NluOL~ zz-;K7^j31kx-5{2LkAY}S4w8NYUDqXo~~!w)ZghQdZORcSAzhzaJdjOD|upLoz+EN*-h_?}^VG<5Y*CdsS0(g3q8&3y*D#DITKr HJ1+SP;FF4i delta 563 zcmZutze~eF7|rG9rEQ=UrB+n52wI00UHyU7K@dcxqHs%yN!q4OnsP}&CkLH$lEcy2 zSvo8J2`(;KoZXzAyss^&;K9AS?|tumJifchx4cm`jFiC7?CF7-y)_DZRbDuPgM5L& zEBYu9iG<)U`EsBTB~XbPXhh?jOz@82D2{q1II1hO^h=4P98?oZ+6PR1e3(#>%$}5d zKEune<4Q9O+)B$jZ-!lqwku`!t{kE*wxT|x9Q#y@Y0%LD7;HwHLmT}~4WVon=p29q zkZ1dNd0~=&iAH0$Yk8FD?paJNI}Yj4KN@n6i)fd<;LB-+%S$2!S0dSd{g2Glmr;#f z>#r+`-0|Z!$=G41?lsK7ZCC;Q<)!nG;CIfO)FL`z{gi)EfYuy9kwb!e9WRdEC{E<4 z?RDt_bbg$}gutmB*7%Ju2P|R(>sTJJr8HgQ97#i{vK`WHFQOu6D6E7`4QbzsBCmdK wmiaPFdsGcqC4&~?(=jmMBOYhlirm5U|H34AJqVqi?{31XM~O0Md{vgd0i5u5>i_@% diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_seekable_unicode_stream_reader.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_seekable_unicode_stream_reader.cpython-37.pyc index 3c370a55a3f6943e783f442b0a1b6c694288e87c..a0c09b1a5232d085237ae6d681075f6d9573c1a1 100644 GIT binary patch delta 882 zcmZuvOHb5L6z=V7I*-mwXVeiC0w|AeM2MmRoah)NB+3HNgd3CV&{J??I~{M^7!wl` z!^VV#k zp#(ORw@h4w3RJ;?+Ln$>P=|9n3bx@qF2eLb#Rkit6<6cxwd-8#MKMPn zm0MT;>zg-Do?ML<1&yN~wg1wO3d(<$OtAS<@_N|sRGbPBp^zh!w;sRM+TUlCObXA%ujGZB4l5Bw2Y{;3No)ZLDr?(_; zUP3m*ZJ9lmxFfSC7olag>kOfoeRUeB$$mJ`?oJEBgEiNrhGb?Z7#AKly&#HxpjTv& zBLw__5Es!2Njoj**M{qFMKZPoD5#98$U?NmKGs&Z@Bc#0h`(7XT1KIe# AwEzGB delta 844 zcmZut%}>){81MVG>$r|v31X; z$9n1cAtoSL0uZ1cu}JKxMbV3%*pV!W@S5Z$IZ0%9}oMRq)F$J$ff>H zFvN(?%tm6@4c3sc#Dry|X1yUw1?+!nY@3+wVG zG5c@j&qN;K-Ddb$nOeH?pTTs`4SJ_@_pu&i&1xv^fa8I;94uVUqo4vgOs$#zWXX}F$Wo?Al1kk|t6TjnNaRp#jFikloAYJ?|z z$X_lo1c!u>4pqerMM9PENB%)|7OQufhRj4h8_P6}Q6xK_AGp9Kd6Kkka6Q{L2)oAH z&OY(FFj;(oV8B5EFU=8!uxj|J&}gmx)r@JB?YS)m{*!&X=t@_ zh35vT8`=12-?42rhESB+KG=79zV8JQ-Srs_Ulm82F>|i#K$>|TEpii!RN+yT8@~Gt Y_4tTbWu-EpvXa1#U;>R4o)pJ_0S1$*^8f$< diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_senna.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_senna.cpython-37.pyc index 3b33bc8ad540b084fbb077afa0ae658cb8732f21..2d04fb9339c4de985861f65877a1de958783fe1e 100644 GIT binary patch delta 1302 zcmZuw&2Q936!(nB_N;fEEMb>jU>l;8ewgM1TEs_*Km<@l6s4(9D>16-8ayQ1*xqPt zHzJokfVgmq%&~{$$PsbpKZx41CAcH?+Dm$%?>U<!#UZ115Tg`Eg%66Nkj!DXzQUKF~K537?CN=GR8tnSirH63l6IhRw7&2 zC1-}!s3vNWBb;bR3_;A2Y{gWo(%2l9EU->$TSS^IP1Iu+a8ZZ&F^4}Uy^e03#kFd4C0oK`PPXz3q<_>MV8$C{ndKPyd_5$gJ-Bc<+ zOkVx_flSq)LFFJ+0bXh@E$Gzw!bp27GFHP#Ane?l&Js19U)1Nwc;45ak!EhPGo2=I zg3iskOz;O;(v4-3th2k53yZ3m-(>9}Oi-r)7Fp0H=&9`}K39O8O0CPXIv8db%tXW`WK9u1Z#=49a`4q28l{-d%91 zYT=eanw7<&A3&2>9b{R_njeO}wSm7wZWLq+inef12VoT96NEDWGcHqOunIHvmzJbb z1dk;3%f}woCn>W zm-f7JS5o%wnrbY+TDf*a%YLOpZomGca+=v$bm9KGF*`Z`{VkX7kcqvp{^PxydvQZm{$K2VT|kLSEY(1jOPG2Y;R?c4 zggt#mHB}1KJR}h89EkLDzFocF{|q;Lj_?J-mk8e=e2Z`$VehzsRZgu71d5R0yW-UH zI&UkCx~=k%!uGsJS(+*7d5S_D#xzkn7zzhn85pv{{6%fdHNh4<4%3w>=xTQ&OTF+N z3Lf`VwCbnJs*}I1U7E)aro8E6g%x}z>IGRSuj4HV3eISaG$F0w2D+EPW3`(p7K1kl;7dG}*zb3ge*E)+q5EFqw>kD5zUTAN7CBx;FJb|HcA>)v=X z3dHz=x6+IjJ=q$sshj$3GNP3JmQ>WBF${RY7(7;i=#1TVRbh@eY`^ixYPbb#K~MN!qA)BD!3*4+UBQxm7KxL#(P-y z;_Pf3`+V5mGV4wh@+e$2XU6W_y9Q01!s9FNi`Sj4M5cRE$cnmO?Bm;>hYzl9Cz#U4 zgpPjE+-?{4EFTKlzK1i&O(C2`IEV0$m|itf>LpLq_u}KV!^j^&IEru#VG`jy!X<>= z>};1k%i}=c*kkMrpQT;4_eu1+y;%`>cCn;jOC14UZ<_O?Mq@r~FncMSPjI44eJ$;A zO(2;a`bnz>BDS7fc0*V^wZljTi(b-{RYjetN&F(Rlm6FOYF`W@zT}I`cw#&VU{K{d G`~Ct7(D)+& diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_stem.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_stem.cpython-37.pyc index d78e5d3f0028910041f830598234ce3ffe16418f..dc55d3b515229009e36800830a35df418560d7a8 100644 GIT binary patch delta 1370 zcma)6&1(}u6wmB#vXk9yzDldLNwwNiqiFp&2~}wQsHtrVZ4bJN>$bbLYj-zcX1CT{ za!`+gIw#K_youspAb9ZP#ft)C7L>=FPm1`Mvk{%h;!bnW>&0 zEx~`W`p6pC^(eEuxyg4fAQ zbu$iEBCi&?kmb5iVgBYZw#EZlu4Z6u6RPrZzGPEVrba^=)1R zvAVTUTd|j8-zIlv*i-FYQH7gw!>f}KFz=K~Zg89 zY7TLmYB^2Xp~1mkfPDbaZv@<%`MJr3rApO0e|hPAb+IyET_nSBxSvD0%{(~(iq4xw zwu6GWPR3SXlgJCLfOqRvexI@rnYY7`!(UTiud{u_{|v~!XNOS8`g32ce|qE*&BFXb zWx7(GoV6xrr@6dzaqjwB+*JGeo$e@d;5wYm2)l4;-7>lCaz@>39MueY_@l> z3B@&)(2v$h62t2=bL%o%zm5vp6a+1jfMsI zjO4H9PWQg(=-)2D$r3E$2u9PNYE!bk{dLFzAZT+S2kqd4m#9T421{_ zf)%plXM!c$WF!1kG$P=kmI5AS-=kV&MbN@~SX|dgU0!p_Ua_?P_RDLLNPJtG?u$U^ zr#@9EyY|$Exm74vOtv|d=I^DeB+7%qJNy+*@t47KJ(|cQc$iTH1wae1Fz*cYXvo2Q zP{FFJRokoPSk3c_X6ZP1Jj@daCHx-k=2qxqyp2c%q6_l)P#aOiCtA0eYnIadiTq}! zEe{&Ng0fe&UDsxf9@-o6DrTwZ70&QA`f%bv0+s-l)?`kUL2{7~E35NS48;(-5V{fi z5SqRIEyzL;KnXl-ixN6aB^subh4=^MM<{@4B7YY?Htx3_xLavq@uiF8?w8lhe9bZ0 z);#|m{yY?fq`LU1SYb&_Z;CI2W_yt&NpMvijWoMa`6)F?^qp1pcEA{d99>gbqcjo> zBa9$SARxWbwKRJpo6&P~x3Y8kN@iJKVWSuv1Hd7peH5X!N-Q=;Zu05ar)?~9XpNY@ zn7#I26@axq;BP=vlU&KEZsy#wWp6;(L(K&KxO30H=%GEtmv3YiGWzUdZgz12?Ch1L zf4JywSCUkBzILhPB&yXi0@9GCMHPteuYcLLa{1c&b3T?xnf;hIfN%uCpQJB;3XhWr z8o~*LCY^8L9oZnj9Q^zn!(vgY&;+bjzaP<}d^a&Tjzg|Xs1OsuG~#Hc@!d#!rSgNk y={QW{zY@Jm{?us}Uen8_XZvgG6CG5$PG>;xnq!|wQm+F58X#&wCGZvFXa4}(dFb>2 diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_tag.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_tag.cpython-37.pyc index f110b1039af0beeb1bb5a4b92f8ef5beffc50daa..9be01863b4f2baf9395822f677bab2ae05965b10 100644 GIT binary patch delta 369 zcmYjNu};G<5cN4t8&ypyv>=#}5DZ<~egHzi#Ds)ELds$lY*19Esm}JoQU-n?%Fc=q zAHd8PuxDrB7g)g8EO>an_k8c&$zRR4R(u-AZRU?_72>^!cdqt5kxYVS86=SLgaeaY z22U&rWJ8AjZph{(8@9g5H{{jY^v2}cStZhJk)y48Zl2!eQVE?}g;Q;cW5p4Qmusks z--}69`(T7!s=_3iYdag+={+{Qki<33XKJ24C=AO-*xIER3_Vm$RK{uh%5dG&pOdjO zI4(cn48pR_51?O8_%jU3PrkPtkZY5GdTZG5Rn;-dx`oJc>9pGRidlUFbU23&bOA{_ pABI(1h&yK;Dj|?lMnKRfpq78Rs|l;{=&;T`ipgPU^{UYF=hlY7sL~ z>=vJ6adB!0T}`ED2FfrBgjlP diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_tgrep.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_tgrep.cpython-37.pyc index 294e48015230ed9cf559b23b8cf8d2336ad581a8..9f6d0926692b2b46bec05f626a3f7743b187077e 100644 GIT binary patch delta 4027 zcmbtXZ){sv759CAY{!Z7ulehEjq5l|ow`ojr0GafFLnR0X6Z|4vaU5Pt{dN`UZ3ss zzUQiX>ad_=NQg;muY|-vS|cGLgf`Z_i7$ZA#Ft5YKnOlP7?h6{Aa&vcA_&Ae_a$~( z+OA6d;pcPDd-tAm&;6Zq&wF!~{Be~8)4^cC1HX#zX7SSHbsPM1o-l8w#6FOD*7aKASLiXx`Vq>O}c)sY#`||!bJb6D=Uu(J{ zXM(gj-*8!C7pZ!?Wz_?3%L|@(zzPRmDHY6!Ne$D8T)g>>j>)*}wB#-rWxZn3Y@xVP zW~S4;!U`obd$CelG7IIB(^@GNmdbgW)e9zNIo;6W#32{%ykkhj$D8HR#_h0?wZkJ7 zgN^Xh@*?Ty@5$G9MnHSrbK7H*+mb2g$qlkfP|m~ussv6+;FN5g;9q-r8$Yj1@E=M; z!&+QneK3*v5JCtlK%B5J%4o@je%`9~wN1m=@B(-gg)|W>!S96o_}iTg_IdTHx3K{p zupmMs|4ZY&cngS@d?=A-ohT`XlF76sC6gc}5+EfK@HCcY-TWU-W&$8sD~K?QLWn17LZh zhf<^IC~Jm+7MC9i@}stOd0U8lXur9wNRFdRma6UClZZQHj~hHM4)??@pZHJ2pKj5q z&M^6zeWtUARAXY%k*{d;BWq;Rku=RxVWf>%^0YSZ#!Ey_93Vy$A82mq+edtFT@gs{boy6&cW;E8z%|)^1VOKH zlmz1sfCSzMvVQ@!n-~v6r@g+QNK^3V-~64>DRPT{7V7qz-n?{!@Ic?-sLz$JlV!-Z z8^n}N<+d*`ZHzGkAv+;6L9fEQGR*s4Z`hw#AT3qvoJh-?eQ#A6MhH${yFN3c#r+~4 zkM&(-D|_MTcR@=~TVNMZ-#kVa%=4%U@k$EmARPi#@yCA>8sNVT|Ndn>6-zBlN3|%B z7Eeu!d!Ovn8bV;|x4dKEqb%HRuOs^$vvO^~8VH;UU&x}CWp2>Ya7wsYk?2FM4 zNT^QHelWh)Fttoe^x6tb&G0?3YI}{ma0F?1fgR=_$GWFPMOk>+&p#au zlf(AfV1!f;gBpq;(x(ofK=XCQu`N>zmf4)DrLstf_IHO`6tbQFXxBgo zngmB{meC7-ch`IELL;88`!&YjO`Hg>YlQ1#{9lPTLba)N&mk75<+a`f5~UBQlliNpE4O4cTbdLft3 zmRF#yFw5*Cj1C&|C;_4nYiJS5_5!_9p(Xo2Ba4KL@{@a4GBsBoSv!?_?acTLnqy0s z_HGFK90|CzmieEZ=Y>mPka>mI^{7{Y*sY6jm73;SMmMh05Z6 zVEoR$KYCYc#bdI(!;#czWC1gi-yQ#G;>lZl!u1g1BQJ!=pX_@F&P#hAU<#NK^uP@z zBWM1P>(F6=S0hDcbkbt)y)T=Za2EnT9IO0=(@=FKOeujvUZFw*`f{ zPOo5Dt;rERhn~AYb>o}R))+_N9S>Zneu;l9_t>8tX(iQ%ZJtIS&LChT3fEP9;ZpOsthiJz5tH-*b!z>^BKt5CptfLw$@S))Z z+%mJsD%7nfwf0D)eWuZUehj`xC^P_Jetu?ZpmyIdyj|8Sum}RpyIJ=e`Q$ zI`m0dgBIa)OfQ$bvWMT&STzE+*bamNggC-5LK0yg!UVz;!a;--!ixyg2!{}65l$dz z2wy@tiLijMi0}%+DTG%MzKn3rvkNeX;u1n0p?(ZP!n0s->v?+l`*TODC^`NtwBdzP zHXCncmrxPN!WXAmVKkd%*oHG1K>^74;RDpfAV4orxF$NxOhZXGFDFIIji;DVa4c@4 yjDHbcJ)DC}3N0}T;3#O!^=oI{J3$ivRI!||=ro1NXJFbYQitT1n)sFZ&i?^JiY}%A delta 3867 zcmb_fOKcm*8RqORAEHD_lqEj(pcqjKZJMNCiYv*KL@P?IIFw{5j?K!TX>uyj<}0(5 z;Sy>IBZh$4qU|qU5saEjekXmaO@T#AYekY}qovYH7(3v{ZYo zuk%I8W4e!K67N1xB_i`R`H0g4&tJ?()=C8$xt#viTB)37S0Z8lQ+b*6@_)-4{$5zI zVc0ZO$flGfTSWXyTVzWD?J{Ur_z#tNqZ)v9{4*uMi98ZW1ZC!g0VX3j0D^?MP)36~ zc=-vV*M0(?RTF?gQHX_9L;O!8UjEP77VT~0Ra2`Gm$x98d3)RaU@M4Kc{&tlc9fLq za5!F-!eNj?A&^2Lm>r6*_tb^}e}Ys^p5Rw=`{EfpntVRb%4CXed}G zQD*1S9@pTPy32Dtz2trEW{-;yAOEqVKP~bkGA1@yq*k6vB=nTYL*ekA{q5fEZy}vw zCH41Ozw;f^eGuFYN{qnIeMy%i{EseYAPNtOpqb%d-TF;dDyS6(VG8Efm3$5!Wo~s3 zg^;;*rNSuP$zL4o6o!kXEKR97%>6&Kq}xZ1p-y%PK_uodN}?8yfCS!%S~!NII}75L z$4Nftzw?|WH~F-0kbmqp^Et1B|H0YXQIRXkrYS3LkX5L?xA^r}-TZU6m;chcRa4O% zBo0GWv1Ag7jk!74Bm8+7;uGCrN8ybp`2o;M+&4<#kw{a71Cvr6w*n$ zcObtz;^+VA{c|3ZTs7vV{0Tp_0NjK`&?Hj+M1P*<%8q;&OPRDv-ROwkKdk(}0|715 z-%5ywmj?Q>!nVb~;$K-=@uT_3S7;tgu0)9QvP3rsy zfdS|4DiM_vG%^>z6By~mx)HSkW~NaVZlQs?lh=lvWJ((u_L15ojKJZ8L~8;JpwZ^Q z>~oQDM?Dlvd78dT)rIXuYQG)nP!tTFe>B$Tgk3|!;B&)1`oe9YpLdB7F}v+ul-~)x zX4@VS0Yv%m!5hxTt4;qQEm8i_!I{J}^%?M}z7M&erWnnqvZX?92AYMs#{ zArW3XR8BSwKDJpZ&1RW2t2E6rYlj{Rhj+G5v}6+$2eK7tv7&yUojRN+YgqPG^M2n@ zDCCP*jUf2q&qV`-yL)CUyntXnlut>^jV!!jVaU> z0$qBm1lnTi#2QfFko5rtLOVG={ESE4kH%f(f%e|`yhNhh92+-@O&aB&obd2Vv7d?R z*cDP#>zn)z`T8>_QfJ1)8#R$CjU}pPj~ZyaoWNdF&AmQ+2(W zud6hr-$O2IeJ4AK{RMniT*;Iv=;+MEV>51IqVU~g;+nShVrUS;ro1U7Zb(TiQFbx8>0F)w70#db z&Nqxcwjb!3uPZ84_@m`7x>~BRp0DKKE|$uaikH;~+HX#On_S#+4HQg%NN(hQRZb+v zs|vvQGxr6)ndpU&s$V54HT+~^eFy^x_+`X~5%wd55k?V?BYX{E3}GB$0s%jY*eL}3 z5@fF;BoO8i77!K@&LAuyEF+vlNE!|RUO+L0a1o)o0YbuEU~~YfE3vxb+?f=xvOSq{!13}Ki(?(4gdfE diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_tokenize.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_tokenize.cpython-37.pyc index b18da62f73931599c139947384172a84048c16d9..4a2306a0f83b89c1744924e9e2d69084e7e3ad55 100644 GIT binary patch delta 1412 zcmaJ>&u<$=6yDicuP5tl66_?kUAHwcVY5kTL<=gQs;Hz*L<=F1R;6?3%JGhg$^NKj z*A6%mk@$5c9gyI_i32wx)yGQt9}pL`C&U5Cs>B~)6^VJfQd_EuJ$&!mZ{B?KX6McR ze(kqcC*CqmLy=E6f8hLO9ZZz>x8$3vP{l>A>LFjHDgur@OmP}Ds>!H%IbWxGI+pYD zKA|M#y5wVY%r~gv7ia+~Yl@q9$(M>tnD+7b7nqtZ-c;&2ON_DNUqPoaV~ z#If1mV^z|3Od0u>8oGZc6kB~J&b0f4oLT`@HX>q@}EmNCf zp>$rxMd6knASyPd50{>*tMIrk{vvonRontt5Q{Tb~HeKkPAQyBWA1YYpNV zgpnh9<>g9S&KyE?n%oUO}^LHBY^f{oL1D2Z4Q zv!KPYR(WQB{O4NV7FSQ6$LqEs)*AEsa8mqvfChjK@FBoY06zmfwoKVHXmI+_ni1R} zg2*hX5|fn3zEUMy>8L%CNiTaJ?$j??bOUCtbbt8zl3h7l$5ZxkEQzf=pth>7<6 o!evU}SG3ZMSZeaDwG(o|58bZEF2dc3;Dc!DxGIZ~IF6?AzcR@)9{>OV delta 4153 zcmbUkU2hy$b?%+n-SODF{*Gf~g=x~ntP|F;Lz*^9lO~}xq{LVx$l}sy81Eh1WAE;4 z?p!Z* z%Gz`8nKS2}Ip6opU(WyS^~|+&+ECzkY<|}I>)FSdqu*n2#}%eBSD#Q8buQ>|~wAF4jeqX@w_w>Y>6@4p~lbsVu|kEhVcz z{^5=DGg%Vqw^rQpZO4~@ciF!02x&QYSM5SJ879iBr4<>b?|Fh-(pzy#Zp8_@$VDUU zy|qy&*z*M^c$s{W$lwln^((yRp~4K_%llyML%#rA5oHySC|ak%J9>Qh5;;ElCVgc@ZsWD4zRFNjsg| zASJ2bx#ZO}#h0tT6xoF64ZP&>&I5`CKS`b-FYJ7n+#$&nFpAOOXZq2zxJNt(0QVq% z9Ki_$&m%aA;Gmd~^ykU!&foP{$<-u2+jmQH-z{MR6`vFXz!4^e(T*yE!bXTr6 zBaEIj%QolWded7lZC^UV^;gUVPnekbjkK_34I;gt8kNc0ORjIizkS9WzvsBX>N_O~ zQFP3Eu3R$58?u0_WV$lPG~5zWxQM++&V*!~C8C|7-q;#yxR-S__X2wfE*X+qZYE2k z>=FquzMf*CCY^N|dGVmz(5byqW`J>4Ls}WgK9-Q8X(U5>NaYwhpf!~aZpag4h9A^k zO~K5LoPP^X{4Uya&&Uc4-5i`0yx~V=72wDzJpf zSZ0Oha-j-hLv^v{NY>G45UOOKf)0A`uo#~en;sWdrrqjh_7GF3Xe2_jq z)CK4s8QUYr_sE2}j1MM}?VstTggA>B%Uno^Da78%{DXwSgYNfEA1dn3{h}79Q3RH> zID_EJ0qYq%aiBKC-ubF3x1#0$>W&bvKCC3l0T{S(9Mz_g2<1=1Z&-o*w3 zcqRC>|3tcmlyfoVL2ztfdiZUmd?Tj(0P&kK{wTOV@MdiVDcH@jbQe>97aa(ACoBRB z0wmu>I%)}YY|p#HF6WXAK}w)#*|+mJ{RM^MEpAuzKi35cpCil$m!c? zQm7SNU$V4cwoCiR23Bh}^0_sAGJGbC$KUWDZ96?JUQ zVcNXolvr}X^9l|ZF9S=^Gd%chOtFeSwl-#t31d+6UGO$VHK3vOsTnd5{7W6I9pY>H4>iNG zkI^FRkP6kQK`YljGbrsu;;^wVv&DK0R%AW)XUyOyFa*P(4|M>8KHY-+2K$vNu_$%^2*yiF$$1x@1BcMQ~;XoP^za@AOVQdrc zHcCSwm5tA7!p&!e_kXb=*V4cs0fYC4Pu5VS18PQ17pplOdA2ux-Y`KY;*Yo==U*sH z%mJ2(Ja`T2@_GTj-I!q&Z1{j!vxOgJIO}d61`wbcIJhx0s^0u^Ebmb3zbGLvR$P0Z zGq~HRDuziRI?ZT0txUK0y!QW#p2_xzPe8EvJ%T?V7(y_D;1q%{A;9k>@e+b72$m82 z2EqFXY9Au}7{Q+q>|5T9gfQR%O7=$q=tMuF>-vbE*8BAi-Oy{wh;Hi{xYr}jYX(Wh VUoAM7F?FKBPg8XYgC8}Ge*=o7B|rcG diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_twitter_auth.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_twitter_auth.cpython-37.pyc index 2bc7807c4635539743a882af05cebeea76524465..15975a5e38b2dcd7984d7eb1fe4fb86b9d70b09f 100644 GIT binary patch delta 38 scmeyV_g0VFiI_fwDCiI^QZPwr)rhpinh~g+!r3Dnh6O2z5Z}01W6D4kvdYxBdv{vk(&! zAO<9aq8pHyfFHm>SN;Gj6{|N^m{|}=yypt2)ZpQHpL;(ZzaQV%_s)8kT-VXyE3655 z)t`9Xhfm=}HLOuY^%x~O){){yY+$3ZCN{yEaV@FidKIh1R$^nj;`O+ZIJjAH3-}Cn zD{jYL(!y<|vF7X9H=45Bb9f%m#|xVpZBXZlMjhsw+nvV-K0=M_TF{)l+wuccwu>|x zWt5RP5{!p&KKXeIQF#+F6SBJUGkS@zjPz@OAM^`N4`%prWwprp>aJ^yQQHLvSKfVS z?LIes1Nri;wTWE$)jC&|{0Rp3`za;g8Y{+Op(S@2om>QEY=^1H(g#T%Ab#k7L+=+l zQ#zp#JQ@~)<-DqK#*B+F`d`(H_Ngh2e40H!#XC2Ddy4mwzpM{bH`pHX?0!n35pLux z8D$*k!Ij95ox8?X;K{4KL9eY=)ltw@&{NP?a8y3)9XtMSlND>Jhz+1-$~$tex42%t zgKlXNLbDMer9;S_LKK`4!ga_i^(@CW99FLlJUI-s`Vz>|fyqs9g?0OEq+0z`vcA_QfQlMa(voYOV$}^%-4$ H>HEI{ku0dj delta 1603 zcmah}-EJF26yBLxd&j?VS|@g#rl_R>48jQk3B&~#6sZ?T5kHlX28m_tnL2j%kJ)i) zB7-H|8-#jM7Ku9)ze4*86fWTwKL9)dH%LgF8HXk`0d_Ut`DW&vGiT2^vp>D``>WMA zE0v-H&*knre($OK)#l_2m=2@N#XjouH z%k#iXtZca#RKhA-LXIpR)n<;7!)0~>@FJ_ z?QeA+7D$3>RoGNUV*#~?U%GXIRpK7>b+7ry}3yroMZA^;Tk#XXuWb3U1@=5 zMtP*;=pannNS!(Fqw1e%n-#g%YS0~NnJQ~jOr+9$@NZUM)m}I!kRO&`J7=|e@#Aw= zKd)cK@P)J0wbNgiJzDKgdI+L64~CJ4aUDjw4WMlU2qN6RJ4K(Nv2%p9qp`?M(U6F| z@a~~08lxG85<5^0x;wfIc1fJXLB#zuQ8JWam;1N1f5+9#8&Za=661@kqaTi}z$F>TK^Yk_NZiXzm6)>bG5GELF*X>10&_Xk(jwM&+GE z_OukI>kj+#O~}6X<4O3bAILD|eiZv!@j)<%`u->qQl-6E$sa*CEdw|RAHNhI?tBm{ z5$#I-mJH+0!qfjadYoqhzH%^Zp1+v@)6US|v3rnY%Zb3iF(e#oGRDH4;u%8D<9Tk1 zlQ-;6{$yv2kKCCv#{2Y?F5J!&4X{8^Je(Y!*iII1$H*F^g|V%sw>u|2uAz;eg%}x; zQGZT-f1suEeZwMoM+0+>Z0FRf(OTj`@OM6WC{yV5G==8AsYY_{?eWyKzIq;1+r$=V z!isRn!UVM8kYoO2y`HgIHmCD$Abow1NdH!>{3j0l8C<6|0Ed)O8P`w?U9fog^4Xe! zw=4hcZns^_wi=tQWpJn(2G$I$8@Oa(Ba0dvljjWW7&!TXSY{$o9)K}R&1I*E7N|#` zrZu_&tU*gOPbsa_g4Js`=AQ41xCfan_2M9a<{iXQI>&L!=qMU!lLzKTF{IU8S=Nez zubXQ-rma{h3iN&l`ZgJ*%7qnM6=m8@Z5oAwRS;ZrmCxR8?p#~^vu=XV>l{3qP|tir WHfuH-1Oi7CmocQ(yfDwBJoyVew0}PU diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/utils.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/utils.cpython-37.pyc index 688638f040b1115fe0167ffb0687d24475eb3794..8f2a1f197367852f767019c5abf83f1835d2b360 100644 GIT binary patch delta 550 zcmYjN&5GMF5VmZ`v14bqdkFo>p_@Zkd`T`XWlPAR^im3?C5M80FizwIZ)D4iBy!wk zDSHokiC+5%eGR`s`v`?XHA#ajp&6l>@B8M{ee^Brzv=gTmRYL1Pe1SbPszvq)r)J3 zc9>rVNkA;>(e76(=~15sCWo}Qu;TD1Vq(&qXG_7El6hK`H3#`!-H{Lpf6u2%z*I-} zTQbm}?B%NtBJ9S^U?UoDyTfc4JjgS_D?kv0CNz=Q7InIVtgdqy>Re5+|M*nbzqenj{R6Ffbu7N4O?fo#+V+ zG;<%u2PA16zPvf|w<7+GW#y7)^QnH^8Q&sn*%CbttM3kE22@OM!8&qX_xKPx#^ET5yMh)jsczwP*!e*r&adOH9B delta 490 zcmYjNyH3L}6piyJNtG0aN-Y&CFjNqRmH`%2I-o8v08+Og5t2An(k5}_I6_QR2(ee@ zBUt$ZHvXZ1z%L*nVW+B6Teh!#&pG#8f9Nl|abXx$fsM9%Yah>zZ7`f4oU{~Jc@-=b zZa|ft8q`LDS^eR-&-WQ>IEqFdlTm=K&gYLy)NAqhoDZO=mMF3i|r=IVJ^SW zvuT*xkJw35AGheAcvlg5Qlab zN>vT|9>NYm*q~eT*(uMM;u6(dL02^H1J~K6vX#kf6BNCP>-1TE>l93|kdQlB>X${- ze&ob1bOv2|q;wv*f+<>>hY}c)5G8A-N>YXQdpSPjr7FCXECUtb9Tr`iRZGp2kPts3 znE%2;8|R5f5Q;J^(J!TO!`)e05|L>)Ct-vYb}zga3TqC_ifTVbFpvBJSIl^`Iw+>3 G?)?L3erIw3 diff --git a/nlp_resource_data/nltk/test/unit/lm/__pycache__/__init__.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/lm/__pycache__/__init__.cpython-37.pyc index fc66cb5a281f501ce43101a9705a1a8da9f2e3f1..130b4978939a140467d9640cdd2bc6bd04aa3dbe 100644 GIT binary patch delta 29 jcmZ3^IGd5%iI2nFb8Kr8 z#mB{y)X!@n1*InoUKG8GCq+HjOAdk;PkI(Si1Teyq$ut%znS@e`_264aX-D04h@Gw zmLk6;V|C`kYN&a7S`J*rg}ma_T5v#)S)LVCM+MTjysJ6^sTi&)6|=5b&sN?6AMIZ9L^n>c3D}Jr>^yd$csrR zj6tn%7GV}xED9?<_`pu$FXKB5;5YLNjNtpg7L4L!>jrdTn>`B|ylt=b9+3H>laMAH zByV;ujsNkrh%K@7VEvj@=_??8?Yulqs@7ksE>(M5#XGV7sSGU+ zqP^C1P{YJN*@$sMDi4p!7j4Q3#kQrItbh1S*LjF$yA3YK(>-o@~WxUZ8fLmnaMypDOsAu?noB1rFqwF z#pFV9SzIXmx1#UddINz>a;})Slk>C7bH&A3XCc{z8F>;S_(*;X%{ZpaKmeaAFW~^% z>L+N!l=dDDp_^sk0?x3$mWs3q6}h5p%3IQwsHnb%in_PP=d8dK$1lpqZr29H@U5#^ z^}|XJ*7OK=F@`$-3WNBa*I*bw>Yw2RJ~37yj#EYihumW@j%)6Xa7;vSItfPz2|^cM zZ;Ieqj|L5R-tz;dFzwxi4DR=>!vOC1!XoZ#-$A&9-+T}El-5CVbpM4%SXI4jm&((V z#e#hWpEV3)JgCD({Mj%X5Y4C+(VoSk(xyfY;yHg7Y<%UP0fsG&?M+p6!Co%K6emid zv>QdoDmi_4xv>+j;acMtFwqWVVFBL-Ua(`7co^>o_dy?S2V<~=AA%ExCgf2uOY?X* zlt|ULP!U;mk@eqgRfT5!r)ffS|Iu(g)DNd|HxwNlr&O&3AHgLcid4w%AdHZ}>Q$RY zK6}%)rb~9AEJ|+=pMh0e42M_ zgb6yNZl_n+X)#OmDRAHl*Xo0FgR}b6m|^mzLbgnYQN@9#0q7O3Z9z_6^fz$X17awF Fkzd9syUG9n diff --git a/nlp_resource_data/nltk/test/unit/lm/__pycache__/test_models.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/lm/__pycache__/test_models.cpython-37.pyc index 592d6df397c8f8876aece39824a8393c93c810d5..d32131e321619f350472a693c5140b7519983903 100644 GIT binary patch literal 11234 zcmdT~ZHybodEVJwl1m=%bUNKxCs}f7C6UEgzOxd$ag4Kcwx!6fCEAWpR?Ax5Qrx{2 zC4QV{_aq%f_RlGhf;)&`iY=uf))YV0poA@>AsYbe-=00XU)3C}`BU{cka^+lOtUT7p zm-CJB@;JAr>JyD^2V)ymn`dPS~<#>o@MLhD4$@q<>=4KpVyEA>j%4IemN3j(+KoLjHI*bH2` z(yCVi*9m7{YP!BFOYT~-8gJRYg4ei??Ot-_nkOp_zY3A^MZXr3f#gF2v;cy3bjNT^ zCq;tsG9;9n`gLttupq2JIQeyfe0*Ij>qj-pLEuCw6qQh%4MjB+PAJ-KW5GY%HW%jo z!*}R4MN!VJHZR!K)u6S!9M3>UQyOUplD`|7r**V5Q(X~gfw-Y{#JavN-qJ6cuW7%f z74^`p*Ze?|+Tuo^dptCKw;6!Su_lpbx1{4rxgG6ykI%3Aa=s3Z*5~WBv-2x!!MRrR zlXIV%_iKUs*h-~(q2jsze6t>0m=9b(m|tzyg86!bZ|p|PaqIrv%G%xB0==}_YOEYC zU|^r;EZY0+73r>2q-)c|wo?fza|`@B|1b)@N6Kd9Y}F~JU9aMtb&I+*F@~H%RupoE z$~4YE7>I z2K2f+1!9ChS0xJR8bhKvvwB@a!4X~a%7H-ZnCs%Gc4p5TdNU?cX@S}S~yjD=`ijm_&yXaG)zd! zF1NR_xc${e&^lyaeY_iv`PEjW1%7)%wQ>y_!O^VSTx}4$+;BTBcsywbE&C9wO_&Bh z8%O!=!TQ0HBkMVPVerhD18i({ctt^1+b1mDBd*^2M_q2W}6vQ=#X z5UvD%C~8bsIn6Ue-^*ARVJ2!1C)_J5ZWW+mb34e&+BQqqwpkk`siFKR5s&0gBGYs; zHzj689?2B5dV9|(MVX5x8LI%95mEh;VOn7dfuHZz zm*o>QmMI^_k32{nG7Z=F0cDc-+m~NB#;S<5e6HpE9GVnO!*<6%LH0_A|BysioDBD^NdML@A7byc#| zC0Rd7kD;E~_bI9%yTB{)ZgznX_sWVqdKC!qqM?@DSRdtOjt4Cmp6Pk(VN$L_KRLCQb7Vg^%7eef%&1Y|Knjo+Z@! zv`1&UY5kIlJdflP zl8`gHE@16NSBJxibp%k=JDQDjUcY5@^xGVdD2OE}fF;YZaS*bPQ$G{k03Dk;cZHMb>WDG3B1={G+@woqyVqYoC8zL zd3bMr|Gy5r@#cRX@xJ+Y_NA8}5{vS2Od0s}3HmV=hB3D@2qR=`Wi8CX<65b^S8Bl; zIuMSPGpr?uvBV3KkZhROdzfp{0CxX*+jrgOWjx?dBhw6B^#0OgCf56`Xg@lN|C@{y z%Ybw&gTyjeM4=}G&m?!`7D6ZR9EPi>Wu#{V^(m!&vHrm)9>I6^jdiucH}JNYmM`Pg z@)gQnrHuF}U!#ne6e(vpy9&l)k-5$=+aB7CZL3XU+!rxO6!PSWRgc{xtJKpmvq)iM zX3F zfyjs3Kz5?Kt#$;(O|;9QEmSg-IQJ1^Mi}{|1Rv>pw)W; z-AmpnzW(HXnK?8;~ij&P61D0okxjBj-l#M9~%_ZxBMF%C$*Hu1&_7 zfP}Y&J9?XCNk3-qkRQ|QGwi~6|Mb)s-o-zOQOWfy`@X)qUL=1-$<_PIKltACkKevZ z6-u^0e*3`_Z~eue4$3w*{s%$PkPCwdgJOxBFn=SQ7_%f_#53O4zDn2c9~lxeSzU#( z_blcg;gML(lcK$=M}2}rL;ef-ZzL49Eo8RHZeZFR6y4UN&_^%zXK93E%0E#tD_hf) zS7|gVAb{-WkOGD|#(nPbE}A)<(M0>9QIqR;9RT#7wg-~1gd04N2rOcBfZ?O=kq7e3 z!N2+1duJvWy*t1AE#sf_hc@*-#N$gf;&l2N?qId<{(GLpZ2 zggwnz-wbs8i54|Mc7M4HV)utw)MHqp$wj?;6pcLqPYp}3Z>vX%tb(cj4-$I{mPuAQ zFD4C1^#j7XpSV309c?5)sl8_yw$-e|`1$9!g~wy|4W6OV>Zwe)xW2GDG2$#Jx@N_v4%K^Zvu{SqN}D zO@=mS0KZlOeoaZO11p+zb`i;c0$CD%0jD^Ip`Cd?5fqNaUUn9zgSwLg?2@moeC&Or z&nA=LI{>Rc!Kx!r9a?p?jkd*A_sYlsqf%6}dEnbQ1kA`K-6)FPg^+?E8 zdG9}rWc$KDwx_>&{6omr!O#~5WJ@7bDwmSfG9X)m>O~}{X zeQJ@8;;2j%M_ExEm5sKDbI}&@m@@?@YDc)Yzwx_-t%u{$;cu(C#5=_YPGK+7D^N6z z`x^EqTk>G~*s7S%}?i+0pUm7l|HH z1%y&hzdtR?fGaV&L&L;E!f<`JmozI$B3$yA$V9#A2-t*CEvdm4-Z@K`c-heLGwBDj zMfv80XKH6R!5-1ZM8COYM+}TepJ+m;$_Nc^I^bSdmpmZuHS${T&l<#4)!CZ_$m=iRu;$i zqJ4O)Bo7WYKrk8G=aKyV$VL$icY*)BR7^SqK9AnLJuD(OS_UduzE`r<#&JwG)&Q+2 zySLT(^;pt~dErVgY=!LM!A28(UQdDBon67V&ZAUmtb?(MMiF&QI5LhQ-ojQ$$9 zSB4A$1AXei2U^UR-$=3pn?PFvhryu+m_ZIBYrrUG&^a9)qf^vQ3n113s6ReFs?=+4 z(^ra+-~#r|qw}46T=x$St^3xcKHc9)(hW8E^j6fMl^kh+Y;r`HY}$e)#Sd_Z_2x7m zY~c$K1^+@jjc-3*#XghpCXNK-uLaeY)NJP2rD?y+XN?v6s1HaB*xxv6T@;CK||0#Yw+6HlClgA zS1RU*-PxlwJ@#qOQRzHo7bvS!)}X9O*+t4mdAu$fZqPL8`#Cx?$7z9q^MdG4QhRX^ zo!&362CMie%a&vk)gjf>D3vYRSRk#z2Mx6=(xAEweF8XFZ>Uf6QuIKYGSYBKKqpD) z<(a749DVmETUC5@1N*`sBl1BrPG%qtKEN+qOL)ce_%@4=F?ipbcf5Efg@Z5l(b<{f xS&t$&v=-Dy2#4wYJ}E_Bw*>z5yk4;KhLxLqa#G}8Mw&%>8fiPy0@5Bz{|m%pMHM3-DAJ&4rT%FG7)gMl=?C(o2~Y%R3#4v?w%9fWiWDi-qDA_n zKcWIfpZA@+y}jen79A%*m)yD8nc3Ny_v3lrcV>={j%GDn2QIwqeC}yY`yM@XuM{#1 zxC2vYnya~@rj^vMC<#34HN9?>jJjDe>sHCCr%I`Mx|FWlC0kHCqn4>>OIhSi*SfBi zMowyO%1vL_+_ab5FiN9%w%rV#GoDx;^G4n5Ewi)(r6X<*rMX1uI7&y|F_exaN_V1k zhdYka@kHqaN_V;wD4j@@?n3DRNuE<~ad2{#h0mN6I1 zx6HZOVE!(BrXWh$)y5^KvKls*mf{iUs3M8jg*$i z((FtP)UD0dsuyNg*20U;#`|Y}ZZ@cf-r<#UY!q>EYVX{1&q z-4!K+*$DPtw@apHDFxaP7!Nc1I+$5X-_T60WP_!4H2HEFH1$GRZF!5BS8&=3%dJC{ zR%+!S$S*Zz-uD`wEQi&Gp9cZD&D}f^BABU|YAD(Were1sYM(&H73=2JgQ3W*j|u6|o@TX)2qc6scq);4Y%uHkEU^!4=BpT=8he(N!M>zuJ} zuV>b?c-L&(?Myp+$H4n${Qh%iwb0toZlm|MXzOj`4#cTo6$=@VI)ky`hsIYBEVXzd z_`;Jlcc%E_Jf3s>90?DSCL|@Vw{|eAgVlQ2JmOq?YCReWD$R}r1g$Yu%O$8Rq@#@2 zSgjMOyl5vh(!rZf*mRDt$V4g7vR)1&!}r2yjO*jxA}d&3@nn>CoQC(h;}k|Bqf!eZ zvjK8P)`fE51(D%a!jfHS0u!!=K_sd?rBaFqg|@FlHKKH1K99Op6?DQ%viveZ%BqrZdQx^A_GDDPYc%io4=h0Vp@b z4gH3(WD?w5x7W=!@x&~)@-)z+k6)vo^~Vq8J0m`Nt&>?FjvX`TxUDTid~O;yNlXNu z^tQMuZtBa%k`|g9+NT7e+btL#SHs7Q;$0dSfW)W=ew1Q-4GLCdvA%=q!~rlX0Ir4~ zs@0o8c;fPExh4YD@Ad>dTY`{KzM$LAve*0boTQq$0v3CTcXoq5-|S z*mUPns&rY5fvByghmy2k9D2x-=tv=lB_E;B@CW08$e-zcKRG=4fw(2JI*-R^aCeZ! z)F)D{NA6=Z6~ZvOh`-&N<%Q7(1P6EZHW>+{IGlGsi#J0O2yF>4MK9dcmc=q#mkqHF zJFslD1zDxr=1p-&xYjMB!^2d6mn3WN#~lzj!;nnkWkEs#L4Z)arU~t`&de7@!0#+t zDZc4)C*?^zQO=~JetKNHmzg7^ySgIhUqA%|zp1rOZcU-2diS%e_Z0W(qfcBvIM&*^ zc+u+$eZCsxudM@r4xtbB0b0}W@xv2fBVv-EJde_V z;aKbwtq;7Lfh!?1l#nrzNMcl}`mBPD{V4r#{5d^mwjR5`&lNI}4g3JCIZl*y$ndV# zD4%m2(!5R$fY5DLfK-G}(Fo&~(kGcmiKOpGqa8r2z*Fji8Yv+t^CUH5so#SfbrO(E zVGC2w?at*Sl~!=a7s=HE?%*{fNsCJ8tQJMn9~6YehjX52|=$L?tCbj>?nB|m17jA!eiGjKwbA!Snwk1)?}PM!g3 zhRqVgyd1;0Bsm6>oh?I-B46x2Waz>F%%d;h|C`4OMkLmX{y%)Jj{mgxf$T9d95 ziA4j0=MRyEkj7yqf=$~`cSM#PHzJ=gAhJr7;o0}fJ0k1f`}p$L!(H}*|NH+I{K4+M z`GuY*PtXT(Q||peaTs}~q)-C7l{m^C8I=smm(UT@HG(zW3{?!%&Je*tAjv`Zui^b5 zi$r75n;?qL-Zw>6AH_meevrm7N6Ar2$bpo5DLF~WDN0_Z!hNo^=FfMr$jUIhj56`MrTEK%SAgd^F^7kb$iITA<`sTDf6ZbAFM@3@`-i{V@IXEM>JDLZr`^qojgIM31cV8B}#CE89`k8D(<9#+dV)WW#Ok*&Y@+cELsEGkvuNB4xZexhp6F~eBtWUPZxFWCUc@HU zR>t=)d#D}0bz;x?@EbkU{xBZjI`Mlw)K&%G>PPJu@}9t?22lI$qfo}|0jMT%*(Ox) z1@OjPMaVh5nJDiPsX0=I&H9CSP$lGsW3wcU&eKpBl=asBA;b88kIA3>_gDVtfR$PB zKmXm&-uPteeeV?}GZa2b)Fb@UhT-oMTha5qhu>eq5D)~$@M|aF*OXM-r_r<=wSYUI zBniKOQ*4CL)(!8^3D;&XJA=JC-OX}-g!3r(Q-Izapm#KSc=)bi5{UaC_D9e_0@wi! zBz)Ya2F?#?;K6sEfYM0l78(4aufXWd9`>M=uh?VkMguC0AdvOV@Bj60{O{lX<1zL+ z{qjHkhw;|m{O2*{a$;TRgJ_cozBuG!3?ByHv~G2jrlDMKJA?Sjz#fX3GbGjhSQ-Al zq1(YfA%0W1zfFFhAK>@n?>PPecH@({{&bMtTyPvUMV}j>MS|VRx&Lm5-OvAXYx2t{ z-z~d;8U3H@V>g9aR@MT$sjiRR1n3L61B%sq`VHNNBgHP0I^EFOm_>BOXBx0{tfDQu z6K!QWOUl{KlJbZ<0e#;U?JrUweVX>I7qMD#va?CsY%KE1@}p<4eCg*Y=H{j3L)~p$ zFCSpj9HIBsaOTGd2nJ@oO#%7%fPg&mwlf@Jonx<&HhCzHQc)Bw-Q9vu8%I+J{1k*m zBeL9Zn{`@6bi08@!g5D;XYU)k!b3wVwJh0fG(`Y)KeasD-4~YSSt!LhTF3Q9! z7J38S#7shWJ;#`&Dmg{C>hUBK<)$lOHHOt#{WZMMmMXI5(2@|z2a`p4@4Pg_D{Tq# zND}xhDD&{+E{z5|?4wj1vZ{^3q%dGbKzGep9B5^h zt$|t=f0ibtOAkNt0wvp-wpt|Y6NoEiS;a1VM<{-sCQY^%d+nk%zuhVKcNR~Wq>NZZ zV`spj!!?k8Y>X@Cnhx3cSe2zRwPel9)fTrwM5i2%TWUFvmh{xJUOFaHrdUc{aOv+Z zVLu4OB0*93UDCTgip5nbC@-Y>1PmYQVN8fDb&BT%UQ~WEOOo2*EKSt;kj2|=8|~_s zuF6`a=6!}JOJByJ8Ic2Z5vv{94t=?on53j_J}bPx~XtkPL*_QF~*OciEs`%D4#aEKZ!fqJ32R7_~8#>bW?}2q_`bP)kV5E$mOm z=Nbw)p&%5_usKSI2g(V03RWqn*7`nLuF?^p%;MULcS6dhRJN65gcWED6dtfoMKVw^ zvVXCsFgWmIn6&e{En(^Ju2{$ode6LdagxU9QnEw|Icf3|B{fRwlr$+B=3tjmagByK zfedZlW9!1e=0(cRQYxvwu!lFuN;dXkaBeaPOKCdfH&d&t!*>RCPKYZBrzA%zB+I4U zon|w1vQIWEILia;q)>`BO|T6FYV%QnlD&wIpT#jO-ZSBKbzV{9l^PE2*x_e4kViFa qI*~o6jyuegQ1M|%Ti8b4&KY)g{P5IEQ`1w=OzoVSn|f&Kq5lKCO035K diff --git a/nlp_resource_data/nltk/test/unit/lm/__pycache__/test_preprocessing.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/lm/__pycache__/test_preprocessing.cpython-37.pyc index 2afa322e9800b3d6dc00a458fc8c956d1bf345ec..e81feda91370bb42a802381365b44a0900604313 100644 GIT binary patch delta 93 zcmX@Xv4?}#iI9yxJZ98A5$o=98jW|2}rOpRJlw}WHy+* rfJvJ1*5o}*4UBw~ZJBEr1t#xemSNB}(YA<)9;^qkOHjmJAu$!)O|+Eg zrB!+@@8ZEjd-bBSp0o%a^dfo@sVzd$izkJi#G5l##DVz@^L_7o!)Z=?>WQMLQpbrI0I`@)&fqNDMjz1HpEI=$<@|F2mAH|bt( z8>4hPKMIc?<$KYhxB1WRU6qZb0)qhy^L@1K^w7s$8(5^rg(rg}y7v){F^!`d$2Cr9 rOlnMNoTR1VK{)iRIAY1rP|~19--~;Bc$nnws+&~GpP#aj;cAQDdG&#k delta 580 zcmX|;&ubGw6vyYy?xvd`+ufuwjj2={{LxZOQnAGJgZ#@+3q zVFjfkB8bSl^x&bzKfp-9OBHY46j2Hqf)Ee{Zyvq5ucCAK%*=P@eZP;HLFsczSx}T1 z3&;4aYt`pZm4kD0;l^MF*i5IH+tA=T2U-iZ&=7SI*kxvjhIF49lF91Qrl3b&8m}RB zqqyoc%wm0IW7TP`xNF4%9_LGt#0~x|mJNM9PEj~W<*_0ljNzW}1ID{=#ASedHy4Qj zOyb1QZ#ao#@&}m3pRx{1cqw{fGGN;*03QNY=XGIQ++mxr6SlRl^S?SNxD_pfg#Bn+ zJ4=Z&B|-*b;$L8~dpi+_FzSv{fverO%27~sDx_9L*1m!R z;V`P|xx{QJ^hC=yUH1%js#ept@Kfr+e=h~^jzq!1M5peNix~~SI zmnVi`AVNF8-(~;6SGb3-GM%XsC7mYFce?Y0GT}U-LRcVN#3!TEFpRsSGZT_ac2h&T jyxz2YYONQqIks)qe9LKiF2_{1tkQdWkq|N@ctZFC9ek49 diff --git a/nlp_resource_data/nltk/test/unit/lm/test_counter.py b/nlp_resource_data/nltk/test/unit/lm/test_counter.py index f7182cf..31fab79 100644 --- a/nlp_resource_data/nltk/test/unit/lm/test_counter.py +++ b/nlp_resource_data/nltk/test/unit/lm/test_counter.py @@ -1,12 +1,15 @@ +# -*- coding: utf-8 -*- # Natural Language Toolkit: Language Model Unit Tests # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Ilia Kurenkov # URL: # For license information, see LICENSE.TXT import unittest +import six + from nltk import FreqDist from nltk.lm import NgramCounter from nltk.util import everygrams @@ -51,8 +54,8 @@ class NgramCounterTests(unittest.TestCase): bigrams = self.trigram_counter[2] trigrams = self.trigram_counter[3] - self.assertCountEqual(expected_bigram_contexts, bigrams.conditions()) - self.assertCountEqual(expected_trigram_contexts, trigrams.conditions()) + six.assertCountEqual(self, expected_bigram_contexts, bigrams.conditions()) + six.assertCountEqual(self, expected_trigram_contexts, trigrams.conditions()) def test_bigram_counts_seen_ngrams(self): b_given_a_count = 1 @@ -102,7 +105,7 @@ class NgramCounterTrainingTests(unittest.TestCase): self.assertFalse(bool(counter[3])) self.assertFalse(bool(counter[2])) - self.assertCountEqual(words, counter[1].keys()) + six.assertCountEqual(self, words, counter[1].keys()) def test_train_on_illegal_sentences(self): str_sent = ["Check", "this", "out", "!"] @@ -127,6 +130,6 @@ class NgramCounterTrainingTests(unittest.TestCase): bigram_contexts = [("a",), ("c",)] trigram_contexts = [("e", "f")] - self.assertCountEqual(unigrams, counter[1].keys()) - self.assertCountEqual(bigram_contexts, counter[2].keys()) - self.assertCountEqual(trigram_contexts, counter[3].keys()) + six.assertCountEqual(self, unigrams, counter[1].keys()) + six.assertCountEqual(self, bigram_contexts, counter[2].keys()) + six.assertCountEqual(self, trigram_contexts, counter[3].keys()) diff --git a/nlp_resource_data/nltk/test/unit/lm/test_models.py b/nlp_resource_data/nltk/test/unit/lm/test_models.py index f39619e..f19edd4 100644 --- a/nlp_resource_data/nltk/test/unit/lm/test_models.py +++ b/nlp_resource_data/nltk/test/unit/lm/test_models.py @@ -1,14 +1,18 @@ +# -*- coding: utf-8 -*- # Natural Language Toolkit: Language Model Unit Tests # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Ilia Kurenkov # URL: # For license information, see LICENSE.TXT +from __future__ import division import math +import sys import unittest +from six import add_metaclass from nltk.lm import ( Vocabulary, @@ -53,11 +57,19 @@ class ParametrizeTestsMeta(type): dct["test_score_{0}".format(i)] = cls.add_score_test( word, context, expected_score ) - return super().__new__(cls, name, bases, dct) + return super(ParametrizeTestsMeta, cls).__new__(cls, name, bases, dct) @classmethod def add_score_test(cls, word, context, expected_score): - message = "word='{word}', context={context}" + if sys.version_info > (3, 5): + message = "word='{word}', context={context}" + else: + # Python 2 doesn't report the mismatched values if we pass a custom + # message, so we have to report them manually. + message = ( + "{score} != {expected_score} within 4 places, " + "word='{word}', context={context}" + ) def test_method(self): score = self.model.score(word, context) @@ -76,8 +88,9 @@ class ParametrizeTestsMeta(type): return test -class MleBigramTests(unittest.TestCase, metaclass=ParametrizeTestsMeta): - """Unit tests for MLE ngram model.""" +@add_metaclass(ParametrizeTestsMeta) +class MleBigramTests(unittest.TestCase): + """unit tests for MLENgramModel class""" score_tests = [ ("d", ["c"], 1), @@ -155,7 +168,8 @@ class MleBigramTests(unittest.TestCase, metaclass=ParametrizeTestsMeta): self.assertAlmostEqual(perplexity, self.model.perplexity(text), places=4) -class MleTrigramTests(unittest.TestCase, metaclass=ParametrizeTestsMeta): +@add_metaclass(ParametrizeTestsMeta) +class MleTrigramTests(unittest.TestCase): """MLE trigram model tests""" score_tests = [ @@ -179,8 +193,9 @@ class MleTrigramTests(unittest.TestCase, metaclass=ParametrizeTestsMeta): self.model.fit(training_text) -class LidstoneBigramTests(unittest.TestCase, metaclass=ParametrizeTestsMeta): - """Unit tests for Lidstone class""" +@add_metaclass(ParametrizeTestsMeta) +class LidstoneBigramTests(unittest.TestCase): + """unit tests for Lidstone class""" score_tests = [ # count(d | c) = 1 @@ -237,7 +252,8 @@ class LidstoneBigramTests(unittest.TestCase, metaclass=ParametrizeTestsMeta): self.assertAlmostEqual(perplexity, self.model.perplexity(text), places=4) -class LidstoneTrigramTests(unittest.TestCase, metaclass=ParametrizeTestsMeta): +@add_metaclass(ParametrizeTestsMeta) +class LidstoneTrigramTests(unittest.TestCase): score_tests = [ # Logic behind this is the same as for bigram model ("d", ["c"], 1.1 / 1.8), @@ -254,8 +270,9 @@ class LidstoneTrigramTests(unittest.TestCase, metaclass=ParametrizeTestsMeta): self.model.fit(training_text) -class LaplaceBigramTests(unittest.TestCase, metaclass=ParametrizeTestsMeta): - """Unit tests for Laplace class""" +@add_metaclass(ParametrizeTestsMeta) +class LaplaceBigramTests(unittest.TestCase): + """unit tests for Laplace class""" score_tests = [ # basic sanity-check: @@ -314,7 +331,8 @@ class LaplaceBigramTests(unittest.TestCase, metaclass=ParametrizeTestsMeta): self.assertAlmostEqual(perplexity, self.model.perplexity(text), places=4) -class WittenBellInterpolatedTrigramTests(unittest.TestCase, metaclass=ParametrizeTestsMeta): +@add_metaclass(ParametrizeTestsMeta) +class WittenBellInterpolatedTrigramTests(unittest.TestCase): def setUp(self): vocab, training_text = _prepare_test_data(3) self.model = WittenBellInterpolated(3, vocabulary=vocab) @@ -339,13 +357,11 @@ class WittenBellInterpolatedTrigramTests(unittest.TestCase, metaclass=Parametriz # gamma(['a', 'b']) = 0.0667 # mle("c", ["a", "b"]) = 1 ("c", ["a", "b"], (1 - 0.0667) + 0.0667 * ((1 - 0.1111) * 0.5 + 0.1111 / 18)), - # The ngram 'z b c' was not seen, so we should simply revert to - # the score of the ngram 'b c'. See issue #2332. - ("c", ["z", "b"], ((1 - 0.1111) * 0.5 + 0.1111 / 18)), ] -class KneserNeyInterpolatedTrigramTests(unittest.TestCase, metaclass=ParametrizeTestsMeta): +@add_metaclass(ParametrizeTestsMeta) +class KneserNeyInterpolatedTrigramTests(unittest.TestCase): def setUp(self): vocab, training_text = _prepare_test_data(3) self.model = KneserNeyInterpolated(3, vocabulary=vocab) @@ -370,14 +386,11 @@ class KneserNeyInterpolatedTrigramTests(unittest.TestCase, metaclass=Parametrize # gamma(['a', 'b']) = 0.1 * 1 # normalizer = total number of trigrams with prefix "ab" = 1 => we can ignore it! ("c", ["a", "b"], 0.9 + 0.1 * ((0.9 + 0.2 * (1 / 8)) / 2)), - # The ngram 'z b c' was not seen, so we should simply revert to - # the score of the ngram 'b c'. See issue #2332. - ("c", ["z", "b"], ((0.9 + 0.2 * (1 / 8)) / 2)), ] class NgramModelTextGenerationTests(unittest.TestCase): - """Using MLE model, generate some text.""" + """Using MLE estimator, generate some text.""" def setUp(self): vocab, training_text = _prepare_test_data(3) @@ -399,14 +412,10 @@ class NgramModelTextGenerationTests(unittest.TestCase): self.model.generate(text_seed=("a", ""), random_seed=2), "a" ) - def test_generate_cycle(self): - # Add a cycle to the model: bd -> b, db -> d - more_training_text = [list(padded_everygrams(self.model.order, list("bdbdbd")))] - self.model.fit(more_training_text) - # Test that we can escape the cycle + def test_generate_no_seed_unigrams(self): self.assertEqual( - self.model.generate(7, text_seed=("b", "d"), random_seed=5), - ["b", "d", "b", "d", "b", "d", ""], + self.model.generate(5, random_seed=3), + ["", "", "", "", ""], ) def test_generate_with_text_seed(self): diff --git a/nlp_resource_data/nltk/test/unit/lm/test_preprocessing.py b/nlp_resource_data/nltk/test/unit/lm/test_preprocessing.py index c298552..02a8af5 100644 --- a/nlp_resource_data/nltk/test/unit/lm/test_preprocessing.py +++ b/nlp_resource_data/nltk/test/unit/lm/test_preprocessing.py @@ -1,6 +1,7 @@ +# -*- coding: utf-8 -*- # Natural Language Toolkit: Language Model Unit Tests # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Ilia Kurenkov # URL: # For license information, see LICENSE.TXT diff --git a/nlp_resource_data/nltk/test/unit/lm/test_vocabulary.py b/nlp_resource_data/nltk/test/unit/lm/test_vocabulary.py index db82eb5..dd78b42 100644 --- a/nlp_resource_data/nltk/test/unit/lm/test_vocabulary.py +++ b/nlp_resource_data/nltk/test/unit/lm/test_vocabulary.py @@ -1,6 +1,7 @@ +# -*- coding: utf-8 -*- # Natural Language Toolkit: Language Model Unit Tests # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Ilia Kurenkov # URL: # For license information, see LICENSE.TXT @@ -8,6 +9,7 @@ import unittest from collections import Counter +import six from nltk.lm import Vocabulary @@ -59,8 +61,8 @@ class NgramModelVocabularyTests(unittest.TestCase): vocab_counts = ["a", "b", "c", "d", "e", "f", "g", "w", "z"] vocab_items = ["a", "b", "d", "e", ""] - self.assertCountEqual(vocab_counts, list(self.vocab.counts.keys())) - self.assertCountEqual(vocab_items, list(self.vocab)) + six.assertCountEqual(self, vocab_counts, list(self.vocab.counts.keys())) + six.assertCountEqual(self, vocab_items, list(self.vocab)) def test_update_empty_vocab(self): empty = Vocabulary(unk_cutoff=2) @@ -123,7 +125,8 @@ class NgramModelVocabularyTests(unittest.TestCase): def test_str(self): self.assertEqual( - str(self.vocab), "" + str(self.vocab), + (""), ) def test_creation_with_counter(self): diff --git a/nlp_resource_data/nltk/test/unit/test_2x_compat.py b/nlp_resource_data/nltk/test/unit/test_2x_compat.py new file mode 100644 index 0000000..f078373 --- /dev/null +++ b/nlp_resource_data/nltk/test/unit/test_2x_compat.py @@ -0,0 +1,65 @@ +# -*- coding: utf-8 -*- +""" +Unit tests for nltk.compat. +See also nltk/test/compat.doctest. +""" +from __future__ import absolute_import, unicode_literals +import unittest + +from nltk.text import Text +from nltk.compat import PY3, python_2_unicode_compatible + + +def setup_module(module): + from nose import SkipTest + + if PY3: + raise SkipTest("test_2x_compat is for testing nltk.compat under Python 2.x") + + +class TestTextTransliteration(unittest.TestCase): + txt = Text(["São", "Tomé", "and", "Príncipe"]) + + def test_repr(self): + self.assertEqual(repr(self.txt), br"") + + def test_str(self): + self.assertEqual(str(self.txt), b"") + + +class TestFraction(unittest.TestCase): + def test_unnoramlize_fraction(self): + from fractions import Fraction as NativePythonFraction + from nltk.compat import Fraction as NLTKFraction + + # The native fraction should throw a TypeError in Python < 3.5 + with self.assertRaises(TypeError): + NativePythonFraction(0, 1000, _normalize=False) + + # Using nltk.compat.Fraction in Python < 3.5 + compat_frac = NLTKFraction(0, 1000, _normalize=False) + # The numerator and denominator does not change. + assert compat_frac.numerator == 0 + assert compat_frac.denominator == 1000 + # The floating point value remains normalized. + assert float(compat_frac) == 0.0 + + # Checks that the division is not divided by + # # by greatest common divisor (gcd). + six_twelve = NLTKFraction(6, 12, _normalize=False) + assert six_twelve.numerator == 6 + assert six_twelve.denominator == 12 + + one_two = NLTKFraction(1, 2, _normalize=False) + assert one_two.numerator == 1 + assert one_two.denominator == 2 + + # Checks against the native fraction. + six_twelve_original = NativePythonFraction(6, 12) + # Checks that rational values of one_two and six_twelve is the same. + assert float(one_two) == float(six_twelve) == float(six_twelve_original) + + # Checks that the fraction does get normalized, even when + # _normalize == False when numerator is using native + # fractions.Fraction.from_float + assert NLTKFraction(3.142, _normalize=False) == NativePythonFraction(3.142) diff --git a/nlp_resource_data/nltk/test/unit/test_aline.py b/nlp_resource_data/nltk/test/unit/test_aline.py index f63d211..72b92c7 100644 --- a/nlp_resource_data/nltk/test/unit/test_aline.py +++ b/nlp_resource_data/nltk/test/unit/test_aline.py @@ -3,6 +3,7 @@ Unit tests for nltk.metrics.aline """ +from __future__ import unicode_literals import unittest diff --git a/nlp_resource_data/nltk/test/unit/test_cfd_mutation.py b/nlp_resource_data/nltk/test/unit/test_cfd_mutation.py deleted file mode 100644 index 7e21d7e..0000000 --- a/nlp_resource_data/nltk/test/unit/test_cfd_mutation.py +++ /dev/null @@ -1,39 +0,0 @@ -import unittest -from nltk import ConditionalFreqDist, tokenize - -class TestEmptyCondFreq(unittest.TestCase): - def test_tabulate(self): - empty = ConditionalFreqDist() - self.assertEqual(empty.conditions(),[]) - try: - empty.tabulate(conditions="BUG") # nonexistent keys shouldn't be added - except: - pass - self.assertEqual(empty.conditions(), []) - - - def test_plot(self): - empty = ConditionalFreqDist() - self.assertEqual(empty.conditions(),[]) - try: - empty.plot(conditions=["BUG"]) # nonexistent keys shouldn't be added - except: - pass - self.assertEqual(empty.conditions(),[]) - - def test_increment(self): - # make sure that we can still mutate cfd normally - text = "cow cat mouse cat tiger" - cfd = ConditionalFreqDist() - - # create cfd with word length as condition - for word in tokenize.word_tokenize(text): - condition = len(word) - cfd[condition][word] += 1 - - self.assertEqual(cfd.conditions(), [3,5]) - - # incrementing previously unseen key is still possible - cfd[2]['hi'] += 1 - self.assertEqual(set(cfd.conditions()),set([3,5,2])) # new condition added - self.assertEqual(cfd[2]['hi'], 1) # key's frequency incremented from 0 (unseen) to 1 diff --git a/nlp_resource_data/nltk/test/unit/test_cfg2chomsky.py b/nlp_resource_data/nltk/test/unit/test_cfg2chomsky.py deleted file mode 100644 index 686861e..0000000 --- a/nlp_resource_data/nltk/test/unit/test_cfg2chomsky.py +++ /dev/null @@ -1,49 +0,0 @@ -# -*- coding: utf-8 -*- -import unittest -import nltk -from nltk.grammar import CFG - - -class ChomskyNormalFormForCFGTest(unittest.TestCase): - def test_simple(self): - grammar = CFG.fromstring( - """ - S -> NP VP - PP -> P NP - NP -> Det N | NP PP P - VP -> V NP | VP PP - VP -> Det - Det -> 'a' | 'the' - N -> 'dog' | 'cat' - V -> 'chased' | 'sat' - P -> 'on' | 'in' - """ - ) - self.assertFalse(grammar.is_flexible_chomsky_normal_form()) - self.assertFalse(grammar.is_chomsky_normal_form()) - grammar = grammar.chomsky_normal_form(flexible=True) - self.assertTrue(grammar.is_flexible_chomsky_normal_form()) - self.assertFalse(grammar.is_chomsky_normal_form()) - - grammar2 = CFG.fromstring( - """ - S -> NP VP - NP -> VP N P - VP -> P - N -> 'dog' | 'cat' - P -> 'on' | 'in' - """ - ) - self.assertFalse(grammar2.is_flexible_chomsky_normal_form()) - self.assertFalse(grammar2.is_chomsky_normal_form()) - grammar2 = grammar2.chomsky_normal_form() - self.assertTrue(grammar2.is_flexible_chomsky_normal_form()) - self.assertTrue(grammar2.is_chomsky_normal_form()) - - def test_complex(self): - grammar = nltk.data.load('grammars/large_grammars/atis.cfg') - self.assertFalse(grammar.is_flexible_chomsky_normal_form()) - self.assertFalse(grammar.is_chomsky_normal_form()) - grammar = grammar.chomsky_normal_form(flexible=True) - self.assertTrue(grammar.is_flexible_chomsky_normal_form()) - self.assertFalse(grammar.is_chomsky_normal_form()) diff --git a/nlp_resource_data/nltk/test/unit/test_chunk.py b/nlp_resource_data/nltk/test/unit/test_chunk.py index 7d61518..8c40dfc 100644 --- a/nlp_resource_data/nltk/test/unit/test_chunk.py +++ b/nlp_resource_data/nltk/test/unit/test_chunk.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +from __future__ import absolute_import, unicode_literals import unittest from nltk import RegexpParser diff --git a/nlp_resource_data/nltk/test/unit/test_classify.py b/nlp_resource_data/nltk/test/unit/test_classify.py index 4dae5d1..e9128d2 100644 --- a/nlp_resource_data/nltk/test/unit/test_classify.py +++ b/nlp_resource_data/nltk/test/unit/test_classify.py @@ -2,6 +2,7 @@ """ Unit tests for nltk.classify. See also: nltk/test/classify.doctest """ +from __future__ import absolute_import from nose import SkipTest from nltk import classify diff --git a/nlp_resource_data/nltk/test/unit/test_collocations.py b/nlp_resource_data/nltk/test/unit/test_collocations.py index 8949411..8e3535f 100644 --- a/nlp_resource_data/nltk/test/unit/test_collocations.py +++ b/nlp_resource_data/nltk/test/unit/test_collocations.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +from __future__ import absolute_import, unicode_literals import unittest from nltk.collocations import BigramCollocationFinder diff --git a/nlp_resource_data/nltk/test/unit/test_concordance.py b/nlp_resource_data/nltk/test/unit/test_concordance.py index 83e407b..81ac47b 100644 --- a/nlp_resource_data/nltk/test/unit/test_concordance.py +++ b/nlp_resource_data/nltk/test/unit/test_concordance.py @@ -1,15 +1,20 @@ # -*- coding: utf-8 -*- +from __future__ import absolute_import, unicode_literals import unittest import contextlib import sys -from io import StringIO from nose import with_setup from nltk.corpus import gutenberg from nltk.text import Text +try: + from StringIO import StringIO +except ImportError as e: + from io import StringIO + @contextlib.contextmanager def stdout_redirect(where): diff --git a/nlp_resource_data/nltk/test/unit/test_corenlp.py b/nlp_resource_data/nltk/test/unit/test_corenlp.py index 966ecc6..fed13e3 100644 --- a/nlp_resource_data/nltk/test/unit/test_corenlp.py +++ b/nlp_resource_data/nltk/test/unit/test_corenlp.py @@ -7,8 +7,11 @@ Mock test for Stanford CoreNLP wrappers. import sys from itertools import chain from unittest import TestCase, SkipTest -from unittest.mock import MagicMock +try: + from unittest.mock import MagicMock +except ImportError: + raise SkipTest('unittest.mock no supported in Python2') from nltk.tree import Tree from nltk.parse import corenlp @@ -1085,7 +1088,7 @@ class TestParserAPI(TestCase): corenlp_parser.api_call.assert_called_once_with( "The quick brown fox jumps over the lazy dog", - properties={'ssplit.eolonly': 'true'}, + properties={'ssplit.ssplit.eolonly': 'true'}, ) self.assertEqual(expected_output, parsed_data) @@ -1411,6 +1414,6 @@ class TestParserAPI(TestCase): corenlp_parser.api_call.assert_called_once_with( "The quick brown fox jumps over the lazy dog", - properties={'ssplit.eolonly': 'true'}, + properties={'ssplit.ssplit.eolonly': 'true'}, ) self.assertEqual(expected_output, parsed_data.tree()) diff --git a/nlp_resource_data/nltk/test/unit/test_corpora.py b/nlp_resource_data/nltk/test/unit/test_corpora.py index 8b105b8..bce083b 100644 --- a/nlp_resource_data/nltk/test/unit/test_corpora.py +++ b/nlp_resource_data/nltk/test/unit/test_corpora.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +from __future__ import absolute_import, unicode_literals import unittest from nltk.corpus import ( @@ -12,6 +13,7 @@ from nltk.corpus import ( udhr, ) # mwa_ppdb +from nltk.compat import python_2_unicode_compatible from nltk.tree import Tree from nltk.test.unit.utils import skipIf diff --git a/nlp_resource_data/nltk/test/unit/test_corpus_views.py b/nlp_resource_data/nltk/test/unit/test_corpus_views.py index 29d8a3c..222385a 100644 --- a/nlp_resource_data/nltk/test/unit/test_corpus_views.py +++ b/nlp_resource_data/nltk/test/unit/test_corpus_views.py @@ -2,6 +2,7 @@ """ Corpus View Regression Tests """ +from __future__ import absolute_import, unicode_literals import unittest import nltk.data from nltk.corpus.reader.util import ( diff --git a/nlp_resource_data/nltk/test/unit/test_disagreement.py b/nlp_resource_data/nltk/test/unit/test_disagreement.py index 6a88868..3054868 100644 --- a/nlp_resource_data/nltk/test/unit/test_disagreement.py +++ b/nlp_resource_data/nltk/test/unit/test_disagreement.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +from __future__ import absolute_import, unicode_literals import unittest from nltk.metrics.agreement import AnnotationTask diff --git a/nlp_resource_data/nltk/test/unit/test_freqdist.py b/nlp_resource_data/nltk/test/unit/test_freqdist.py deleted file mode 100644 index a73fd02..0000000 --- a/nlp_resource_data/nltk/test/unit/test_freqdist.py +++ /dev/null @@ -1,16 +0,0 @@ -import unittest -import nltk - - -class TestFreqDist(unittest.TestCase): - - def test_iterating_returns_an_iterator_ordered_by_frequency(self): - - samples = ['one', 'two', 'two'] - - distribution = nltk.FreqDist(samples) - - most_frequent, less_frequent = [entry for entry in distribution] - - self.assertEqual(most_frequent, 'two') - self.assertEqual(less_frequent, 'one') diff --git a/nlp_resource_data/nltk/test/unit/test_hmm.py b/nlp_resource_data/nltk/test/unit/test_hmm.py index b9770ca..d211bc2 100644 --- a/nlp_resource_data/nltk/test/unit/test_hmm.py +++ b/nlp_resource_data/nltk/test/unit/test_hmm.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +from __future__ import absolute_import, unicode_literals from nltk.tag import hmm diff --git a/nlp_resource_data/nltk/test/unit/test_json2csv_corpus.py b/nlp_resource_data/nltk/test/unit/test_json2csv_corpus.py index 6714d9c..ac61a65 100644 --- a/nlp_resource_data/nltk/test/unit/test_json2csv_corpus.py +++ b/nlp_resource_data/nltk/test/unit/test_json2csv_corpus.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Natural Language Toolkit: Twitter client # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Lorenzo Rubio # URL: # For license information, see LICENSE.TXT @@ -14,8 +14,10 @@ package. import os import unittest -from tempfile import TemporaryDirectory +from six.moves import zip + +from nltk.compat import TemporaryDirectory from nltk.corpus import twitter_samples from nltk.twitter.common import json2csv, json2csv_entities diff --git a/nlp_resource_data/nltk/test/unit/test_json_serialization.py b/nlp_resource_data/nltk/test/unit/test_json_serialization.py deleted file mode 100644 index 4667fbf..0000000 --- a/nlp_resource_data/nltk/test/unit/test_json_serialization.py +++ /dev/null @@ -1,87 +0,0 @@ -import unittest - -from nltk.corpus import brown -from nltk.jsontags import JSONTaggedDecoder, JSONTaggedEncoder -from nltk.tag import DefaultTagger, RegexpTagger, AffixTagger -from nltk.tag import UnigramTagger, BigramTagger, TrigramTagger, NgramTagger -from nltk.tag import PerceptronTagger -from nltk.tag import BrillTaggerTrainer, BrillTagger -from nltk.tag.brill import nltkdemo18 - - -class TestJSONSerialization(unittest.TestCase): - def setUp(self): - self.corpus = brown.tagged_sents()[:35] - self.decoder = JSONTaggedDecoder() - self.encoder = JSONTaggedEncoder() - self.default_tagger = DefaultTagger("NN") - - def test_default_tagger(self): - encoded = self.encoder.encode(self.default_tagger) - decoded = self.decoder.decode(encoded) - - self.assertEqual(repr(self.default_tagger), repr(decoded)) - self.assertEqual(self.default_tagger._tag, decoded._tag) - - def test_regexp_tagger(self): - tagger = RegexpTagger([(r".*", "NN")], backoff=self.default_tagger) - - encoded = self.encoder.encode(tagger) - decoded = self.decoder.decode(encoded) - - self.assertEqual(repr(tagger), repr(decoded)) - self.assertEqual(repr(tagger.backoff), repr(decoded.backoff)) - self.assertEqual(tagger._regexps, decoded._regexps) - - def test_affix_tagger(self): - tagger = AffixTagger(self.corpus, backoff=self.default_tagger) - - encoded = self.encoder.encode(tagger) - decoded = self.decoder.decode(encoded) - - self.assertEqual(repr(tagger), repr(decoded)) - self.assertEqual(repr(tagger.backoff), repr(decoded.backoff)) - self.assertEqual(tagger._affix_length, decoded._affix_length) - self.assertEqual(tagger._min_word_length, decoded._min_word_length) - self.assertEqual(tagger._context_to_tag, decoded._context_to_tag) - - def test_ngram_taggers(self): - unitagger = UnigramTagger(self.corpus, backoff=self.default_tagger) - bitagger = BigramTagger(self.corpus, backoff=unitagger) - tritagger = TrigramTagger(self.corpus, backoff=bitagger) - ntagger = NgramTagger(4, self.corpus, backoff=tritagger) - - encoded = self.encoder.encode(ntagger) - decoded = self.decoder.decode(encoded) - - self.assertEqual(repr(ntagger), repr(decoded)) - self.assertEqual(repr(tritagger), repr(decoded.backoff)) - self.assertEqual(repr(bitagger), repr(decoded.backoff.backoff)) - self.assertEqual(repr(unitagger), repr(decoded.backoff.backoff.backoff)) - self.assertEqual(repr(self.default_tagger), - repr(decoded.backoff.backoff.backoff.backoff)) - - def test_perceptron_tagger(self): - tagger = PerceptronTagger(load=False) - tagger.train(self.corpus) - - encoded = self.encoder.encode(tagger) - decoded = self.decoder.decode(encoded) - - self.assertEqual(tagger.model.weights, decoded.model.weights) - self.assertEqual(tagger.tagdict, decoded.tagdict) - self.assertEqual(tagger.classes, decoded.classes) - - def test_brill_tagger(self): - trainer = BrillTaggerTrainer(self.default_tagger, nltkdemo18(), - deterministic=True) - tagger = trainer.train(self.corpus, max_rules=30) - - encoded = self.encoder.encode(tagger) - decoded = self.decoder.decode(encoded) - - self.assertEqual(repr(tagger._initial_tagger), - repr(decoded._initial_tagger)) - self.assertEqual(tagger._rules, decoded._rules) - self.assertEqual(tagger._training_stats, decoded._training_stats) - diff --git a/nlp_resource_data/nltk/test/unit/test_naivebayes.py b/nlp_resource_data/nltk/test/unit/test_naivebayes.py index ac9ff9b..37e4411 100644 --- a/nlp_resource_data/nltk/test/unit/test_naivebayes.py +++ b/nlp_resource_data/nltk/test/unit/test_naivebayes.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +from __future__ import print_function, unicode_literals import unittest diff --git a/nlp_resource_data/nltk/test/unit/test_nombank.py b/nlp_resource_data/nltk/test/unit/test_nombank.py deleted file mode 100644 index 8f2d9d8..0000000 --- a/nlp_resource_data/nltk/test/unit/test_nombank.py +++ /dev/null @@ -1,27 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Unit tests for nltk.corpus.nombank -""" - -import unittest - -from nltk.corpus import nombank -# Load the nombank once. -nombank.nouns() - -class NombankDemo(unittest.TestCase): - def test_numbers(self): - # No. of instances. - self.assertEqual(len(nombank.instances()), 114574) - # No. of rolesets - self.assertEqual(len(nombank.rolesets()), 5577) - # No. of nouns. - self.assertEqual(len(nombank.nouns()), 4704) - - - def test_instance(self): - self.assertEqual(nombank.instances()[0].roleset, 'perc-sign.01') - - def test_framefiles_fileids(self): - self.assertEqual(len(nombank.fileids()), 4705) - self.assertTrue(all(fileid.endswith('.xml') for fileid in nombank.fileids())) diff --git a/nlp_resource_data/nltk/test/unit/test_pl196x.py b/nlp_resource_data/nltk/test/unit/test_pl196x.py deleted file mode 100644 index d90d94c..0000000 --- a/nlp_resource_data/nltk/test/unit/test_pl196x.py +++ /dev/null @@ -1,14 +0,0 @@ -import unittest - -import nltk -from nltk.corpus.reader import pl196x - - -class TestCorpusViews(unittest.TestCase): - - def test_corpus_reader(self): - pl196x_dir = nltk.data.find('corpora/pl196x') - pl = pl196x.Pl196xCorpusReader(pl196x_dir, r'.*\.xml', - textids='textids.txt', - cat_file='cats.txt') - pl.tagged_words(fileids=pl.fileids(), categories='cats.txt') diff --git a/nlp_resource_data/nltk/test/unit/test_pos_tag.py b/nlp_resource_data/nltk/test/unit/test_pos_tag.py index 0aced19..a0aa1d0 100644 --- a/nlp_resource_data/nltk/test/unit/test_pos_tag.py +++ b/nlp_resource_data/nltk/test/unit/test_pos_tag.py @@ -3,6 +3,7 @@ Tests for nltk.pos_tag """ +from __future__ import unicode_literals import unittest diff --git a/nlp_resource_data/nltk/test/unit/test_rte_classify.py b/nlp_resource_data/nltk/test/unit/test_rte_classify.py index 3ba2d06..b26298c 100644 --- a/nlp_resource_data/nltk/test/unit/test_rte_classify.py +++ b/nlp_resource_data/nltk/test/unit/test_rte_classify.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +from __future__ import print_function, unicode_literals import unittest diff --git a/nlp_resource_data/nltk/test/unit/test_seekable_unicode_stream_reader.py b/nlp_resource_data/nltk/test/unit/test_seekable_unicode_stream_reader.py index c5d1583..a54c559 100644 --- a/nlp_resource_data/nltk/test/unit/test_seekable_unicode_stream_reader.py +++ b/nlp_resource_data/nltk/test/unit/test_seekable_unicode_stream_reader.py @@ -3,6 +3,7 @@ The following test performs a random series of reads, seeks, and tells, and checks that the results are consistent. """ +from __future__ import absolute_import, unicode_literals import random import functools from io import BytesIO diff --git a/nlp_resource_data/nltk/test/unit/test_senna.py b/nlp_resource_data/nltk/test/unit/test_senna.py index be5fed0..8701225 100644 --- a/nlp_resource_data/nltk/test/unit/test_senna.py +++ b/nlp_resource_data/nltk/test/unit/test_senna.py @@ -3,6 +3,7 @@ Unit tests for Senna """ +from __future__ import unicode_literals from os import environ, path, sep import logging diff --git a/nlp_resource_data/nltk/test/unit/test_stem.py b/nlp_resource_data/nltk/test/unit/test_stem.py index 52a0d66..67677df 100644 --- a/nlp_resource_data/nltk/test/unit/test_stem.py +++ b/nlp_resource_data/nltk/test/unit/test_stem.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +from __future__ import print_function, unicode_literals import os import unittest @@ -40,8 +41,11 @@ class SnowballTest(unittest.TestCase): assert ar_stemmer.stem("الكلمات") == "كلم" def test_russian(self): + # Russian words both consisting of Cyrillic + # and Roman letters can be stemmed. stemmer_russian = SnowballStemmer("russian") assert stemmer_russian.stem("авантненькая") == "авантненьк" + assert stemmer_russian.stem("avenantnen'kai^a") == "avenantnen'k" def test_german(self): stemmer_german = SnowballStemmer("german") diff --git a/nlp_resource_data/nltk/test/unit/test_tag.py b/nlp_resource_data/nltk/test/unit/test_tag.py index b460854..c382074 100644 --- a/nlp_resource_data/nltk/test/unit/test_tag.py +++ b/nlp_resource_data/nltk/test/unit/test_tag.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +from __future__ import absolute_import, unicode_literals def test_basic(): diff --git a/nlp_resource_data/nltk/test/unit/test_tgrep.py b/nlp_resource_data/nltk/test/unit/test_tgrep.py index f46b4ce..17b2c4a 100644 --- a/nlp_resource_data/nltk/test/unit/test_tgrep.py +++ b/nlp_resource_data/nltk/test/unit/test_tgrep.py @@ -3,7 +3,7 @@ # # Natural Language Toolkit: TGrep search # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Will Roberts # URL: # For license information, see LICENSE.TXT @@ -12,9 +12,12 @@ Unit tests for nltk.tgrep. ''' +from __future__ import absolute_import, print_function, unicode_literals import unittest +from six import b + from nltk.tree import ParentedTree from nltk import tgrep @@ -63,7 +66,7 @@ class TestSequenceFunctions(unittest.TestCase): Test that tokenization handles bytes and strs the same way. ''' self.assertEqual( - tgrep.tgrep_tokenize(b'A .. (B !< C . D) | ![<< (E , F) $ G]'), + tgrep.tgrep_tokenize(b('A .. (B !< C . D) | ![<< (E , F) $ G]')), tgrep.tgrep_tokenize('A .. (B !< C . D) | ![<< (E , F) $ G]'), ) @@ -266,15 +269,15 @@ class TestSequenceFunctions(unittest.TestCase): '(S (NP (DT the) (JJ big) (NN dog)) ' '(VP bit) (NP (DT a) (NN cat)))' ) self.assertEqual( - list(tgrep.tgrep_positions(b'NN', [tree])), - list(tgrep.tgrep_positions(b'NN', [tree])), + list(tgrep.tgrep_positions(b('NN'), [tree])), + list(tgrep.tgrep_positions('NN', [tree])), ) self.assertEqual( - list(tgrep.tgrep_nodes(b'NN', [tree])), + list(tgrep.tgrep_nodes(b('NN'), [tree])), list(tgrep.tgrep_nodes('NN', [tree])), ) self.assertEqual( - list(tgrep.tgrep_positions(b'NN|JJ', [tree])), + list(tgrep.tgrep_positions(b('NN|JJ'), [tree])), list(tgrep.tgrep_positions('NN|JJ', [tree])), ) diff --git a/nlp_resource_data/nltk/test/unit/test_tokenize.py b/nlp_resource_data/nltk/test/unit/test_tokenize.py index f3b80c5..fa0c286 100644 --- a/nlp_resource_data/nltk/test/unit/test_tokenize.py +++ b/nlp_resource_data/nltk/test/unit/test_tokenize.py @@ -4,20 +4,15 @@ Unit tests for nltk.tokenize. See also nltk/test/tokenize.doctest """ +from __future__ import unicode_literals +import os import unittest from nose import SkipTest -from nose.tools import assert_equal -from nltk.tokenize import ( - punkt, - word_tokenize, - TweetTokenizer, - StanfordSegmenter, - TreebankWordTokenizer, - SyllableTokenizer, -) +from nltk.tokenize import word_tokenize +from nltk.tokenize import TweetTokenizer, StanfordSegmenter, TreebankWordTokenizer class TestTokenize(unittest.TestCase): @@ -42,14 +37,6 @@ class TestTokenize(unittest.TestCase): 'français', ] self.assertEqual(tokens, expected) - - def test_sonority_sequencing_syllable_tokenizer(self): - """ - Test SyllableTokenizer tokenizer. - """ - tokenizer = SyllableTokenizer() - tokens = tokenizer.tokenize('justification') - self.assertEqual(tokens, ['jus', 'ti', 'fi', 'ca', 'tion']) def test_stanford_segmenter_arabic(self): """ @@ -108,25 +95,6 @@ class TestTokenize(unittest.TestCase): expected = ['(', '393', ')', "928 -3010"] result = tokenizer.tokenize(test2) self.assertEqual(result, expected) - - def test_pad_asterisk(self): - """ - Test padding of asterisk for word tokenization. - """ - text = "This is a, *weird sentence with *asterisks in it." - expected = ['This', 'is', 'a', ',', '*', 'weird', 'sentence', - 'with', '*', 'asterisks', 'in', 'it', '.'] - self.assertEqual(word_tokenize(text), expected) - - def test_pad_dotdot(self): - """ - Test padding of dotdot* for word tokenization. - """ - text = "Why did dotdot.. not get tokenized but dotdotdot... did? How about manydots....." - expected = ['Why', 'did', 'dotdot', '..', 'not', 'get', - 'tokenized', 'but', 'dotdotdot', '...', 'did', '?', - 'How', 'about', 'manydots', '.....'] - self.assertEqual(word_tokenize(text), expected) def test_remove_handle(self): """ @@ -378,6 +346,7 @@ class TestTokenize(unittest.TestCase): result = list(tokenizer.span_tokenize(test3)) self.assertEqual(result, expected) + def test_word_tokenize(self): """ Test word_tokenize function @@ -391,35 +360,3 @@ class TestTokenize(unittest.TestCase): sentence = "'v' 're'" expected = ["'", 'v', "'", "'re", "'"] self.assertEqual(word_tokenize(sentence), expected) - - def test_punkt_pair_iter(self): - - test_cases = [ - ('12', [('1', '2'), ('2', None)]), - ('123', [('1', '2'), ('2', '3'), ('3', None)]), - ('1234', [('1', '2'), ('2', '3'), ('3', '4'), ('4', None)]), - ] - - for (test_input, expected_output) in test_cases: - actual_output = [x for x in punkt._pair_iter(test_input)] - - assert_equal(actual_output, expected_output) - - def test_punkt_pair_iter_handles_stop_iteration_exception(self): - # test input to trigger StopIteration from next() - it = iter([]) - # call method under test and produce a generator - gen = punkt._pair_iter(it) - # unpack generator, ensure that no error is raised - list(gen) - - def test_punkt_tokenize_words_handles_stop_iteration_exception(self): - obj = punkt.PunktBaseClass() - - class TestPunktTokenizeWordsMock: - def word_tokenize(self, s): - return iter([]) - - obj._lang_vars = TestPunktTokenizeWordsMock() - # unpack generator, ensure that no error is raised - list(obj._tokenize_words('test')) diff --git a/nlp_resource_data/nltk/test/unit/test_wordnet.py b/nlp_resource_data/nltk/test/unit/test_wordnet.py index 08fd14a..f2191d3 100644 --- a/nlp_resource_data/nltk/test/unit/test_wordnet.py +++ b/nlp_resource_data/nltk/test/unit/test_wordnet.py @@ -4,8 +4,8 @@ Unit tests for nltk.corpus.wordnet See also nltk/test/wordnet.doctest """ +from __future__ import unicode_literals -import collections import os import unittest @@ -195,26 +195,3 @@ class WordnNetDemo(unittest.TestCase): self.assertAlmostEqual( S('dog.n.01').lin_similarity(S('cat.n.01'), semcor_ic), 0.8863, places=3 ) - - def test_omw_lemma_no_trailing_underscore(self): - expected = sorted([ - u'popolna_sprememba_v_mišljenju', - u'popoln_obrat', - u'preobrat', - u'preobrat_v_mišljenju' - ]) - self.assertEqual(sorted(S('about-face.n.02').lemma_names(lang='slv')), expected) - - def test_iterable_type_for_all_lemma_names(self): - # Duck-test for iterables. - # See https://stackoverflow.com/a/36230057/610569 - cat_lemmas = wn.all_lemma_names(lang='cat') - eng_lemmas = wn.all_lemma_names(lang='eng') - - self.assertTrue(hasattr(eng_lemmas, '__iter__')) - self.assertTrue(hasattr(eng_lemmas, '__next__') or hasattr(eng_lemmas, 'next')) - self.assertTrue(eng_lemmas.__iter__() is eng_lemmas) - - self.assertTrue(hasattr(cat_lemmas, '__iter__')) - self.assertTrue(hasattr(cat_lemmas, '__next__') or hasattr(eng_lemmas, 'next')) - self.assertTrue(cat_lemmas.__iter__() is cat_lemmas) diff --git a/nlp_resource_data/nltk/test/unit/translate/__pycache__/__init__.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/translate/__pycache__/__init__.cpython-37.pyc index 8a4115d4e3ff606d1e3f9980f2bde0f8fd2a7a5d..48b64c749f3cf5d2f4c6443685d923859f8e9e75 100644 GIT binary patch delta 29 jcmZ3-xQLP4iIzAiI2!-osLJR;4-VH+l diff --git a/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_gdfa.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_gdfa.cpython-37.pyc index e9f61e1a8e72765eb1bef24268d37c6d096723be..c973f8af75b81930e55fc01a83915f3bf13be138 100644 GIT binary patch delta 90 zcmeB^YLeo1;^pOH0D@BXi5t0#xfpLvuIJL@Rsf3p^3g9XF4Et8glj&ds1i`12qcO` QC{C{6NfW>je8-~z0Ekr*`~Uy| delta 130 zcmZpY>XYJj;^pOH00OzBfQ{V6T#QkZ>$&vA<$)rV*&DtL3Xh)aWDcQ6Av>0cncY^ diff --git a/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_ibm1.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_ibm1.cpython-37.pyc index 77558d27c0b092aa3a39456dbd76d970adcdb0c5..dc66b4dbcf94ea3af8b4a9d56d1fe30a7f5fea60 100644 GIT binary patch delta 32 mcmZ21G+l_>iISrZZX5<$n7G>+}ZLVON&jtYMrwlFt diff --git a/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_ibm2.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_ibm2.cpython-37.pyc index 13b6a54ff058206b871378f93a2948144e5c528f..02014afee5c50b0bcfaa603da822d4c45b80c30f 100644 GIT binary patch delta 32 mcmbO!(JR62#LLUY00gD%6E|{CVq){rFD)+8-@KkFixU8J9S8;h delta 44 ycmeB`m?^>S#LLUY00eSL0UNm|F$qQJXXNLm>SrZZX5<$n7G>+}ZC=cj#R&lI<_t;z diff --git a/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_ibm3.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_ibm3.cpython-37.pyc index b3c6f66712ef35cdbf36d973b6e39b36aeae47c6..316900a5bf22d3ea2e2c61218898dd2bdf8c3cce 100644 GIT binary patch delta 32 mcmdldy-AweiInSrZZX5<$n7G>+}ZO&zO;$ytEd4r$` QBcuJ~v)mGs?+Tp)0QkNVf&c&j diff --git a/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_ibm_model.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_ibm_model.cpython-37.pyc index 2a75f204778e3b32ffeefd786c536c952fe236ab..fba69a86c71ac707eb76b514bda384bae2ffdac8 100644 GIT binary patch delta 32 mcmaFib;pa_iItWiI$kF?vo`64zi1oa`-r7RZ*A&|wUo>?U!M(Q~teWDBE| z56A$25D@?(fkSVbsA`V0(fQZD&x`Gmn5tBbE TiUUbbr5+%uD=0pBt&%zbz&k+x delta 372 zcmX@%dc~F7iISrZZX5<$n7G>+}ZFXW76=aN>93k?L z*A1w$$Q?*%G8cJFP8Pi&?gJ7LgNR6g*eW2xck)L`$;ruLo7g;n+*@LkCyI;lL6k8S zft3XSRR~WG65qw>F_~9FhcO7u_m$Yk=&_kcvV~FJ8)S|ji0}syfgmCTM1+Efu*r&o z5}S`p&0`Xe0P)3vgeD8b=tz)2)aDvlcSa$EBr{ZUvYevC=C^WYj1piukkfCmB&8OY z0KEiuS>$AEh1ROtDck8f0_~h=>Id@gO1r # URL: # For license information, see LICENSE.TXT diff --git a/nlp_resource_data/nltk/test/unit/utils.py b/nlp_resource_data/nltk/test/unit/utils.py index 8bd7346..0489b16 100644 --- a/nlp_resource_data/nltk/test/unit/utils.py +++ b/nlp_resource_data/nltk/test/unit/utils.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +from __future__ import absolute_import from unittest import TestCase from functools import wraps from nose.plugins.skip import SkipTest diff --git a/nlp_resource_data/nltk/test/util.doctest b/nlp_resource_data/nltk/test/util.doctest index f2360ff..7ba6af1 100644 --- a/nlp_resource_data/nltk/test/util.doctest +++ b/nlp_resource_data/nltk/test/util.doctest @@ -1,10 +1,11 @@ -.. Copyright (C) 2001-2020 NLTK Project +.. Copyright (C) 2001-2019 NLTK Project .. For license information, see LICENSE.TXT ================= Utility functions ================= + >>> from __future__ import print_function >>> from nltk.util import * >>> from nltk.tree import Tree diff --git a/nlp_resource_data/nltk/test/wordnet.doctest b/nlp_resource_data/nltk/test/wordnet.doctest index 54c5975..409504d 100644 --- a/nlp_resource_data/nltk/test/wordnet.doctest +++ b/nlp_resource_data/nltk/test/wordnet.doctest @@ -1,4 +1,4 @@ -.. Copyright (C) 2001-2020 NLTK Project +.. Copyright (C) 2001-2019 NLTK Project .. For license information, see LICENSE.TXT ================= @@ -6,6 +6,7 @@ WordNet Interface ================= WordNet is just another NLTK corpus reader, and can be imported like this: + >>> from __future__ import print_function, unicode_literals >>> from nltk.corpus import wordnet For more compact code, we recommend: @@ -52,31 +53,31 @@ WordNet, using ISO-639 language codes. 'nob', 'pol', 'por', 'qcn', 'slv', 'spa', 'swe', 'tha', 'zsm'] >>> wn.synsets(b'\xe7\x8a\xac'.decode('utf-8'), lang='jpn') [Synset('dog.n.01'), Synset('spy.n.01')] - + wn.synset('spy.n.01').lemma_names('jpn') # doctest: +NORMALIZE_WHITESPACE ['\u3044\u306c', '\u307e\u308f\u3057\u8005', '\u30b9\u30d1\u30a4', '\u56de\u3057\u8005', '\u56de\u8005', '\u5bc6\u5075', '\u5de5\u4f5c\u54e1', '\u5efb\u3057\u8005', '\u5efb\u8005', '\u63a2', '\u63a2\u308a', '\u72ac', '\u79d8\u5bc6\u635c\u67fb\u54e1', '\u8adc\u5831\u54e1', '\u8adc\u8005', '\u9593\u8005', '\u9593\u8adc', '\u96a0\u5bc6'] - + >>> wn.synset('dog.n.01').lemma_names('ita') ['cane', 'Canis_familiaris'] >>> wn.lemmas('cane', lang='ita') # doctest: +NORMALIZE_WHITESPACE - [Lemma('dog.n.01.cane'), Lemma('cramp.n.02.cane'), Lemma('hammer.n.01.cane'), Lemma('bad_person.n.01.cane'), + [Lemma('dog.n.01.cane'), Lemma('cramp.n.02.cane'), Lemma('hammer.n.01.cane'), Lemma('bad_person.n.01.cane'), Lemma('incompetent.n.01.cane')] >>> sorted(wn.synset('dog.n.01').lemmas('dan')) # doctest: +NORMALIZE_WHITESPACE [Lemma('dog.n.01.hund'), Lemma('dog.n.01.k\xf8ter'), Lemma('dog.n.01.vovhund'), Lemma('dog.n.01.vovse')] - + sorted(wn.synset('dog.n.01').lemmas('por')) [Lemma('dog.n.01.cachorra'), Lemma('dog.n.01.cachorro'), Lemma('dog.n.01.cadela'), Lemma('dog.n.01.c\xe3o')] - + >>> dog_lemma = wn.lemma(b'dog.n.01.c\xc3\xa3o'.decode('utf-8'), lang='por') >>> dog_lemma Lemma('dog.n.01.c\xe3o') >>> dog_lemma.lang() 'por' - >>> len(list(wordnet.all_lemma_names(pos='n', lang='jpn'))) + >>> len(wordnet.all_lemma_names(pos='n', lang='jpn')) 64797 ------- @@ -430,7 +431,7 @@ Compute transitive closures of synsets Synset('leonberg.n.01'), Synset('mexican_hairless.n.01'), Synset('newfoundland.n.01'), Synset('pooch.n.01'), Synset('poodle.n.01'), ...] >>> list(dog.closure(hyper)) # doctest: +NORMALIZE_WHITESPACE - [Synset('canine.n.02'), Synset('domestic_animal.n.01'), Synset('carnivore.n.01'), Synset('animal.n.01'), + [Synset('canine.n.02'), Synset('domestic_animal.n.01'), Synset('carnivore.n.01'), Synset('animal.n.01'), Synset('placental.n.01'), Synset('organism.n.01'), Synset('mammal.n.01'), Synset('living_thing.n.01'), Synset('vertebrate.n.01'), Synset('whole.n.02'), Synset('chordate.n.01'), Synset('object.n.01'), Synset('physical_entity.n.01'), Synset('entity.n.01')] diff --git a/nlp_resource_data/nltk/test/wordnet_fixt.py b/nlp_resource_data/nltk/test/wordnet_fixt.py index 09ba27c..1412c0d 100644 --- a/nlp_resource_data/nltk/test/wordnet_fixt.py +++ b/nlp_resource_data/nltk/test/wordnet_fixt.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +from __future__ import absolute_import def teardown_module(module=None): diff --git a/nlp_resource_data/nltk/test/wordnet_lch.doctest b/nlp_resource_data/nltk/test/wordnet_lch.doctest index d92b5a1..c2536b4 100644 --- a/nlp_resource_data/nltk/test/wordnet_lch.doctest +++ b/nlp_resource_data/nltk/test/wordnet_lch.doctest @@ -1,4 +1,4 @@ -.. Copyright (C) 2001-2020 NLTK Project +.. Copyright (C) 2001-2019 NLTK Project .. For license information, see LICENSE.TXT =============================== diff --git a/nlp_resource_data/nltk/test/wsd.doctest b/nlp_resource_data/nltk/test/wsd.doctest index 28cf0e9..b4d8f90 100644 --- a/nlp_resource_data/nltk/test/wsd.doctest +++ b/nlp_resource_data/nltk/test/wsd.doctest @@ -1,4 +1,4 @@ -.. Copyright (C) 2001-2020 NLTK Project +.. Copyright (C) 2001-2019 NLTK Project .. For license information, see LICENSE.TXT .. -*- coding: utf-8 -*- diff --git a/nlp_resource_data/nltk/text.py b/nlp_resource_data/nltk/text.py index fc2731f..0fa9c3c 100644 --- a/nlp_resource_data/nltk/text.py +++ b/nlp_resource_data/nltk/text.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Texts # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Steven Bird # Edward Loper # URL: @@ -13,25 +13,25 @@ Functionality includes: concordancing, collocation discovery, regular expression search over tokenized strings, and distributional similarity. """ +from __future__ import print_function, division, unicode_literals, absolute_import from math import log from collections import defaultdict, Counter, namedtuple from functools import reduce import re -import sys -from nltk.lm import MLE -from nltk.lm.preprocessing import padded_everygram_pipeline +from six import text_type + from nltk.probability import FreqDist from nltk.probability import ConditionalFreqDist as CFD from nltk.util import tokenwrap, LazyConcatenation from nltk.metrics import f_measure, BigramAssocMeasures from nltk.collocations import BigramCollocationFinder -from nltk.tokenize import sent_tokenize +from nltk.compat import python_2_unicode_compatible ConcordanceLine = namedtuple( - "ConcordanceLine", - ["left", "query", "right", "offset", "left_print", "right_print", "line"], + 'ConcordanceLine', + ['left', 'query', 'right', 'offset', 'left_print', 'right_print', 'line'], ) @@ -46,8 +46,8 @@ class ContextIndex(object): @staticmethod def _default_context(tokens, i): """One left token and one right token, normalized to lowercase""" - left = tokens[i - 1].lower() if i != 0 else "*START*" - right = tokens[i + 1].lower() if i != len(tokens) - 1 else "*END*" + left = tokens[i - 1].lower() if i != 0 else '*START*' + right = tokens[i + 1].lower() if i != len(tokens) - 1 else '*END*' return (left, right) def __init__(self, tokens, context_func=None, filter=None, key=lambda x: x): @@ -126,7 +126,7 @@ class ContextIndex(object): return fd - +@python_2_unicode_compatible class ConcordanceIndex(object): """ An index that can be used to look up the offset locations at which @@ -178,7 +178,7 @@ class ConcordanceIndex(object): return self._offsets[word] def __repr__(self): - return "" % ( + return '' % ( len(self._tokens), len(self._offsets), ) @@ -200,10 +200,10 @@ class ConcordanceIndex(object): left_context = self._tokens[max(0, i - context) : i] right_context = self._tokens[i + 1 : i + context] # Create the pretty lines with the query_word in the middle. - left_print = " ".join(left_context)[-half_width:] - right_print = " ".join(right_context)[:half_width] + left_print = ' '.join(left_context)[-half_width:] + right_print = ' '.join(right_context)[:half_width] # The WYSIWYG line of the concordance. - line_print = " ".join([left_print, query_word, right_print]) + line_print = ' '.join([left_print, query_word, right_print]) # Create the ConcordanceLine concordance_line = ConcordanceLine( left_context, @@ -252,7 +252,7 @@ class TokenSearcher(object): """ def __init__(self, tokens): - self._raw = "".join("<" + w + ">" for w in tokens) + self._raw = ''.join('<' + w + '>' for w in tokens) def findall(self, regexp): """ @@ -279,25 +279,25 @@ class TokenSearcher(object): :type regexp: str """ # preprocess the regular expression - regexp = re.sub(r"\s", "", regexp) - regexp = re.sub(r"<", "(?:<(?:", regexp) - regexp = re.sub(r">", ")>)", regexp) - regexp = re.sub(r"(?]", regexp) + regexp = re.sub(r'\s', '', regexp) + regexp = re.sub(r'<', '(?:<(?:', regexp) + regexp = re.sub(r'>', ')>)', regexp) + regexp = re.sub(r'(?]', regexp) # perform the search hits = re.findall(regexp, self._raw) # Sanity check for h in hits: - if not h.startswith("<") and h.endswith(">"): - raise ValueError("Bad regexp for TokenSearcher.findall") + if not h.startswith('<') and h.endswith('>'): + raise ValueError('Bad regexp for TokenSearcher.findall') # postprocess the output - hits = [h[1:-1].split("><") for h in hits] + hits = [h[1:-1].split('><') for h in hits] return hits - +@python_2_unicode_compatible class Text(object): """ A wrapper around a sequence of simple (string) tokens, which is @@ -337,11 +337,11 @@ class Text(object): if name: self.name = name - elif "]" in tokens[:20]: - end = tokens[:20].index("]") - self.name = " ".join(str(tok) for tok in tokens[1:end]) + elif ']' in tokens[:20]: + end = tokens[:20].index(']') + self.name = " ".join(text_type(tok) for tok in tokens[1:end]) else: - self.name = " ".join(str(tok) for tok in tokens[:8]) + "..." + self.name = " ".join(text_type(tok) for tok in tokens[:8]) + "..." # //////////////////////////////////////////////////////////// # Support item & slice access @@ -371,7 +371,7 @@ class Text(object): :seealso: ``ConcordanceIndex`` """ - if "_concordance_index" not in self.__dict__: + if '_concordance_index' not in self.__dict__: self._concordance_index = ConcordanceIndex( self.tokens, key=lambda s: s.lower() ) @@ -392,28 +392,24 @@ class Text(object): :seealso: ``ConcordanceIndex`` """ - if "_concordance_index" not in self.__dict__: + if '_concordance_index' not in self.__dict__: self._concordance_index = ConcordanceIndex( self.tokens, key=lambda s: s.lower() ) return self._concordance_index.find_concordance(word, width)[:lines] - def collocation_list(self, num=20, window_size=2): + def collocations(self, num=20, window_size=2): """ - Return collocations derived from the text, ignoring stopwords. - - >>> from nltk.book import text4 - >>> text4.collocation_list()[:2] - [('United', 'States'), ('fellow', 'citizens')] + Print collocations derived from the text, ignoring stopwords. - :param num: The maximum number of collocations to return. + :seealso: find_collocations + :param num: The maximum number of collocations to print. :type num: int :param window_size: The number of tokens spanned by a collocation (default=2) :type window_size: int - :rtype: list(tuple(str, str)) """ if not ( - "_collocations" in self.__dict__ + '_collocations' in self.__dict__ and self._num == num and self._window_size == window_size ): @@ -423,32 +419,14 @@ class Text(object): # print("Building collocations list") from nltk.corpus import stopwords - ignored_words = stopwords.words("english") + ignored_words = stopwords.words('english') finder = BigramCollocationFinder.from_words(self.tokens, window_size) finder.apply_freq_filter(2) finder.apply_word_filter(lambda w: len(w) < 3 or w.lower() in ignored_words) bigram_measures = BigramAssocMeasures() - self._collocations = list(finder.nbest(bigram_measures.likelihood_ratio, num)) - return self._collocations - - def collocations(self, num=20, window_size=2): - """ - Print collocations derived from the text, ignoring stopwords. - - >>> from nltk.book import text4 - >>> text4.collocations() # doctest: +ELLIPSIS - United States; fellow citizens; four years; ... - - :param num: The maximum number of collocations to print. - :type num: int - :param window_size: The number of tokens spanned by a collocation (default=2) - :type window_size: int - """ - - collocation_strings = [ - w1 + " " + w2 for w1, w2 in self.collocation_list(num, window_size) - ] - print(tokenwrap(collocation_strings, separator="; ")) + self._collocations = finder.nbest(bigram_measures.likelihood_ratio, num) + colloc_strings = [w1 + ' ' + w2 for w1, w2 in self._collocations] + print(tokenwrap(colloc_strings, separator="; ")) def count(self, word): """ @@ -477,7 +455,7 @@ class Text(object): :type num: int :seealso: ContextIndex.similar_words() """ - if "_word_context_index" not in self.__dict__: + if '_word_context_index' not in self.__dict__: # print('Building word-context index...') self._word_context_index = ContextIndex( self.tokens, filter=lambda x: x.isalpha(), key=lambda s: s.lower() @@ -505,13 +483,13 @@ class Text(object): Find contexts where the specified words appear; list most frequent common contexts first. - :param words: The words used to seed the similarity search - :type words: str + :param word: The word used to seed the similarity search + :type word: str :param num: The number of words to generate (default=20) :type num: int :seealso: ContextIndex.common_contexts() """ - if "_word_context_index" not in self.__dict__: + if '_word_context_index' not in self.__dict__: # print('Building word-context index...') self._word_context_index = ContextIndex( self.tokens, key=lambda s: s.lower() @@ -541,58 +519,15 @@ class Text(object): dispersion_plot(self, words) - def _train_default_ngram_lm(self, tokenized_sents, n=3): - train_data, padded_sents = padded_everygram_pipeline(n, tokenized_sents) - model = MLE(order=n) - model.fit(train_data, padded_sents) - return model - - def generate(self, length=100, text_seed=None, random_seed=42): + def generate(self, words): """ - Print random text, generated using a trigram language model. - See also `help(nltk.lm)`. - - :param length: The length of text to generate (default=100) - :type length: int - - :param text_seed: Generation can be conditioned on preceding context. - :type text_seed: list(str) - - :param random_seed: A random seed or an instance of `random.Random`. If provided, - makes the random sampling part of generation reproducible. (default=42) - :type random_seed: int - + Issues a reminder to users following the book online """ - # Create the model when using it the first time. - self._tokenized_sents = [ - sent.split(" ") for sent in sent_tokenize(" ".join(self.tokens)) - ] - if not hasattr(self, "trigram_model"): - print("Building ngram index...", file=sys.stderr) - self._trigram_model = self._train_default_ngram_lm( - self._tokenized_sents, n=3 - ) + import warnings - generated_tokens = [] - - assert length > 0, "The `length` must be more than 0." - while len(generated_tokens) < length: - for idx, token in enumerate( - self._trigram_model.generate( - length, text_seed=text_seed, random_seed=random_seed - ) - ): - if token == "": - continue - if token == "": - break - generated_tokens.append(token) - random_seed += 1 - - prefix = " ".join(text_seed) + " " if text_seed else "" - output_str = prefix + tokenwrap(generated_tokens[:length]) - print(output_str) - return output_str + warnings.warn( + 'The generate() method is no longer available.', DeprecationWarning + ) def plot(self, *args): """ @@ -638,14 +573,14 @@ class Text(object): self._token_searcher = TokenSearcher(self) hits = self._token_searcher.findall(regexp) - hits = [" ".join(h) for h in hits] + hits = [' '.join(h) for h in hits] print(tokenwrap(hits, "; ")) # //////////////////////////////////////////////////////////// # Helper Methods # //////////////////////////////////////////////////////////// - _CONTEXT_RE = re.compile("\w+|[\.\!\?]") + _CONTEXT_RE = re.compile('\w+|[\.\!\?]') def _context(self, tokens, i): """ @@ -657,13 +592,13 @@ class Text(object): j = i - 1 while j >= 0 and not self._CONTEXT_RE.match(tokens[j]): j -= 1 - left = tokens[j] if j != 0 else "*START*" + left = tokens[j] if j != 0 else '*START*' # Right context j = i + 1 while j < len(tokens) and not self._CONTEXT_RE.match(tokens[j]): j += 1 - right = tokens[j] if j != len(tokens) else "*END*" + right = tokens[j] if j != len(tokens) else '*END*' return (left, right) @@ -672,10 +607,10 @@ class Text(object): # //////////////////////////////////////////////////////////// def __str__(self): - return "" % self.name + return '' % self.name def __repr__(self): - return "" % self.name + return '' % self.name # Prototype only; this approach will be slow to load @@ -697,7 +632,7 @@ class TextCollection(Text): """ def __init__(self, source): - if hasattr(source, "words"): # bridge to the text corpus reader + if hasattr(source, 'words'): # bridge to the text corpus reader source = [source.words(f) for f in source.fileids()] self._texts = source @@ -717,7 +652,7 @@ class TextCollection(Text): if idf is None: matches = len([True for text in self._texts if term in text]) if len(self._texts) == 0: - raise ValueError("IDF undefined for empty document collection") + raise ValueError('IDF undefined for empty document collection') idf = log(len(self._texts) / matches) if matches else 0.0 self._idf_cache[term] = idf return idf @@ -729,14 +664,14 @@ class TextCollection(Text): def demo(): from nltk.corpus import brown - text = Text(brown.words(categories="news")) + text = Text(brown.words(categories='news')) print(text) print() print("Concordance:") - text.concordance("news") + text.concordance('news') print() print("Distributionally similar words:") - text.similar("news") + text.similar('news') print() print("Collocations:") text.collocations() @@ -745,7 +680,7 @@ def demo(): # text.generate() # print() print("Dispersion plot:") - text.dispersion_plot(["news", "report", "said", "announced"]) + text.dispersion_plot(['news', 'report', 'said', 'announced']) print() print("Vocabulary plot:") text.plot(50) @@ -753,10 +688,10 @@ def demo(): print("Indexing:") print("text[3]:", text[3]) print("text[3:5]:", text[3:5]) - print("text.vocab()['news']:", text.vocab()["news"]) + print("text.vocab()['news']:", text.vocab()['news']) -if __name__ == "__main__": +if __name__ == '__main__': demo() __all__ = [ diff --git a/nlp_resource_data/nltk/tgrep.py b/nlp_resource_data/nltk/tgrep.py index 84df549..d5a315a 100644 --- a/nlp_resource_data/nltk/tgrep.py +++ b/nlp_resource_data/nltk/tgrep.py @@ -3,12 +3,12 @@ # # Natural Language Toolkit: TGrep search # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Will Roberts # URL: # For license information, see LICENSE.TXT -""" +''' ============================================ TGrep search implementation for NLTK trees ============================================ @@ -108,32 +108,36 @@ specified in a call to a predicate. Predicates which call other predicates must always pass the value of these arguments on. The top-level predicate (constructed by ``_tgrep_exprs_action``) binds the macro definitions to ``m`` and initialises ``l`` to an empty dictionary. -""" +''' + +from __future__ import absolute_import, print_function, unicode_literals import functools import re +from six import binary_type, text_type + try: import pyparsing except ImportError: - print("Warning: nltk.tgrep will not work without the `pyparsing` package") - print("installed.") + print('Warning: nltk.tgrep will not work without the `pyparsing` package') + print('installed.') import nltk.tree class TgrepException(Exception): - """Tgrep exception type.""" + '''Tgrep exception type.''' pass def ancestors(node): - """ + ''' Returns the list of all nodes dominating the given tree node. This method will not work with leaf nodes, since there is no way to recover the parent. - """ + ''' results = [] try: current = node.parent() @@ -147,10 +151,10 @@ def ancestors(node): def unique_ancestors(node): - """ + ''' Returns the list of all nodes dominating the given node, where there is only a single path of descent. - """ + ''' results = [] try: current = node.parent() @@ -164,10 +168,10 @@ def unique_ancestors(node): def _descendants(node): - """ + ''' Returns the list of all nodes which are descended from the given tree node in some way. - """ + ''' try: treepos = node.treepositions() except AttributeError: @@ -176,10 +180,10 @@ def _descendants(node): def _leftmost_descendants(node): - """ + ''' Returns the set of all nodes descended in some way through left branches from this node. - """ + ''' try: treepos = node.treepositions() except AttributeError: @@ -188,10 +192,10 @@ def _leftmost_descendants(node): def _rightmost_descendants(node): - """ + ''' Returns the set of all nodes descended in some way through right branches from this node. - """ + ''' try: rightmost_leaf = max(node.treepositions()) except AttributeError: @@ -200,15 +204,15 @@ def _rightmost_descendants(node): def _istree(obj): - """Predicate to check whether `obj` is a nltk.tree.Tree.""" + '''Predicate to check whether `obj` is a nltk.tree.Tree.''' return isinstance(obj, nltk.tree.Tree) def _unique_descendants(node): - """ + ''' Returns the list of all nodes descended from the given node, where there is only a single path of descent. - """ + ''' results = [] current = node while current and _istree(current) and len(current) == 1: @@ -218,9 +222,9 @@ def _unique_descendants(node): def _before(node): - """ + ''' Returns the set of all nodes that are before the given node. - """ + ''' try: pos = node.treeposition() tree = node.root() @@ -230,14 +234,14 @@ def _before(node): def _immediately_before(node): - """ + ''' Returns the set of all nodes that are immediately before the given node. Tree node A immediately precedes node B if the last terminal symbol (word) produced by A immediately precedes the first terminal symbol produced by B. - """ + ''' try: pos = node.treeposition() tree = node.root() @@ -256,9 +260,9 @@ def _immediately_before(node): def _after(node): - """ + ''' Returns the set of all nodes that are after the given node. - """ + ''' try: pos = node.treeposition() tree = node.root() @@ -268,14 +272,14 @@ def _after(node): def _immediately_after(node): - """ + ''' Returns the set of all nodes that are immediately after the given node. Tree node A immediately follows node B if the first terminal symbol (word) produced by A immediately follows the last terminal symbol produced by B. - """ + ''' try: pos = node.treeposition() tree = node.root() @@ -297,66 +301,67 @@ def _immediately_after(node): def _tgrep_node_literal_value(node): - """ + ''' Gets the string value of a given parse tree node, for comparison using the tgrep node literal predicates. - """ - return node.label() if _istree(node) else str(node) + ''' + return node.label() if _istree(node) else text_type(node) def _tgrep_macro_use_action(_s, _l, tokens): - """ + ''' Builds a lambda function which looks up the macro name used. - """ + ''' assert len(tokens) == 1 - assert tokens[0][0] == "@" + assert tokens[0][0] == '@' macro_name = tokens[0][1:] def macro_use(n, m=None, l=None): if m is None or macro_name not in m: - raise TgrepException("macro {0} not defined".format(macro_name)) + raise TgrepException('macro {0} not defined'.format(macro_name)) return m[macro_name](n, m, l) return macro_use def _tgrep_node_action(_s, _l, tokens): - """ + ''' Builds a lambda function representing a predicate on a tree node depending on the name of its node. - """ + ''' + # print 'node tokens: ', tokens if tokens[0] == "'": # strip initial apostrophe (tgrep2 print command) tokens = tokens[1:] if len(tokens) > 1: # disjunctive definition of a node name - assert list(set(tokens[1::2])) == ["|"] + assert list(set(tokens[1::2])) == ['|'] # recursively call self to interpret each node name definition tokens = [_tgrep_node_action(None, None, [node]) for node in tokens[::2]] # capture tokens and return the disjunction return (lambda t: lambda n, m=None, l=None: any(f(n, m, l) for f in t))(tokens) else: - if hasattr(tokens[0], "__call__"): + if hasattr(tokens[0], '__call__'): # this is a previously interpreted parenthetical node # definition (lambda function) return tokens[0] - elif tokens[0] == "*" or tokens[0] == "__": + elif tokens[0] == '*' or tokens[0] == '__': return lambda n, m=None, l=None: True elif tokens[0].startswith('"'): assert tokens[0].endswith('"') - node_lit = tokens[0][1:-1].replace('\\"', '"').replace("\\\\", "\\") + node_lit = tokens[0][1:-1].replace('\\"', '"').replace('\\\\', '\\') return ( lambda s: lambda n, m=None, l=None: _tgrep_node_literal_value(n) == s )(node_lit) - elif tokens[0].startswith("/"): - assert tokens[0].endswith("/") + elif tokens[0].startswith('/'): + assert tokens[0].endswith('/') node_lit = tokens[0][1:-1] return ( lambda r: lambda n, m=None, l=None: r.search( _tgrep_node_literal_value(n) ) )(re.compile(node_lit)) - elif tokens[0].startswith("i@"): + elif tokens[0].startswith('i@'): node_func = _tgrep_node_action(_s, _l, [tokens[0][2:].lower()]) return ( lambda f: lambda n, m=None, l=None: f( @@ -370,78 +375,80 @@ def _tgrep_node_action(_s, _l, tokens): def _tgrep_parens_action(_s, _l, tokens): - """ + ''' Builds a lambda function representing a predicate on a tree node from a parenthetical notation. - """ + ''' + # print 'parenthetical tokens: ', tokens assert len(tokens) == 3 - assert tokens[0] == "(" - assert tokens[2] == ")" + assert tokens[0] == '(' + assert tokens[2] == ')' return tokens[1] def _tgrep_nltk_tree_pos_action(_s, _l, tokens): - """ + ''' Builds a lambda function representing a predicate on a tree node which returns true if the node is located at a specific tree position. - """ + ''' # recover the tuple from the parsed sting node_tree_position = tuple(int(x) for x in tokens if x.isdigit()) # capture the node's tree position return ( lambda i: lambda n, m=None, l=None: ( - hasattr(n, "treeposition") and n.treeposition() == i + hasattr(n, 'treeposition') and n.treeposition() == i ) )(node_tree_position) def _tgrep_relation_action(_s, _l, tokens): - """ + ''' Builds a lambda function representing a predicate on a tree node depending on its relation to other nodes in the tree. - """ + ''' + # print 'relation tokens: ', tokens # process negation first if needed negated = False - if tokens[0] == "!": + if tokens[0] == '!': negated = True tokens = tokens[1:] - if tokens[0] == "[": + if tokens[0] == '[': # process square-bracketed relation expressions assert len(tokens) == 3 - assert tokens[2] == "]" + assert tokens[2] == ']' retval = tokens[1] else: # process operator-node relation expressions assert len(tokens) == 2 operator, predicate = tokens # A < B A is the parent of (immediately dominates) B. - if operator == "<": + if operator == '<': retval = lambda n, m=None, l=None: ( _istree(n) and any(predicate(x, m, l) for x in n) ) # A > B A is the child of B. - elif operator == ">": + elif operator == '>': retval = lambda n, m=None, l=None: ( - hasattr(n, "parent") + hasattr(n, 'parent') and bool(n.parent()) and predicate(n.parent(), m, l) ) # A <, B Synonymous with A <1 B. - elif operator == "<," or operator == "<1": + elif operator == '<,' or operator == '<1': retval = lambda n, m=None, l=None: ( _istree(n) and bool(list(n)) and predicate(n[0], m, l) ) # A >, B Synonymous with A >1 B. - elif operator == ">," or operator == ">1": + elif operator == '>,' or operator == '>1': retval = lambda n, m=None, l=None: ( - hasattr(n, "parent") + hasattr(n, 'parent') and bool(n.parent()) and (n is n.parent()[0]) and predicate(n.parent(), m, l) ) # A N B A is the Nth child of B (the first child is >1). - elif operator[0] == ">" and operator[1:].isdigit(): + elif operator[0] == '>' and operator[1:].isdigit(): idx = int(operator[1:]) # capture the index parameter retval = ( lambda i: lambda n, m=None, l=None: ( - hasattr(n, "parent") + hasattr(n, 'parent') and bool(n.parent()) and 0 <= i < len(n.parent()) and (n is n.parent()[i]) @@ -467,21 +474,21 @@ def _tgrep_relation_action(_s, _l, tokens): )(idx - 1) # A <' B B is the last child of A (also synonymous with A <-1 B). # A <- B B is the last child of A (synonymous with A <-1 B). - elif operator == "<'" or operator == "<-" or operator == "<-1": + elif operator == '<\'' or operator == '<-' or operator == '<-1': retval = lambda n, m=None, l=None: ( _istree(n) and bool(list(n)) and predicate(n[-1], m, l) ) # A >' B A is the last child of B (also synonymous with A >-1 B). # A >- B A is the last child of B (synonymous with A >-1 B). - elif operator == ">'" or operator == ">-" or operator == ">-1": + elif operator == '>\'' or operator == '>-' or operator == '>-1': retval = lambda n, m=None, l=None: ( - hasattr(n, "parent") + hasattr(n, 'parent') and bool(n.parent()) and (n is n.parent()[-1]) and predicate(n.parent(), m, l) ) # A <-N B B is the N th-to-last child of A (the last child is <-1). - elif operator[:2] == "<-" and operator[2:].isdigit(): + elif operator[:2] == '<-' and operator[2:].isdigit(): idx = -int(operator[2:]) # capture the index parameter retval = ( @@ -493,12 +500,12 @@ def _tgrep_relation_action(_s, _l, tokens): ) )(idx) # A >-N B A is the N th-to-last child of B (the last child is >-1). - elif operator[:2] == ">-" and operator[2:].isdigit(): + elif operator[:2] == '>-' and operator[2:].isdigit(): idx = -int(operator[2:]) # capture the index parameter retval = ( lambda i: lambda n, m=None, l=None: ( - hasattr(n, "parent") + hasattr(n, 'parent') and bool(n.parent()) and 0 <= (i + len(n.parent())) < len(n.parent()) and (n is n.parent()[i + len(n.parent())]) @@ -506,115 +513,115 @@ def _tgrep_relation_action(_s, _l, tokens): ) )(idx) # A <: B B is the only child of A - elif operator == "<:": + elif operator == '<:': retval = lambda n, m=None, l=None: ( _istree(n) and len(n) == 1 and predicate(n[0], m, l) ) # A >: B A is the only child of B. - elif operator == ">:": + elif operator == '>:': retval = lambda n, m=None, l=None: ( - hasattr(n, "parent") + hasattr(n, 'parent') and bool(n.parent()) and len(n.parent()) == 1 and predicate(n.parent(), m, l) ) # A << B A dominates B (A is an ancestor of B). - elif operator == "<<": + elif operator == '<<': retval = lambda n, m=None, l=None: ( _istree(n) and any(predicate(x, m, l) for x in _descendants(n)) ) # A >> B A is dominated by B (A is a descendant of B). - elif operator == ">>": + elif operator == '>>': retval = lambda n, m=None, l=None: any( predicate(x, m, l) for x in ancestors(n) ) # A <<, B B is a left-most descendant of A. - elif operator == "<<," or operator == "<<1": + elif operator == '<<,' or operator == '<<1': retval = lambda n, m=None, l=None: ( _istree(n) and any(predicate(x, m, l) for x in _leftmost_descendants(n)) ) # A >>, B A is a left-most descendant of B. - elif operator == ">>,": + elif operator == '>>,': retval = lambda n, m=None, l=None: any( (predicate(x, m, l) and n in _leftmost_descendants(x)) for x in ancestors(n) ) # A <<' B B is a right-most descendant of A. - elif operator == "<<'": + elif operator == '<<\'': retval = lambda n, m=None, l=None: ( _istree(n) and any(predicate(x, m, l) for x in _rightmost_descendants(n)) ) # A >>' B A is a right-most descendant of B. - elif operator == ">>'": + elif operator == '>>\'': retval = lambda n, m=None, l=None: any( (predicate(x, m, l) and n in _rightmost_descendants(x)) for x in ancestors(n) ) # A <<: B There is a single path of descent from A and B is on it. - elif operator == "<<:": + elif operator == '<<:': retval = lambda n, m=None, l=None: ( _istree(n) and any(predicate(x, m, l) for x in _unique_descendants(n)) ) # A >>: B There is a single path of descent from B and A is on it. - elif operator == ">>:": + elif operator == '>>:': retval = lambda n, m=None, l=None: any( predicate(x, m, l) for x in unique_ancestors(n) ) # A . B A immediately precedes B. - elif operator == ".": + elif operator == '.': retval = lambda n, m=None, l=None: any( predicate(x, m, l) for x in _immediately_after(n) ) # A , B A immediately follows B. - elif operator == ",": + elif operator == ',': retval = lambda n, m=None, l=None: any( predicate(x, m, l) for x in _immediately_before(n) ) # A .. B A precedes B. - elif operator == "..": + elif operator == '..': retval = lambda n, m=None, l=None: any( predicate(x, m, l) for x in _after(n) ) # A ,, B A follows B. - elif operator == ",,": + elif operator == ',,': retval = lambda n, m=None, l=None: any( predicate(x, m, l) for x in _before(n) ) # A $ B A is a sister of B (and A != B). - elif operator == "$" or operator == "%": + elif operator == '$' or operator == '%': retval = lambda n, m=None, l=None: ( - hasattr(n, "parent") + hasattr(n, 'parent') and bool(n.parent()) and any(predicate(x, m, l) for x in n.parent() if x is not n) ) # A $. B A is a sister of and immediately precedes B. - elif operator == "$." or operator == "%.": + elif operator == '$.' or operator == '%.': retval = lambda n, m=None, l=None: ( - hasattr(n, "right_sibling") + hasattr(n, 'right_sibling') and bool(n.right_sibling()) and predicate(n.right_sibling(), m, l) ) # A $, B A is a sister of and immediately follows B. - elif operator == "$," or operator == "%,": + elif operator == '$,' or operator == '%,': retval = lambda n, m=None, l=None: ( - hasattr(n, "left_sibling") + hasattr(n, 'left_sibling') and bool(n.left_sibling()) and predicate(n.left_sibling(), m, l) ) # A $.. B A is a sister of and precedes B. - elif operator == "$.." or operator == "%..": + elif operator == '$..' or operator == '%..': retval = lambda n, m=None, l=None: ( - hasattr(n, "parent") - and hasattr(n, "parent_index") + hasattr(n, 'parent') + and hasattr(n, 'parent_index') and bool(n.parent()) and any(predicate(x, m, l) for x in n.parent()[n.parent_index() + 1 :]) ) # A $,, B A is a sister of and follows B. - elif operator == "$,," or operator == "%,,": + elif operator == '$,,' or operator == '%,,': retval = lambda n, m=None, l=None: ( - hasattr(n, "parent") - and hasattr(n, "parent_index") + hasattr(n, 'parent') + and hasattr(n, 'parent_index') and bool(n.parent()) and any(predicate(x, m, l) for x in n.parent()[: n.parent_index()]) ) @@ -629,8 +636,8 @@ def _tgrep_relation_action(_s, _l, tokens): return retval -def _tgrep_conjunction_action(_s, _l, tokens, join_char="&"): - """ +def _tgrep_conjunction_action(_s, _l, tokens, join_char='&'): + ''' Builds a lambda function representing a predicate on a tree node from the conjunction of several other such lambda functions. @@ -651,9 +658,10 @@ def _tgrep_conjunction_action(_s, _l, tokens, join_char="&"): tokens[0] is a tgrep_expr predicate; tokens[1:] are an (optional) list of segmented patterns (`tgrep_expr_labeled`, processed by `_tgrep_segmented_pattern_action`). - """ + ''' # filter out the ampersand tokens = [x for x in tokens if x != join_char] + # print 'relation conjunction tokens: ', tokens if len(tokens) == 1: return tokens[0] else: @@ -665,7 +673,7 @@ def _tgrep_conjunction_action(_s, _l, tokens, join_char="&"): def _tgrep_segmented_pattern_action(_s, _l, tokens): - """ + ''' Builds a lambda function representing a segmented pattern. Called for expressions like (`tgrep_expr_labeled`):: @@ -687,7 +695,7 @@ def _tgrep_segmented_pattern_action(_s, _l, tokens): parse action to the pred use inside a node_expr. See `_tgrep_node_label_use_action` and `_tgrep_node_label_pred_use_action`. - """ + ''' # tokens[0] is a string containing the node label node_label = tokens[0] # tokens[1:] is an (optional) list of predicates which must all @@ -695,11 +703,11 @@ def _tgrep_segmented_pattern_action(_s, _l, tokens): reln_preds = tokens[1:] def pattern_segment_pred(n, m=None, l=None): - """This predicate function ignores its node argument.""" + '''This predicate function ignores its node argument.''' # look up the bound node using its label if l is None or node_label not in l: raise TgrepException( - "node_label ={0} not bound in pattern".format(node_label) + 'node_label ={0} not bound in pattern'.format(node_label) ) node = l[node_label] # match the relation predicates against the node @@ -709,7 +717,7 @@ def _tgrep_segmented_pattern_action(_s, _l, tokens): def _tgrep_node_label_use_action(_s, _l, tokens): - """ + ''' Returns the node label used to begin a tgrep_expr_labeled. See `_tgrep_segmented_pattern_action`. @@ -721,14 +729,14 @@ def _tgrep_node_label_use_action(_s, _l, tokens): expression (see `_tgrep_segmented_pattern_action`). It returns the node label. - """ + ''' assert len(tokens) == 1 - assert tokens[0].startswith("=") + assert tokens[0].startswith('=') return tokens[0][1:] def _tgrep_node_label_pred_use_action(_s, _l, tokens): - """ + ''' Builds a lambda function representing a predicate on a tree node which describes the use of a previously bound node label. @@ -740,16 +748,16 @@ def _tgrep_node_label_pred_use_action(_s, _l, tokens): relation). The predicate returns true if and only if its node argument is identical the the node looked up in the node label dictionary using the node's label. - """ + ''' assert len(tokens) == 1 - assert tokens[0].startswith("=") + assert tokens[0].startswith('=') node_label = tokens[0][1:] def node_label_use_pred(n, m=None, l=None): # look up the bound node using its label if l is None or node_label not in l: raise TgrepException( - "node_label ={0} not bound in pattern".format(node_label) + 'node_label ={0} not bound in pattern'.format(node_label) ) node = l[node_label] # truth means the given node is this node @@ -759,7 +767,7 @@ def _tgrep_node_label_pred_use_action(_s, _l, tokens): def _tgrep_bind_node_label_action(_s, _l, tokens): - """ + ''' Builds a lambda function representing a predicate on a tree node which can optionally bind a matching node into the tgrep2 string's label_dict. @@ -768,7 +776,7 @@ def _tgrep_bind_node_label_action(_s, _l, tokens): /NP/ @NP=n - """ + ''' # tokens[0] is a tgrep_node_expr if len(tokens) == 1: return tokens[0] @@ -776,7 +784,7 @@ def _tgrep_bind_node_label_action(_s, _l, tokens): # if present, tokens[1] is the character '=', and tokens[2] is # a tgrep_node_label, a string value containing the node label assert len(tokens) == 3 - assert tokens[1] == "=" + assert tokens[1] == '=' node_pred = tokens[0] node_label = tokens[2] @@ -785,7 +793,7 @@ def _tgrep_bind_node_label_action(_s, _l, tokens): # bind `n` into the dictionary `l` if l is None: raise TgrepException( - "cannot bind node_label {0}: label_dict is None".format( + 'cannot bind node_label {0}: label_dict is None'.format( node_label ) ) @@ -798,12 +806,13 @@ def _tgrep_bind_node_label_action(_s, _l, tokens): def _tgrep_rel_disjunction_action(_s, _l, tokens): - """ + ''' Builds a lambda function representing a predicate on a tree node from the disjunction of several other such lambda functions. - """ + ''' # filter out the pipe - tokens = [x for x in tokens if x != "|"] + tokens = [x for x in tokens if x != '|'] + # print 'relation disjunction tokens: ', tokens if len(tokens) == 1: return tokens[0] elif len(tokens) == 2: @@ -813,16 +822,16 @@ def _tgrep_rel_disjunction_action(_s, _l, tokens): def _macro_defn_action(_s, _l, tokens): - """ + ''' Builds a dictionary structure which defines the given macro. - """ + ''' assert len(tokens) == 3 - assert tokens[0] == "@" + assert tokens[0] == '@' return {tokens[1]: tokens[2]} def _tgrep_exprs_action(_s, _l, tokens): - """ + ''' This is the top-lebel node in a tgrep2 search string; the predicate function it returns binds together all the state of a tgrep2 search string. @@ -831,11 +840,11 @@ def _tgrep_exprs_action(_s, _l, tokens): from the disjunction of several tgrep expressions. Also handles macro definitions and macro name binding, and node label definitions and node label binding. - """ + ''' if len(tokens) == 1: return lambda n, m=None, l=None: tokens[0](n, None, {}) # filter out all the semicolons - tokens = [x for x in tokens if x != ";"] + tokens = [x for x in tokens if x != ';'] # collect all macro definitions macro_dict = {} macro_defs = [tok for tok in tokens if isinstance(tok, dict)] @@ -853,42 +862,42 @@ def _tgrep_exprs_action(_s, _l, tokens): def _build_tgrep_parser(set_parse_actions=True): - """ + ''' Builds a pyparsing-based parser object for tokenizing and interpreting tgrep search strings. - """ - tgrep_op = pyparsing.Optional("!") + pyparsing.Regex("[$%,.<>][%,.<>0-9-':]*") + ''' + tgrep_op = pyparsing.Optional('!') + pyparsing.Regex('[$%,.<>][%,.<>0-9-\':]*') tgrep_qstring = pyparsing.QuotedString( - quoteChar='"', escChar="\\", unquoteResults=False + quoteChar='"', escChar='\\', unquoteResults=False ) tgrep_node_regex = pyparsing.QuotedString( - quoteChar="/", escChar="\\", unquoteResults=False + quoteChar='/', escChar='\\', unquoteResults=False ) tgrep_qstring_icase = pyparsing.Regex('i@\\"(?:[^"\\n\\r\\\\]|(?:\\\\.))*\\"') - tgrep_node_regex_icase = pyparsing.Regex("i@\\/(?:[^/\\n\\r\\\\]|(?:\\\\.))*\\/") - tgrep_node_literal = pyparsing.Regex("[^][ \r\t\n;:.,&|<>()$!@%'^=]+") + tgrep_node_regex_icase = pyparsing.Regex('i@\\/(?:[^/\\n\\r\\\\]|(?:\\\\.))*\\/') + tgrep_node_literal = pyparsing.Regex('[^][ \r\t\n;:.,&|<>()$!@%\'^=]+') tgrep_expr = pyparsing.Forward() tgrep_relations = pyparsing.Forward() - tgrep_parens = pyparsing.Literal("(") + tgrep_expr + ")" + tgrep_parens = pyparsing.Literal('(') + tgrep_expr + ')' tgrep_nltk_tree_pos = ( - pyparsing.Literal("N(") + pyparsing.Literal('N(') + pyparsing.Optional( pyparsing.Word(pyparsing.nums) - + "," + + ',' + pyparsing.Optional( - pyparsing.delimitedList(pyparsing.Word(pyparsing.nums), delim=",") - + pyparsing.Optional(",") + pyparsing.delimitedList(pyparsing.Word(pyparsing.nums), delim=',') + + pyparsing.Optional(',') ) ) - + ")" + + ')' ) - tgrep_node_label = pyparsing.Regex("[A-Za-z0-9]+") - tgrep_node_label_use = pyparsing.Combine("=" + tgrep_node_label) + tgrep_node_label = pyparsing.Regex('[A-Za-z0-9]+') + tgrep_node_label_use = pyparsing.Combine('=' + tgrep_node_label) # see _tgrep_segmented_pattern_action tgrep_node_label_use_pred = tgrep_node_label_use.copy() - macro_name = pyparsing.Regex("[^];:.,&|<>()[$!@%'^=\r\t\n ]+") - macro_name.setWhitespaceChars("") - macro_use = pyparsing.Combine("@" + macro_name) + macro_name = pyparsing.Regex('[^];:.,&|<>()[$!@%\'^=\r\t\n ]+') + macro_name.setWhitespaceChars('') + macro_use = pyparsing.Combine('@' + macro_name) tgrep_node_expr = ( tgrep_node_label_use_pred | macro_use @@ -897,40 +906,40 @@ def _build_tgrep_parser(set_parse_actions=True): | tgrep_node_regex_icase | tgrep_qstring | tgrep_node_regex - | "*" + | '*' | tgrep_node_literal ) tgrep_node_expr2 = ( tgrep_node_expr - + pyparsing.Literal("=").setWhitespaceChars("") - + tgrep_node_label.copy().setWhitespaceChars("") + + pyparsing.Literal('=').setWhitespaceChars('') + + tgrep_node_label.copy().setWhitespaceChars('') ) | tgrep_node_expr tgrep_node = tgrep_parens | ( pyparsing.Optional("'") + tgrep_node_expr2 + pyparsing.ZeroOrMore("|" + tgrep_node_expr) ) - tgrep_brackets = pyparsing.Optional("!") + "[" + tgrep_relations + "]" + tgrep_brackets = pyparsing.Optional('!') + '[' + tgrep_relations + ']' tgrep_relation = tgrep_brackets | (tgrep_op + tgrep_node) tgrep_rel_conjunction = pyparsing.Forward() tgrep_rel_conjunction << ( tgrep_relation - + pyparsing.ZeroOrMore(pyparsing.Optional("&") + tgrep_rel_conjunction) + + pyparsing.ZeroOrMore(pyparsing.Optional('&') + tgrep_rel_conjunction) ) tgrep_relations << tgrep_rel_conjunction + pyparsing.ZeroOrMore( "|" + tgrep_relations ) tgrep_expr << tgrep_node + pyparsing.Optional(tgrep_relations) tgrep_expr_labeled = tgrep_node_label_use + pyparsing.Optional(tgrep_relations) - tgrep_expr2 = tgrep_expr + pyparsing.ZeroOrMore(":" + tgrep_expr_labeled) + tgrep_expr2 = tgrep_expr + pyparsing.ZeroOrMore(':' + tgrep_expr_labeled) macro_defn = ( - pyparsing.Literal("@") + pyparsing.White().suppress() + macro_name + tgrep_expr2 + pyparsing.Literal('@') + pyparsing.White().suppress() + macro_name + tgrep_expr2 ) tgrep_exprs = ( - pyparsing.Optional(macro_defn + pyparsing.ZeroOrMore(";" + macro_defn) + ";") + pyparsing.Optional(macro_defn + pyparsing.ZeroOrMore(';' + macro_defn) + ';') + tgrep_expr2 - + pyparsing.ZeroOrMore(";" + (macro_defn | tgrep_expr2)) - + pyparsing.ZeroOrMore(";").suppress() + + pyparsing.ZeroOrMore(';' + (macro_defn | tgrep_expr2)) + + pyparsing.ZeroOrMore(';').suppress() ) if set_parse_actions: tgrep_node_label_use.setParseAction(_tgrep_node_label_use_action) @@ -950,38 +959,38 @@ def _build_tgrep_parser(set_parse_actions=True): tgrep_expr.setParseAction(_tgrep_conjunction_action) tgrep_expr_labeled.setParseAction(_tgrep_segmented_pattern_action) tgrep_expr2.setParseAction( - functools.partial(_tgrep_conjunction_action, join_char=":") + functools.partial(_tgrep_conjunction_action, join_char=':') ) tgrep_exprs.setParseAction(_tgrep_exprs_action) - return tgrep_exprs.ignore("#" + pyparsing.restOfLine) + return tgrep_exprs.ignore('#' + pyparsing.restOfLine) def tgrep_tokenize(tgrep_string): - """ + ''' Tokenizes a TGrep search string into separate tokens. - """ + ''' parser = _build_tgrep_parser(False) - if isinstance(tgrep_string, bytes): + if isinstance(tgrep_string, binary_type): tgrep_string = tgrep_string.decode() return list(parser.parseString(tgrep_string)) def tgrep_compile(tgrep_string): - """ + ''' Parses (and tokenizes, if necessary) a TGrep search string into a lambda function. - """ + ''' parser = _build_tgrep_parser(True) - if isinstance(tgrep_string, bytes): + if isinstance(tgrep_string, binary_type): tgrep_string = tgrep_string.decode() return list(parser.parseString(tgrep_string, parseAll=True))[0] def treepositions_no_leaves(tree): - """ + ''' Returns all the tree positions in the given tree which are not leaf nodes. - """ + ''' treepositions = tree.treepositions() # leaves are treeposition tuples that are not prefixes of any # other treeposition @@ -1005,7 +1014,7 @@ def tgrep_positions(pattern, trees, search_leaves=True): :rtype: iter(tree positions) """ - if isinstance(pattern, (bytes, str)): + if isinstance(pattern, (binary_type, text_type)): pattern = tgrep_compile(pattern) for tree in trees: @@ -1032,7 +1041,7 @@ def tgrep_nodes(pattern, trees, search_leaves=True): :rtype: iter(tree nodes) """ - if isinstance(pattern, (bytes, str)): + if isinstance(pattern, (binary_type, text_type)): pattern = tgrep_compile(pattern) for tree in trees: diff --git a/nlp_resource_data/nltk/tokenize/__init__.py b/nlp_resource_data/nltk/tokenize/__init__.py index 241b9f3..7068cba 100644 --- a/nlp_resource_data/nltk/tokenize/__init__.py +++ b/nlp_resource_data/nltk/tokenize/__init__.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Natural Language Toolkit: Tokenizers # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Edward Loper # Steven Bird (minor additions) # Contributors: matthewmc, clouds56 @@ -65,7 +65,6 @@ import re from nltk.data import load from nltk.tokenize.casual import TweetTokenizer, casual_tokenize from nltk.tokenize.mwe import MWETokenizer -from nltk.tokenize.destructive import NLTKWordTokenizer from nltk.tokenize.punkt import PunktSentenceTokenizer from nltk.tokenize.regexp import ( RegexpTokenizer, @@ -89,11 +88,10 @@ from nltk.tokenize.toktok import ToktokTokenizer from nltk.tokenize.treebank import TreebankWordTokenizer from nltk.tokenize.util import string_span_tokenize, regexp_span_tokenize from nltk.tokenize.stanford_segmenter import StanfordSegmenter -from nltk.tokenize.sonority_sequencing import SyllableTokenizer # Standard sentence tokenizer. -def sent_tokenize(text, language="english"): +def sent_tokenize(text, language='english'): """ Return a sentence-tokenized copy of *text*, using NLTK's recommended sentence tokenizer @@ -103,15 +101,31 @@ def sent_tokenize(text, language="english"): :param text: text to split into sentences :param language: the model name in the Punkt corpus """ - tokenizer = load("tokenizers/punkt/{0}.pickle".format(language)) + tokenizer = load('tokenizers/punkt/{0}.pickle'.format(language)) return tokenizer.tokenize(text) # Standard word tokenizer. -_treebank_word_tokenizer = NLTKWordTokenizer() - - -def word_tokenize(text, language="english", preserve_line=False): +_treebank_word_tokenizer = TreebankWordTokenizer() + +# See discussion on https://github.com/nltk/nltk/pull/1437 +# Adding to TreebankWordTokenizer, nltk.word_tokenize now splits on +# - chervon quotes u'\xab' and u'\xbb' . +# - unicode quotes u'\u2018', u'\u2019', u'\u201c' and u'\u201d' +# See https://github.com/nltk/nltk/issues/1995#issuecomment-376741608 +# Also, behavior of splitting on clitics now follows Stanford CoreNLP +# - clitics covered (?!re|ve|ll|m|t|s|d)(\w)\b +improved_open_quote_regex = re.compile(u'([«“‘„]|[`]+)', re.U) +improved_open_single_quote_regex = re.compile(r"(?i)(\')(?!re|ve|ll|m|t|s|d)(\w)\b", re.U) +improved_close_quote_regex = re.compile(u'([»”’])', re.U) +improved_punct_regex = re.compile(r'([^\.])(\.)([\]\)}>"\'' u'»”’ ' r']*)\s*$', re.U) +_treebank_word_tokenizer.STARTING_QUOTES.insert(0, (improved_open_quote_regex, r' \1 ')) +_treebank_word_tokenizer.STARTING_QUOTES.append((improved_open_single_quote_regex, r'\1 \2')) +_treebank_word_tokenizer.ENDING_QUOTES.insert(0, (improved_close_quote_regex, r' \1 ')) +_treebank_word_tokenizer.PUNCTUATION.insert(0, (improved_punct_regex, r'\1 \2 \3 ')) + + +def word_tokenize(text, language='english', preserve_line=False): """ Return a tokenized copy of *text*, using NLTK's recommended word tokenizer diff --git a/nlp_resource_data/nltk/tokenize/__pycache__/__init__.cpython-37.pyc b/nlp_resource_data/nltk/tokenize/__pycache__/__init__.cpython-37.pyc index 3d7a1203ccab87aa61fd99c812500c3b529481e9..49a23d1e462ee487bd4d48adeecd46b1623891cc 100644 GIT binary patch delta 1165 zcmY*Y&2Jk;6yI4KCwAhmwH<$@i4)S;>$c8Ep$Amb4=AWeq=22vWZeZkL*;hIYi4)T z$}+Mg_y@us0Et3-st`AFMB>m37cLyQ?9p5~aPEmWA)X)Xz{}la@_Vm;{k!QX)(6oY#rkQ1q4KH_%MvxEcKh{j%^36@rLsKcL-C zpYop+z6$MnaedQs_@@6y?7iV#(Y_D}B^pQkRXDv2i0Z-fpU0!`$D?n@qaRv>gO6Jm z^!{4)T1T%M6}@_GmD9l?9az?Y4cvhv4=J%=4r=9V48F%kx_XKkdAms9I8Ct(XRxvrb+C!X$C5%IMTJ;2X+6) z)J-30SrqpF)Y9{u-lLCtd=}#TEp6FvXxVZc08UHx>1B0Cuy_JIJShUNI33orC{GEC zCPZ2YF&~_t4t41x*X=-U_jndUi)hjRM_XEfA4b92G}Qwh&x=F_zmnb<7KJJr#@9gv zv!ab1iw0;P7vO$wA5vo4J=!%t_iUG%vKm~Tb~$jfMW-0WmyEV$JJY_*OJ8s3b=&Sq z(nOfrcK5j7ARyeipm){;iJ`DSG$xRkOyL5z6IhwKJ>Tvj*j$0F z1#V?yOspu){So>PXmPETr7f}W&0bKCIGcI%eKX(t^5)IHUH)3DZ6uSrf@607iL+`c z%6Cbw#t4G@ID^!A?a?+-T;fjfq)5?}NYk{)(2OvsAxvrti&`Q}v$00w(;`Q6G1hrr z6lg&dX))%Lyd=uBEGo1jX6X56do$in^CeNGRZ*k0m}mGcVN*NC245B{bOmw2wcPA$ zLT@wuOrdw&qMLK`$m_1Ut=OgB2QsQ@#^bG>hfjTQd++p*#Ii}>*goZaZ=bUr|A@7` zSH0D#3$jGDa6lPcD6oLnyxeLW`hqo{@0}j{?L9bZ)T3X?O>5KOIS@g-c+m%^%0_LP_9s^CbfdveXnZvyn zqgudPA!{{RFmQ?eq*Xup67S;tsTH>dwWPBuEF-fC%*aKJmj`yEKkhiL-*lY53TWKo z;ZZ%>H8(277+yKmT^5AUX@=f0gR$tnSuBmC56up%fE}=tcJ$4hRUsAqGz;CdR9VBS z05t#^JOSfBCV?l~oPjCN@h4LtE5o!@7KV*Nb`pjjw&VbE^7vXcSo|nX$VYFi3MpVc z)?^3uP?UzH=!;eDmSx`jux;SCeDK0kCtxo+*kr>gzt7Z iTfc{jpo$@(PN-8PL#E diff --git a/nlp_resource_data/nltk/tokenize/__pycache__/api.cpython-37.pyc b/nlp_resource_data/nltk/tokenize/__pycache__/api.cpython-37.pyc index 6adc12678d9a2328e91ac79f6e25940a2fdb95e9..31e1a7b42cb7e7716328d5c50d80dd7d998c71cb 100644 GIT binary patch delta 845 zcmZuu&x_MQ6wb^f&5tH^(cN{W2NjexifsiEsb#6FAfoHVTP`Kd3>r60mzfkqgcZ8p zyp)_hdKT&5phpiLbM?R2lkZJwcZCkjH!t73_s#oWzHR^9+T9NV&ww?tXM-D#Vf@99 z<+32?!&ZE@+wOA0t;`v@kxL9=2|6Xu3|`~*iNS5*P1%%0bzmIs0^^GMltn)GUK?S3 z_VH?HKi+?GAd0wZ#6wlcI4MS=csu4Hse+jE0er`3eBH6gBd!IBcbPA!HtBN?PS_Z|4QdRJHN&ok$8jKCO1H=he_F ar#Bk&NhbD?QVjr;ZV;dP#Ah4CY5xO@E}5bL delta 959 zcmZWnPiqrF6rY*hWM^X%T5X#aL5V_YP?H>r2Q4Kghg_r<>czd3O=f6flHD-771~R{ zp-D;^tL`tNmM5HB~7!HTKB{s?9 zC+ZfQ;#@Fty_3-IORsk!AI?f*Y!9i<5p;G9D80RSP69zypyuq=kKDq2%uog|ZL zVI~7bjElmt9^RWdFRC(+>1aQM;BJ6Kzj(2vcHmS_=KkLV!KIme6+N&;ErnMdKA$=c zJT~zc+!&^&2}k`p`e8pE+iC;THc@S%Du9alNckhF!gGa%Xi)u7!qKY96*Joq*8oYF zF-*p2-sFZt`(Zhgz#4A5qp&><>*T$G4EvRy7fR*1gFqo`&TVf1#>Ka-oQ1uC(ybx+ e8t+Co%K~D)KlH`fKptR&x(bu6u`JCRYt_F(cBd2o diff --git a/nlp_resource_data/nltk/tokenize/__pycache__/casual.cpython-37.pyc b/nlp_resource_data/nltk/tokenize/__pycache__/casual.cpython-37.pyc index 5598df2cc4a062107b63b5d0f6ff5d69d3f4fa95..410cf6ed830a95376497753a4c70bf5735f9b85b 100644 GIT binary patch delta 1478 zcmZuxO>Epm6rQoYYp?e=$u?v+NlV(M$rfltXcK{=G;Fi2Qk4XDlQuO7%gsz$C$@JP zdqYyC3Q}|E0U<>`&|~&ei6hDd3DgS`S2+8^1tB4E;D!Vj-g9V^7QFJCH}Adq=6m0a z_p8g{OtzZM>I9y)x@Y$6WlKB1z=RQlFv=u{x{@JL^kqjjWEd4kaT7+uO&ZBKo^VuG zGc@p%MvAG1&UL2o6iYFkrCEk$S#C9Dq`ADGVR=^A&+>GK_7X0y<>1X%(JNF*M>4wN zn)R*@HM-H|ssAbJiHo|SrpV5+B0I55jT3B`l~@_P-fM*Qvw>a01|CYrD{PRR+$BaI zJH=jwvBb`?)9eh?m6_Zim65Oicp|-1p;4jj*(*)PO~($nupEEBB1M|*1><+N0v;tX zeNRAP_FmvRCien6u(^Nmr97vnbD-B>(_?L4jD>$@ub01txgSePHL6gZW~eAYOymwe z%$1b#PvCEpebBT+AJEv7I#k@>x*mSf{pQX`oEgm)}ak@i+u7}A%U{4 ziw#Qf$H)WeHbj?+cXFFPBI`-BJ`j-_Jk-tdEBZ~lA8`M5I~$PJ2-gWroSd9(skaURWUXktD`q%_mC zJv%VX;=yj;H?;I}ddjifJItCM3zy0h$&nZ)+$$I9`S3}3=;Lz`h}2sP^;xrCi_}%y zW0vCx+%r-f+XppVt5s3N>$nn|GE%;^%FTdp22tYEreOYmCMg#>(?#7}~w6% z9m7N&#U(IrBV4RvLZ{*qfcN7$_QD{;Y7sFo%{hb#N z0+=B8xHHfvKJOPRUP6xYE0*839P^nx^`l)R{Jv@cSCQt0ilZE_UKCyNWbx|N%1*zS zfk~vB=4v}=3vQZ1f`Gu@BH6b$4=*`(4u@oKUT~Wm+>h_Lk*dA7usC;RVZLD~OZ6KK zL#@}Y)t29H#G*vX8r=Lyy;-YQ7aFyQ&PD3dJe1GY#3XJp0cKfTKzA5T{8tcPrdR`1 nU7&Dr56Aj`e$J6L5*eyV1)2#zJ5~7ypjt(i delta 1326 zcmZuwO>ERg6rLH|yIyD4`C${XyCKU01xzVX2~AZsA|VQ(LJJk30+xzp_D!-sUVA%s zXo41TNThPARDA7W_fXWH_rejOo_gw`C%4C5+EcIXrJd&%lD62IZ)VDU7Ap>TQq&4P%+Id`Y9`+YLCJp zn1cOsHFlA}G?aD-l(v~Y4rQq95PJe<;DDU>!C|PvL0MM>cAZ#9hGX;zqgRH%DeoEk zvCBU$%{77!pKG~WjiBp__S~s>)x1=GDsy3K1J$WY4VtGSFJr{pBo~l&PCVbzqg;<|%ltoF&;I7RO?-BXJfh;e z__KYh9~gjCg? zAWtRAI+svyh`3OEyMlg*5#$(k>Vbcci^xfjZv7NH#p&rHHEEvd)L^D`jmcrs47)$~ zcw#F4s%V`?11(Ktk*0=l2th|UjDY+W+mjl3JXsliCZnM$fy|f(#L?l2$*ywzxZDa0 z;RHgeY*(J{I8DD9InMa-`Sep-`fqyCb31pzJ$o{a%L|pGFK7}|@J?`#I}O(dkB7pF ze=gVVye9KW_U6@VtIoCaNp`L2gX?($B_yiX45dRIw`-~5btJ;>>|E%saVO%NQKEes z2nheDFbdk-Z{FuqalO)+KZz&bMc9@4vK?_uCjOKo#QW37RAy-6Sp56ULj2B5@1iF7 zI^RsP^`O&jdR(Af=_2Y#vRyZdxbXMT;{Ay;5*FwSUp0e@Gm`I@rDaT>L)gP2Rxq$D zEM=INu_a_cO+-~`MiKLIZ8lu|_ti)xC_imZT7;@n=U#nz@%OXE#jCrt4F9`TdB$Q_ zgo3k@3b`Q)>E_0T3)ZAqLegb~a(uZuOV7ovYW3p<47`Q#2Eq}9^cPO=I~9dC2<#;N f#-0{mdY|Zvod7mGezqpd!c!7dO{aSNYt{M-d_@_w diff --git a/nlp_resource_data/nltk/tokenize/__pycache__/destructive.cpython-37.pyc b/nlp_resource_data/nltk/tokenize/__pycache__/destructive.cpython-37.pyc deleted file mode 100644 index bfdd50ad7757a2cbca0e2529088a6a03de2c3392..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3128 zcmZ`*TW{OQ73NJBE0XU?x@k9^IN4ClM$)bmv_Kmq@kQAh>y=YW-V_uSiPDT^+7ziB zQgKXEfC2B1Xo0jyfW8!c?OWf9{t7-7?oKssDg4S5f#NQ?rkVWq5>cb(k)`(g2=9a~^^qoXTTP6)>-gSYYgo6Q^(* z&vetknZZ}Of||ftoP%c)&q9p3;OcjJXUlR=DA}+rm+BqYur0^i${@J54aY_HKI%-E zkdF1fOEB7}b`R~^Z9-i%CXpTF#>o~#4)1#YiJ@rid`GvG{FB?iQ}UMWg2KBR8E~YQ zPY6?Ai_~4C7a4lfdV9>HPmKrG5x+a7H_69&dNz#%Ta!G;mw5t@<1Mp%P+pGN4QHJE zg!Whu)p7F22Rcj(LpuqRiRTJ>B@av;cxvs6k_ytzrlt2tvl(QX&7O@1Cd1ig^M`?M zMwV2wiR})ka~nIA+V0v$ZEL4eU7hYeP^YX9|H4tWK|}F=r*vTVNa;{NIk5XWJt`GV zyQ7;W(`c9aC+>l5Jt+RN-X%`SGToz6{5DINI4&K)_IO2#{Sztzb&l=5 zYar61BntfGofOi1IA&_YW?}48=JYpydczXwZ2}8KI!wNUt-B8h`qDcdCnB5#9P3kn z(~*8A^2zKA-GrJ7{jU5UPZq}KqVcnU=OR2mffq&?IMN}%&)d*h$9OrU9 z8uydP_8Hh>d;{=LBmK`N^q&L18R>t1>EE4EVKy*tG#aaB!rm?Dtwf`51HKdCF96?- zunagKVFmEL2!9FqKCDbqg*5@@0`!aff|@0{b55Nlb7UUkC&=vK!nttHs|ypF#Za?| zAK*FW;=5?+OZ@i17ZY}dYhVN*OVO8Ogko8h-!J0OUeFKYmD4|EqD=ngV^eQLjg zx-sb67N`lTQ9|0fbu>j*%#4}htw)jJpp}XJw}PBO=U_z5h$!ij*Tg}3zi%24;n*-u z)Eih`!s3K{hSeXq2zIbxbs;;X1J+%F+T_HxFfbIL#zh<4wu)%WWkLJ4;}~rdXut!= zPeQKNWa67-oMsV^af^Ad)M6t>_P~V{94MzF61Gm8m*3R%BE)Vj)P}Tyh!Uu*mW{dt zo$8iLV79Iu5>fJsRp2O|O^vQ$>1H(5Q3)Qv?}^LT|MAz0vo{xKzrQ&9Q^T*f8eb}& zgtV`bC%)8HJfYR{qFQANk}$N@Lf#6ihj~qg;(3b_ z{}u5~)9?AN@Ay#ev}0Bs6EId=9kA~m;{N^O?2i{`e`qN0`4tE$%gD>*wJIY4nvhsq zMc(K6`b!x>Ou6n<8@^ocea9r;1gRiT8Ji)VyuIzq5MACa`!b{;e_!@3+4U=OO+kaZ zVceHmkxifoj#@)ghL2Y?InD(&?m=z8cgLFvGez11(>( z!9%RyYj}$+%{r{SqMd3(?UDAl;R#yaOBA+u*9!m@fV(4DXWDA0Z7}T!o-*x5s2wtG z2nz*R^FjeV>#$P^yOPin&^Dks5`4U`Z~ELJx5+h@HDM?X6TbA8_Y*$PNdN1@Im_qY&b;HGw277XJYp}Gy>+Byg1KsKZMU{n`KI~Xy`5UQ z8i>xI9b9?7SJ|lTt+8KlF!y|Iw_K?<*WsW1ZMjwrmf#2deR;Rm3=Os4l&j#E+uYe( z-!3;d*Q%fha^=eAB`am8I64C&NH|cxTqPMu4l(xw$tAB{bqNX+`@);>+a?y20*5$i zh7xx`ttM0~H5rz_zM6&X9M^CM>;!lD>!(*3bvmvnhwPoR?}Ov#`FR1FnBlX6#Ci;g zS*8=R(1(`tZhTPgi!oJ25uQC$mMPm-lDH$S(>ZpOTlJiISrZZX5<$n7G>+}ZSG>5&kg|f0Stlw diff --git a/nlp_resource_data/nltk/tokenize/__pycache__/nist.cpython-37.pyc b/nlp_resource_data/nltk/tokenize/__pycache__/nist.cpython-37.pyc index 673b0cda2526f448787d09f83069b525facc4fc4..9eba70bdec717d05dee3c4b88b83b8eef56a722b 100644 GIT binary patch delta 711 zcmZvZJ#W-N5Qe?B&tK=yBOymFBn}ZlF1UySXiiZ?gT#e21cbJ+tg{hT{H3*x0u4x^ z1$68WK;j<+niLciv`Exf039XNRP5{trHO4l^ZFfYc4qu#^TQSUmSq_NyIOS`-d~;C z{pT@ziv=viC>Ar6i&#XUrA)#Sk2027l(SB*V1?&8nVM@@<66n|+`tCcYSzt7Y$D;d z5NomiQi%1$IFhgxOZSAph*ppw82gXY+GkJX@9pl7{o_|2x)h+9+ZO4hjFT`+n}kGJ zy~j+wNe-K^d0HhO-=PzPp3sNVuZ222^wYd5N#md9ugRv46uNDVx{GY0zoV_TCR=oB z-9<Q8TGd8Y0ur-jEIU+ru|~$S(ob0PBF$!MK9yh{^yR z)i)TltMo4mw;*;6tJa)*f`oa#a+WmP@WL5 z6;SXMcocUH9X-&Y6bLFBJ^>Xwju1j(SNhF=|2vx5*~^ieVSQTD(gI&S8_Vn0-G_ST z$~-@L0uemqi2))gi6RkslmaP~iHzV*4pN~)ltiZjHN*rXVMg#2Pu&wd)mL^U(&CA$ zf`cy(h7nr4dO-gXT4RUO3wu>&uAb$87|HBdo9y_(TfWT}N>S(+wu-+>QME`b1(!W( z3+NAX^*c@F84hkpwV2WjYc&Rt#tKGOF}P1nwq!U17DyWa<(l}(RRhOS#osRf_W!a| z U;7^%6qh`XGvV;*9+YsND1ZZxp~Rs z0i7X>@9p!m_c>>uz4zJY+{aIPC!X|{TwPL9o*D#AtfZ+0!Cq?NGXzZ&?ruXm2jd&DUsy@V@6`8 zGE>qaqcl;blo4I1lxsywg;uOoYGI{HD^W7l+6<*eFV`z-JbGn~N13Hp>D5Hn5FJ6i zUWppLMw@vosMKnuN}X1w)NAERgI1x;)+&`o`ro8gDRX)}TD4Ylz@ydZ&HH`ITs<(+ zqRr9+JLetuD)Y67R(rssv{J7+t)6<-qgO^*pqEfpgEpJ0W}|99Q=(MUs5MbdlN7a( z%5$`4DmUv3N%SHr&(&I}+@dek%l7+~Hf^5LuFcn4X^6#oo4#not1Zx?2bj{K`8Io^ z3u6b1J)WXXW651s(`G$s>B(WesXt|>*iFTmsLz>^=JBMZj*KOTt#~Twl#V6i!zoQy zjku-rm|;30ExspiQd@77ImMbj5*srtEk0~fF>G4IKB`(brFApvb;4P0uBW+Em%ho` zkxHsd)ok@}Dv^#^@gYMe=8*V9c*`Z_WEI3&Kr>9H_9nSo=4Djv1dP;PokzXAf=awf z_T-h4w(l%iRh(Y>KgW2v9|gzmtO>~f{}Pqiy20CKFrJ=1U*XjrnshBsa<+9ezRU+x7A$a zWl=F0sVc6)oLT^F;KXQoyt#B?WErYJz9QuWO^MTFkDC zH2A1IO#jWx2`taJXFt2uV=?*5M6xbI{qC%D`n--G#^?Bx`aX+n)El=Pf12xi9ItNV zj3l?LNKMD$Tm{2&iYaE8WP5@k7Gg#87h=4}e!2c4#scC@L+$MR-K4%wNLAy>xTUHa z#M0T@I@VDgzn0)UDn$2^VMC8`H8wOvGcpIoGqbnMkv!`!%u_CIY^)Zm8@Dc#JvUsq zXN4=>Bv)0*4mmB$91*7*x6cU>6W<8vJwLnXYnsL4)VEoDx#^l_*~{lzyN?{y@sAj> z(OmV3Uo=%SLj>o%%X-C!a|UNg?evp$hYg?A@y6x|ONo8W8+KlX31I*D6@V^4A7B&U zI>7aS&42;Gjezr+$69Jwhb=25BEE=eW*pZ|r-1x8MvfT`xPBl71k*7dOPCHzJ8W0ZoTRm46?bDg zGl=l8APcjwSA0D0oq3aiM`{Oh@;P@<-NWMJ`A=VsF!Ryp^p7_Y)k|^BXXr^e+Y4CJ z41`WKnNrFB;wkMh@qX)zmkkj!Hvw4Z1vtyIoUj#_`=t2Ef_s<9r541^u{70QfVp@i z&P_{|U7i&u7uAU5!l3wGbmtVe8+y-;A~Ydql3R9pev*4-H`FAtZ`HFa>otcoBJaSz9|s(y6ulJ_r_hK=aVH2_R7dp7yFe$MGs zKqsX|ynht6OFJ{?iwc+wzY-whgL|f+cncr}kf+scpzZ*`k9apg+P(z}*5lgI&7f`t zWB|7V_5%)j=#b)H0eKJLe6dEQh68Z!g+zpW;`d9VOtt4;6lOu3QhcpgcJbVc;o(l9 z5u=P|PmG&Rp*(#_ohCQXYg#fCDA!dj?!Nf)o*)(RU!HDBA~bfZcb_)SCOn$=I9uh} zK5(mV+&96FlV&aNxM!zt!awdi&gca6Qz>8t$Nl@6XkQkY85s9W1jYl$*?@<6c5kq^ zEQ_(sAj#x0g5GEWg@n;{nQ}h!g~hm*`yi-?01%L) zD=A)D!XauXgXAJ2LO#E@bY>_Ls^W+2&%0h>!F$0c#OvL&iqX>*TPQy5HZPU>E3!Ck zk%~QX3ijxHC}ry0WkUD7Wx$Ogyn-6^q!P ztvJe-$t67wKEG=uerX@y!f3|H#^g`BQt%s<#-6h=w&c)IEk~vRpFx6A&V!pEQYw z#gR43g04A;*VfExz)m?;GF3=%x_^xFSbE1zsurVc>QTXZYFUq{Cp5(+Jx%Npv8|`E z37dU5L7$SAT${xsmbfa?gHa&oMsrVghJBQ;Dn ztW?tE$VWL8HyH_B&CB%8YZ#877PCa+(){hlp3N71>#t(P_Tly4VzVMtL6L_`HTOd9 zX21z?-NvdCsk}$191YP?V{YuJf%7?m;nZ02tk5D(Z>(Tf+CSg8o|U>187~B10r9Qq ztM0s;3Q92@)^so>QhRjGb>n}a>ijAGfa(V9gMFu2rUK2CfHD9@d7A4C{uBy>0K`qX zyH1hXcYXibK6T^DO^%OzgWrrMxvBwBLxAbyMFt)jTHIJFH_0g&PPyS!Zf<`Ac(wrY zcJCRgGwfeo*YCfyiO62Qo3MWk$I_#v`~dy%F9G5J*MdRG<#JFbL}s9&@?jz!*6a8a zF>8n8Lk_S}oE&H_m$4(@+9bA>KQBHQxR$*nI<~e%{st9!Gx{s4J7ybO-(>AgMAC&` zj-54DDI<>s#`>Yyb;HU*Il)a-bUhHV_+JBL6ox%!b8YE7P#c$%cplW*Vx2N+uI?Sm z^{RL6u=0?%6*`g{>ZdJ~GUT3EsFt4Z>PejbLgiW+lnXy0Cw@6OK=+ke^Hwm;6;8^W))vQwf>ZO`xZI`x`uC)CwG zic|K`l1tv83jQ{REE4-vRsta0-AtkN*qcr=o7SBl<4LJUvywrBx#} zO7AhdbR)hamC`zFbNG7JJ*}K+gFQ$hvd7>H5clG_GWqwZZpilO_pt>{XofxZQs9Fh zJB9re;@pP}sc9K(PKXyrYJ>lZ&ZoqCBNguJ#y_L{_kfp$f3!9763DaX&;O3{Ne5v_g*%`kwP-5Bz)7depve$BnF^iO60GXOGaYYYuW!kD_==lhip( zGOoxJWB9Wn+AQ<%{7bWno_&CC1EeRt3FO#*^Z@PNzvHGLe@y6JM zD(8T=F(-szCttJzqdYa1<_}V>dp^muaul6>=@dQD;9ZqMd{UDcj^iInr3^O=-VPoa z2BA2@w+1QqV6+kP|eG_%J=Lc+GR3;Y!SOj565C*UE_fyM2PKno=I^fb_3rnA7_N zbn-~CQ+sfxs0-O09*Daa1<1t6%;-vWbGLZmq8g#zQXzhSb4?q49GKe8scA_|$qJOV z@(bc^5nZ>uUH=&d{4oYBa*YBT!ha3;jaV|?vhjBy>7wP~oG2$quOf7*%GIG#M7L>6 zrPsdLE~!ucPvDtan|wLuf26v0`^5Mf@9G{j7eJ(x%RqeP|BsrygYw?K^jM~kKQ)Og7oFY z!ygo9Zm+MM-rr-GOwO7_=N*k~#NKp=#{Qo&$iWCsFh=iBG#ec?xl_eZ zP6)47_!!|{*D&(bRX`*U)UuR)*MR}X9uhzMQp1HKFGl*JU3c(p=ATO<1iPVH%(%08 zVG9PG2ceU9mF=j=Ss#jb5u6mtoxK^^y$yA8wsMc;_P{9Q+X_ANn{+8dX6>Giaz(}N zZ9$r%To!q;sT6MAtX{vmXZ5yCoW4ZmTu2@7UXvU>X7NJQPfe%TwV^Oo3VZ2YO-wmT zKn0l7i^;LsQ8$$8D;)vg7;fZ7)UQvLm)ek>k2+>!4AnDKwu|9*Hq z%gC7&O9rQS)0WJ+E077HOcLpd%)JI|xrQs$Cwo4~(_bc# zQ#<2s%o*y`O{dG`!$Fwds>M@B)}Hs-Yx-Q5r`)HQ!~XyfyEr0~j1?Tt%;A-szFc~E zDL{HAtbo@OI00)cO?x>X71#rg<#SBN-PL^Ud_pa+iF@wtV4v9EzxP+HIA&3?rrhcT#M>Z)qlv-LKJIFO8Yapk5wd$Qz``_ab_JFJ*iQ zX4NS_GtlLYK2us_T&H(SEQ0$0et?|y8$>z9xB~0)Tc*hasPUQceKIwmo26mxT3q6O zgmj-xL*m{0sxyVuPbrhF*%NGWEFI@Xki8YK4R8ZM0sI-@3xHPai!`rSQTzj-5p_*~ zIe=!sT);d6Czu-ANfVS`$GD6+nI+&l0FZvU7L>FjnKDhFh`pu9e1b&)=w&*7J>8+v`((3~j!+zc-);WZ56j>J6$vDhJeTJxk5egK9|6R>OLZnyZJ@JR{r4 zDfJkkQja>x2phRn&ZBZZ=tc+>BTo-M5Kt%UL%C{!o~IV-lhh(TUoF-rt0nX))eF=q z>pXg)UNr2{i;S{kUbWm{XQt}K20Ku3R#7YU61{ZTqgGL`DS8?8DwDmcsa~#6rTSD^ zuc3N{UP<*zS+Avfm0nHtYNPg;ccEHGgc`k;2(?D75j^Ho>-9SI3cX&xf`*@F)EjmC z6n&aLeVD1!b!EFJ(qP}k?g{0$MVJ%PjeXI+xTVLsEpcD&?!{r!ZA5Xh`Vt2d$$pVnc#~)7)7(AlzT&nR&vM0}zWxFA7mban^Wx&PO#-3!a(Rd(wxF?y4@`H_YMc3pO zc3wO&dDEi8bT=BRn97ddw4#GMYJ0a$Y6YmLz9nvY}^zHjw1HW+e|AiP#MW*SPVi_;tzCSI$9e z*1lLmkH+I^37I)|TJ*>r$L_U%Qo2AHs;3q;yxmAx%?W)?Pn4T&kszN;R9*#`4$eX? zW$;*1*SeDl%edas_VrqP2Ab;tvjDRRB1~#0bK=Pv+}tRnBVc0r;!wF+UJCv)KrMk2 zOz|XfSciF?_}}vKfGdZnnmS|EOmvbHM^I`2VD-ESFb^;vFuJX737f<737!?VO^vX8 zac*ka)*#ip>0@>fSe`-8F?PgrIBYRlVmm#FBA>@n4(9L~%!7ZOZTIZ*TwgxuS?k$T za6}nogUUW-2ek|;N7$hz!73cx>-b`17>+MNdUA5NTgg-_)uUD{nTU8DC1yFk6gT=E z#fXm)Q<_hXmWsx>2LAbJ1$hENrm!%}V*!>!|3SuA+cPT{Fs6vTRpnI^8A?#oVu_fg zX&c3juh}UZB#Lk*!OBG159?&{*f1)~tt|&=v^(52A)Z%>{u-8vq*t zn*f^uZGf$S-GGUVZVff8$6mtvg3PA`i-O+tKSwJ}aP-elrIq%#>SriJe$IqcvV zLFK`~((ZtaaEu?2Q`< zDeYyfE)UDm63G$Q@|b9u@zW)HFi8S{bzX+EAkAr#*K%FU(_&@hh(+UXiCg9j&-{A~L1Ku;Q$5jlMdRFWM80IJ!Vo2y&{BPg zZtFe!let$jRw){r^tq0Y8!3v+jxx82_hf1Tc6l*-`F?}EXo|1+SiIQOJR9@Kb?~Do z)c_0vIv!egJZhSn#&L8bbM%&KW0itc&8zp9gHd@n!)!1&V9 z)60!7#Lp~>?iMkBCXUQ+n$Sr=8on4H-Nk4C<$a(X0LWN*14@GcoFIM`Kw5ngCD@N^ zNft^2fWv?r0Y?Bgdnn-XTT#3nFp=v~so@x-V?C1!K}B4*Ai`Ap)&*e}fHUH2#Oa0g ztXTYLVR_m9@q<8FatTG^&kL9C&!TEJeITrPfXnL96r~J|$5MPbn(hRUMm+p#zzV=! z1mr*AyVUGdJQ|ak3;&8(xwvSpG$V&uV-zP1WYTDMC37vao}!iskLC&bSUv^ce8mmM zL8ZXHbMX@IGK}v8WKKrDH`=!!BV-O+q(u{nBr-ZcBORL#XoCdha=0tK` z)0RuAC7Luxck`un!qamdAl=R#C?TlxdjPOv*geYk0XhMf%tG$y*NL_RD?-iV4=7<4 z6b~+6qT6lwqV5{=DyhF5i_`vSNG+YLeuH-uUdu1*f(rW^Yn7VoWG%L?uKRrj(*a!HSY&*!ETKFrM@#cbaMj30+TLbSKU zN2@P1NV8J-F?13H<0h867$)k!iqF<8zvc;a8Z!$&1;Y4A()?g`!XtV>hHt)a53GHg zu|m<-vU}=TH2dN)GWrKFi1dZhBpwv!TNVXea}a-PnG(QGImJTTR$R%?pj;%g2Lww> z@S+DPuj)%!e6`rTuACB{o^>_sVR3w2b>l-A7#=8+;~LT>abZ$wxGxbq)Mv;%#N>O$ zJL?*XGW3KMUl0ZBUzr90$(c$$NrTMj2`x38Y+XKWt=myruADHV<9Dcix*cu(26Gp= z`Q*m3{YW|aGk|jdq-1nG!VQR47mp{qWxj24WUw5%iv-*yN#(bryAqE(KFTEKNt~a# z<8s@?%^R;_e*49ZPqL+j#MDL(MQXYgJ0Ig-j6!})9BMCVx{WGoh=M!CvEF3Ap}Xh& zJdraO@Kd7gw!h#025ZPga~>cBU<7TEJpOG|MqMjUitlbM?zBLZYut*GOvo+;AL)st zuNl!NcFf5dxfZ&f2ioOZ0OQxmZK5UZE4Fp`u7XQ#<8XZ)EgUP28iw^;GfNRkx z$<_6u^niF~M`h82RCHLIPTQ>+vs^fm#~aJQkfonRzB(gyZ%#4RU?TdRyuZl%l1A9g6(dK0K`Wf}AwI9-&z2wI}=$g0qRZRU$0w-rjG~VaF zc~IGqutxsic9LbWU)6BQ#vp{ zhXJ1d;wb$4L^JG##wjMRT~gm|(x_D!4G*Jv%< znzuUM-dKYF7876rPRMOr)zR9HM-}Pfeu37X0A3_ev&mh#4q45CGZL-)3(H}88xa}RL%DA$gp*k@3c zvWK@fw{>XSJ63J$Xx^4K`w=ldW}iM##^&HS(?eZ4o&M4Y@UnRC;0E^% zLmrZhMSqWmIxkw{%ZB_AY&=BjWVV0#yybE5$P@V*O8*S_1KR)n)dJz3(Vnz6!1r1Z|YS97kmX zKrRACn%=N~0QFJ8D90E9q;i$u6w-^Jkq_or~TwWNoXx-y8$htcHqhlIDu&uKM8U_0X@VU z-J~_T?#g6x0)uuY=|&e_5z-yai}u-pcDBUT3kLc(0w-X`W8L(WqnKZKBFr^YQ5t{b z3An`lF#XUC` zikUZ-&i)*%Bex0353!`-qz9Cy^GBjKh~69Dt@wAezX`4!*G8}({Lg@oMc3dJn?FVI ze*nD%PJrHE==L^NhiVR8;Vn(3s8XN&Wpp218<$i5H_@8xUkbJg-UkJvj@mKn4zQnzdDJBNJiN@+lHx7~Y!WOMxcD1+8z z`kyHuqj~n&FWub90)9++ub6eTqWseSo`NuA)+C}wt69vx<*3g7pI3_k5gu+|yByY{r^V!3D=+VeG1D#f zrd!{|^Ud_zm(8rgY}FWTB*)(c!kC4jI!s`TJ8y3rlHI3+)(McCBlkt_Pz}*2dok(K zhi7s(*2<-XTv4SB%JtCcHP!5$+qG@Y>za3M;W<#UykB5oM;S2bfJZG(KcS5gC*&GW z77m$0)Zssb8{e-SU8m=idJ;yUry6F1SD_rTrFuIW7o>hpk3Ks(?~@%G(~ znWhhk|74flvylx+NkWpr32nKyZB56uGLac`0ZL*VIb#*ny&YAPfF9)9w{@&<*REZ| zucdn0mB@W+MXwA)`3uoj1;`sgj~;)R-pR!uzP5hCH#Qag8vu_0J_7s|0H?*_k~o|M z$0_DGj+}lrrJD~x9!XdMFClRJR$q$tvIYd~0mt$=gc%VqL(C7U<#qAG$wu~}{ilfF>Y+cj+W71@$0-lSPk`@o2X}C$ka6 zF~g!cxI7;1?b4$w`o-o`(<)@*BG>*nI?ENGqY`~9y zK>n5#rUxxP3sN<>KXFLWUZpNh2$yK18;8;4SdW^Yh+7Be($5TC(XJRhza5tMQgyN< zr^C_QXe!2iL)4#-UNLwFs;~mS1F#cN3uQ~2c?s430+fMP4wwq408|3137kN(>i`KP zf5el&NxBcIH-U3CU=2Xp#wM8$-H$ppjMrn3X#gyCjB=%(U~#13-lX0aH&z%_Gck&f jes&AuXOK;&7gU$zFUueD2`(5Mn27UhrUZyC} diff --git a/nlp_resource_data/nltk/tokenize/__pycache__/regexp.cpython-37.pyc b/nlp_resource_data/nltk/tokenize/__pycache__/regexp.cpython-37.pyc index 6f449ab4a998f393ceed5b223f1a4ca1646d03b8..5b29816046a10c73cd15b7935ab8cf6bd93abf33 100644 GIT binary patch delta 1436 zcmb7E&1=*^6wl;iH=As>*)LqHT}9M2TDPTmP?V}w5Je9n2<8&%&a_RN{a}&>6-2Ol z=}iVNUiA;v0aMt9hKqF0O1s@_yqt}e1#IKMx?F!DnVI`G+%?K9_ekvH{_lXW!hOkE9FdN zwsU@tC@gnQp;>C)Q>e*|Rc)2{7VP9`V8NEftX0joVZ)|*(DPEygYM7*=mn`ebd0)8 zTQ0yFn^OSYIb}hkMOwN?{36S*D$6Cm%pCcwQ0k(@`C|1{XyJnEAm~i*lC8)xW*{Ocar^HqF z3<<>}w{zNo-Bjf)&Em~4Vmt>sJP+UzUO*Wy4xkKD>IAJN)>;WTTVywSH^uql+v$;s zr>1IiI@OZy%7%MCtP@cfC&)5;R~#)JzpxGcV*9v_AR{s29^{y27s3vmk0MW%A*6c3 zZX_EVrY4ZEPI!ym7H>+gmj1ZYhwcWBtDvSPdc^9bdfc1uc*Y>N2|UvywveO-4;Pn` zl(aYUICvo#lC6oaoD%A4zJwew^vw6Dak?T5BTI|Xb{N3;ge zPOUGGx7B_BljeKzbLSTMrh)7Ah!4}87K;Tvk%C3xv;r=2c|_QY#1+`bnQN7#|fq;}rYie!YB6L#fn>l9lF= zsM$}#h+|89TOJHP0~|krgGs=?ajYW8)c7HUNrb})$js~FTYd8ECQVPx6YVbTM{M>C d*u|K>PV%a$<_Y|@JTWi3u4}tBIg*x1egpa~7v=x} delta 1300 zcmb7D&1(};5Z|{S$!0g3Y#LKzY^#*k7@Ka2cqmxYnj*#OLGhMLh<%&pWAnkwDzzSZ zXu*r8J`e=|01*Wxs3%eIBv|mgpcl`cg`S+*kW@+$yYPPd=FRNyH*aS4?c$gGR>3ed z0iKDww@RNT1mPzdgGYwV60GhM04fv(N~q|NB7wK$h)t;|5j>L}xv3NtBCH5hp@AKN z25f0tDh9v~QWb2~4uBm5J47|GHQ!cOmIbQQFdT&agAg@n#3pqOmR=A5!v&!t(I_={ zNYSv>ZJ{11M(vRQHmP_=$i&|KP&EEqOZTqmmO>Qnn3E?>NO*rWS8X@#T)q6D+U}HD zBWLla<}#`9?`G?&4iTQnY%jRhZgy&p&4O^kuoxM_!njA0KEz<>8Kt$Vz1Ao(d)?mX z+~<#DALoW48POB1IZtxijYHq!M12i$5H>yqZ8>6(NM+XQFgFPR;d#m0F9I z9Va9DDR{DLZ@7mDJrzXkR&CSX+>O6W9k59Qwmxs1e~XXK`!_ia9#3}cit8z?wqA8T z(mc#+bUWo%X+ZsmPn`I0$uG%4rvE68T7tTCSgEwjqlt@TiWd@T66f{A*_$JnmY?Mq z?j{kY5r)O<@36=WNA3o!E;`jyvLX_Zq;yqQ4OM3{-%HN-BBNo*{dM6s6RHmm`gRqjYNCh}hsNz0#TKN)NY!}2i9 znPm_zAj}{jB^%?{&SdjPbkkE8nr(W|vGdCi*G&S*qAG?-STb*$rm34rf66qK`~kBw B>|Fo= diff --git a/nlp_resource_data/nltk/tokenize/__pycache__/repp.cpython-37.pyc b/nlp_resource_data/nltk/tokenize/__pycache__/repp.cpython-37.pyc index a4277c4a8f55f4221d3e0d741127bdc5452d7f95..b4ddbf50a87b12fa593ef1cfcd527c3fa41966eb 100644 GIT binary patch delta 969 zcmZuv-D}fO6wghXHciv7*}Ae)HpLC2ZUdDKW-ZJizQ|Pec3(3U-#ranQmH4$nSjoa=z|8hc`d93b*okQ;|=U*=y&g z_6oJZQ)xMhLp;SNl=v!D3EHZsQ7w@vno6Wj^+cv=I*}Qgky7&vKTESoOv*F;9L*)V z?pc1G<`X^b75pMC66KEKWE|tU;ux&>Ors@7U0YJ@>>+k!t8pm`+-}cdZO`QlI$jtT z`rrnyuiFaOnGjj*I zkNS8{mVly!w+fV{IBL#EAKrDf6_&XyO+#gcSW1M{(<%aXs2VN4RS!F!u5a38ak z;mfffgjiyyQY#gZ(jNgfoRYeDU0EbW@wGCeW4c%v)oumroo0fnG*^d%Dv;~gm90Ti zIqwGjh_AaI3pzdn`y?m{#~xrx!iJb0e?Nn4O!{yc-B%Dsikz3e=%99AC1vqo;zIpj z`p9g0Zpgbmzu$Txb|W})rZzAakb9LCGn=dFgn+?jD0fv zSzjTwG^Qymham&Jx7z?cz&XX;Mq@M#_i*+}ulBmls#G^GPQ>vLUa^{4Qx6)P>khROP*_NBdBVg?|sL}(?3phV5C(b`>GuP?6 kkilG7{eMlzM$_*(=OT}_a4*7&gpz8I->+(^7LlL!3n6RM^Z)<= delta 867 zcmZuv&1=(O7|)xuX_}LkNV^tb0T<9 z1VIn_{uLf2cpV_j{fvdB3!eE*7s83c4h&+P&>N z$^4sQ#lI^&S8|Cj*+V2YNqyOtQ&emv#f+U1DErw!wN-+(ik}NKTT9A`ok-Uu zH|wg;C0C{TGsQMsa$B-;N7!`h{ozDTZNxNW7C&Xw_@+@!TH2vJiY@VFqmXPGpR$XZ zV1OyiI|+&cj&HQOQ9xS{oc(Upcc9m5@}<%SSxdswDrpo%iLIeF4LdYt49$c{3^W$? zXy`qrmy*v@KM2Y5gXzkofvM~sc#IB|m;wJby-G}8E7!BB9KTuaOk?kn5=;sAR`Rh7 zMecP}}2@+zq)C{x^sV+zcR!fOB#;G+C>wK`7AiP~5Epn8F*{AYFL>7?-C1RDHO z>Cyu;W)AZ@0cG9Dg$0abH$d)RvyMJhRwQP`6Qi0opON*PX6Un8!BovMVAe9>vH(~U zM*O?|7|x=be3fht2~_{zPy=l} Wh}>PDw(-~uCnMVMmQ7;Hrt%9=oVHK^ diff --git a/nlp_resource_data/nltk/tokenize/__pycache__/sexpr.cpython-37.pyc b/nlp_resource_data/nltk/tokenize/__pycache__/sexpr.cpython-37.pyc index 59ecdf1432c90ff4ceef8c3f99dfb219f120d878..6769cee26fe837dc53ee872412c03327fbefc54f 100644 GIT binary patch delta 32 mcmX@9wpWeYiI+G delta 44 ycmdn1c2bSoiISrZZX5<$n7G>+}ZT91S&kg|jQw*X2 diff --git a/nlp_resource_data/nltk/tokenize/__pycache__/simple.cpython-37.pyc b/nlp_resource_data/nltk/tokenize/__pycache__/simple.cpython-37.pyc index fd0882f259189e9bb84138d7ea50c57446961048..58ee439ef5bdb6edf450dd6ddfcac48991dc7182 100644 GIT binary patch delta 915 zcmaJq z#iLN-x%dY>=>7>k_y_U_Joy9k1BUkt-! zx}+Y+b{#AVgx}k1St8c*skYVBfcIb}6hN<1S!iGMKH04T6?jEaLog96gfo@XsDXPn zz&%)}A{vO;)3|Dp+N`*dzcW5wZejHTKtY0nygDb=$RE^p_bxhh5qp*qW@=?r9_>Qn z5v-&Fpet0?72!_5)~HH4`FDGh=(+0LAg%nCvodFgIVVu@%ehPH`MP^)F89=ZdHX8p z6>5@-U|cBU(YULKnR_)1Vm~=&sPwX!|8ze*z5LkJHX~S__pn7It#h+E^(=?CHt{P5sxYxQ5Bu z!#46yt@hytY(D7{QJ!8ee|}+j-is{FgztGmhCFlPFx~H`qkTUPCVqc74upbYI7rN$ zPiAQl&Mq%-=*poH`1?fZ3eWs&Wc@6}XD#&m(THcEzl~lKoHa^JViQ}j>$YiI_SPSN C5TyVB delta 857 zcmaJ)arn1bHsK6klyOd^Muc9k&bJ!gty@1C3-vPnmKMQbiJA`pJxZSN10xP8!j zxj!8B#Itrwelt2{9nP8%86+yE#i-CH6|+T{30!Vl=NdB$;lY+D0bGNaCK!-Vn!K@G z;wo_y%s+`$6ELcrMx2ce2kLzuf2tDPnq0kx<~QvP`70xXYcH$Aeu>7{PA(M`i9B%`0e z5ep6AlpD)Nz4ULhn*34Tit4B}#J?tW5xxFDu2qmcfXFc4tTJj_rXz4y)MZ+^+$fgl z0drvE_{CVP;o*Grp8Q&=`~5$O=0Du$utNeEh?=fE2(Y4HR3pSYu9y*%B|CNj18BZ-KCaG zQo~tUt8#$?aZz6y1Z@vR4~3jtf&K|a|A-!W?a7z^0qLRry&<_PSq^&WrCe}!IGlN3 z^M3EW;RmNq*&41lI(OUudR^20K@a6+q4PfO#e%3{Xp}oUhT2w)p+$Yvo+@#+ghXU`~k%6dwtHfqc9S_9J6h{ zJK~}1hdp*z_~^s|XUp5$cN@%4n8N}m?2VirXHGAS66w3l33{}&KV-3p;wa&s-5Ilw zq>R`tp2S=vQONv|ZCqM^x5X0%96(gXd0eb;KxamZI6S%=|H%S;LZRqU3Vl{hj$~vnFRS>KOwe?8JINLjYzN|-rA6d z=s+o!aQyRN;EMrU-new}-A2<^2Jv{;83mI0Ll|JlL+MB#>qZGr)Z*)5FYuE-w2%8v zhszb#8Ax2*u+&#bRLo?Xs=H06d0y!bnhcxIFlo&KngurXn zbrPN%jijX=ut%Z42ctRxOOy#f(anXM?nEh!BbgUL;4qSG$RUF-88!qAh6%$)J3%nE z9als!Zn?2A?FWvK0hf|-38|WF`TY9&rDFekQG|_whxCUIY(}z9O+h>0xMSPzvXS%y z|A4%R^jwa6O}4qQLB_o~j3XhPPzK`_hM5j?M~RGvth5$snT+B%AVWE!$M&K}Jiu5C z1qxwl8(||S#9qQ7_#O{d$Yx`f^rKNg60#o5gze|CBOJ*C_)?VbPfqVs<^~S*cBB+O z_KrigpYSdh(2XZs_UbVg=3%pN;M=?}IwQBwJnY$B@>%H*nl@v%xgQ_+y%lzkk2)b2 zO?IvC#1gYMHdolTA9VN%i(}Ba_}=RI_01+&T!#;l_w-9ABrX>LlaF#29XdNOa4u+< zJ8~qjn`D!G>*4)SIuc{ze=}rP1(4P4vUk`=kdlDQy`XC;p+Kc{FTQyt+5{SZh^1 z+ndLK&09cRJe|E%Mwcls$21Ts8Lp{DFx#U@cnRwOq#i4y%72ff@H8Ss*Oe<*SXaRN z!$1z2ve1m84nQ)EaZ@Q?ysn&1(zHt&$EH&7`2I<9%Z=w}O+{E<+5s%5`?fTj`GDRm zt*|BOQ`=Ro>*OO*xb*NCP09BqSig0z4_H(DLdb|Zj#v%+0WtvsE8(sA*0DtoAn!~P zrBNdfi)0{j)?=zJ6O+b_?}GCN|pT2 zEk>J54|DpN!NVIac^F+a+*RDya3>N?Y9xy(GV{O?i=N?`-d4Ek!2BOI+DauvBPDPuRR0DY%>uqDSa!YZQ17E7d< zjpvk)CODxo90Ez{_6Xzo7-F>xhol^kN8TwJ67p-VDVZ15Arvt3D;MxZav-3}Wyn19 zS3w)psyf_{M&ni`7>43e%R1Ogx~=(+>-FRy-reu-2$&Gzrjk|j{D82N7bNYW6K8Wg z91WqEFgBW|6I0XHN>0>3&vG`6<)g*6Ha4HS}jqyac9S-CyfR zL%z1-jQbHRJXmW6k?RC&f!|q+#}Z;}HZQCtzT~U1;|@@LC2JHf)=I3eP3b1n@~s(< zvwFMjhrVpLFM&<6h(=pjFbti3Rnyk1`dQu97jPfEdLrUA%f&7tpSq|Lr8-r4`)|?d zX%l1rNlIS)%ZWK&OpQY=HI>JjuV`K4sjh4AQ`^#{F|frGU87%WN^?*Xd(t|rc=};A zHJ|D~-6u7AFQ=`xMb20N6;si&c4uNC%wJLx|s7I5PoEhVJZAyXF z?F02L@V!>5i8^7*pui17#C0Q-09cT#jf#OPaAl5}%3FdWz!pkvph*%-!W8PYP&Q`B zI=GGsCugckOvDvGL?%2^byE$T+jE7X+QME?Jb>{O$hO$hFD6UR{*9|)XH^H73O!MW zXJz#t@$q#bA|c58vic)O(9$HcsFzi#hIM3S4xN12{YucFcBEkMK27xmM6UAOS78;klG+y`%-gv&7+Q_N8C znP!MD@j#gI^ohCud#RKlMILx+ zBoYWge(+4>n2&+B$^~s=?*0)O2D#=}2C|IdRnS(>X+8ZMxPi)S#TSY?R-Myw4oA5a z7^*tIJBp#}VWR3zg`v`(j1YkHMZg_$OjO}&M`tc{Ge4|#qacVL(WXyV(vc)wlAy3x ztb%WNa2uST@)8~A2$bQ9>`6S8blT*cFUA$lgsM;&#pJQkl(i~3b0`!Ur0|Y{*2(0t zRpReO@cmjgCt$U-z)3*=1F2x{1BKa z?@+~+yaBgNwL|R*e0O3<{nuJoeFb@32R@7skuel5bg}XTzQ0XaLoMZ5uV{3m#`-JO zR$j%c{syxj{spy`l~z92fWPXbmexKoC-uqP#7?Uo^4DZOo%3oy+2^SV1X?ObQE#4l zUVUQi-b(A~oCV|{+n~&I*Q$^I7h3mDC%nmppg4B!-t$Ap_Ds(1RMVc32zFL z{}JU!^Vl&cxv-MDD7}z_ZQ@oXQmQ>XGtr;G%~F&lKSRN?Yj_$=tJ7h$;^GJ`8%Aa{ z*E4;;p$mMkD-sIjlCS12_jt(nV{zqAL@_6SuzJ$zRf57Xws~1qd&!mNbmcP=PT~9+ z`90X`p#r>klOAh=V3wK5XqdyB3lKUP6NEYYL|h4x&!Z(~5bP0c%KS~Yy}pi^_J zUX%)nc#X#GSn$0(3AX0DRDu={HWZRY|5W~>SgGPdX#|x^>!n^_fTTKsYxB1Gx?VLb zM1r&Wf?*qVlVStapNlF=RFP6`#0=Y7FwP)ayr>#2$=+!gs-nNqs8f9#Iz!%WXLh@d zk87iV`U~wgecCGC)Y@$?a@%e3P12tXr|cs(s7K}Zk%kly6*S+)olwxMFIa~Cdi}LJ zQ{xR=yhn4+QS%)%2(~aLzCqAFw_ZASif=lxFKGW-b84Kvi3!>&Md}D!B}Hz$%efX; T7^WpE_8E1f4x=sT3+8_SP!LTw diff --git a/nlp_resource_data/nltk/tokenize/__pycache__/stanford.cpython-37.pyc b/nlp_resource_data/nltk/tokenize/__pycache__/stanford.cpython-37.pyc index eaa06cfba7a16c53feb717c85d5e255c7767005c..99fb947f1e0a099ed1260469a7e97c3d7e748bb6 100644 GIT binary patch delta 961 zcmZuwO>5LZ7|u+xNj96LyIuQ5TeYp$ZqcommL5cF(W(?m(UWs9w3`v*=3|*mpbFB1 z{R4F_9@V>u2;RK-AH3ur2p&9p@_n;fDwx3Yem(Q>zArQHXI{##AR;c zF)#6%pNz1PDsRTkq{Uj5Z^ffYo3+8W*`qUew6GpJM_TCc)=Qm@g^kcX(%5)NA7~x# zJ9gWlWwS`5UKa8ojwBa*abEfZ5v4MCT%U6k{`%GJ{$0lpMM(7 z7Dxz$UmGh$E>_gn+7{VXb(0fQW#-XB8wm@4Srt`GL0H;B5T%g}f@L*s9X3vc#FTnt z&5#}S#rkpU%8=UiG zh}nTywAGZeK0gm`X~HNx4aFknFCf$rEQAG>J6%#!@0_Jg8xwN~C|Q~z$90u7%S*FJ z<0$QSoMC4xP-kY84hk8FYV;*Mwy503+}Txh*APw(i(wbwq1tZj*dBO0Tf$dw8*3Zq zu(k|Py1CqwQ7_^0NfruRinxk^2TzHeEJCBGwXw*k=S zvKRzO78Wso0o)t~G~!Z99O4s7#5Tm8iP8z+R>(r|AP^K%LPuCfxCu}iPxCA-oxIo| zh^)u+yjq!<#+sf-2jXH4T;`{-?04agagoA>6DA5&d#`p~S0B8IKE5V4S}_NEBIn&+ tCU_bTL<8C95wOjwwf}9Qw66^(x{gZuBtVne1b>&<)TNVPU25pf{okFy%^Uy# delta 861 zcmZva&ui2`6vs13cJg!6-J)&L?b^CpU4LvrQ9MYIYFjP3tcWOc3EQ2)rkfwjBrB~5 z72Km2oxej6>c8R9OK$2(@Zi~lI4{9c!2~|@eth4{n@8Twzi)V})vB$?r@7VhKTW;$ z=JsDok17;}3Mo%SP~odYm8TkNkxq0}<|?EjV_JwKgBX!X%*Y~EWD~o{>!B05#4YSf zSdBd5NgI=?4TT!ioG8>}?rV+Is739GLh2Mfsw-{hJ51DVqg?OZ>#ZGV=%?QBWQYTP z-Cy2LBDTEK-`h?`eLh_72o0~J4Y7$C(!>WmY1Kfy@VBBU<1^CDP2UgVAoKkt@x<77 z4x92B@x^GMP4V0Kapyv*WV>uI&RARHXP{FXqS_#$WxJxZ?3pD)76&g_S1M9nLea5! zWgI`aZPgK8l0L5qV&6D@N}4%lyBUiqZ$W$!fB{g)7sN-qgHDKV_W9Kc1eyRsf7vX*$@EHw)F>)->_f+ diff --git a/nlp_resource_data/nltk/tokenize/__pycache__/stanford_segmenter.cpython-37.pyc b/nlp_resource_data/nltk/tokenize/__pycache__/stanford_segmenter.cpython-37.pyc index ba109a5a52121cb32cd67946d9a780432512182c..189a51bf445974f163eececd2cbed78f704b47fc 100644 GIT binary patch delta 1718 zcmZ8hO>f*p81~p++w0x+ex#dh3M4cYAeS~tze`hEHA)IZRTLUpp%KDb*0ZS_e=)X8 z(#s;X5;sH`P8?8w0TRkDK!Ot=J@hu>$OTn`BN7*wcihr;v&u8?c%FIZ{di`6I`!~$ zW4Y0=74cV?d)^Owjp@;SG4T~2g-V1lid3v3n5&_NH96{7mtzH22L$b^yj8@(An}P8205YqFyl^2gE;qwMLfuZBwlwB z*EinTSQTS6Cp)>9@1{gdj6o8m{oFOUH4I|kyVa+>I{fbYg9e&pzZpw2Q{@Tj{31sl zg_(qZ))tD4F0jky2BPds^Zwx%#S+)H$u8G!_rs@5Isj2-?o?T~0oOo*m_;RgDj|rQbB)%QcB<9){$+<2{ag#0p=?uV0fKvd+0W5%{ z0(k9el57|0Dy0dfRrq`e;Di9zq)Y5yX9ithGtIf#vE&?|4EsfxdxIn%23vQT*L-kn z30$HKdgFP6u%Bh#<5J7ot&8QK#eb#>P}rW`WohftNf{EHNKXSmtlSW-C9z*d4}ptn zk{vQAa^mVDuFudEG4f=a#MxLQx%rs>Xn(h}i4H@p zT$Re^qn$Cb?E2sAuZeHbS@z8VcO*-04E-b}u`DyUgDi-%ydMt;t&|zWpk!Y8gbFS5 z${A~7O-XY@@mKY^=4T4^D2y~&Nzq?_R8 zJ%Hy6LvM(0f9~&eYiRU=Fh!OZfdy)FHC&Z_l6ry7+{kjDPpH!#?)%3 z34SEBgtdN!w%Hd`XRf^tpDzJi1ptlF>j3WqK<;Hl8;=z^AYu4Kl~5=b(G4I;g*w7| zjrDrRk;B${Um2HSWtsinTN$Nb!g_hm+h83L;Og9n62Axu1q#=Mt~erNd2pf5yhy#0 zxroFifGiYLS5;&n8#$ekq{S4{0m5c1X3jqbEharka|H#FmJZB)WVj3`@ZJQ?|rOqKlj6$ zea^NmMSK>n_1(*#*qzb7P<+Klp@PdN!bs{BT#-)|tKwM+wP*@YA&5~!JuNj*AT>9Ws^K8o0Z z@x`&W2;=II+~n#zUijb?Jq+iM2zZZuZEhiT{JVJr>GNP+W1h9xxqpES<)-Tfage+2 znepe=+m)7dw;;Rbd%4%6HTI}|dwLEMJOUt7&9cAkv;|>~`jGj9Mr^+fVba&64*RRo zK6e5v&jPFfoCJ6VKm&ME0Iyw4l3|f*|kPqVAd1@t^CnoEll1vGrJ z>lI<{4w85`xc)x-w)xZYX-J7O(eA424ni->+??N0>DY5u`>X1_tJ?Cz|y=tz#cev*=y!clGoSrBKr7Y_(M z&d$yq9{o>Q^pXgDP!sLRG({W+@eo=pR9wwO4Me2L=nGO8%aInMsy(ikB7GgQ^Z_1j z0lh422jZ8lDa!blISq~85}GLM3C-Uw#728vw5Yz#`E%0Nw4?txH1?!Cz+H0_nH_N8@^w_(e!4gm6`?4o75yITs6<7pYrD zo?t(AI->+mwVs~VQc?s8NpxJKWT@Dtg z9FM|1=Ry3Z628yLI%AEY)9lD=C^^R?M+zA9Zge+Ok-0Wt*)@zx~R#ut5IS-3|th569~X_E@>a;M80mXc>N&h{pw mLLUqHD-D*<8B+|tldGB=4Y6sX;DoP>WyS%0Psb=Soj(P`3#BIj delta 643 zcmZvZv2W8*5XN)H327vYv_K3k&bU00VFM>AQP(-@EtT2Yb(!<|Qc+;Va#0 zEPs42eVUl6mD*0vEZO>Y+i`VVFBOB)#9{)pAa}$9sIuYF0bC9e)3edDi@Xyn9AVT# z=Yfm2DRG4`%OQ(M=BN_x1lr8Lki18FnQ))5!j$w7s=;o0Pk=S{Jo6YDES5cSh)lwR zXl|P5nOI|L_TscAwB1lJeC>eYE12RHGN2W_$bJXj+n=*xiG7<>Q&qmB#Vy_SRvk38 zXxBCMb-W&&%YA{2N5>`!JpxTHOaIKd5y+=L0~qY*>D^;@sbrooo_jp}~!Hf3t;x_TS>eP2TqsA`wsIA`YT}&ERW6iNb(Q zmiw_KvK_Ej=jS@H5ukuN#v#e5pdQel^F!hY*!$eQ_-LB(zJ1yK{`<3iM zC{q0QM>Ns%&7KzPnwGg?@Tl!<$?-oQL>9P1K7#M#)3NY8e8P(ArOc3rvVf1MWb0uu(1{~0j-*Rfyb8d)?_|jac)5%_m`c1X>pPMWE);*#&w$; zcs&^znI@m&TQ4UJlq>?NKq7=dToxc9H+edLtvHCK$zCK55|scFx0s7FD~gyVD+-8P oFoOg^3XrseO#_*6i^C>2KczG$)lLk^E9L+aEKD4XJWM>y0N&X<8UO$Q delta 297 zcmeBFe5b(c#LLUY00eSL0r7Jt@=6-%068fPQ4A@JDNH$xQH(&EIfp5iIf|K)!JQ$6 zC55$xA%!)SxtTeNC50)NL6dD`%6G=i8BDTFjIxv4Snf?uWqrRnlx-p#>n-NulA_74 zd2IEg*osoqQ!5IhxQk1ZiZe?}OEUBGRx%U`04@9FrJs?Xo2s9cSecPukXV$huQ%D7 z*O_tc=6+sJMn=xbH~H4f2>}&?RU;9CATeejAv<{)e{B?qrO8?(1`-tq5>bMAIVIV8 zCHdK@d6`wIdWi*@MNA+`kU}J5KyD}knRSc9CO1E&G$+*#WJWO?kYHirVB%p00GHuK A@Bjb+ diff --git a/nlp_resource_data/nltk/tokenize/__pycache__/treebank.cpython-37.pyc b/nlp_resource_data/nltk/tokenize/__pycache__/treebank.cpython-37.pyc index 04e52411b6df910980ea0830e29ba0bb0aafb6a0..16b629f90890c1bbdcfeff9428de5cb2ab342000 100644 GIT binary patch delta 1607 zcmZ`&&u`mQ9QU&m+ew;qV~yJm3TCBCysj;qDyV?{qKK-}nn72g5i)JApO*#_J9>7g z>Pse#6F&~syC8lXI3jUDIUA?<+pexKu{?ZA;gdGGV}_w)Vr-k%?S zf4B5@v6z?O>1l`e@>dQ^GvtHAN@aCJqFJhbCQ z>r}oiS%se;UM779#nMNI#dUatRRCA26DrkZO6m%gsX{Xt0)=U=sn;_!PYX@Co~0AC z2>&WAf#_sh-m;xdFFauGy6=U|cEXnL1<_{Sgy&`}2u=Ti>7=GU*i4(+-O#1x1Lk+k zZGYEgp*d2i1ZL=e;(D#fWy?Y&nyy}JS&iMQV|!Nh+WZ}>>iHoMQF&;fL%MR{B5e%m zW~(zabW*-Agqr?H$Uf!(vPppO7~giE5fVBbpQVw#qdNQJL_sHrrZ6G1S_!S4M7|Y_ z(ie2X?%RS+)8F5-p)d;XfygGU7I>=)i8}D#*sWO_|G`+|cM30_s~6XA)poXTtnX~z zs@-1wQDOxctbnK2S9$?k;eQm~v{(k%8JxQTpeq?gCP#m&tP}Cg;5YJBF3)l(f~Syw zTDW<3tjQ*Ti}iF|y}`MOuXR%5zf8S$6$4u&&Ni9f>soRwFTWa-&I#YA`M26|LL9%- zz4{m&0vSM+qSyKRN|}FYl=ybpu9bj{!djN zV*2yq8b)CjK&&yh+qNAyEne%|VdygNh|Z|=(}Zs=^j1dBY#9`Wj%)=b$H7NVW&YO! zBc%a7Pl%dp1l5nu589P)$?{L)GUBa%?;zasJ+te>y@ksi?z!fcAGm=@-Lzwj_pFEg zNr(|;8kZWEtwNmn*l&5_7_rkBTQma?V4Mv?*6JpW5-k(W6K%wXS}GIss-V|dgjs~= z5nezzQ3dvP8_r3fVuK}}M;E@kgrudM6guu1=f?%~`NrZ}|Gy@Vmxo{$@Vl3!? zD@n$_hM$Zhi{{E-~b--0-~d+Qn5<+)jqaL>Qk@($7XS=Ya{vGU delta 1156 zcmZ`&&rcIU6y6{0Znw}vghDHVOAy;dx|M2_h!PQ_A>oGzF%dDabO%^ycQ>!MT{ICr=(t51u^uALzl$-n`?*qZggGYpGz|WWT;Q)9-uleKY&!?zh|VE3sIV zp-<1sjisrP-FT9n-%Y3UI;)3^LP=Z@S>`&U3%dA-(M81X@Cy-|M|6qilAlXDH^(T^ z_Yd=IN@2aklvbE9-3mtImg!=x;2M_c)NX~INj+X{d}ez7=GgfB#LV#F=SG1-Ylqjc zRkjt*p9dYczK_3IbC-PrgVA@-Z1FJDu7wfi70L<>e}u zPGgoPRF}6cs*1Z_B-<}RDGYMBo5BbB20HAt62d8(dK^YxWYsWr!$lbLbV&*0BlHKi zk=bOj|2Tv}$5@6K)V!EuYvxjYx+mm$@+x`EXBFHFyCQUL6cLU90Zt4z0W<@|0OF)2 z)t7C7uoVDW!6yLxjvL_j9;O>9vXmNU!?m}mr?FwEf)xs$h>=~?3J3=Fb6a!iAm(2> z`{Vy&2uhc8`(&(A#vdZif#jjO;2BS(3}%JegAB`x(M2b7=j1?bM|s6cdr>)f$aYs^ zXizR-q`63!S1NMZ)GfnYlihVBW3-0uBS&7vR;kW_4f0v)kkq<`Wb)MZmPqSWLyJR; zd8Nq* z48vaFD)+8-yFyCl$r6?WEYNbM&ZqKILw$CMK@pO z<^+k|<&$9)oBWGUpHXbH7XJe#UIn1`A|4>2$ylT?SzJhgQF*h4kS{Z%_GAkoi_QB) z^O>YnKx$M$gc^uY2N4=TLX&lpsJb9yktT@ESi}OMv?e#mi%gy@p~9%Md56R{HlbVW z`DtmzsU^j?SWEJ=Q}c@CfGX?+jeyGiH2H2Z6{Qw20ok`EpO;ru7Y4G5L_u7({G_bZ XSrZZX5<$n7G>+}Z4P32%FGxw*@7dS zQE2l74l`y(kbFz+*#pX?-`AnipAT`P$LIp&qf(W(AW)gb*>Odw?BMXGmm|QGj!Kk%)jl?!K zp(ytJw6x;XlHyyeCHdK@c||glOXTeY^?^$LH2H2Z6{Qw20ohTL56dg63jtY0B0%C6 cTYgejYH~@DC`d#RL}-EtEfAqUnOngI01w?f3jhEB diff --git a/nlp_resource_data/nltk/tokenize/api.py b/nlp_resource_data/nltk/tokenize/api.py index 316e385..476db21 100644 --- a/nlp_resource_data/nltk/tokenize/api.py +++ b/nlp_resource_data/nltk/tokenize/api.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Tokenizer Interface # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Edward Loper # Steven Bird # URL: @@ -10,13 +10,15 @@ Tokenizer Interface """ -from abc import ABC, abstractmethod +from abc import ABCMeta, abstractmethod +from six import add_metaclass from nltk.internals import overridden from nltk.tokenize.util import string_span_tokenize -class TokenizerI(ABC): +@add_metaclass(ABCMeta) +class TokenizerI(object): """ A processing interface for tokenizing a string. Subclasses must define ``tokenize()`` or ``tokenize_sents()`` (or both). @@ -68,11 +70,6 @@ class StringTokenizer(TokenizerI): on the specified string (defined in subclasses). """ - @property - @abstractmethod - def _string(self): - raise NotImplementedError - def tokenize(self, s): return s.split(self._string) diff --git a/nlp_resource_data/nltk/tokenize/casual.py b/nlp_resource_data/nltk/tokenize/casual.py index 9187cd1..edc82f2 100644 --- a/nlp_resource_data/nltk/tokenize/casual.py +++ b/nlp_resource_data/nltk/tokenize/casual.py @@ -2,7 +2,7 @@ # # Natural Language Toolkit: Twitter Tokenizer # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Christopher Potts # Ewan Klein (modifications) # Pierpaolo Pantone <> (modifications) @@ -35,8 +35,11 @@ domains and tasks. The basic logic is this: ###################################################################### -import regex # https://github.com/nltk/nltk/issues/2409 -import html +from __future__ import unicode_literals +import re + +from six import int2byte, unichr +from six.moves import html_entities ###################################################################### # The following strings are components in the regular expression @@ -163,17 +166,17 @@ REGEXPS = ( ###################################################################### # This is the core tokenizing regex: -WORD_RE = regex.compile(r"""(%s)""" % "|".join(REGEXPS), regex.VERBOSE | regex.I | regex.UNICODE) +WORD_RE = re.compile(r"""(%s)""" % "|".join(REGEXPS), re.VERBOSE | re.I | re.UNICODE) # WORD_RE performs poorly on these patterns: -HANG_RE = regex.compile(r"([^a-zA-Z0-9])\1{3,}") +HANG_RE = re.compile(r'([^a-zA-Z0-9])\1{3,}') # The emoticon string gets its own regex so that we can preserve case for # them as needed: -EMOTICON_RE = regex.compile(EMOTICONS, regex.VERBOSE | regex.I | regex.UNICODE) +EMOTICON_RE = re.compile(EMOTICONS, re.VERBOSE | re.I | re.UNICODE) # These are for regularizing HTML entities to Unicode: -ENT_RE = regex.compile(r"&(#?(x?))([^&;\s]+);") +ENT_RE = re.compile(r'&(#?(x?))([^&;\s]+);') ###################################################################### @@ -181,15 +184,15 @@ ENT_RE = regex.compile(r"&(#?(x?))([^&;\s]+);") ###################################################################### -def _str_to_unicode(text, encoding=None, errors="strict"): +def _str_to_unicode(text, encoding=None, errors='strict'): if encoding is None: - encoding = "utf-8" + encoding = 'utf-8' if isinstance(text, bytes): return text.decode(encoding, errors) return text -def _replace_html_entities(text, keep=(), remove_illegal=True, encoding="utf-8"): +def _replace_html_entities(text, keep=(), remove_illegal=True, encoding='utf-8'): """ Remove entities from text by converting them to their corresponding unicode character. @@ -228,19 +231,19 @@ def _replace_html_entities(text, keep=(), remove_illegal=True, encoding="utf-8") # Numeric character references in the 80-9F range are typically # interpreted by browsers as representing the characters mapped # to bytes 80-9F in the Windows-1252 encoding. For more info - # see: https://en.wikipedia.org/wiki/ISO/IEC_8859-1#Similar_character_sets + # see: http://en.wikipedia.org/wiki/Character_encodings_in_HTML if 0x80 <= number <= 0x9F: - return bytes((number,)).decode("cp1252") + return int2byte(number).decode('cp1252') except ValueError: number = None else: if entity_body in keep: return match.group(0) else: - number = html.entities.name2codepoint.get(entity_body) + number = html_entities.name2codepoint.get(entity_body) if number is not None: try: - return chr(number) + return unichr(number) except ValueError: pass @@ -291,7 +294,7 @@ class TweetTokenizer: if self.reduce_len: text = reduce_lengthening(text) # Shorten problematic sequences of characters - safe_text = HANG_RE.sub(r"\1\1\1", text) + safe_text = HANG_RE.sub(r'\1\1\1', text) # Tokenize: words = WORD_RE.findall(safe_text) # Possibly alter the case, but avoid changing emoticons like :D into :d: @@ -312,7 +315,7 @@ def reduce_lengthening(text): Replace repeated character sequences of length 3 or greater with sequences of length 3. """ - pattern = regex.compile(r"(.)\1{2,}") + pattern = re.compile(r"(.)\1{2,}") return pattern.sub(r"\1\1\1", text) @@ -320,11 +323,11 @@ def remove_handles(text): """ Remove Twitter username handles from text. """ - pattern = regex.compile( + pattern = re.compile( r"(? -# For license information, see LICENSE.TXT - - -import re -from nltk.tokenize.api import TokenizerI - - -class MacIntyreContractions: - """ - List of contractions adapted from Robert MacIntyre's tokenizer. - """ - - CONTRACTIONS2 = [ - r"(?i)\b(can)(?#X)(not)\b", - r"(?i)\b(d)(?#X)('ye)\b", - r"(?i)\b(gim)(?#X)(me)\b", - r"(?i)\b(gon)(?#X)(na)\b", - r"(?i)\b(got)(?#X)(ta)\b", - r"(?i)\b(lem)(?#X)(me)\b", - r"(?i)\b(mor)(?#X)('n)\b", - r"(?i)\b(wan)(?#X)(na)\s", - ] - CONTRACTIONS3 = [r"(?i) ('t)(?#X)(is)\b", r"(?i) ('t)(?#X)(was)\b"] - CONTRACTIONS4 = [r"(?i)\b(whad)(dd)(ya)\b", r"(?i)\b(wha)(t)(cha)\b"] - - -class NLTKWordTokenizer(TokenizerI): - """ - The NLTK tokenizer that has improved upon the TreebankWordTokenizer. - - The tokenizer is "destructive" such that the regexes applied will munge the - input string to a state beyond re-construction. It is possible to apply - `TreebankWordDetokenizer.detokenize` to the tokenized outputs of - `NLTKDestructiveWordTokenizer.tokenize` but there's no guarantees to - revert to the original string. - """ - - # Starting quotes. - STARTING_QUOTES = [ - (re.compile(u"([«“‘„]|[`]+)", re.U), r" \1 "), - (re.compile(r"^\""), r"``"), - (re.compile(r"(``)"), r" \1 "), - (re.compile(r"([ \(\[{<])(\"|\'{2})"), r"\1 `` "), - (re.compile(r"(?i)(\')(?!re|ve|ll|m|t|s|d)(\w)\b", re.U), r"\1 \2"), - ] - - # Ending quotes. - ENDING_QUOTES = [ - (re.compile(u"([»”’])", re.U), r" \1 "), - (re.compile(r'"'), " '' "), - (re.compile(r"(\S)(\'\')"), r"\1 \2 "), - (re.compile(r"([^' ])('[sS]|'[mM]|'[dD]|') "), r"\1 \2 "), - (re.compile(r"([^' ])('ll|'LL|'re|'RE|'ve|'VE|n't|N'T) "), r"\1 \2 "), - ] - - # For improvements for starting/closing quotes from TreebankWordTokenizer, - # see discussion on https://github.com/nltk/nltk/pull/1437 - # Adding to TreebankWordTokenizer, nltk.word_tokenize now splits on - # - chervon quotes u'\xab' and u'\xbb' . - # - unicode quotes u'\u2018', u'\u2019', u'\u201c' and u'\u201d' - # See https://github.com/nltk/nltk/issues/1995#issuecomment-376741608 - # Also, behavior of splitting on clitics now follows Stanford CoreNLP - # - clitics covered (?!re|ve|ll|m|t|s|d)(\w)\b - - # Punctuation. - PUNCTUATION = [ - (re.compile(r'([^\.])(\.)([\]\)}>"\'' u"»”’ " r"]*)\s*$", re.U), r"\1 \2 \3 "), - (re.compile(r"([:,])([^\d])"), r" \1 \2"), - (re.compile(r"([:,])$"), r" \1 "), - (re.compile(r"\.{2,}", re.U), r" \g<0> "), # See https://github.com/nltk/nltk/pull/2322 - (re.compile(r"[;@#$%&]"), r" \g<0> "), - ( - re.compile(r'([^\.])(\.)([\]\)}>"\']*)\s*$'), - r"\1 \2\3 ", - ), # Handles the final period. - (re.compile(r"[?!]"), r" \g<0> "), - (re.compile(r"([^'])' "), r"\1 ' "), - (re.compile(r"[*]", re.U), r" \g<0> "), # See https://github.com/nltk/nltk/pull/2322 - ] - - # Pads parentheses - PARENS_BRACKETS = (re.compile(r"[\]\[\(\)\{\}\<\>]"), r" \g<0> ") - - # Optionally: Convert parentheses, brackets and converts them to PTB symbols. - CONVERT_PARENTHESES = [ - (re.compile(r"\("), "-LRB-"), - (re.compile(r"\)"), "-RRB-"), - (re.compile(r"\["), "-LSB-"), - (re.compile(r"\]"), "-RSB-"), - (re.compile(r"\{"), "-LCB-"), - (re.compile(r"\}"), "-RCB-"), - ] - - DOUBLE_DASHES = (re.compile(r"--"), r" -- ") - - # List of contractions adapted from Robert MacIntyre's tokenizer. - _contractions = MacIntyreContractions() - CONTRACTIONS2 = list(map(re.compile, _contractions.CONTRACTIONS2)) - CONTRACTIONS3 = list(map(re.compile, _contractions.CONTRACTIONS3)) - - def tokenize(self, text, convert_parentheses=False, return_str=False): - for regexp, substitution in self.STARTING_QUOTES: - text = regexp.sub(substitution, text) - - for regexp, substitution in self.PUNCTUATION: - text = regexp.sub(substitution, text) - - # Handles parentheses. - regexp, substitution = self.PARENS_BRACKETS - text = regexp.sub(substitution, text) - # Optionally convert parentheses - if convert_parentheses: - for regexp, substitution in self.CONVERT_PARENTHESES: - text = regexp.sub(substitution, text) - - # Handles double dash. - regexp, substitution = self.DOUBLE_DASHES - text = regexp.sub(substitution, text) - - # add extra space to make things easier - text = " " + text + " " - - for regexp, substitution in self.ENDING_QUOTES: - text = regexp.sub(substitution, text) - - for regexp in self.CONTRACTIONS2: - text = regexp.sub(r" \1 \2 ", text) - for regexp in self.CONTRACTIONS3: - text = regexp.sub(r" \1 \2 ", text) - - # We are not using CONTRACTIONS4 since - # they are also commented out in the SED scripts - # for regexp in self._contractions.CONTRACTIONS4: - # text = regexp.sub(r' \1 \2 \3 ', text) - - return text if return_str else text.split() diff --git a/nlp_resource_data/nltk/tokenize/mwe.py b/nlp_resource_data/nltk/tokenize/mwe.py index 9e4b991..5c61363 100644 --- a/nlp_resource_data/nltk/tokenize/mwe.py +++ b/nlp_resource_data/nltk/tokenize/mwe.py @@ -1,6 +1,6 @@ # Multi-Word Expression tokenizer # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Rob Malouf # URL: # For license information, see LICENSE.TXT @@ -38,7 +38,7 @@ class MWETokenizer(TokenizerI): into single tokens. """ - def __init__(self, mwes=None, separator="_"): + def __init__(self, mwes=None, separator='_'): """Initialize the multi-word tokenizer with a list of expressions and a separator diff --git a/nlp_resource_data/nltk/tokenize/nist.py b/nlp_resource_data/nltk/tokenize/nist.py index e6b7491..28d7e08 100644 --- a/nlp_resource_data/nltk/tokenize/nist.py +++ b/nlp_resource_data/nltk/tokenize/nist.py @@ -15,9 +15,11 @@ which was also ported into Python in https://github.com/lium-lst/nmtpy/blob/master/nmtpy/metrics/mtevalbleu.py#L162 """ +from __future__ import unicode_literals import io import re +from six import text_type from nltk.corpus import perluniprops from nltk.tokenize.api import TokenizerI @@ -30,6 +32,7 @@ class NISTTokenizer(TokenizerI): paragraph-based tokenization from mteval-14.pl; The sentence-based tokenization is consistent with the other tokenizers available in NLTK. + >>> from six import text_type >>> from nltk.tokenize.nist import NISTTokenizer >>> nist = NISTTokenizer() >>> s = "Good muffins cost $3.88 in New York." @@ -71,17 +74,17 @@ class NISTTokenizer(TokenizerI): """ # Strip "skipped" tags - STRIP_SKIP = re.compile(""), "" + STRIP_SKIP = re.compile(''), '' # Strip end-of-line hyphenation and join lines - STRIP_EOL_HYPHEN = re.compile("\u2028"), " " + STRIP_EOL_HYPHEN = re.compile(u'\u2028'), ' ' # Tokenize punctuation. - PUNCT = re.compile("([\{-\~\[-\` -\&\(-\+\:-\@\/])"), " \\1 " + PUNCT = re.compile('([\{-\~\[-\` -\&\(-\+\:-\@\/])'), ' \\1 ' # Tokenize period and comma unless preceded by a digit. - PERIOD_COMMA_PRECEED = re.compile("([^0-9])([\.,])"), "\\1 \\2 " + PERIOD_COMMA_PRECEED = re.compile('([^0-9])([\.,])'), '\\1 \\2 ' # Tokenize period and comma unless followed by a digit. - PERIOD_COMMA_FOLLOW = re.compile("([\.,])([^0-9])"), " \\1 \\2" + PERIOD_COMMA_FOLLOW = re.compile('([\.,])([^0-9])'), ' \\1 \\2' # Tokenize dash when preceded by a digit - DASH_PRECEED_DIGIT = re.compile("([0-9])(-)"), "\\1 \\2 " + DASH_PRECEED_DIGIT = re.compile('([0-9])(-)'), '\\1 \\2 ' LANG_DEPENDENT_REGEXES = [ PUNCT, @@ -91,37 +94,37 @@ class NISTTokenizer(TokenizerI): ] # Perluniprops characters used in NIST tokenizer. - pup_number = str("".join(set(perluniprops.chars("Number")))) # i.e. \p{N} - pup_punct = str("".join(set(perluniprops.chars("Punctuation")))) # i.e. \p{P} - pup_symbol = str("".join(set(perluniprops.chars("Symbol")))) # i.e. \p{S} + pup_number = text_type(''.join(set(perluniprops.chars('Number')))) # i.e. \p{N} + pup_punct = text_type(''.join(set(perluniprops.chars('Punctuation')))) # i.e. \p{P} + pup_symbol = text_type(''.join(set(perluniprops.chars('Symbol')))) # i.e. \p{S} # Python regexes needs to escape some special symbols, see # see https://stackoverflow.com/q/45670950/610569 - number_regex = re.sub(r"[]^\\-]", r"\\\g<0>", pup_number) - punct_regex = re.sub(r"[]^\\-]", r"\\\g<0>", pup_punct) - symbol_regex = re.sub(r"[]^\\-]", r"\\\g<0>", pup_symbol) + number_regex = re.sub(r'[]^\\-]', r'\\\g<0>', pup_number) + punct_regex = re.sub(r'[]^\\-]', r'\\\g<0>', pup_punct) + symbol_regex = re.sub(r'[]^\\-]', r'\\\g<0>', pup_symbol) # Note: In the original perl implementation, \p{Z} and \p{Zl} were used to # (i) strip trailing and heading spaces and # (ii) de-deuplicate spaces. # In Python, this would do: ' '.join(str.strip().split()) # Thus, the next two lines were commented out. - # Line_Separator = str(''.join(perluniprops.chars('Line_Separator'))) # i.e. \p{Zl} - # Separator = str(''.join(perluniprops.chars('Separator'))) # i.e. \p{Z} + # Line_Separator = text_type(''.join(perluniprops.chars('Line_Separator'))) # i.e. \p{Zl} + # Separator = text_type(''.join(perluniprops.chars('Separator'))) # i.e. \p{Z} # Pads non-ascii strings with space. - NONASCII = re.compile("([\x00-\x7f]+)"), r" \1 " + NONASCII = re.compile('([\x00-\x7f]+)'), r' \1 ' # Tokenize any punctuation unless followed AND preceded by a digit. PUNCT_1 = ( - re.compile("([{n}])([{p}])".format(n=number_regex, p=punct_regex)), - "\\1 \\2 ", + re.compile(u"([{n}])([{p}])".format(n=number_regex, p=punct_regex)), + '\\1 \\2 ', ) PUNCT_2 = ( - re.compile("([{p}])([{n}])".format(n=number_regex, p=punct_regex)), - " \\1 \\2", + re.compile(u"([{p}])([{n}])".format(n=number_regex, p=punct_regex)), + ' \\1 \\2', ) # Tokenize symbols - SYMBOLS = re.compile("([{s}])".format(s=symbol_regex)), " \\1 " + SYMBOLS = re.compile(u"([{s}])".format(s=symbol_regex)), ' \\1 ' INTERNATIONAL_REGEXES = [NONASCII, PUNCT_1, PUNCT_2, SYMBOLS] @@ -138,28 +141,28 @@ class NISTTokenizer(TokenizerI): return text def tokenize(self, text, lowercase=False, western_lang=True, return_str=False): - text = str(text) + text = text_type(text) # Language independent regex. text = self.lang_independent_sub(text) # Language dependent regex. if western_lang: # Pad string with whitespace. - text = " " + text + " " + text = ' ' + text + ' ' if lowercase: text = text.lower() for regexp, substitution in self.LANG_DEPENDENT_REGEXES: text = regexp.sub(substitution, text) # Remove contiguous whitespaces. - text = " ".join(text.split()) + text = ' '.join(text.split()) # Finally, strips heading and trailing spaces # and converts output string into unicode. - text = str(text.strip()) + text = text_type(text.strip()) return text if return_str else text.split() def international_tokenize( self, text, lowercase=False, split_non_ascii=True, return_str=False ): - text = str(text) + text = text_type(text) # Different from the 'normal' tokenize(), STRIP_EOL_HYPHEN is applied # first before unescaping. regexp, substitution = self.STRIP_SKIP @@ -176,5 +179,5 @@ class NISTTokenizer(TokenizerI): # Make sure that there's only one space only between words. # Strip leading and trailing spaces. - text = " ".join(text.strip().split()) + text = ' '.join(text.strip().split()) return text if return_str else text.split() diff --git a/nlp_resource_data/nltk/tokenize/punkt.py b/nlp_resource_data/nltk/tokenize/punkt.py index 408ce27..76fd868 100644 --- a/nlp_resource_data/nltk/tokenize/punkt.py +++ b/nlp_resource_data/nltk/tokenize/punkt.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Punkt sentence tokenizer # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Algorithm: Kiss & Strunk (2006) # Author: Willy (original Python port) # Steven Bird (additions) @@ -99,6 +99,7 @@ The algorithm for this tokenizer is described in:: Kiss, Tibor and Strunk, Jan (2006): Unsupervised Multilingual Sentence Boundary Detection. Computational Linguistics 32: 485-525. """ +from __future__ import print_function, unicode_literals, division # TODO: Make orthographic heuristic less susceptible to overtraining # TODO: Frequent sentence starters optionally exclude always-capitalised words @@ -108,6 +109,9 @@ import re import math from collections import defaultdict +from six import string_types + +from nltk.compat import unicode_repr, python_2_unicode_compatible from nltk.probability import FreqDist from nltk.tokenize.api import TokenizerI @@ -143,12 +147,12 @@ _ORTHO_LC = _ORTHO_BEG_LC + _ORTHO_MID_LC + _ORTHO_UNK_LC """Orthographic context: occurs with lower case.""" _ORTHO_MAP = { - ("initial", "upper"): _ORTHO_BEG_UC, - ("internal", "upper"): _ORTHO_MID_UC, - ("unknown", "upper"): _ORTHO_UNK_UC, - ("initial", "lower"): _ORTHO_BEG_LC, - ("internal", "lower"): _ORTHO_MID_LC, - ("unknown", "lower"): _ORTHO_UNK_LC, + ('initial', 'upper'): _ORTHO_BEG_UC, + ('internal', 'upper'): _ORTHO_MID_UC, + ('unknown', 'upper'): _ORTHO_UNK_UC, + ('initial', 'lower'): _ORTHO_BEG_LC, + ('internal', 'lower'): _ORTHO_MID_LC, + ('unknown', 'lower'): _ORTHO_UNK_LC, } """A map from context position and first-letter case to the appropriate orthographic context flag.""" @@ -160,14 +164,14 @@ appropriate orthographic context flag.""" # { Decision reasons for debugging ###################################################################### -REASON_DEFAULT_DECISION = "default decision" -REASON_KNOWN_COLLOCATION = "known collocation (both words)" -REASON_ABBR_WITH_ORTHOGRAPHIC_HEURISTIC = "abbreviation + orthographic heuristic" -REASON_ABBR_WITH_SENTENCE_STARTER = "abbreviation + frequent sentence starter" -REASON_INITIAL_WITH_ORTHOGRAPHIC_HEURISTIC = "initial + orthographic heuristic" -REASON_NUMBER_WITH_ORTHOGRAPHIC_HEURISTIC = "initial + orthographic heuristic" +REASON_DEFAULT_DECISION = 'default decision' +REASON_KNOWN_COLLOCATION = 'known collocation (both words)' +REASON_ABBR_WITH_ORTHOGRAPHIC_HEURISTIC = 'abbreviation + orthographic heuristic' +REASON_ABBR_WITH_SENTENCE_STARTER = 'abbreviation + frequent sentence starter' +REASON_INITIAL_WITH_ORTHOGRAPHIC_HEURISTIC = 'initial + orthographic heuristic' +REASON_NUMBER_WITH_ORTHOGRAPHIC_HEURISTIC = 'initial + orthographic heuristic' REASON_INITIAL_WITH_SPECIAL_ORTHOGRAPHIC_HEURISTIC = ( - "initial + special orthographic heuristic" + 'initial + special orthographic heuristic' ) @@ -189,7 +193,7 @@ class PunktLanguageVars(object): constructors. """ - __slots__ = ("_re_period_context", "_re_word_tokenizer") + __slots__ = ('_re_period_context', '_re_word_tokenizer') def __getstate__(self): # All modifications to the class are performed by inheritance. @@ -200,14 +204,14 @@ class PunktLanguageVars(object): def __setstate__(self, state): return 1 - sent_end_chars = (".", "?", "!") + sent_end_chars = ('.', '?', '!') """Characters which are candidates for sentence boundaries""" @property def _re_sent_end_chars(self): - return "[%s]" % re.escape("".join(self.sent_end_chars)) + return '[%s]' % re.escape(''.join(self.sent_end_chars)) - internal_punctuation = ",:;" # might want to extend this.. + internal_punctuation = ',:;' # might want to extend this.. """sentence internal punctuation, which indicates an abbreviation if preceded by a period-final token.""" @@ -224,7 +228,7 @@ class PunktLanguageVars(object): _re_multi_char_punct = r"(?:\-{2,}|\.{2,}|(?:\.\s){2,}\.)" """Hyphen and ellipsis are multi-character punctuation""" - _word_tokenize_fmt = r"""( + _word_tokenize_fmt = r'''( %(MultiChar)s | (?=%(WordStart)s)\S+? # Accept word characters until end is found @@ -236,7 +240,7 @@ class PunktLanguageVars(object): ) | \S - )""" + )''' """Format of a regular expression to split punctuation from words, excluding period.""" @@ -248,9 +252,9 @@ class PunktLanguageVars(object): self._re_word_tokenizer = re.compile( self._word_tokenize_fmt % { - "NonWord": self._re_non_word_chars, - "MultiChar": self._re_multi_char_punct, - "WordStart": self._re_word_start, + 'NonWord': self._re_non_word_chars, + 'MultiChar': self._re_multi_char_punct, + 'WordStart': self._re_word_start, }, re.UNICODE | re.VERBOSE, ) @@ -281,15 +285,15 @@ class PunktLanguageVars(object): self._re_period_context = re.compile( self._period_context_fmt % { - "NonWord": self._re_non_word_chars, - "SentEndChars": self._re_sent_end_chars, + 'NonWord': self._re_non_word_chars, + 'SentEndChars': self._re_sent_end_chars, }, re.UNICODE | re.VERBOSE, ) return self._re_period_context -_re_non_punct = re.compile(r"[^\W\d]", re.UNICODE) +_re_non_punct = re.compile(r'[^\W\d]', re.UNICODE) """Matches token types that are not merely punctuation. (Types for numeric tokens are changed to ##number## and hence contain alpha.)""" @@ -310,10 +314,7 @@ def _pair_iter(it): pair will have None as its second element. """ it = iter(it) - try: - prev = next(it) - except StopIteration: - return + prev = next(it) for el in it: yield (prev, el) prev = el @@ -366,17 +367,17 @@ class PunktParameters(object): def _debug_ortho_context(self, typ): c = self.ortho_context[typ] if c & _ORTHO_BEG_UC: - yield "BEG-UC" + yield 'BEG-UC' if c & _ORTHO_MID_UC: - yield "MID-UC" + yield 'MID-UC' if c & _ORTHO_UNK_UC: - yield "UNK-UC" + yield 'UNK-UC' if c & _ORTHO_BEG_LC: - yield "BEG-LC" + yield 'BEG-LC' if c & _ORTHO_MID_LC: - yield "MID-LC" + yield 'MID-LC' if c & _ORTHO_UNK_LC: - yield "UNK-LC" + yield 'UNK-LC' ###################################################################### @@ -384,17 +385,18 @@ class PunktParameters(object): ###################################################################### +@python_2_unicode_compatible class PunktToken(object): """Stores a token of text with annotations produced during sentence boundary detection.""" - _properties = ["parastart", "linestart", "sentbreak", "abbr", "ellipsis"] - __slots__ = ["tok", "type", "period_final"] + _properties + _properties = ['parastart', 'linestart', 'sentbreak', 'abbr', 'ellipsis'] + __slots__ = ['tok', 'type', 'period_final'] + _properties def __init__(self, tok, **params): self.tok = tok self.type = self._get_type(tok) - self.period_final = tok.endswith(".") + self.period_final = tok.endswith('.') for p in self._properties: setattr(self, p, None) @@ -405,10 +407,10 @@ class PunktToken(object): # { Regular expressions for properties # //////////////////////////////////////////////////////////// # Note: [A-Za-z] is approximated by [^\W\d] in the general case. - _RE_ELLIPSIS = re.compile(r"\.\.+$") - _RE_NUMERIC = re.compile(r"^-?[\.,]?\d[\d,\.-]*\.?$") - _RE_INITIAL = re.compile(r"[^\W\d]\.$", re.UNICODE) - _RE_ALPHA = re.compile(r"[^\W\d]+$", re.UNICODE) + _RE_ELLIPSIS = re.compile(r'\.\.+$') + _RE_NUMERIC = re.compile(r'^-?[\.,]?\d[\d,\.-]*\.?$') + _RE_INITIAL = re.compile(r'[^\W\d]\.$', re.UNICODE) + _RE_ALPHA = re.compile(r'[^\W\d]+$', re.UNICODE) # //////////////////////////////////////////////////////////// # { Derived properties @@ -416,14 +418,14 @@ class PunktToken(object): def _get_type(self, tok): """Returns a case-normalized representation of the token.""" - return self._RE_NUMERIC.sub("##number##", tok.lower()) + return self._RE_NUMERIC.sub('##number##', tok.lower()) @property def type_no_period(self): """ The type with its final period removed if it has one. """ - if len(self.type) > 1 and self.type[-1] == ".": + if len(self.type) > 1 and self.type[-1] == '.': return self.type[:-1] return self.type @@ -450,10 +452,10 @@ class PunktToken(object): @property def first_case(self): if self.first_lower: - return "lower" + return 'lower' elif self.first_upper: - return "upper" - return "none" + return 'upper' + return 'none' @property def is_ellipsis(self): @@ -463,7 +465,7 @@ class PunktToken(object): @property def is_number(self): """True if the token text is that of a number.""" - return self.type.startswith("##number##") + return self.type.startswith('##number##') @property def is_initial(self): @@ -490,17 +492,17 @@ class PunktToken(object): with eval(), which lists all the token's non-default annotations. """ - typestr = " type=%s," % repr(self.type) if self.type != self.tok else "" + typestr = ' type=%s,' % unicode_repr(self.type) if self.type != self.tok else '' - propvals = ", ".join( - "%s=%s" % (p, repr(getattr(self, p))) + propvals = ', '.join( + '%s=%s' % (p, unicode_repr(getattr(self, p))) for p in self._properties if getattr(self, p) ) - return "%s(%s,%s %s)" % ( + return '%s(%s,%s %s)' % ( self.__class__.__name__, - repr(self.tok), + unicode_repr(self.tok), typestr, propvals, ) @@ -511,11 +513,11 @@ class PunktToken(object): """ res = self.tok if self.abbr: - res += "" + res += '' if self.ellipsis: - res += "" + res += '' if self.sentbreak: - res += "" + res += '' return res @@ -553,16 +555,11 @@ class PunktBaseClass(object): respectively. """ parastart = False - for line in plaintext.split("\n"): + for line in plaintext.split('\n'): if line.strip(): line_toks = iter(self._lang_vars.word_tokenize(line)) - try: - tok = next(line_toks) - except StopIteration: - continue - - yield self._Token(tok, parastart=parastart, linestart=True) + yield self._Token(next(line_toks), parastart=parastart, linestart=True) parastart = False for t in line_toks: @@ -606,10 +603,10 @@ class PunktBaseClass(object): aug_tok.sentbreak = True elif aug_tok.is_ellipsis: aug_tok.ellipsis = True - elif aug_tok.period_final and not tok.endswith(".."): + elif aug_tok.period_final and not tok.endswith('..'): if ( tok[:-1].lower() in self._params.abbrev_types - or tok[:-1].lower().split("-")[-1] in self._params.abbrev_types + or tok[:-1].lower().split('-')[-1] in self._params.abbrev_types ): aug_tok.abbr = True @@ -763,12 +760,12 @@ class PunktTrainer(PunktBaseClass): if is_add: self._params.abbrev_types.add(abbr) if verbose: - print((" Abbreviation: [%6.4f] %s" % (score, abbr))) + print((' Abbreviation: [%6.4f] %s' % (score, abbr))) else: if not is_add: self._params.abbrev_types.remove(abbr) if verbose: - print((" Removed abbreviation: [%6.4f] %s" % (score, abbr))) + print((' Removed abbreviation: [%6.4f] %s' % (score, abbr))) # Make a preliminary pass through the document, marking likely # sentence breaks, abbreviations, and ellipsis tokens. @@ -791,7 +788,7 @@ class PunktTrainer(PunktBaseClass): if self._is_rare_abbrev_type(aug_tok1, aug_tok2): self._params.abbrev_types.add(aug_tok1.type_no_period) if verbose: - print((" Rare Abbrev: %s" % aug_tok1.type)) + print((' Rare Abbrev: %s' % aug_tok1.type)) # Does second token have a high likelihood of starting a sentence? if self._is_potential_sent_starter(aug_tok2, aug_tok1): @@ -815,13 +812,13 @@ class PunktTrainer(PunktBaseClass): for typ, ll in self._find_sent_starters(): self._params.sent_starters.add(typ) if verbose: - print((" Sent Starter: [%6.4f] %r" % (ll, typ))) + print((' Sent Starter: [%6.4f] %r' % (ll, typ))) self._params.clear_collocations() for (typ1, typ2), ll in self._find_collocations(): self._params.collocations.add((typ1, typ2)) if verbose: - print((" Collocation: [%6.4f] %r+%r" % (ll, typ1, typ2))) + print((' Collocation: [%6.4f] %r+%r' % (ll, typ1, typ2))) self._finalized = True @@ -884,7 +881,7 @@ class PunktTrainer(PunktBaseClass): positions. """ # 'initial' or 'internal' or 'unknown' - context = "internal" + context = 'internal' tokens = list(tokens) for aug_tok in tokens: @@ -892,13 +889,13 @@ class PunktTrainer(PunktBaseClass): # that it's a sentence break. But err on the side of # caution (by not positing a sentence break) if we just # saw an abbreviation. - if aug_tok.parastart and context != "unknown": - context = "initial" + if aug_tok.parastart and context != 'unknown': + context = 'initial' # If we're at the beginning of a line, then we can't decide # between 'internal' and 'initial'. - if aug_tok.linestart and context == "internal": - context = "unknown" + if aug_tok.linestart and context == 'internal': + context = 'unknown' # Find the case-normalized type of the token. If it's a # sentence-final token, strip off the period. @@ -912,13 +909,13 @@ class PunktTrainer(PunktBaseClass): # Decide whether the next word is at a sentence boundary. if aug_tok.sentbreak: if not (aug_tok.is_number or aug_tok.is_initial): - context = "initial" + context = 'initial' else: - context = "unknown" + context = 'unknown' elif aug_tok.ellipsis or aug_tok.abbr: - context = "unknown" + context = 'unknown' else: - context = "internal" + context = 'internal' # //////////////////////////////////////////////////////////// # { Abbreviations @@ -945,10 +942,10 @@ class PunktTrainer(PunktBaseClass): for typ in types: # Check some basic conditions, to rule out words that are # clearly not abbrev_types. - if not _re_non_punct.search(typ) or typ == "##number##": + if not _re_non_punct.search(typ) or typ == '##number##': continue - if typ.endswith("."): + if typ.endswith('.'): if typ in self._params.abbrev_types: continue typ = typ[:-1] @@ -960,7 +957,7 @@ class PunktTrainer(PunktBaseClass): # Count how many periods & nonperiods are in the # candidate. - num_periods = typ.count(".") + 1 + num_periods = typ.count('.') + 1 num_nonperiods = len(typ) - num_periods + 1 # Let be the candidate without the period, and @@ -968,7 +965,7 @@ class PunktTrainer(PunktBaseClass): # indicates whether occurs as a single unit (high # value of ll), or as two independent units and # (low value of ll). - count_with_period = self._type_fdist[typ + "."] + count_with_period = self._type_fdist[typ + '.'] count_without_period = self._type_fdist[typ] ll = self._dunning_log_likelihood( count_with_period + count_without_period, @@ -998,7 +995,7 @@ class PunktTrainer(PunktBaseClass): This fails to include abbreviations otherwise found as "rare". """ self._params.clear_abbrevs() - tokens = (typ for typ in self._type_fdist if typ and typ.endswith(".")) + tokens = (typ for typ in self._type_fdist if typ and typ.endswith('.')) for abbr, score, is_add in self._reclassify_abbrev_types(tokens): if score >= self.ABBREV: self._params.abbrev_types.add(abbr) @@ -1151,8 +1148,8 @@ class PunktTrainer(PunktBaseClass): continue col_count = self._collocation_fdist[types] - typ1_count = self._type_fdist[typ1] + self._type_fdist[typ1 + "."] - typ2_count = self._type_fdist[typ2] + self._type_fdist[typ2 + "."] + typ1_count = self._type_fdist[typ1] + self._type_fdist[typ1 + '.'] + typ2_count = self._type_fdist[typ2] + self._type_fdist[typ2 + '.'] if ( typ1_count > 1 and typ2_count > 1 @@ -1196,7 +1193,7 @@ class PunktTrainer(PunktBaseClass): continue typ_at_break_count = self._sent_starter_fdist[typ] - typ_count = self._type_fdist[typ] + self._type_fdist[typ + "."] + typ_count = self._type_fdist[typ] + self._type_fdist[typ + '.'] if typ_count < typ_at_break_count: # needed after freq_threshold continue @@ -1255,7 +1252,7 @@ class PunktSentenceTokenizer(PunktBaseClass, TokenizerI): given. Repeated calls to this method destroy previous parameters. For incremental training, instantiate a separate PunktTrainer instance. """ - if not isinstance(train_text, str): + if not isinstance(train_text, string_types): return train_text return PunktTrainer( train_text, lang_vars=self._lang_vars, token_cls=self._Token @@ -1280,7 +1277,7 @@ class PunktSentenceTokenizer(PunktBaseClass, TokenizerI): """ for match in self._lang_vars.period_context_re().finditer(text): - decision_text = match.group() + match.group("after_tok") + decision_text = match.group() + match.group('after_tok') tokens = self._tokenize_words(decision_text) tokens = list(self._annotate_first_pass(tokens)) while not tokens[0].period_final: @@ -1328,12 +1325,12 @@ class PunktSentenceTokenizer(PunktBaseClass, TokenizerI): def _slices_from_text(self, text): last_break = 0 for match in self._lang_vars.period_context_re().finditer(text): - context = match.group() + match.group("after_tok") + context = match.group() + match.group('after_tok') if self.text_contains_sentbreak(context): yield slice(last_break, match.end()) - if match.group("next_tok"): + if match.group('next_tok'): # next sentence starts after whitespace - last_break = match.start("next_tok") + last_break = match.start('next_tok') else: # next sentence starts at following punctuation last_break = match.end() @@ -1440,9 +1437,9 @@ class PunktSentenceTokenizer(PunktBaseClass, TokenizerI): pos = 0 # A regular expression that finds pieces of whitespace: - WS_REGEXP = re.compile(r"\s*") + WS_REGEXP = re.compile(r'\s*') - sentence = "" + sentence = '' for aug_tok in tokens: tok = aug_tok.tok @@ -1456,7 +1453,7 @@ class PunktSentenceTokenizer(PunktBaseClass, TokenizerI): # token doesn't match, see if adding whitespace helps. # If so, then use the version with whitespace. if text[pos : pos + len(tok)] != tok: - pat = "\s*".join(re.escape(c) for c in tok) + pat = '\s*'.join(re.escape(c) for c in tok) m = re.compile(pat).match(text, pos) if m: tok = m.group() @@ -1475,7 +1472,7 @@ class PunktSentenceTokenizer(PunktBaseClass, TokenizerI): # If we're at a sentence break, then start a new sentence. if aug_tok.sentbreak: yield sentence - sentence = "" + sentence = '' # If the last sentence is emtpy, discard it. if sentence: @@ -1483,15 +1480,15 @@ class PunktSentenceTokenizer(PunktBaseClass, TokenizerI): # [XX] TESTING def dump(self, tokens): - print("writing to /tmp/punkt.new...") - with open("/tmp/punkt.new", "w") as outfile: + print('writing to /tmp/punkt.new...') + with open('/tmp/punkt.new', 'w') as outfile: for aug_tok in tokens: if aug_tok.parastart: - outfile.write("\n\n") + outfile.write('\n\n') elif aug_tok.linestart: - outfile.write("\n") + outfile.write('\n') else: - outfile.write(" ") + outfile.write(' ') outfile.write(str(aug_tok)) @@ -1499,7 +1496,7 @@ class PunktSentenceTokenizer(PunktBaseClass, TokenizerI): # { Customization Variables # //////////////////////////////////////////////////////////// - PUNCTUATION = tuple(";:,.!?") + PUNCTUATION = tuple(';:,.!?') # //////////////////////////////////////////////////////////// # { Annotation Procedures @@ -1568,7 +1565,7 @@ class PunktSentenceTokenizer(PunktBaseClass, TokenizerI): # [4.3. Token-Based Detection of Initials and Ordinals] # Check if any initials or ordinals tokens that are marked # as sentbreaks should be reclassified as abbreviations. - if tok_is_initial or typ == "##number##": + if tok_is_initial or typ == '##number##': # [4.1.1. Orthographic Heuristic] Check if there's # orthogrpahic evidence about whether the next word @@ -1587,7 +1584,7 @@ class PunktSentenceTokenizer(PunktBaseClass, TokenizerI): # heuristc is unknown, and next word is always # capitalized, then mark as abbrev (eg: J. Bach). if ( - is_sent_starter == "unknown" + is_sent_starter == 'unknown' and tok_is_initial and aug_tok2.first_upper and not (self._params.ortho_context[next_typ] & _ORTHO_LC) @@ -1628,10 +1625,10 @@ class PunktSentenceTokenizer(PunktBaseClass, TokenizerI): return False # Otherwise, we're not sure. - return "unknown" + return 'unknown' -DEBUG_DECISION_FMT = """Text: %(text)r (at offset %(period_index)d) +DEBUG_DECISION_FMT = '''Text: %(text)r (at offset %(period_index)d) Sentence break? %(break_decision)s (%(reason)s) Collocation? %(collocation)s %(type1)r: @@ -1641,7 +1638,7 @@ Collocation? %(collocation)s known sentence starter: %(type2_is_sent_starter)s orthographic heuristic suggests is a sentence starter? %(type2_ortho_heuristic)s orthographic contexts in training: %(type2_ortho_contexts)s -""" +''' def format_debug_decision(d): @@ -1651,7 +1648,7 @@ def format_debug_decision(d): def demo(text, tok_cls=PunktSentenceTokenizer, train_cls=PunktTrainer): """Builds a punkt model and applies it to the same text""" cleanup = ( - lambda s: re.compile(r"(?:\r|^\s+)", re.MULTILINE).sub("", s).replace("\n", " ") + lambda s: re.compile(r'(?:\r|^\s+)', re.MULTILINE).sub('', s).replace('\n', ' ') ) trainer = train_cls() trainer.INCLUDE_ALL_COLLOCS = True diff --git a/nlp_resource_data/nltk/tokenize/regexp.py b/nlp_resource_data/nltk/tokenize/regexp.py index dd4630e..9f7a1ee 100644 --- a/nlp_resource_data/nltk/tokenize/regexp.py +++ b/nlp_resource_data/nltk/tokenize/regexp.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Tokenizers # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Edward Loper # Steven Bird # Trevor Cohn @@ -65,13 +65,16 @@ argument. This differs from the conventions used by Python's ``re`` functions, where the pattern is always the first argument. (This is for consistency with the other NLTK tokenizers.) """ +from __future__ import unicode_literals import re from nltk.tokenize.api import TokenizerI from nltk.tokenize.util import regexp_span_tokenize +from nltk.compat import python_2_unicode_compatible +@python_2_unicode_compatible class RegexpTokenizer(TokenizerI): """ A tokenizer that splits a string using a regular expression, which @@ -107,7 +110,7 @@ class RegexpTokenizer(TokenizerI): flags=re.UNICODE | re.MULTILINE | re.DOTALL, ): # If they gave us a regexp object, extract the pattern. - pattern = getattr(pattern, "pattern", pattern) + pattern = getattr(pattern, 'pattern', pattern) self._pattern = pattern self._gaps = gaps @@ -144,7 +147,7 @@ class RegexpTokenizer(TokenizerI): yield m.span() def __repr__(self): - return "%s(pattern=%r, gaps=%r, discard_empty=%r, flags=%r)" % ( + return '%s(pattern=%r, gaps=%r, discard_empty=%r, flags=%r)' % ( self.__class__.__name__, self._pattern, self._gaps, @@ -166,7 +169,7 @@ class WhitespaceTokenizer(RegexpTokenizer): """ def __init__(self): - RegexpTokenizer.__init__(self, r"\s+", gaps=True) + RegexpTokenizer.__init__(self, r'\s+', gaps=True) class BlanklineTokenizer(RegexpTokenizer): @@ -177,7 +180,7 @@ class BlanklineTokenizer(RegexpTokenizer): """ def __init__(self): - RegexpTokenizer.__init__(self, r"\s*\n\s*\n\s*", gaps=True) + RegexpTokenizer.__init__(self, r'\s*\n\s*\n\s*', gaps=True) class WordPunctTokenizer(RegexpTokenizer): @@ -193,7 +196,7 @@ class WordPunctTokenizer(RegexpTokenizer): """ def __init__(self): - RegexpTokenizer.__init__(self, r"\w+|[^\w\s]+") + RegexpTokenizer.__init__(self, r'\w+|[^\w\s]+') ###################################################################### diff --git a/nlp_resource_data/nltk/tokenize/repp.py b/nlp_resource_data/nltk/tokenize/repp.py index 49b5139..2cf7a50 100644 --- a/nlp_resource_data/nltk/tokenize/repp.py +++ b/nlp_resource_data/nltk/tokenize/repp.py @@ -8,12 +8,16 @@ # URL: # For license information, see LICENSE.TXT +from __future__ import unicode_literals, print_function + import os import re import sys import subprocess import tempfile +from six import text_type + from nltk.data import ZipFilePathPointer from nltk.internals import find_dir @@ -40,20 +44,20 @@ class ReppTokenizer(TokenizerI): (u'We', u'evaluated', u'our', u'method', u'on', u'three', u'languages', u'and', u'obtained', u'error', u'rates', u'of', u'0.27', u'%', u'(', u'English', u')', u',', u'0.35', u'%', u'(', u'Dutch', u')', u'and', u'0.76', u'%', u'(', u'Italian', u')', u'for', u'our', u'best', u'models', u'.') >>> for sent in tokenizer.tokenize_sents(sents): # doctest: +SKIP - ... print(sent) # doctest: +SKIP + ... print sent # doctest: +SKIP ... (u'Tokenization', u'is', u'widely', u'regarded', u'as', u'a', u'solved', u'problem', u'due', u'to', u'the', u'high', u'accuracy', u'that', u'rulebased', u'tokenizers', u'achieve', u'.') (u'But', u'rule-based', u'tokenizers', u'are', u'hard', u'to', u'maintain', u'and', u'their', u'rules', u'language', u'specific', u'.') (u'We', u'evaluated', u'our', u'method', u'on', u'three', u'languages', u'and', u'obtained', u'error', u'rates', u'of', u'0.27', u'%', u'(', u'English', u')', u',', u'0.35', u'%', u'(', u'Dutch', u')', u'and', u'0.76', u'%', u'(', u'Italian', u')', u'for', u'our', u'best', u'models', u'.') >>> for sent in tokenizer.tokenize_sents(sents, keep_token_positions=True): # doctest: +SKIP - ... print(sent) # doctest: +SKIP + ... print sent # doctest: +SKIP ... [(u'Tokenization', 0, 12), (u'is', 13, 15), (u'widely', 16, 22), (u'regarded', 23, 31), (u'as', 32, 34), (u'a', 35, 36), (u'solved', 37, 43), (u'problem', 44, 51), (u'due', 52, 55), (u'to', 56, 58), (u'the', 59, 62), (u'high', 63, 67), (u'accuracy', 68, 76), (u'that', 77, 81), (u'rulebased', 82, 91), (u'tokenizers', 92, 102), (u'achieve', 103, 110), (u'.', 110, 111)] [(u'But', 0, 3), (u'rule-based', 4, 14), (u'tokenizers', 15, 25), (u'are', 26, 29), (u'hard', 30, 34), (u'to', 35, 37), (u'maintain', 38, 46), (u'and', 47, 50), (u'their', 51, 56), (u'rules', 57, 62), (u'language', 63, 71), (u'specific', 72, 80), (u'.', 80, 81)] [(u'We', 0, 2), (u'evaluated', 3, 12), (u'our', 13, 16), (u'method', 17, 23), (u'on', 24, 26), (u'three', 27, 32), (u'languages', 33, 42), (u'and', 43, 46), (u'obtained', 47, 55), (u'error', 56, 61), (u'rates', 62, 67), (u'of', 68, 70), (u'0.27', 71, 75), (u'%', 75, 76), (u'(', 77, 78), (u'English', 78, 85), (u')', 85, 86), (u',', 86, 87), (u'0.35', 88, 92), (u'%', 92, 93), (u'(', 94, 95), (u'Dutch', 95, 100), (u')', 100, 101), (u'and', 102, 105), (u'0.76', 106, 110), (u'%', 110, 111), (u'(', 112, 113), (u'Italian', 113, 120), (u')', 120, 121), (u'for', 122, 125), (u'our', 126, 129), (u'best', 130, 134), (u'models', 135, 141), (u'.', 141, 142)] """ - def __init__(self, repp_dir, encoding="utf8"): + def __init__(self, repp_dir, encoding='utf8'): self.repp_dir = self.find_repptokenizer(repp_dir) # Set a directory to store the temporary files. self.working_dir = tempfile.gettempdir() @@ -81,11 +85,11 @@ class ReppTokenizer(TokenizerI): :rtype: iter(tuple(str)) """ with tempfile.NamedTemporaryFile( - prefix="repp_input.", dir=self.working_dir, mode="w", delete=False + prefix='repp_input.', dir=self.working_dir, mode='w', delete=False ) as input_file: # Write sentences to temporary input file. for sent in sentences: - input_file.write(str(sent) + "\n") + input_file.write(text_type(sent) + '\n') input_file.close() # Generate command to run REPP. cmd = self.generate_repp_command(input_file.name) @@ -104,9 +108,9 @@ class ReppTokenizer(TokenizerI): :param inputfilename: path to the input file :type inputfilename: str """ - cmd = [self.repp_dir + "/src/repp"] - cmd += ["-c", self.repp_dir + "/erg/repp.set"] - cmd += ["--format", "triple"] + cmd = [self.repp_dir + '/src/repp'] + cmd += ['-c', self.repp_dir + '/erg/repp.set'] + cmd += ['--format', 'triple'] cmd += [inputfilename] return cmd @@ -128,8 +132,8 @@ class ReppTokenizer(TokenizerI): :return: an iterable of the tokenized sentences as tuples of strings :rtype: iter(tuple) """ - line_regex = re.compile("^\((\d+), (\d+), (.+)\)$", re.MULTILINE) - for section in repp_output.split("\n\n"): + line_regex = re.compile('^\((\d+), (\d+), (.+)\)$', re.MULTILINE) + for section in repp_output.split('\n\n'): words_with_positions = [ (token, int(start), int(end)) for start, end, token in line_regex.findall(section) @@ -144,8 +148,8 @@ class ReppTokenizer(TokenizerI): if os.path.exists(repp_dirname): # If a full path is given. _repp_dir = repp_dirname else: # Try to find path to REPP directory in environment variables. - _repp_dir = find_dir(repp_dirname, env_vars=("REPP_TOKENIZER",)) + _repp_dir = find_dir(repp_dirname, env_vars=('REPP_TOKENIZER',)) # Checks for the REPP binary and erg/repp.set config file. - assert os.path.exists(_repp_dir + "/src/repp") - assert os.path.exists(_repp_dir + "/erg/repp.set") + assert os.path.exists(_repp_dir + '/src/repp') + assert os.path.exists(_repp_dir + '/erg/repp.set') return _repp_dir diff --git a/nlp_resource_data/nltk/tokenize/sexpr.py b/nlp_resource_data/nltk/tokenize/sexpr.py index 9313a94..e2a1dd6 100644 --- a/nlp_resource_data/nltk/tokenize/sexpr.py +++ b/nlp_resource_data/nltk/tokenize/sexpr.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Tokenizers # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Yoav Goldberg # Steven Bird (minor edits) # URL: @@ -76,14 +76,14 @@ class SExprTokenizer(TokenizerI): :param strict: If true, then raise an exception when tokenizing an ill-formed sexpr. """ - def __init__(self, parens="()", strict=True): + def __init__(self, parens='()', strict=True): if len(parens) != 2: - raise ValueError("parens must contain exactly two strings") + raise ValueError('parens must contain exactly two strings') self._strict = strict self._open_paren = parens[0] self._close_paren = parens[1] self._paren_regexp = re.compile( - "%s|%s" % (re.escape(parens[0]), re.escape(parens[1])) + '%s|%s' % (re.escape(parens[0]), re.escape(parens[1])) ) def tokenize(self, text): @@ -125,13 +125,13 @@ class SExprTokenizer(TokenizerI): depth += 1 if paren == self._close_paren: if self._strict and depth == 0: - raise ValueError("Un-matched close paren at char %d" % m.start()) + raise ValueError('Un-matched close paren at char %d' % m.start()) depth = max(0, depth - 1) if depth == 0: result.append(text[pos : m.end()]) pos = m.end() if self._strict and depth > 0: - raise ValueError("Un-matched open paren at char %d" % pos) + raise ValueError('Un-matched open paren at char %d' % pos) if pos < len(text): result.append(text[pos:]) return result diff --git a/nlp_resource_data/nltk/tokenize/simple.py b/nlp_resource_data/nltk/tokenize/simple.py index ac1e400..c467678 100644 --- a/nlp_resource_data/nltk/tokenize/simple.py +++ b/nlp_resource_data/nltk/tokenize/simple.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: Simple Tokenizers # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Edward Loper # Steven Bird # URL: @@ -34,7 +34,7 @@ that expects a tokenizer. For example, these tokenizers can be used to specify the tokenization conventions when building a `CorpusReader`. """ - +from __future__ import unicode_literals from nltk.tokenize.api import TokenizerI, StringTokenizer from nltk.tokenize.util import string_span_tokenize, regexp_span_tokenize @@ -50,7 +50,7 @@ class SpaceTokenizer(StringTokenizer): 'Please', 'buy', 'me\ntwo', 'of', 'them.\n\nThanks.'] """ - _string = " " + _string = ' ' class TabTokenizer(StringTokenizer): @@ -62,7 +62,7 @@ class TabTokenizer(StringTokenizer): ['a', 'b c\n', ' d'] """ - _string = "\t" + _string = '\t' class CharTokenizer(StringTokenizer): @@ -101,11 +101,11 @@ class LineTokenizer(TokenizerI): a corresponding token ``''`` after that newline. """ - def __init__(self, blanklines="discard"): - valid_blanklines = ("discard", "keep", "discard-eof") + def __init__(self, blanklines='discard'): + valid_blanklines = ('discard', 'keep', 'discard-eof') if blanklines not in valid_blanklines: raise ValueError( - "Blank lines must be one of: %s" % " ".join(valid_blanklines) + 'Blank lines must be one of: %s' % ' '.join(valid_blanklines) ) self._blanklines = blanklines @@ -113,20 +113,20 @@ class LineTokenizer(TokenizerI): def tokenize(self, s): lines = s.splitlines() # If requested, strip off blank lines. - if self._blanklines == "discard": + if self._blanklines == 'discard': lines = [l for l in lines if l.rstrip()] - elif self._blanklines == "discard-eof": + elif self._blanklines == 'discard-eof': if lines and not lines[-1].strip(): lines.pop() return lines # discard-eof not implemented def span_tokenize(self, s): - if self._blanklines == "keep": - for span in string_span_tokenize(s, r"\n"): + if self._blanklines == 'keep': + for span in string_span_tokenize(s, r'\n'): yield span else: - for span in regexp_span_tokenize(s, r"\n(\s+\n)*"): + for span in regexp_span_tokenize(s, r'\n(\s+\n)*'): yield span @@ -136,5 +136,5 @@ class LineTokenizer(TokenizerI): # XXX: it is stated in module docs that there is no function versions -def line_tokenize(text, blanklines="discard"): +def line_tokenize(text, blanklines='discard'): return LineTokenizer(blanklines).tokenize(text) diff --git a/nlp_resource_data/nltk/tokenize/sonority_sequencing.py b/nlp_resource_data/nltk/tokenize/sonority_sequencing.py deleted file mode 100644 index fb6b080..0000000 --- a/nlp_resource_data/nltk/tokenize/sonority_sequencing.py +++ /dev/null @@ -1,192 +0,0 @@ -# Natural Language Toolkit: Tokenizers -# -# Copyright (C) 2001-2020 NLTK Project -# Author: Christopher Hench -# Alex Estes -# URL: -# For license information, see LICENSE.TXT - -""" -The Sonority Sequencing Principle (SSP) is a language agnostic algorithm proposed -by Otto Jesperson in 1904. The sonorous quality of a phoneme is judged by the -openness of the lips. Syllable breaks occur before troughs in sonority. For more -on the SSP see Selkirk (1984). - -The default implementation uses the English alphabet, but the `sonority_hiearchy` -can be modified to IPA or any other alphabet for the use-case. The SSP is a -universal syllabification algorithm, but that does not mean it performs equally -across languages. Bartlett et al. (2009) is a good benchmark for English accuracy -if utilizing IPA (pg. 311). - -Importantly, if a custom hiearchy is supplied and vowels span across more than -one level, they should be given separately to the `vowels` class attribute. - -References: -- Otto Jespersen. 1904. Lehrbuch der Phonetik. - Leipzig, Teubner. Chapter 13, Silbe, pp. 185-203. -- Elisabeth Selkirk. 1984. On the major class features and syllable theory. - In Aronoff & Oehrle (eds.) Language Sound Structure: Studies in Phonology. - Cambridge, MIT Press. pp. 107-136. -- Susan Bartlett, et al. 2009. On the Syllabification of Phonemes. - In HLT-NAACL. pp. 308-316. -""" - -import warnings - -import re -from string import punctuation - -from nltk.tokenize.api import TokenizerI -from nltk.util import ngrams - - -class SyllableTokenizer(TokenizerI): - """ - Syllabifies words based on the Sonority Sequencing Principle (SSP). - - >>> from nltk.tokenize import SyllableTokenizer - >>> from nltk import word_tokenize - >>> SSP = SyllableTokenizer() - >>> SSP.tokenize('justification') - ['jus', 'ti', 'fi', 'ca', 'tion'] - >>> text = "This is a foobar-like sentence." - >>> [SSP.tokenize(token) for token in word_tokenize(text)] - [['This'], ['is'], ['a'], ['foo', 'bar', '-', 'li', 'ke'], ['sen', 'ten', 'ce'], ['.']] - """ - - def __init__(self, lang="en", sonority_hierarchy=False): - """ - :param lang: Language parameter, default is English, 'en' - :type lang: str - :param sonority_hierarchy: Sonority hierarchy according to the - Sonority Sequencing Principle. - :type sonority_hierarchy: list(str) - """ - # Sonority hierarchy should be provided in descending order. - # If vowels are spread across multiple levels, they should be - # passed assigned self.vowels var together, otherwise should be - # placed in first index of hierarchy. - if not sonority_hierarchy and lang == "en": - sonority_hierarchy = [ - "aeiouy", # vowels. - "lmnrw", # nasals. - "zvsf", # fricatives. - "bcdgtkpqxhj", # stops. - ] - - self.vowels = sonority_hierarchy[0] - self.phoneme_map = {} - for i, level in enumerate(sonority_hierarchy): - for c in level: - sonority_level = len(sonority_hierarchy) - i - self.phoneme_map[c] = sonority_level - self.phoneme_map[c.upper()] = sonority_level - - def assign_values(self, token): - """ - Assigns each phoneme its value from the sonority hierarchy. - Note: Sentence/text has to be tokenized first. - - :param token: Single word or token - :type token: str - :return: List of tuples, first element is character/phoneme and - second is the soronity value. - :rtype: list(tuple(str, int)) - """ - syllables_values = [] - for c in token: - try: - syllables_values.append((c, self.phoneme_map[c])) - except KeyError: - if c not in punctuation: - warnings.warn( - "Character not defined in sonority_hierarchy," - " assigning as vowel: '{}'".format(c) - ) - syllables_values.append((c, max(self.phoneme_map.values()))) - self.vowels += c - else: # If it's a punctuation, assing -1. - syllables_values.append((c, -1)) - return syllables_values - - def validate_syllables(self, syllable_list): - """ - Ensures each syllable has at least one vowel. - If the following syllable doesn't have vowel, add it to the current one. - - :param syllable_list: Single word or token broken up into syllables. - :type syllable_list: list(str) - :return: Single word or token broken up into syllables - (with added syllables if necessary) - :rtype: list(str) - """ - valid_syllables = [] - front = "" - for i, syllable in enumerate(syllable_list): - if syllable in punctuation: - valid_syllables.append(syllable) - continue - if not re.search("|".join(self.vowels), syllable): - if len(valid_syllables) == 0: - front += syllable - else: - valid_syllables = valid_syllables[:-1] + [ - valid_syllables[-1] + syllable - ] - else: - if len(valid_syllables) == 0: - valid_syllables.append(front + syllable) - else: - valid_syllables.append(syllable) - - return valid_syllables - - def tokenize(self, token): - """ - Apply the SSP to return a list of syllables. - Note: Sentence/text has to be tokenized first. - - :param token: Single word or token - :type token: str - :return syllable_list: Single word or token broken up into syllables. - :rtype: list(str) - """ - # assign values from hierarchy - syllables_values = self.assign_values(token) - - # if only one vowel return word - if sum(token.count(x) for x in self.vowels) <= 1: - return [token] - - syllable_list = [] - syllable = syllables_values[0][0] # start syllable with first phoneme - for trigram in ngrams(syllables_values, n=3): - phonemes, values = zip(*trigram) - # Sonority of previous, focal and following phoneme - prev_value, focal_value, next_value = values - # Focal phoneme. - focal_phoneme = phonemes[1] - - # These cases trigger syllable break. - if focal_value == -1: # If it's a punctuation, just break. - syllable_list.append(syllable) - syllable_list.append(focal_phoneme) - syllable = "" - elif prev_value >= focal_value == next_value: - syllable += focal_phoneme - syllable_list.append(syllable) - syllable = "" - - elif prev_value > focal_value < next_value: - syllable_list.append(syllable) - syllable = "" - syllable += focal_phoneme - - # no syllable break - else: - syllable += focal_phoneme - - syllable += syllables_values[-1][0] # append last phoneme - syllable_list.append(syllable) - - return self.validate_syllables(syllable_list) diff --git a/nlp_resource_data/nltk/tokenize/stanford.py b/nlp_resource_data/nltk/tokenize/stanford.py index b17f591..93fb219 100644 --- a/nlp_resource_data/nltk/tokenize/stanford.py +++ b/nlp_resource_data/nltk/tokenize/stanford.py @@ -1,23 +1,27 @@ # -*- coding: utf-8 -*- # Natural Language Toolkit: Interface to the Stanford Tokenizer # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Steven Xu # # URL: # For license information, see LICENSE.TXT +from __future__ import unicode_literals, print_function + import tempfile import os import json from subprocess import PIPE import warnings +from six import text_type + from nltk.internals import find_jar, config_java, java, _java_options from nltk.tokenize.api import TokenizerI from nltk.parse.corenlp import CoreNLPParser -_stanford_url = "https://nlp.stanford.edu/software/tokenizer.shtml" +_stanford_url = 'https://nlp.stanford.edu/software/tokenizer.shtml' class StanfordTokenizer(TokenizerI): @@ -33,15 +37,15 @@ class StanfordTokenizer(TokenizerI): ['The', 'color', 'of', 'the', 'wall', 'is', 'blue', '.'] """ - _JAR = "stanford-postagger.jar" + _JAR = 'stanford-postagger.jar' def __init__( self, path_to_jar=None, - encoding="utf8", + encoding='utf8', options=None, verbose=False, - java_options="-mx1000m", + java_options='-mx1000m', ): # Raise deprecation warning. warnings.warn( @@ -57,7 +61,7 @@ class StanfordTokenizer(TokenizerI): self._stanford_jar = find_jar( self._JAR, path_to_jar, - env_vars=("STANFORD_POSTAGGER",), + env_vars=('STANFORD_POSTAGGER',), searchpath=(), url=_stanford_url, verbose=verbose, @@ -67,8 +71,8 @@ class StanfordTokenizer(TokenizerI): self.java_options = java_options options = {} if options is None else options - self._options_cmd = ",".join( - "{0}={1}".format(key, val) for key, val in options.items() + self._options_cmd = ','.join( + '{0}={1}'.format(key, val) for key, val in options.items() ) @staticmethod @@ -79,25 +83,25 @@ class StanfordTokenizer(TokenizerI): """ Use stanford tokenizer's PTBTokenizer to tokenize multiple sentences. """ - cmd = ["edu.stanford.nlp.process.PTBTokenizer"] + cmd = ['edu.stanford.nlp.process.PTBTokenizer'] return self._parse_tokenized_output(self._execute(cmd, s)) def _execute(self, cmd, input_, verbose=False): encoding = self._encoding - cmd.extend(["-charset", encoding]) + cmd.extend(['-charset', encoding]) _options_cmd = self._options_cmd if _options_cmd: - cmd.extend(["-options", self._options_cmd]) + cmd.extend(['-options', self._options_cmd]) - default_options = " ".join(_java_options) + default_options = ' '.join(_java_options) # Configure java. config_java(options=self.java_options, verbose=verbose) # Windows is incompatible with NamedTemporaryFile() without passing in delete=False. - with tempfile.NamedTemporaryFile(mode="wb", delete=False) as input_file: + with tempfile.NamedTemporaryFile(mode='wb', delete=False) as input_file: # Write the actual sentences to the temporary input file - if isinstance(input_, str) and encoding: + if isinstance(input_, text_type) and encoding: input_ = input_.encode(encoding) input_file.write(input_) input_file.flush() @@ -125,5 +129,5 @@ def setup_module(module): StanfordTokenizer() except LookupError: raise SkipTest( - "doctests from nltk.tokenize.stanford are skipped because the stanford postagger jar doesn't exist" + 'doctests from nltk.tokenize.stanford are skipped because the stanford postagger jar doesn\'t exist' ) diff --git a/nlp_resource_data/nltk/tokenize/stanford_segmenter.py b/nlp_resource_data/nltk/tokenize/stanford_segmenter.py index 2595945..858c4d8 100644 --- a/nlp_resource_data/nltk/tokenize/stanford_segmenter.py +++ b/nlp_resource_data/nltk/tokenize/stanford_segmenter.py @@ -3,7 +3,7 @@ # Natural Language Toolkit: Interface to the Stanford Segmenter # for Chinese and Arabic # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: 52nlp <52nlpcn@gmail.com> # Casper Lehmann-Strøm # Alex Constantin @@ -11,12 +11,17 @@ # URL: # For license information, see LICENSE.TXT +from __future__ import unicode_literals, print_function + import tempfile import os import json import warnings from subprocess import PIPE +from six import text_type + +from nltk import compat from nltk.internals import ( find_jar, find_file, @@ -28,7 +33,7 @@ from nltk.internals import ( from nltk.tokenize.api import TokenizerI -_stanford_url = "https://nlp.stanford.edu/software" +_stanford_url = 'https://nlp.stanford.edu/software' class StanfordSegmenter(TokenizerI): @@ -53,7 +58,7 @@ class StanfordSegmenter(TokenizerI): """ - _JAR = "stanford-segmenter.jar" + _JAR = 'stanford-segmenter.jar' def __init__( self, @@ -63,15 +68,15 @@ class StanfordSegmenter(TokenizerI): path_to_model=None, path_to_dict=None, path_to_sihan_corpora_dict=None, - sihan_post_processing="false", - keep_whitespaces="false", - encoding="UTF-8", + sihan_post_processing='false', + keep_whitespaces='false', + encoding='UTF-8', options=None, verbose=False, - java_options="-mx2g", + java_options='-mx2g', ): # Raise deprecation warning. - warnings.simplefilter("always", DeprecationWarning) + warnings.simplefilter('always', DeprecationWarning) warnings.warn( str( "\nThe StanfordTokenizer will " @@ -81,21 +86,21 @@ class StanfordSegmenter(TokenizerI): DeprecationWarning, stacklevel=2, ) - warnings.simplefilter("ignore", DeprecationWarning) + warnings.simplefilter('ignore', DeprecationWarning) stanford_segmenter = find_jar( self._JAR, path_to_jar, - env_vars=("STANFORD_SEGMENTER",), + env_vars=('STANFORD_SEGMENTER',), searchpath=(), url=_stanford_url, verbose=verbose, ) if path_to_slf4j is not None: slf4j = find_jar( - "slf4j-api.jar", + 'slf4j-api.jar', path_to_slf4j, - env_vars=("SLF4J", "STANFORD_SEGMENTER"), + env_vars=('SLF4J', 'STANFORD_SEGMENTER'), searchpath=(), url=_stanford_url, verbose=verbose, @@ -119,8 +124,8 @@ class StanfordSegmenter(TokenizerI): self._encoding = encoding self.java_options = java_options options = {} if options is None else options - self._options_cmd = ",".join( - "{0}={1}".format(key, json.dumps(val)) for key, val in options.items() + self._options_cmd = ','.join( + '{0}={1}'.format(key, json.dumps(val)) for key, val in options.items() ) def default_config(self, lang): @@ -130,33 +135,33 @@ class StanfordSegmenter(TokenizerI): """ search_path = () - if os.environ.get("STANFORD_SEGMENTER"): - search_path = {os.path.join(os.environ.get("STANFORD_SEGMENTER"), "data")} + if os.environ.get('STANFORD_SEGMENTER'): + search_path = {os.path.join(os.environ.get('STANFORD_SEGMENTER'), 'data')} # init for Chinese-specific files self._dict = None self._sihan_corpora_dict = None - self._sihan_post_processing = "false" + self._sihan_post_processing = 'false' - if lang == "ar": + if lang == 'ar': self._java_class = ( - "edu.stanford.nlp.international.arabic.process.ArabicSegmenter" + 'edu.stanford.nlp.international.arabic.process.ArabicSegmenter' ) - model = "arabic-segmenter-atb+bn+arztrain.ser.gz" + model = 'arabic-segmenter-atb+bn+arztrain.ser.gz' - elif lang == "zh": - self._java_class = "edu.stanford.nlp.ie.crf.CRFClassifier" - model = "pku.gz" - self._sihan_post_processing = "true" + elif lang == 'zh': + self._java_class = 'edu.stanford.nlp.ie.crf.CRFClassifier' + model = 'pku.gz' + self._sihan_post_processing = 'true' - path_to_dict = "dict-chris6.ser.gz" + path_to_dict = 'dict-chris6.ser.gz' try: self._dict = find_file( path_to_dict, searchpath=search_path, url=_stanford_url, verbose=False, - env_vars=("STANFORD_MODELS",), + env_vars=('STANFORD_MODELS',), ) except LookupError: raise LookupError( @@ -165,13 +170,13 @@ class StanfordSegmenter(TokenizerI): % path_to_dict ) - sihan_dir = "./data/" + sihan_dir = './data/' try: path_to_sihan_dir = find_dir( sihan_dir, url=_stanford_url, verbose=False, - env_vars=("STANFORD_SEGMENTER",), + env_vars=('STANFORD_SEGMENTER',), ) self._sihan_corpora_dict = os.path.join(path_to_sihan_dir, sihan_dir) except LookupError: @@ -188,7 +193,7 @@ class StanfordSegmenter(TokenizerI): searchpath=search_path, url=_stanford_url, verbose=False, - env_vars=("STANFORD_MODELS", "STANFORD_SEGMENTER"), + env_vars=('STANFORD_MODELS', 'STANFORD_SEGMENTER'), ) except LookupError: raise LookupError( @@ -204,21 +209,21 @@ class StanfordSegmenter(TokenizerI): """ cmd = [ self._java_class, - "-loadClassifier", + '-loadClassifier', self._model, - "-keepAllWhitespaces", + '-keepAllWhitespaces', self._keep_whitespaces, - "-textFile", + '-textFile', input_file_path, ] if self._sihan_corpora_dict is not None: cmd.extend( [ - "-serDictionary", + '-serDictionary', self._dict, - "-sighanCorporaDict", + '-sighanCorporaDict', self._sihan_corpora_dict, - "-sighanPostProcessing", + '-sighanPostProcessing', self._sihan_post_processing, ] ) @@ -238,30 +243,30 @@ class StanfordSegmenter(TokenizerI): _input_fh, self._input_file_path = tempfile.mkstemp(text=True) # Write the actural sentences to the temporary input file - _input_fh = os.fdopen(_input_fh, "wb") - _input = "\n".join((" ".join(x) for x in sentences)) - if isinstance(_input, str) and encoding: + _input_fh = os.fdopen(_input_fh, 'wb') + _input = '\n'.join((' '.join(x) for x in sentences)) + if isinstance(_input, text_type) and encoding: _input = _input.encode(encoding) _input_fh.write(_input) _input_fh.close() cmd = [ self._java_class, - "-loadClassifier", + '-loadClassifier', self._model, - "-keepAllWhitespaces", + '-keepAllWhitespaces', self._keep_whitespaces, - "-textFile", + '-textFile', self._input_file_path, ] if self._sihan_corpora_dict is not None: cmd.extend( [ - "-serDictionary", + '-serDictionary', self._dict, - "-sighanCorporaDict", + '-sighanCorporaDict', self._sihan_corpora_dict, - "-sighanPostProcessing", + '-sighanPostProcessing', self._sihan_post_processing, ] ) @@ -275,12 +280,12 @@ class StanfordSegmenter(TokenizerI): def _execute(self, cmd, verbose=False): encoding = self._encoding - cmd.extend(["-inputEncoding", encoding]) + cmd.extend(['-inputEncoding', encoding]) _options_cmd = self._options_cmd if _options_cmd: - cmd.extend(["-options", self._options_cmd]) + cmd.extend(['-options', self._options_cmd]) - default_options = " ".join(_java_options) + default_options = ' '.join(_java_options) # Configure java. config_java(options=self.java_options, verbose=verbose) @@ -301,9 +306,9 @@ def setup_module(module): try: seg = StanfordSegmenter() - seg.default_config("ar") - seg.default_config("zh") + seg.default_config('ar') + seg.default_config('zh') except LookupError as e: raise SkipTest( - "Tests for nltk.tokenize.stanford_segmenter skipped: %s" % str(e) + 'Tests for nltk.tokenize.stanford_segmenter skipped: %s' % str(e) ) diff --git a/nlp_resource_data/nltk/tokenize/texttiling.py b/nlp_resource_data/nltk/tokenize/texttiling.py index dbcc980..83da7bf 100644 --- a/nlp_resource_data/nltk/tokenize/texttiling.py +++ b/nlp_resource_data/nltk/tokenize/texttiling.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: TextTiling # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: George Boutsioukis # # URL: @@ -77,9 +77,9 @@ class TextTilingTokenizer(TokenizerI): if stopwords is None: from nltk.corpus import stopwords - stopwords = stopwords.words("english") + stopwords = stopwords.words('english') self.__dict__.update(locals()) - del self.__dict__["self"] + del self.__dict__['self'] def tokenize(self, text): """Return a tokenized copy of *text*, where each "token" represents @@ -92,8 +92,8 @@ class TextTilingTokenizer(TokenizerI): # Tokenization step starts here # Remove punctuation - nopunct_text = "".join( - c for c in lowercase_text if re.match("[a-z\-' \n\t]", c) + nopunct_text = ''.join( + c for c in lowercase_text if re.match("[a-z\-\' \n\t]", c) ) nopunct_par_breaks = self._mark_paragraph_breaks(nopunct_text) @@ -392,7 +392,7 @@ class TokenTableField(object): last_tok_seq=None, ): self.__dict__.update(locals()) - del self.__dict__["self"] + del self.__dict__['self'] class TokenSequence(object): @@ -401,11 +401,11 @@ class TokenSequence(object): def __init__(self, index, wrdindex_list, original_length=None): original_length = original_length or len(wrdindex_list) self.__dict__.update(locals()) - del self.__dict__["self"] + del self.__dict__['self'] # Pasted from the SciPy cookbook: http://www.scipy.org/Cookbook/SignalSmooth -def smooth(x, window_len=11, window="flat"): +def smooth(x, window_len=11, window='flat'): """smooth the data using a window with requested size. This method is based on the convolution of a scaled window with the signal. @@ -441,7 +441,7 @@ def smooth(x, window_len=11, window="flat"): if window_len < 3: return x - if window not in ["flat", "hanning", "hamming", "bartlett", "blackman"]: + if window not in ['flat', 'hanning', 'hamming', 'bartlett', 'blackman']: raise ValueError( "Window is on of 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'" ) @@ -449,12 +449,12 @@ def smooth(x, window_len=11, window="flat"): s = numpy.r_[2 * x[0] - x[window_len:1:-1], x, 2 * x[-1] - x[-1:-window_len:-1]] # print(len(s)) - if window == "flat": # moving average - w = numpy.ones(window_len, "d") + if window == 'flat': # moving average + w = numpy.ones(window_len, 'd') else: - w = eval("numpy." + window + "(window_len)") + w = eval('numpy.' + window + '(window_len)') - y = numpy.convolve(w / w.sum(), s, mode="same") + y = numpy.convolve(w / w.sum(), s, mode='same') return y[window_len - 1 : -window_len + 1] diff --git a/nlp_resource_data/nltk/tokenize/toktok.py b/nlp_resource_data/nltk/tokenize/toktok.py index 0c595b2..9779725 100644 --- a/nlp_resource_data/nltk/tokenize/toktok.py +++ b/nlp_resource_data/nltk/tokenize/toktok.py @@ -22,6 +22,7 @@ Model (Doctoral dissertation). Columbus, OH, USA: The Ohio State University. """ import re +from six import text_type from nltk.tokenize.api import TokenizerI @@ -33,10 +34,10 @@ class ToktokTokenizer(TokenizerI): >>> toktok = ToktokTokenizer() >>> text = u'Is 9.5 or 525,600 my favorite number?' - >>> print(toktok.tokenize(text, return_str=True)) + >>> print (toktok.tokenize(text, return_str=True)) Is 9.5 or 525,600 my favorite number ? >>> text = u'The https://github.com/jonsafari/tok-tok/blob/master/tok-tok.pl is a website with/and/or slashes and sort of weird : things' - >>> print(toktok.tokenize(text, return_str=True)) + >>> print (toktok.tokenize(text, return_str=True)) The https://github.com/jonsafari/tok-tok/blob/master/tok-tok.pl is a website with/and/or slashes and sort of weird : things >>> text = u'\xa1This, is a sentence with weird\xbb symbols\u2026 appearing everywhere\xbf' >>> expected = u'\xa1 This , is a sentence with weird \xbb symbols \u2026 appearing everywhere \xbf' @@ -51,20 +52,20 @@ class ToktokTokenizer(TokenizerI): # Pad some funky punctuation. FUNKY_PUNCT_1 = re.compile(u'([،;؛¿!"\])}»›”؟¡%٪°±©®।॥…])'), r" \1 " # Pad more funky punctuation. - FUNKY_PUNCT_2 = re.compile(u"([({\[“‘„‚«‹「『])"), r" \1 " + FUNKY_PUNCT_2 = re.compile(u'([({\[“‘„‚«‹「『])'), r" \1 " # Pad En dash and em dash - EN_EM_DASHES = re.compile(u"([–—])"), r" \1 " + EN_EM_DASHES = re.compile(u'([–—])'), r" \1 " # Replace problematic character with numeric character reference. - AMPERCENT = re.compile("& "), "& " - TAB = re.compile("\t"), " " - PIPE = re.compile("\|"), " | " + AMPERCENT = re.compile('& '), '& ' + TAB = re.compile('\t'), ' ' + PIPE = re.compile('\|'), ' | ' # Pad numbers with commas to keep them from further tokenization. - COMMA_IN_NUM = re.compile(r"(? # Michael Heilman (re-port from http://www.cis.upenn.edu/~treebank/tokenizer.sed) # @@ -19,7 +19,25 @@ and available at http://www.cis.upenn.edu/~treebank/tokenizer.sed. import re from nltk.tokenize.api import TokenizerI from nltk.tokenize.util import align_tokens -from nltk.tokenize.destructive import MacIntyreContractions + + +class MacIntyreContractions: + """ + List of contractions adapted from Robert MacIntyre's tokenizer. + """ + + CONTRACTIONS2 = [ + r"(?i)\b(can)(?#X)(not)\b", + r"(?i)\b(d)(?#X)('ye)\b", + r"(?i)\b(gim)(?#X)(me)\b", + r"(?i)\b(gon)(?#X)(na)\b", + r"(?i)\b(got)(?#X)(ta)\b", + r"(?i)\b(lem)(?#X)(me)\b", + r"(?i)\b(mor)(?#X)('n)\b", + r"(?i)\b(wan)(?#X)(na)\s", + ] + CONTRACTIONS3 = [r"(?i) ('t)(?#X)(is)\b", r"(?i) ('t)(?#X)(was)\b"] + CONTRACTIONS4 = [r"(?i)\b(whad)(dd)(ya)\b", r"(?i)\b(wha)(t)(cha)\b"] class TreebankWordTokenizer(TokenizerI): @@ -49,44 +67,44 @@ class TreebankWordTokenizer(TokenizerI): # starting quotes STARTING_QUOTES = [ - (re.compile(r"^\""), r"``"), - (re.compile(r"(``)"), r" \1 "), - (re.compile(r"([ \(\[{<])(\"|\'{2})"), r"\1 `` "), + (re.compile(r'^\"'), r'``'), + (re.compile(r'(``)'), r' \1 '), + (re.compile(r"([ \(\[{<])(\"|\'{2})"), r'\1 `` '), ] # punctuation PUNCTUATION = [ - (re.compile(r"([:,])([^\d])"), r" \1 \2"), - (re.compile(r"([:,])$"), r" \1 "), - (re.compile(r"\.\.\."), r" ... "), - (re.compile(r"[;@#$%&]"), r" \g<0> "), + (re.compile(r'([:,])([^\d])'), r' \1 \2'), + (re.compile(r'([:,])$'), r' \1 '), + (re.compile(r'\.\.\.'), r' ... '), + (re.compile(r'[;@#$%&]'), r' \g<0> '), ( re.compile(r'([^\.])(\.)([\]\)}>"\']*)\s*$'), - r"\1 \2\3 ", + r'\1 \2\3 ', ), # Handles the final period. - (re.compile(r"[?!]"), r" \g<0> "), + (re.compile(r'[?!]'), r' \g<0> '), (re.compile(r"([^'])' "), r"\1 ' "), ] # Pads parentheses - PARENS_BRACKETS = (re.compile(r"[\]\[\(\)\{\}\<\>]"), r" \g<0> ") + PARENS_BRACKETS = (re.compile(r'[\]\[\(\)\{\}\<\>]'), r' \g<0> ') # Optionally: Convert parentheses, brackets and converts them to PTB symbols. CONVERT_PARENTHESES = [ - (re.compile(r"\("), "-LRB-"), - (re.compile(r"\)"), "-RRB-"), - (re.compile(r"\["), "-LSB-"), - (re.compile(r"\]"), "-RSB-"), - (re.compile(r"\{"), "-LCB-"), - (re.compile(r"\}"), "-RCB-"), + (re.compile(r'\('), '-LRB-'), + (re.compile(r'\)'), '-RRB-'), + (re.compile(r'\['), '-LSB-'), + (re.compile(r'\]'), '-RSB-'), + (re.compile(r'\{'), '-LCB-'), + (re.compile(r'\}'), '-RCB-'), ] - DOUBLE_DASHES = (re.compile(r"--"), r" -- ") + DOUBLE_DASHES = (re.compile(r'--'), r' -- ') # ending quotes ENDING_QUOTES = [ (re.compile(r'"'), " '' "), - (re.compile(r"(\S)(\'\')"), r"\1 \2 "), + (re.compile(r'(\S)(\'\')'), r'\1 \2 '), (re.compile(r"([^' ])('[sS]|'[mM]|'[dD]|') "), r"\1 \2 "), (re.compile(r"([^' ])('ll|'LL|'re|'RE|'ve|'VE|n't|N'T) "), r"\1 \2 "), ] @@ -122,9 +140,9 @@ class TreebankWordTokenizer(TokenizerI): text = regexp.sub(substitution, text) for regexp in self.CONTRACTIONS2: - text = regexp.sub(r" \1 \2 ", text) + text = regexp.sub(r' \1 \2 ', text) for regexp in self.CONTRACTIONS3: - text = regexp.sub(r" \1 \2 ", text) + text = regexp.sub(r' \1 \2 ', text) # We are not using CONTRACTIONS4 since # they are also commented out in the SED scripts @@ -252,11 +270,11 @@ class TreebankWordDetokenizer(TokenizerI): _contractions = MacIntyreContractions() CONTRACTIONS2 = [ - re.compile(pattern.replace("(?#X)", "\s")) + re.compile(pattern.replace('(?#X)', '\s')) for pattern in _contractions.CONTRACTIONS2 ] CONTRACTIONS3 = [ - re.compile(pattern.replace("(?#X)", "\s")) + re.compile(pattern.replace('(?#X)', '\s')) for pattern in _contractions.CONTRACTIONS3 ] @@ -264,75 +282,74 @@ class TreebankWordDetokenizer(TokenizerI): ENDING_QUOTES = [ (re.compile(r"([^' ])\s('ll|'LL|'re|'RE|'ve|'VE|n't|N'T) "), r"\1\2 "), (re.compile(r"([^' ])\s('[sS]|'[mM]|'[dD]|') "), r"\1\2 "), - (re.compile(r"(\S)(\'\')"), r"\1\2 "), + (re.compile(r'(\S)(\'\')'), r'\1\2 '), (re.compile(r" '' "), '"'), ] # Handles double dashes - DOUBLE_DASHES = (re.compile(r" -- "), r"--") + DOUBLE_DASHES = (re.compile(r' -- '), r'--') # Optionally: Convert parentheses, brackets and converts them from PTB symbols. CONVERT_PARENTHESES = [ - (re.compile("-LRB-"), "("), - (re.compile("-RRB-"), ")"), - (re.compile("-LSB-"), "["), - (re.compile("-RSB-"), "]"), - (re.compile("-LCB-"), "{"), - (re.compile("-RCB-"), "}"), + (re.compile('-LRB-'), '('), + (re.compile('-RRB-'), ')'), + (re.compile('-LSB-'), '['), + (re.compile('-RSB-'), ']'), + (re.compile('-LCB-'), '{'), + (re.compile('-RCB-'), '}'), ] # Undo padding on parentheses. PARENS_BRACKETS = [ - (re.compile(r"\s([\[\(\{\<])\s"), r" \g<1>"), - (re.compile(r"\s([\]\)\}\>])\s"), r"\g<1> "), - (re.compile(r"([\]\)\}\>])\s([:;,.])"), r"\1\2"), + (re.compile(r'\s([\[\(\{\<])\s'), r' \g<1>'), + (re.compile(r'\s([\]\)\}\>])\s'), r'\g<1> '), + (re.compile(r'([\]\)\}\>])\s([:;,.])'), r'\1\2'), ] # punctuation PUNCTUATION = [ (re.compile(r"([^'])\s'\s"), r"\1' "), - (re.compile(r"\s([?!])"), r"\g<1>"), # Strip left pad for [?!] + (re.compile(r'\s([?!])'), r'\g<1>'), # Strip left pad for [?!] # (re.compile(r'\s([?!])\s'), r'\g<1>'), - (re.compile(r'([^\.])\s(\.)([\]\)}>"\']*)\s*$'), r"\1\2\3"), + (re.compile(r'([^\.])\s(\.)([\]\)}>"\']*)\s*$'), r'\1\2\3'), # When tokenizing, [;@#$%&] are padded with whitespace regardless of # whether there are spaces before or after them. # But during detokenization, we need to distinguish between left/right # pad, so we split this up. - (re.compile(r"\s([#$])\s"), r" \g<1>"), # Left pad. - (re.compile(r"\s([;%])\s"), r"\g<1> "), # Right pad. - (re.compile(r"\s([&*])\s"), r" \g<1> "), # Unknown pad. - (re.compile(r"\s\.\.\.\s"), r"..."), - (re.compile(r"\s([:,])\s$"), r"\1"), + (re.compile(r'\s([#$])\s'), r' \g<1>'), # Left pad. + (re.compile(r'\s([;%])\s'), r'\g<1> '), # Right pad. + (re.compile(r'\s([&])\s'), r' \g<1> '), # Unknown pad. + (re.compile(r'\s\.\.\.\s'), r'...'), + (re.compile(r'\s([:,])\s$'), r'\1'), ( - re.compile(r"\s([:,])\s([^\d])"), - r"\1 \2", + re.compile(r'\s([:,])\s([^\d])'), + r'\1 \2', ) # Keep right pad after comma/colon before non-digits. # (re.compile(r'\s([:,])\s([^\d])'), r'\1\2') ] # starting quotes STARTING_QUOTES = [ - (re.compile(r"([ (\[{<])\s``"), r'\1"'), - (re.compile(r"\s(``)\s"), r"\1"), - (re.compile(r"^``"), r"\""), + (re.compile(r'([ (\[{<])\s``'), r'\1"'), + (re.compile(r'\s(``)\s'), r'\1'), + (re.compile(r'^``'), r'\"'), ] def tokenize(self, tokens, convert_parentheses=False): """ - Treebank detokenizer, created by undoing the regexes from - the TreebankWordTokenizer.tokenize. + Python port of the Moses detokenizer. :param tokens: A list of strings, i.e. tokenized text. :type tokens: list(str) :return: str """ - text = " ".join(tokens) + text = ' '.join(tokens) # Reverse the contractions regexes. # Note: CONTRACTIONS4 are not used in tokenization. for regexp in self.CONTRACTIONS3: - text = regexp.sub(r"\1\2", text) + text = regexp.sub(r'\1\2', text) for regexp in self.CONTRACTIONS2: - text = regexp.sub(r"\1\2", text) + text = regexp.sub(r'\1\2', text) # Reverse the regexes applied for ending quotes. for regexp, substitution in self.ENDING_QUOTES: diff --git a/nlp_resource_data/nltk/tokenize/util.py b/nlp_resource_data/nltk/tokenize/util.py index be7c12b..a91f129 100644 --- a/nlp_resource_data/nltk/tokenize/util.py +++ b/nlp_resource_data/nltk/tokenize/util.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Natural Language Toolkit: Tokenizer Utilities # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Steven Bird # URL: # For license information, see LICENSE.TXT diff --git a/nlp_resource_data/nltk/toolbox.py b/nlp_resource_data/nltk/toolbox.py index f9b5caa..74f4dbc 100644 --- a/nlp_resource_data/nltk/toolbox.py +++ b/nlp_resource_data/nltk/toolbox.py @@ -1,7 +1,7 @@ # coding: utf-8 # Natural Language Toolkit: Toolbox Reader # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Greg Aumann # URL: # For license information, see LICENSE.TXT @@ -10,11 +10,14 @@ Module for reading, writing and manipulating Toolbox databases and settings files. """ +from __future__ import print_function import re, codecs from xml.etree.ElementTree import ElementTree, TreeBuilder, Element, SubElement -from io import StringIO +from six import u + +from nltk.compat import StringIO, PY3 from nltk.data import PathPointer, find @@ -40,7 +43,7 @@ class StandardFormat(object): # (PathPointer.open doesn't take a mode option) self._file = sfm_file.open(self._encoding) else: - self._file = codecs.open(sfm_file, "rU", self._encoding) + self._file = codecs.open(sfm_file, 'rU', self._encoding) def open_string(self, s): """ @@ -59,11 +62,11 @@ class StandardFormat(object): :rtype: iter(tuple(str, str)) """ - join_string = "\n" - line_regexp = r"^%s(?:\\(\S+)\s*)?(.*)$" + join_string = '\n' + line_regexp = r'^%s(?:\\(\S+)\s*)?(.*)$' # discard a BOM in the first line - first_line_pat = re.compile(line_regexp % "(?:\xef\xbb\xbf)?") - line_pat = re.compile(line_regexp % "") + first_line_pat = re.compile(line_regexp % '(?:\xef\xbb\xbf)?') + line_pat = re.compile(line_regexp % '') # need to get first line outside the loop for correct handling # of the first marker if it spans multiple lines file_iter = iter(self._file) @@ -95,7 +98,7 @@ class StandardFormat(object): strip=True, unwrap=True, encoding=None, - errors="strict", + errors='strict', unicode_fields=None, ): """ @@ -122,11 +125,17 @@ class StandardFormat(object): :rtype: iter(tuple(str, str)) """ if encoding is None and unicode_fields is not None: - raise ValueError("unicode_fields is set but not encoding.") - unwrap_pat = re.compile(r"\n+") + raise ValueError('unicode_fields is set but not encoding.') + unwrap_pat = re.compile(r'\n+') for mkr, val in self.raw_fields(): + if encoding and not PY3: # kludge - already decoded in PY3? + if unicode_fields is not None and mkr in unicode_fields: + val = val.decode('utf8', errors) + else: + val = val.decode(encoding, errors) + mkr = mkr.decode(encoding, errors) if unwrap: - val = unwrap_pat.sub(" ", val) + val = unwrap_pat.sub(' ', val) if strip: val = val.rstrip() yield (mkr, val) @@ -200,27 +209,27 @@ class ToolboxData(StandardFormat): :return: contents of toolbox data divided into header and records """ builder = TreeBuilder() - builder.start("toolbox_data", {}) - builder.start("header", {}) + builder.start('toolbox_data', {}) + builder.start('header', {}) in_records = False for mkr, value in self.fields(**kwargs): - if key is None and not in_records and mkr[0] != "_": + if key is None and not in_records and mkr[0] != '_': key = mkr if mkr == key: if in_records: - builder.end("record") + builder.end('record') else: - builder.end("header") + builder.end('header') in_records = True - builder.start("record", {}) + builder.start('record', {}) builder.start(mkr, {}) builder.data(value) builder.end(mkr) if in_records: - builder.end("record") + builder.end('record') else: - builder.end("header") - builder.end("toolbox_data") + builder.end('header') + builder.end('toolbox_data') return builder.close() def _tree2etree(self, parent): @@ -236,7 +245,7 @@ class ToolboxData(StandardFormat): e.text = text return root - def _chunk_parse(self, grammar=None, root_label="record", trace=0, **kwargs): + def _chunk_parse(self, grammar=None, root_label='record', trace=0, **kwargs): """ Returns an element tree structure corresponding to a toolbox data file parsed according to the chunk grammar. @@ -261,10 +270,10 @@ class ToolboxData(StandardFormat): cp = chunk.RegexpParser(grammar, root_label=root_label, trace=trace) db = self.parse(**kwargs) - tb_etree = Element("toolbox_data") - header = db.find("header") + tb_etree = Element('toolbox_data') + header = db.find('header') tb_etree.append(header) - for record in db.findall("record"): + for record in db.findall('record'): parsed = cp.parse([(elem.text, elem.tag) for elem in record]) tb_etree.append(self._tree2etree(parsed)) return tb_etree @@ -273,7 +282,7 @@ class ToolboxData(StandardFormat): _is_value = re.compile(r"\S") -def to_sfm_string(tree, encoding=None, errors="strict", unicode_fields=None): +def to_sfm_string(tree, encoding=None, errors='strict', unicode_fields=None): """ Return a string with a standard format representation of the toolbox data in tree (tree can be a toolbox database or a single record). @@ -289,12 +298,12 @@ def to_sfm_string(tree, encoding=None, errors="strict", unicode_fields=None): :type unicode_fields: dict(str) or set(str) :rtype: str """ - if tree.tag == "record": - root = Element("toolbox_data") + if tree.tag == 'record': + root = Element('toolbox_data') root.append(tree) tree = root - if tree.tag != "toolbox_data": + if tree.tag != 'toolbox_data': raise ValueError("not a toolbox_data element structure") if encoding is None and unicode_fields is not None: raise ValueError( @@ -302,29 +311,29 @@ def to_sfm_string(tree, encoding=None, errors="strict", unicode_fields=None): ) l = [] for rec in tree: - l.append("\n") + l.append('\n') for field in rec: mkr = field.tag value = field.text if encoding is not None: if unicode_fields is not None and mkr in unicode_fields: - cur_encoding = "utf8" + cur_encoding = 'utf8' else: cur_encoding = encoding if re.search(_is_value, value): l.append( - ("\\%s %s\n" % (mkr, value)).encode(cur_encoding, errors) + (u("\\%s %s\n") % (mkr, value)).encode(cur_encoding, errors) ) else: l.append( - ("\\%s%s\n" % (mkr, value)).encode(cur_encoding, errors) + (u("\\%s%s\n") % (mkr, value)).encode(cur_encoding, errors) ) else: if re.search(_is_value, value): l.append("\\%s %s\n" % (mkr, value)) else: l.append("\\%s%s\n" % (mkr, value)) - return "".join(l[1:]) + return ''.join(l[1:]) class ToolboxSettings(StandardFormat): @@ -333,7 +342,7 @@ class ToolboxSettings(StandardFormat): def __init__(self): super(ToolboxSettings, self).__init__() - def parse(self, encoding=None, errors="strict", **kwargs): + def parse(self, encoding=None, errors='strict', **kwargs): """ Return the contents of toolbox settings file with a nested structure. @@ -358,7 +367,7 @@ class ToolboxSettings(StandardFormat): if block == "+": builder.start(mkr, {}) builder.data(value) - elif block == "-": + elif block == '-': builder.end(mkr) else: builder.start(mkr, {}) @@ -367,7 +376,7 @@ class ToolboxSettings(StandardFormat): return builder.close() -def to_settings_string(tree, encoding=None, errors="strict", unicode_fields=None): +def to_settings_string(tree, encoding=None, errors='strict', unicode_fields=None): # write XML to file l = list() _to_settings_string( @@ -377,7 +386,7 @@ def to_settings_string(tree, encoding=None, errors="strict", unicode_fields=None errors=errors, unicode_fields=unicode_fields, ) - return "".join(l) + return ''.join(l) def _to_settings_string(node, l, **kwargs): @@ -386,17 +395,17 @@ def _to_settings_string(node, l, **kwargs): text = node.text if len(node) == 0: if text: - l.append("\\%s %s\n" % (tag, text)) + l.append('\\%s %s\n' % (tag, text)) else: - l.append("\\%s\n" % tag) + l.append('\\%s\n' % tag) else: if text: - l.append("\\+%s %s\n" % (tag, text)) + l.append('\\+%s %s\n' % (tag, text)) else: - l.append("\\+%s\n" % tag) + l.append('\\+%s\n' % tag) for n in node: _to_settings_string(n, l, **kwargs) - l.append("\\-%s\n" % tag) + l.append('\\-%s\n' % tag) return @@ -502,29 +511,29 @@ def demo(): # zip_path = find('corpora/toolbox.zip') # lexicon = ToolboxData(ZipFilePathPointer(zip_path, 'toolbox/rotokas.dic')).parse() - file_path = find("corpora/toolbox/rotokas.dic") + file_path = find('corpora/toolbox/rotokas.dic') lexicon = ToolboxData(file_path).parse() - print("first field in fourth record:") + print('first field in fourth record:') print(lexicon[3][0].tag) print(lexicon[3][0].text) - print("\nfields in sequential order:") - for field in islice(lexicon.find("record"), 10): + print('\nfields in sequential order:') + for field in islice(lexicon.find('record'), 10): print(field.tag, field.text) - print("\nlx fields:") - for field in islice(lexicon.findall("record/lx"), 10): + print('\nlx fields:') + for field in islice(lexicon.findall('record/lx'), 10): print(field.text) settings = ToolboxSettings() - file_path = find("corpora/toolbox/MDF/MDF_AltH.typ") + file_path = find('corpora/toolbox/MDF/MDF_AltH.typ') settings.open(file_path) # settings.open(ZipFilePathPointer(zip_path, entry='toolbox/MDF/MDF_AltH.typ')) - tree = settings.parse(unwrap=False, encoding="cp1252") - print(tree.find("expset/expMDF/rtfPageSetup/paperSize").text) + tree = settings.parse(unwrap=False, encoding='cp1252') + print(tree.find('expset/expMDF/rtfPageSetup/paperSize').text) settings_tree = ElementTree(tree) - print(to_settings_string(settings_tree).encode("utf8")) + print(to_settings_string(settings_tree).encode('utf8')) -if __name__ == "__main__": +if __name__ == '__main__': demo() diff --git a/nlp_resource_data/nltk/translate/__init__.py b/nlp_resource_data/nltk/translate/__init__.py index 21ddf8a..3a1b2e5 100644 --- a/nlp_resource_data/nltk/translate/__init__.py +++ b/nlp_resource_data/nltk/translate/__init__.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Natural Language Toolkit: Machine Translation # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Steven Bird , Tah Wei Hoon # URL: # For license information, see LICENSE.TXT @@ -20,6 +20,5 @@ from nltk.translate.ibm4 import IBMModel4 from nltk.translate.ibm5 import IBMModel5 from nltk.translate.bleu_score import sentence_bleu as bleu from nltk.translate.ribes_score import sentence_ribes as ribes -from nltk.translate.meteor_score import meteor_score as meteor from nltk.translate.metrics import alignment_error_rate from nltk.translate.stack_decoder import StackDecoder diff --git a/nlp_resource_data/nltk/translate/__pycache__/__init__.cpython-37.pyc b/nlp_resource_data/nltk/translate/__pycache__/__init__.cpython-37.pyc index 6e891423b7702e01d13ecd8baa8c999249874d83..0c445217001ac03e9bf6fa95ac62f5be27e57624 100644 GIT binary patch delta 106 zcmZqU*u>83#LLUY00gD%6XPdN1C`N(#v44Q9~fn|S8? zQTrNu@1ljk#gGm1@q%`6E3 DCFmKB delta 229 zcmdnQ-p0Y}#LLUY00eSL0r3|m@=7u;n5f>OD9Ol>!kEI7Bb6&1B@Jft=E&sAM#%!% ze8CKw{1Xp+=jF*wElJHUiZ4#iFG`)fg;9%9Zu5Oc8AfO6yquD3y^^BDyyBe1l2kop zbw#{D%c9sILQ!IvDvL6ci;MUsi!duG^8=NxWGE5@QefhjuYN{;ZmND(Vr52tL1Iz1 VzTRYCW^Wk@paK>q5k?*?5CE7~JB0uM diff --git a/nlp_resource_data/nltk/translate/__pycache__/api.cpython-37.pyc b/nlp_resource_data/nltk/translate/__pycache__/api.cpython-37.pyc index 51ca6ded2cdad3c97a9cb80b5ef01b87f672e763..66ec1c6aae1032c15916161bf2f95b26455ee281 100644 GIT binary patch delta 2855 zcma)8U2GIp6rMXfJ3IT+?Y7$%%5O_)VOv;A5gM_UU#SvXw_pOz!Gk zMp*@XG)C{UiTXgq_=21GWK8@EiH{Ova$kHPJP;FOLSnSU=s9<7yDWvUP0zh^?z!Lj zx!<`nSFQJEtA|@!ViNo+k@xjmgOYR`AC`V$m>h(E{{s-FWD;9)NS-)yUMBb~+o602 z#*nQz;e1%=6kBy7`3RA$$b@8uP4%i|s+M*obcN)jLKiVL&}mi-bXAg%n^7}%RmvyK zxS4>l#cVN?Fea_A*=nZDwxWDR&bI9`AlNg0_Z%e3SM`fSB5 z1SQYqsft@Fc&4S>rNE+w?N4P0kGY0pnL(v&Tky2Lyb#QFdagdAFBum+r)&hJnH!%| zwH?cHU7{%%7RPo~e7cK`MGlcY>{2Ax6G69xw!k1P>Y@!FlOV>}?~x-t+M|tHVd)^7 z)^e$42zEnDlcVeht-2qPC|c4qBFS}k7*~@|p45FS&<(p(bREkLCTf?WJIQto9cm+I zhdH^>v}G|mR4VYnFg4twMY}Mj^&r3k zE6#~8vgD+~$$3snP37ScJy1Q}^n%m1pIWyO!r5Z#4C!a5+j69ZU2hv%-;`#oo~3`j zsJ~VFvyBk4iT%}{9Y?1tL1pUI;;QdeXu;xYV9=r!JnUWf11K3|X8Pu+iV&jUA~_@j zS4-}Biw2M2;24;E$YwgGtLjgp1I;A=gbPWnYs0}#lM=^@x85D@p5Sd6= zP#gn7U?16{ZC2!1G@&FEEu<+iS&M7iwX_zaXvIc4eG;u*>%1T8YdB6$$_>B*6Z!QW z`@Y-hZte(JphrP`Q#-zHKqe{n+J=rk^vsFTgt8bLvrB$}eLVg%`*g$To+#Rd{W9DG z&_K^4RxgU?)0?H910^rH#ha{s<8?B@zS`KkNl+zp&RS@)r#Zm*h27tHXYWQtBOvY~ z3#Wu3coKbd7}p;3B>Lz!>)brjgP}d<{C=$!H_ws*QMHi3<3#6Vb~V61*u9>8T@&c; zB@kS}D$pK$?qUaeyM{55e6;NOCA=Pe9xgcsw8d%x;;uXFo!&2cPQ&aG0U}-Qu~S<< z0hTXrIW~9#o}Vc7f7pEAo_^63kAX+}{ayREZ-5Lfmv5=4Czo8+3n9q!BIp#t(*666 zE}zF#MyXfe7;eDYaMdW6E!SMSNmt7r%l9vPw|__VS==^`;sA<+D3FFcUa$?{cdTH} zGwBqnb0|c&^x#bNlIS2rYd9~FiwGFc{u=!I*hX5nB5P_QrD>WPOKPg7(x?P%V%hdC zmKzu%lkAOwwS!y1l#}@eJOOna)KMUyufRkiNkWgauLllIzXDH?=_CyF49*q$D$Wsx z>L^yppT~9AFbeia*(lNE3Tlebv$fo|J~@jy{C^Z&K`VHbM%{{|U+}2uH=GGJ^cMDU z=Ce^ek}1(6VtMHx4MZYb!_AHtH1FNYE)VWK8wVq(28aX*9x`m3$6jzv>*8~idKB){ z8C)!&Sbj5t2cZy4qDY;vp2l?0MrujPT|^^Ygm%`}4DBbYUO9L&yKD11(sJK9m;$D_ z4Prhm=;b+T_*Uadu#d8xAr=`oka}vOY^v-_#RyL;rgrAMs?i@u7%-^Pn&NjsFZ0QVUgXF*@oz; z*eYna%tfBi^@8UHM#=Sc{RF$bd;6-~$9hl)MS!A$Vg+d#j+S6yXsOhi0@My^ty-(l zXWDp7hp)7tLM`|#0FFyEgkmQMXe`gRE%<<|!WBi4&iS}&2j{Z&?=p&?qg?f7=D|k1 zUFZ?qE7*ZuK+mGUqpp_^BUvp(D8A#F726s|cKJxu6wo0dCqfCLRi%yu3^Q67{yLMP F{{Sdx9s2+P delta 2758 zcma)8O>7fa5Z<@D-mHI;I0*y-${!&CLlW~N2++t41W_m@1o6{0Kvx^TjbpHPw{O>> zsaoX#*NRH-Q1yz`18pzG*Zv8q>b2^Pr&1wRm5Pvh>Y++2wKH$?V~j!A^6dMYZ)U!k zdF$JAx16P8&CLk~K0UM3*6=Px`3)zl9~}l0@C#fJRH3A#%-~Zkk+M3Y66H08YE-|a zP~FjPYBM^_W7L4T;lyBW&?ai$Qf8WHoF?FD(j-m6Gw!IgnYPf@!iaiPok@UNnzn&j zn}kVHa$d=_KL(@9RECIzYnL4wR{UxQVMb3AEZr8&L50fcC=3QlO*rDDM? zJ8n3^`{GMS5SO7IOCzot8Ap%nJOK5eR(-PiAU;BNVo(`XsH1xXxlyOG93L+@uJfMH za%cHyvOE|aW*ZT`^H~#Y6Rb`1?~}uakqL&8Ms8`5TkFmu(mFfBv@GWM%(6~Z2UAbf zmoil02U>%rmvLzUj0GwPT@mENjKQAvPQ;k)797@rK5Yg8Cy;+Z0vV+s6+spRDb~>= znyk?9Eb4_9sw3$ggv{`ZZCA-QUTvEu39h&A+gcZ9B8oCtC|Dm<&$Samw(|2Gqd7FX z?o=jXOBjJyVR=Uwq0J!k&sw*F5GcFNzw7vMOhX7sxC{=9K^R4M!C~QZD)JB<`ILXN z;oMRy9yWdE`EYtsmTFW|Z3wyng)FEp9Bo6fAH^67M5*yi1}qC69YWv$gEM<{H4#tg zDc#geJ)xRObEny1CK#ULA8ibXQ6-)KXahCr6vVFqmY7&y-}2rq<*o+(!6sn$>*{A) zhSWn62Kv}46yLY^`F4>(XJFY1V%LE#C&o*~Ak2GZKlc;Yy2th#crF(BrD6zL$6>~L zP}C=Lj(KIsK+!GS=jHCZWRh>{>F<%1qg{6v>&~+`0OMzVspt0tTM&(ac#K6kB@eFn z$8xy$v?bYAzjD8KUmv>mf^>@N{oXk;B%{|>;k)s{f3NQXjGp5lR?naO-nKhX^cM$C?3#l4 zONIV7WPCG!IIwk4_Q?xOU`75_?cP2_cCS&mO6=KHa}gP`o=(Ere+qX84<293$rMB? zSvZCY$d52=-*;RJNV4o2F@O{<^FIcMmky)a5frbW7)LPyLL~DgI|#~7IPX#RIxb^H zL=7Yj%Jz~?g=jT7ld(vLG5>eq7qo*gH|wftq|&Bo8i^LuFb!rXoDWS-o`PMW72PnS zi6mA(-V{)$tRFYXwHj;*djghqD98Zs4u!N3c7k{9IC}0am_gJC$KJ-V&Sr3oFzhml z23&}w<=S?>B-<{&j+!RyO7)8!1L~-(&y{}EWpfe{cPnMUQR?oGLOaA=`edrbnKT##Fm5+t|< zoyU|b&1tohn52`i_A1+bgfzT=@U|PS{xIC39xlLerCdZJV0p`*XLjJ!@h;LKym^9> z9Tq(|u;x6Lt+r(siIE3&M)}8kG6NKmh#=4Cn&TzAJWK7|Y5vQe?3-ucoM>VWT=;>5 zH9U(L=m)!mTROIJCKgrjJT6B@a4!b`CRqJ_B{ZAT9G~5LX0#?6CRNv(iy>DcWm$R8 z4ecTXVV&U*_wH=i1;`I8qVQ3?i((x~jwJOCYz(bZZ|YFwn%Qc$GF;E3SOy-@OkSzv zBehsL4o5OD`!K N(z9j^z8hP#{{beg{C)rc diff --git a/nlp_resource_data/nltk/translate/__pycache__/bleu_score.cpython-37.pyc b/nlp_resource_data/nltk/translate/__pycache__/bleu_score.cpython-37.pyc index 4ab78a5ea802e9846fcd7fbc2dd8fd24a39cea7b..d1a3787f9b7d04afcab11db3c344fd089bedec55 100644 GIT binary patch delta 2849 zcma)8S!^3c7@pZ(+w1iaCvh%2B@s>27^1jofI^z4ZLUJvx+g8!rZuj|4Q_mOyl&It zD1uZ7fjGiQAR!^OsN#hO9?T1GAgZb=2p&*{WQhk33HrbrNQnFY$99fV)b48k`Dgz5 zXa3{+XZFTb`q4GIFyiqT1fGIpDf(HOkngcmdvsuSz?Z)cgb|Z4%9IpME2ct`S5vB~ z%2qQq+1gB-Z0)8US}o;B>!vQ}+EUK6VH%X2?@ZWEyFhAZ4z{4bqDrwdWlOtE7bQnY zrc)yc_pHHRqy%4>>P&em(kYTro+7M;xtRMRHNC8rd6*ZtI#$Pg7m4X(^{fHfde+GN z&^GKMtcf*WB&>N+F&mkBh=kfM1V;9ZKiQd&XO})~841y{&XQ-7`D8W|fsSJ|TgX@m z4y-*h!(-`u@+Xy$3FsTqOqQo(spN$OYEAH1+ybqopSw$bdc@=MLY#gd4Hxn}ES~f3 zt!u~NX+X$cg>ql%8?VppJ%_$#pxaUb?cbu6`U1fH0`(WG19? zG_h=x0V~_$7(xtKIyM)Toyw{;Pgycv@pMxB);Hj*2VID7B{ELlA-0BlJ$78);MFKt z;a(aRm&5+fE*R$ud{m4rb*8}&XxEb!C>r20ek1HEGU;sQp+xQ@owUOiZ!B||J z+njI1AXKEdQdu8L#nLBPY&ap*fk!=kFc#7(wgklE106lpy``GmyBnq)p^$Hk6Y+Sd zLK_s`U3zW6?cIkCauAXTSQEs~13-Y^j{M5r(mc7`Mi89FF0PHA3rRe%1u7n>7R6TX|0@IdYEDQ>AZ5KUZj{@a^gmHz&4A z?j=bh;-_6hRgOPiZ`Zg|*X|+OGlHIsA{j%nHqd1~8ck-BRy6u$ z>BT)CP!9$Kl6yZ+?Gj^=!AgjAK8_>3;^j!NOTrWpTGpcJ{LF1j@(@V=DBXy7XupJn zGyri?OE{_msN{+(fo+4)Hlgnyd|0MXtL_M+Jc48&5-F;xN<28y3G10G>-U$=?SGFB zuK}#e5vqI{m#g=`-Duo>qd0bOaNsVxR}@$8y}6@w{owoR)5}=8rNClPd6gid3DSGV zojfYrlbQJ0j8&<9S@vkae|H&pP<(YH%;GkX;)C!*_b@E^{Xpg>EmEY5bc`Gyo>t}^ z^LkM~O&Oh4xKV^#U{;0J!IUC7t@w$hUeb!%Wf~+cWZns*&LC(P9Yf@dQ96G#M;9a# zY^$7*@R2pr4%-1pWCDAEubu_S$&y*g*D_T2VU*t4wTE^>!18z^61{;oP%kwUpLq9J zP`ozP(wBqbB$;;$7va`xBTQ`};FmT;=Ef{4ewfGj)`dBaliw!TO#t*F8bTMJoK#>gU|VBS?UAk(&@(+?Rgx{z-n%Q+5|UYLG| z(nF%_#BSFzaCyDBaANa=*pJ9u)hb5bNo$iQkLa`F>l2SH*m-9yfR$o<1iKm%Ja>o+{C9qu}o9{gP2ER7}ExrL)<>`W^_j{7I~3eLJR{&0NL6+g+9b|RUa`y z3xU$>(~UIRXLME{@bU<#a}!8tb(OB%%(uaMZ79msLukvhXcZPEarnfgW9u5XMBo_M z0evpfheZ7H^rnRg9Kv&wA3$;t$&*Ny?cmecJ&t5$Gs)r6!+U^S12E_D2Gt#!V)W{5 zx=VNK&AJcXfSduFJ3thDvYPl}IY z-3#bZ*?IVUE-}t|mYc3j%9`!NAEKDWeW;9AYo&_1k;A)C8gM7u;R;V@Ss|4e#v?3` k=Z8k!aEVr4x8hb5>Vwxy8|?U_={5FaV_14t_v_RD0w07Y)Bpeg delta 2849 zcma(TOKenC^xpU0%)I$eKeU|=g;Ku5Zwj>{wU$z_2!&~-1ni?QPJ4%TV1C^9u!wIg zp~QtICgd)RF)D0Kj0wh!iMnEgqWD?3a4{xoVob!93s!9OoIBG_TP5&j-Z|&qbI-l! zf8Q5h(Yv>4-GJZkA@Ho47)gv}3He?053HDd6=oPQ2%}8NP=iXW7z$uDqh>WjqZOXR zaKN0Fab{hHOU^kmde&{Yfo^0vb6=%~hpk{9<^{^je9V897(NzYLBM_%Vs(H62MDWY z4Oa-Tnv#HqGIT%Ko> zne?SplyXIkMmn2t7w!k}z|)&ao7QALTj;wZ-i~xRUC7eKkCCxx1CWL~lSr4dgFrSI)tO zjR;V8z7jzgfT2tj@X%bDn)2MJ!kduUi~tqqP5@Kznoj{BeNO76x>7H`Y3roU(u1~O zT@Y0TFRIyeE~@ed(GqK4VmM1rRt(=K;<2t!5S9;uhw4$x%~y$Qv8{du2cD|3;%;nf z_jZ^u0+X3^p^##UNeq_B*TN)UhoEvO??hVa*#XpVVoT?0+EE(nJf*B(1ruNnI+yQ8 zu+*=-4d%wh_dCrndZNO`oyvM|CYhaJ$-dKKtm`TNHkgZQ71hGxy{^@ps%J|vdG>l( zGEBioIuOJkpbD*5`1;ZxUB18&+RK4S;-F0{4x#{n?nAnEwp34Uw*eTFxSg!0QJrcE zzI=n|9SGA2aeSaB*201;1cl&cVOGcLWpT8!2G%HxBLaoe1Qe79QDv$UBRVShI;9yy{P>Ff_(^< zI=8GR66svpN+iB6{d)Lg>PII)eAnR8wc?$4&td6So$tq)Eda_|l24gs=eZfgh6|E1 zkxR`~vKAB8&=Og5YS}@26L9XA)(rdU4w)QMEzFSl(4#11n>?rH;ue^#7m1OcZW&Ci zrUyR)EFMQ7SyfRCOxcx9S<`v8!(aMgW@2X;hS!_`U~62rwVcx;wmL_P+Qq{bwdow)PfqoZD@9jP zxAn8o_0K^K*cxCJu!|`+Ijc4jOS|r{wHq`-nn}@ZyJ4*}LTqZgddd0L6F@;4tEDeb zOw(DJN-mD&y~s$JgtEFK$dERB7TH{DgR%e{Z# zxu*gJQIl46ARprw^*)9LWtuhISS^Lj1#`zEL)ks#Lt8?yJ#mGo`26HEM?VF6x%M_( z)nQ_`TX1pD(JS=go0d`y=#^?fT}(aatu2k6Z17zCd3yJ3HCtAL*rJr_#-WB_06^6D7{0D~*H8=TPJRLe z%QW+lta)-AJ>4h^#U0Yb`-#21Pk_XKTurV+t=I~!W{yp;XYIS_%kb= z`k;7iVgtjPmftwM11Y@8Io`i=-~UqyJ`b{ab|I7M!yah11HkK9r#^fYpOa8RJtGoY KdR}kT$NvR|;VLu$ diff --git a/nlp_resource_data/nltk/translate/__pycache__/chrf_score.cpython-37.pyc b/nlp_resource_data/nltk/translate/__pycache__/chrf_score.cpython-37.pyc index 431f99e6d7ea023aa6b1dfcedc105f01e15e85fd..6325651ff2f630ba1eb00379a23cd20de10c6f09 100644 GIT binary patch delta 752 zcmZuuy>HV{5chNJ*zrf2CIMRVQ9+=^LTXt+NTeCaf`kwQUE%>N^>dJg9fz~c01_-+ z8KSo`ATcqbioXGgnaNWB0A?1{i91Jv2=SiuySv}Lcb{JlzaQH7ZQGFWY+b5E1@HXGz z4P;Dyfp79AY8F?XNY2IOn}Nv+orf>OG)(57KU}si)i5tmDW4>0pg12N$mtG{w07`Zj$kzpP*r&BF8+5G)|^gEW1HL|QhsDt|KTDvf&Qd>tU_F=k)pFR2)2rT0Vj=|7>)jbRj~nV-aq z(V6(x9h(XzsJsdn&=Fg`9o83jd;JED2{s5;6BV;wf9FqEWeT`P?&}1*1OxH0H)#JY zpC$+2la7J^%wN7?iJJPRXGiEK&_5Idyq5l}H}lPQZXH?9xkgy&H(qD`m7E6idI NrpuOM9O0TK>$^0e{?dESP#C&& zVyiqMegP9g6$5{OUw{}aW~BawF7TeCLWH=Jp7-nCk9+RE?S0;}AK133;MzKP>OMNN zJ1?H0!4#@6!qk9-Dpf;`YDC&vpi>>C9+bi|Eu-%mt1|PH&#CDj^2P9iX}cdbb`FHb!`jRB?bW8ka3GQI7P8Nmv`^om**V;=y z@N}-do+q#&wGw#IA%|V5m%Sw65z9;Sl!s#3qDf8QQw}^rQm8pQPU3XN1s7mr3P^h4 z&x6ovvr(FO5 diff --git a/nlp_resource_data/nltk/translate/__pycache__/gale_church.cpython-37.pyc b/nlp_resource_data/nltk/translate/__pycache__/gale_church.cpython-37.pyc index 0be5c764eb247d2936e8a5ba46e9d9f54e4739ca..57322b4d1617392e8ebee2525876601dc890b132 100644 GIT binary patch delta 1855 zcmZuxJ!~9R5Z<@Dx3}Iq@14(=`0o;kQQ|Dbc76nw62~El6XOIDSttUwSuXe9nTzjk zH}m!&4j>na1O*h<78Gd$qM)F=h6*ZZXh1a4(jX*2G>D2K6*F%GXF1u`&YPY2X5Re0 zcmMP^r`>bI!;XeeFTWl9Fum=LZ+(S-T=O-~xZY+R-PalMM%(a>%^GrM+wA20oRa3+ zxsK)Mnf992Jz{FY-f-YerqRnSp1-U4L)_*=xH{b71zhuQXkGnvO&IqJy!a{eiz^x* z=I&jMyJGl`;h%ANiI)*r;v>9*YgxEW#z%SeF7rnezlZNdzQV`&IO<3F1fRsU%8gew zZ)*F&=S7dDHgCP%lC7}Ye~-P(3d5Mx!{zy&g!#lZF0xv3%{azN$p^;!YdO@d|m8!P3p-+o5M!F8O=todX3lNm(mpH+JFETnzPwXe1vx&#?Lamrh_TQS4U~ z4x`>rmR8toN)hAlEd3T`*n@@$r0B+S&=GhVl^-RmZfEN;7jP7HXg{ay<2tO1_=)x( zBS1CN%!;VW_NbS`sUdqEIEWCa5uq7@5hDALq$9q7a2aRs6b;`6fDm90Da@Czt{l($ zXc2mp$P^OKQ5mVQ;224dCsFCSqpHN_V>$w;*>mgd)_OOhHyuGs#x6IpzlBjlQ z3(KZl8L5TiA?h7?1#!OnRvq{*HwYzNHing`4pIavuOPF;%U_cwBD9=i!c|N3b8c)?7^vF-&o?!)?)SdC zeDAVzC6lo={5!F>>Q0aEJ43r);~Up18fRQ@unOB5LCa|9O`~Ek@|q2!X;v(zy{)w- zOiiS=Y$DSAHR@8!x6nT#`C;?k5#hDF7RV$_wfNfi18dB;>Yos=lUC3skr~}e)|sl zhz<6l_Jh@_4ZkU-Hocto$Vtz9eZxfiLvym;s+Fu{A*121Lk``s{g&6*7Ow#K z5L>l|@BQEUwpt(a!rB5RhNMAbI?FSM!7;QUD~s0cMfw;pgp#fvKn}E(|Ip47=W#zd zkfZ!bP(HN8h0Ye0Mf-#SgjM_x`w-G89VH~cv|%zBXx~ar8fS@K(qp}RE@r`zN*#31 ztc#Yo8Nh5Hf9ie~tNd5@2jeteF{{UH^9Vn-BJiqWP2RCjv2y!c+cjoM`-QTx} z`L$XoB7B{o*WS?IhN^Hw(Tc)Om*6Q3elD*&&D|%KfGLb&{-nx}JXjaef98LI1l3?O zDWWRdVSh8R5pFl(X{5j~0nH6isbB;l&Q%2X^Y!=Zm%_T=Qf-F<1wId}9&XIRo-pW3=tV(P?5hH?{N5^Z_*IOX= z(JDD`hP-D9UPP#5m82%N#Eo_W5yosDo}nnU4A`W+UYIy}f^6Dcr)zO7t z%C8ES*j)QT;d?!+s=#0l`6UBJ{$>EcTZ^NEZ_ z67VX)wMSBLt~y?|LOKi@^~m+2-;fj1m}qGSj}t7(2ZiJ9ABvx{Go%mSrj66K9ba{fI?IO~}7WZ6h20lc|X;HT^U7g7hd7cOjIE7g9r6`cB7TN` zK|aCO&dRx23!UQ3IrpCXdS8F8gQFmDHT5KI*;gBEk&||NwnH=^U~sdr604vIRoVv% z_5(?52*3eXF$cVJZ5g((aBRr$@;a}|*Qa?nu~k3eD0tRHpuX?pm8`|8`v@;c^tOmf ze`=uxb4!xwH|an(5Oy!qD{F*#vzx?+)}~u`8P6>bqki`oS7WUGEkNdW#_Zr8FF9N4 zV(3nl_{ZIzG6q%3Sno1+EgLB^#)PhXy_Z&;XKABOs09vBoRT-UyRyl-Fq9T1LWF=* e{wzFf5~^4q+j~U?l4tv9sSym~6JH;X+|wU9#Yuty delta 337 zcmaE5ecGDWiIQ-{6a6 z8Ht51U8)w$U$Lq++vT9PsvY?kH5v8oS&1Enp~2ZpI2N2 zvMY)+FQ+71ue2mHr-&J3APb1#0}&uYk=zHim=`E^i^C>2KczG$)ehvWVxZqS8CaNj Jm;{))+yMU^LdXCB diff --git a/nlp_resource_data/nltk/translate/__pycache__/ibm1.cpython-37.pyc b/nlp_resource_data/nltk/translate/__pycache__/ibm1.cpython-37.pyc index 42b670d68da24c52b145b7a21a4f1ae6e6ed9b0e..9ca23b5415dfc124d238fc0ba5045d275767e3d4 100644 GIT binary patch delta 1014 zcmZ8gJ#5oJ6wc+xzBq9(X{!FArX_{qK%4R>3WBOd0)!AkzyR`MIk5{CXFGa!5U?=N zp#um-w;^^$iXs*UCN`EXECHz-OdZ*|cR@|nSpM?vz3;vE-MjPV+J`m$s;+A(@$0Bv z`)*gCAdkEAb2oE=Vkso$5;wc`%&2YtlZ8M$}@&Z@_fN)D_0tw7#jSeaWPjADp+sVsFKBs zFag0N{h}ZGiSIDG4N+4%VN^u=^fGgTrt7S{|05!o0 zgkcNW{HXA4@zwEP_5C}ROqp|ljy=Eiu20m-EOrCg7c={rzzIjJ)uzQe24 z=1aU}^6UYR4se8kgMrfss2Pyl*KNDSoH%ZIjM?^ZZzh2eB+0T=ShvI*mj%#jHeISw zomRx7QiEmzf91!_mH+?% delta 972 zcmZ8fO=uHQ5YEdsn@!rZA&EBaV$#~ix~V3uC@N^Nf`~;>^q?%ly6JA)wfhq%tJq#V z*n=mbJP#J}>`_aOA|AYX^1KwqTS2^7Ja}{F#r|&CFS9e>n{R&J+v(@inai0>I)+cF zanqPw%?z-6t7D@VB`={TSj=Kp!j(K(mxV36Nl(!gVJF>`p1K;d6f3n7vr@MFOw!X< z@^(y1ufLp-+h66G6iae7t1*RN$u@J>>I;Eq*B_Wm3qjL_#d?i@$evG-_+tBSw#?4u z(F6jGMLmx1gpTPNO$Zvo2?dNq$_3Z-8kRY=hF@Fw#+J`;mLK7*T!kh0T5hBsM<*x< zngo4hXAvS6{!a~u$w$ec8{q~2nft?J{v*G_`uJSoE-Ud*g`vVuF&%0Ksju5xh4=B} zyOCW>r0Yk{N6K47Sg!N?#Up&P|IiIl&{6bha;KnUM5-X@1=Drreb4qo(Wo&OfDuB8 z;$s8{6o%3Fj$bcsRQL8LnBvRt53<&y5TJTMcuQ3>)NxS z6ZkEl4ePzW8!pyDF#VQmhBip(?DR&1){FbYHK*YjUSQcS$V5~LNMO&-cbdU#k5u)M TYfoK5N{edliubVXpCbJOpee*# diff --git a/nlp_resource_data/nltk/translate/__pycache__/ibm2.cpython-37.pyc b/nlp_resource_data/nltk/translate/__pycache__/ibm2.cpython-37.pyc index 1c38ba0d82c067533657198b5c5f98bbcaf57673..b3900c1c29494cf122a73f8475a9a2fc54e93854 100644 GIT binary patch delta 2070 zcma)7-EZ4e6!*0g$4Q*@EB#19*G}n2-1TGYm{|AGZXK*3G=Ve;sg@?ojZFzSPIT>b z(q1COos^io^@>zJCB7EWtGKR>5C@#5m`;t68MN68ZRd&pr2ipX={; zf8H%@7YaEEe%;KWwR2b~_a4H+kxa>9j_k3PY|0F=;wh#A@06EnrOh<)(w^GNm>I$< zV9nGBX8>o-EMX0B&dd?cdij=a>P*^^oSc(?C^>mI`-x%}V68g^SQp$89T-JNxhok( z`!Fp@IdL{~@w;F5(z?MS&1oJqgJ#?BZ>eABDp~L~(k!d#H$%(f8Cdc>^2%bURF=6e zrbZ6b>ucSBuZb^5Hqu4dbo;-J+-FP`?~T<)hw+FG#K`h7#3k|B*mh|WA(1IWLqI<7 zKOMWnSXr!%m*-zW?lh7rkZ7#K+k2MndGIv%{g&&80Us8v@kcYLG#cvI9ClsLYP5N$ z8$@cO-9?$2n46fLokK5Wpz_pJc@bp3{Fd9J@7eh|byXl@WTTXyKi z2}rWA0(ySzZ%_R$*OL4s2s0IF*F3wm=h&OSh&QWOR?29f0WnfsK}`9^obe^(W@2B@ zBjbYjwpyuS(0m;Ux;^NE0ldfkKdKeBi>eKU@=rB?p^9{tRK==M)k&85MO>1N7riH+ zp7gEh9k!Ero{MdqD2lSK-)*@ZGjs_xVaJhmei>0*4?x&b;?Facw-U?b5;_-YSb+O& z?nGL^8-xaejfSbKLd&-Hn+L9M1xMoDnU7bfO;=Gz;iP~ayU7QR^HC6a*8h9vhSH;4 z>2VxM>s|$cQ=N}gyVG%fhr?`6~&FkT$(%fa8JzE7e#Mz^!ORCQO74|2n`$*Hy7`|l-xbz=1a*{>))I@og5v$Ob-9Q z-D5eh+xyQrVmVN3sJs9#UPnUZqjNMOui-wU{&`-LBpWEZ4qgTHA!0=)GhLS1gYi3U z$Mx24iRq<-I(7Z4sQT3DJqNp2#Scq|n~6;|?pZpRTnfoRCe@+BEQ5+I@#(qM-Zq-L zh2(W4Zy=$d&I~T4?MhbQYcJAsUpgVcb3n*jb^BQ7%g5cc!WdcFRM9Ve;dG(W;g?RAsm$D%6(8-@G?J-^`o$ ze%$|Rzqng0Dhc?^b#CYvPK(ng?|@<^w1mmbq|G!2|0z4!PiZNJYiT>(&uAIK8NiYz z5taaFwJhN*U|Ew1%XY4>XbMYQO_(`T`7mKBR`!FGmIpm=7CWo08% z1iS9=*UBBnGNMy$jE|vX9)z0X;+QSJ$8JLKO@$s78V-lvt=G73}g0 z=*oP)BnDFpqv^}A`MJ11^_P@E?>X_sx%Dj~oexQ`(lUxA5L)V%?omW-!sA`tvmD=Y zx|VKR&VhgE@rJ0>>Y`lB%*mGL_xgr!g~i4dUxYYc58tT$oLq@}5hLtMAaB`5zhfFZ zKY;cJ{1W%pMEU%zcz33{QUjGU5Nety5mTq@h*Vt7hRoM+VO@MPQ*U4k__HXmh*3f< zSbI49bEeMrF|?ZE6br}mvGXHe3KfRN{ix^sIaDdf$qy<44y>(wV^n>1Q%hA1G#GCW)uh7`8V31;T(gUN%N8EV@O#U7!3$LW8M70oB z7mxBwVDQ9}0?8N*EXU+&w8H8k0HuBzuvY58BvFyM;6Zu8^Cf;1Sha7JM$NA(q6LfNX8xa=B(IY78de=5QuWQ*hybe)NmGMb5 zqDzfW$fo#yahA#Ax5XQ*Iow{dSSvD)-9q~p!Tu6VG~WPl?=9h|8)9ByeL8J zAg(v}p31JLUTHl=JS5Nr0!^{kdap5(o79NFWn%pQ1kUV$fcc*ULOUR!O1uDnyorKN zhy3VXT*iGyv8^-Q!Ch_k+5y z3?a6~$IGiHS1{EsidRv*hJu)57LIoObrey6N8~9E7TShxdN??8I-M-!v+?anVG_DW8AOt>FV0=dBE-;PM>phs?~l@CZ%>IIG*xRvgK0|$^e@ehD_Z^p49NNz0o?Qh=v=FR)u z{muNyN?uMT69M@3V|R^PcazzJ&tPE&^nfXuK_Yd7dQd_fA|X8l&oBvhBYK4EA|%?4 z=`oI@MDE7*IL9%-2|dBF3|P?>j^lt;UFA4Il3h*Lq`+$dQ!&*K1Ey*zAB6PT6eyCW z1`5s6Krv;8wgctVgZqCZ)UxEurv0w%+C69RrBY5x3HHOu!jud?Qw4{V=|lbS8`9W`5pUuzPUbyma#ze1PXp81!(98JhI~zcbyZz z9V{%!!2*xxh`{i;(~l7>vdtxHGVf=>`)R2%nfFBS^0~%jo-cU&xeJYvYf#vxU8fA=jfcgWH%Tk*`r7loe6t zS@vfB+6-O}`70uiC*>JH-wz(;`_kGH?oIpZ)t+gQ#w)$PaTP*mBI;yY4}tuHTTMsbUar(yrs!L8-E z6cv%NJ1b}Nyv#$7&!GhjnASk(A=_|io&C6Ss)@Gsj7K|$YdN0fv@L^JPRF|mt&Ur= z<$89vTaXbq3P28rgKC_juAHWu@bD9cvD^3h)PnX2f`XnwtduE?l%`I5DCqaOF?2WV2{{WUjX%GMa delta 2002 zcma)7&2QUe7?1O5$8ol9Nom%m>6W&`>zagg8ynEBOd#4#AW+dJ88PF;Zfa^L(QB8n z38{x|2TpvvaOT8;BSc7m;IQj1ocNH{!z2U;PDuL)c%H{u0-cmKk{|!x*T3iUeV#XW zt-nm=lA@#|@SASDuCITh6n5{x!i;DUQ!t~Bpb7AfIZ-#J#RS}oJ8?ImB?u>+q?^)G zgp+_pO(dKGENK#95pY^d6P6s=&1e}R@=C-^oARBADO=L*n5MuwV=Ay#EI9)kSu?s3 zQL|tF_;oc%(`^Pgsm@@d>yL{Z^?&R$=t0@Nm~_ifLIFm=PRTW!~B z`?_y5z}BDwVF)>3Kj^(TCke?gM}Bv{{t_L0Klbxz8dGOvIcDn~tD%pa$C|namaTTv z(jBYa^0z#SM9cZC#UIQ+H7!~Yg=_ea`F$2FNWI$0YBVhKf}AGl*z#DUZY98C<~Kizrr6gcU=C9)f7M zrped?MuV(F#+M++Y|5C z=`*ARimo>u!}FRJIMRn!`U4l963*o{csxu+w7Bz|RKO{n)iTi6sv z&%ql|Tev)mi&lAXB@7*gp74_C^R84k4#c}xeepoN-O5*^<9joV?}>iGM zLTZxH^;QSW=r~C4d0Z~z_wbH>RB{>-8c^iE#y4vptx-=66;feOJ%<*kv2Zs2(^J?d z`%8Nc_O9{eXI|c=5OB%X|Hpe20QtsX*PtREHoq8;M>7(`J@pVNxd z)As1@3F0tqTMUN*n%D8SD?T&Yo`WM8N3{ePi!d6f6hBsvT T9dpOARxShA8wU~R-Fp6CD(`6_ diff --git a/nlp_resource_data/nltk/translate/__pycache__/ibm4.cpython-37.pyc b/nlp_resource_data/nltk/translate/__pycache__/ibm4.cpython-37.pyc index f36c4dc1a7f4bc40bab20007b0a4b1aa4462961a..063087f35283828e218ab43c5d96e2eb2382b397 100644 GIT binary patch delta 2642 zcma)8OKclO81}3m@jK3=Qf*z3~Tn~XBm7-h^>oFrP#kn2kNYZ zG>RgIB95XFgx$eD$#gW(1Wq)ug99H0a@n#;v+T~mjK2kDzo?RAKk4fMRV1_%aj4T? z5O#1*&l}51{=@^eF!)-^yS@4tlJ>*W16&?pzo*Wm_k{%bW|loWm}OdODb_P4z0WJn;GwzC@Xhp$yI=;Yl=f3dhc1Qf;9Q_wen&?vxi4%b&g76D$cAu%< z7!rg4cTx|#Invr%^A|U?i@-qx>_74BL;gwukL%fbr<*)$+jM)L%vv1BVWGS86 zi?h$+1Y~88)B8CRjC)QDQ>+`>fT9nC9egWKi&}Bjw0)@AUeltq9hV>Rwa>||E?AQM zM*eNtum$CH?va-5OJmN_HV}kV*E6pYl4SP|jgSEQ=g=Z)XHSnF2^>Jk6Ra{i*32^< z!z!hLPcyVlc5d_l`+4*->10b|H=MY*!RY{KvYN=ACM3aL8qY;)w}8s{bbL0=lm9Rh zM%UE5A}g|$U)EJB;+c4`KgiQz}-CjfAsotn50CE0hlw}-P$;?|mbo!rX8 zoLaqf_-itp#AWcVhYo^(eC0J8Us0*0>6-P9Tw0Menql*kZ7b+hF<|>Q@_Je2KuK61 zm*T~;uFJI@^a#LgqKRG}ABqOpA#K@^jgq43ZU)+|eB-KGRB7IVb|PCUHQr>mCx4qj zv*4(xNei;67A>_{P@SM=V1=GQ;X{FaM)c8ncJFAH*oQOx!cU@BbG%)5A3Wn-)-v_k z^hv~wp?C@f))_EO!NVpNUEohs6dogFe@}Ij5312)84_?fSZ;c#%gGDe6)&L06kS(P z*Dac0m#5ouL^W@kJkU~8ZEd||YUr6AtiXdB z70t9t5UDNSeaM<;R+HR!ez!vE{D$U=K|0B<&!p1lVZg7;9aSGL%H79nAqOCx{W){x z6>QZ`55xMPn9CG1j$$@Jq%4SG!BDNWl0tEp9h03)6;wzM+0DVNZs(*TcE$~M_jm{7 z>>tOUA?a#%_GOZE6V}zZJ^g^c>-3E;=a!OQ7`n`E#Cdny!u99oNA`gOrp;mY)!Z<= z3u5ec?)2_P{NNY?#Xxb%~d~gN`OMt`I{&W zI_06{Rv*LR42oG4b08o**f_SMR4XFB>pS49dm}}Kc(v_RFA4B6I?3kd+mE`Gix2b0 zo&I|iXL*rgsqwA`?d*AG6LVl(m8 zGU~B?M&4TEeF&PMZoM&P(R|U=^HBVL+=u*59R}I=3q3D}aLiv{{0{+-Ma@L_j_{`k gT4h;RPprTO^B@SYQ3#SKi3;%B{(2J?PsMZge{AIMG5`Po delta 2616 zcma)8O>7%g5Z<@`$=X@}G)ZX8$sNo{#{-kWdc z&EK2#!%gzlCW+0*Vo?D;ohui#ySHMg@`tc61XVDIA=*SG@C?|Z6Ho&L_kwoN38^8@ zhwQM^q&9Ir47{XDoNod?qDD9`0UuSPoR8Sej;zX@j{+Z4V?;PB7|n+Ku3*S!VK5r>VqDZ@s>5SIldv5`s&LiBdanMy19HBs44ewQRsw$)WQ+yoBOW+XC_7?L5Qp()fso^}GM zO4GLPtQh*tV|M=d>p5{(`V^8L1JPp?cd@^63(tt0%e|LN?{9OvW^?s`(|shsl`uVo zT3A@xiiB(W=|H(3COq2fD;_UL|5>@)of1cQ68x-GWoymyEZx@DX=$aN9v*axV9;Rb z&9m&|-YiRWpU5|QdK?|7Wpt0q*p5mI+bYD^J!L-Ma4h{F)%K@P>p9@GnhN_y$@Cmx zzjN0Sh0q;$cYS!A#noD0)bwQ!=oC%Vp7kt zE$^mQx}sS$&(4h=S;QPF;cDoHZf}@AP+T8W<7*qXt<^N>Fj^p%5}^Dtp##;3wc=<_ z$uRAm=3`%srbvlB7=1K^E`V9trA5s(*F1BrXi{FUJVEJ9efS?A={EKgl%r_TY_kT&#Ik?07! zKbgy4Kp95sH+Ms2{On=k=roXPYQ;QEVJ2l&5=E(J{T%lEJmA zG0hL4XhJ7w^>*7CYJT_Q0QBJJVk)HJpcF62ZBj<+m)fKzJ|2r8fDilU^bAodQ&X>! zY`t^q9QXDZ03Yxh&%T^q&Wf3lf9GbMId&~8l)VW%Ae&Qwp3T?pOKnn1b*Oq%dj>`!or;$tndGcMWB9X;-B|DoY%luklyU~}Byo`k3vPI9(E6rMZ(|81wGP+DY4JCrHYPFwz3XbY{BwiIf^Pbm-ZF`apBAH112xo@Ue z78HTVMi+47PmG4RGA0JpA!=MGF~+!Hfr$%!Mgxf^CjRVnp$k3dzUjPDNK2f>nRD+w zzu&p%-1l06d{iKT{y@NMfnVNrO&q@#sF{Bk7AcEhNs*LQA!(~%CCJ+pn_z=~yJAl} z1P9-9D9*G?aB<$LxYHiN!+96*Uct+GH}F2e$9WI%e!0} zZSki9S1hT3I6ohpJtjkjsu&$6ou&zwm4qKw3_wW7@@<)m8PRX;f zCaamkCzUtKiH`}5J;cr4Y^-z93P?+@VQQ?#MMJ>TO7?x@%g&y7UZZh#DbnRw1-l;< z?nK&2cs;ODLTgbDc^U?yyStS{dNP&hwX%w)(-GUk=_8VK0~`Tt7TUm0MTeW8ND@@e z$60MtoV^|kuz^ilq<(n^fGIloVbh6~LMci@C^IXBVn~ADsFPNqdmoZ&BwW)#4$lpA z;O{AO#bM&o{}zJHjn-C z6w9>;Qxum9Uu?fce2pjtPypoYLg(7*67YX}yA@f>CDnAcm$93j9nez`I-8-t{9WrH zwC1js4Ld(MPF?rR$cmCwQqpZ%^&Bs2)TujW6G~p_~z@plYMYk0C)f zf~VyB5s!-;F3K_|(e$@aNe!=sg|MIcu6G_lb8SetVLx!-sT=-ulH-1KqD8G{GDXwu zX8$_!bK!3Pm!u5gjrw#)Qp)Bu+QQ!2+a6twCb@B(4Z2fJCiB@jBfCnJ4|3yll>M_e zR*(MZF%&GzQfXFJ^O~5%B2c;+kDA%gz^)i}-l9Nya9O&Ix_>s2Ok|RCBBuE(yEQPk zxzw+;17$g?JSN_$m~hlnyj<+?zDS&hf!K1hfWzZFhVi@|$Y+JO_njd=(@BWc4RtsN z@Vt^88QRvwABt_b=7#u`#C!xNPJ&sDHsjf9_TJE{8U)42rj!!h3VY4$$DtQa4&o`k zM8?p?DM7K%DK;u^B)T6tjEk%30Z~jU2~A5%a7#tg@JRocJc-bNcO=3-Jvg|0ZlnJrH$!LTSc-B+EKe_=-&iTX`DO7kAk50dL$Bu9w86gk%cIG!Q5iUdx4Rz`+gUA>OV*@T=jpv5+9K62CQA=pGxi8I#IS zkD?s!qH*M~Vd(@C-c+WuhjE@Q4$O0amKyngEX(G^57Ry#k zaSY(evq(fFr;#L(EV&|EVDAvyek8k-w+t?+@{rs@`eEEf(E2cQCopci-Rk$z7`uOD zjC8Q}qwj?Hd?UJ3)D$&`!?2e9cC>Ekvx}~-u)6W<4T!Jq&Z-)G*r=JM!67(T+1KNr z_i?*c1E@6Y;=>GoGy_EN7~@QqG$Yw1DPjR$V&5D))K;{@%fhWJ{y;62S9gh`B4tGJ zRW?4+KF@LCW#W}o(XHvBq{Zjv1%w8_e-IkC&Fyr1`F^yDo`qfAD~eP3T%JlWW$E*JX D91?Hx delta 3290 zcma)8U2GIp6rQ_(-T$`R0&S&GXlpzD+qO_>ffiaRltM`(l%EkDc4von%IwbO&bC@Z zs8P`b5>2_1@L-~ez8GU98KMT`iw`{UpgtI##Kc5jG?HlaK}}3N=gxMvElO$9Gw0rW ze(w3s&)ttLa?>KQkytD&z~7eiq};a@t6h8>78yYjG9=^CiA3P*)je6SbD$EJ3HE!_jl_}UjFDB7v#Es{BcGxdlU?k!<^g{-NZhjS zHTRH)Ex?L|)}apabSn@$xKB@I)0xze!0KDhw|SONACRQm;7A~#w;jIE53}yJdUj20 zX?`RXR?&L9tpOCd>;E2lYuC2+_#gTNNwWC^2D`+WQ&Ql{@!k8o1#)D&e$GPSeDzq8>y4m28 z9ggJvj^_0wldk0AIyQTkp*=2!X$XA=kTd|XgU_aDPRmW3whuYmYZfT2!Q(Y}_?_B$ z42~p!HuapGwfSe>_S9zPmlMk)!qTFffK*z4biPVRBfFIBB7XLMa)Q*bzmmJjM%L5a z6^NqT2BvnuN$QxV=c8&j4hQI3IPbIW^?c$xR?1ZuY})=joy)1BkKaj(nQE?}<`h-V zr!-1;quG8S#pJC(7?l<4i(Ma*NFz!CFpvOtwZCqon})x``*h7LC`L9vw8ZZ8_mVPp zzrPK7ux6khF1l-=bNc}3*?vr z=@{&YK?hJYB?V{YygEItW@wVN3?8pr4TK$<(R5wWwQTwg>-E9gi0Hxnw_8dfG@-!& zjl;)|Ur=d5)3w5aY+R5v+QGIS*ffSI6$7?^KBdp8j-V&sEyZ(lx-PpKbQg%xL6r6K z`k;8dl~aRVK2S^UvYQ9KT}1a_GHOyqUe171IvuDqj>L;3hJrpif&3&AbRdOXum9%@ zq*g{#P-*sS$e4*^O(5*o;p_cJful(z+;9Xq@U#~`HYspFHZeudgJIkmsV6^J-;CTP z4Xg3IbW?6@3Nu=bYlC_TY1`eWrf}J1ho8phf zPFy!2;j8=;c?1zFKeDX5wSEt`T65F{r$H$=Jv_FCD`W3QY$gr}HB zXY}H%ptVS_K`Ay6J%R-9$<^$b3=UGJsi;t|a>?>?kMpo_m|tf1j%gaZM>*$F__xUJ!aGH9BT<65_BgtJo_QJ#cjyrzWwO5o| znpnr)u}6%fD7UWPqTJHN05gtmd3Y?x-B=bMilHM1pu_yD!tAyq;ibU!6kMuEE+Cl( zvid1@62o(cTBD$4;@lHRoOnpo6Su}D4||>A6s1#e;55!D=qO3u#W9&f9X z+l13wd+><2EpIJ88F(j2c<>aGb4c7FK%W+bokFqm@#GmKGLrL1Qb^X6VK3}WvXPVd zJsb*LQhk-&szOK5_G9GOy+NPP6OGVz_RGmh(!&O)-b?V2M(luW>P7+QUmg2%YV+Fn z6ztgAU$zQNLGdKNh@JWV{421u;9r>n(Bc}|3va<|9Nv)hEVy%Kc?lc$=I~=2l;?DHC=WU& R?!)Ct6lxm&*}G?}{sm$2XI=mR diff --git a/nlp_resource_data/nltk/translate/__pycache__/ibm_model.cpython-37.pyc b/nlp_resource_data/nltk/translate/__pycache__/ibm_model.cpython-37.pyc index 86f878494cc0420b5e6cbc954de93ea82f9a1cff..313bb1791f7dff3467c1a39e0ddd51640e8d1d19 100644 GIT binary patch delta 3051 zcma)8OKcle6!rK!{!h}nY2A=Eb>d`_wrSJ+NE!lZLlYqGhfpe~?cjJO&Y-c!ycq|A zK#8DYi=ww+L6=Bu*i?oMn=X(LEn-#B;LK_j3;gz;VMV(bMJZQ z-nsAHezV2>vc<(`DM_H0PdKlvYz2yAZgiwimF(v^#|u5C0-&IW{80%+RVG_VH7*I>{=^HHsTPg2{C8NYjlO3^D>kt6@Zp6257 z>8IP#?_z~sM;@cx2WoZ;EZ71W(WpbTmnAbVu|M{EFe3(tpMD>o>2P>=JQHl6wA1dy z!%jF(^x8*1_CdOm-*Pf1~Q9gaS$0l~p>4Q=8 zgCB(HA^Lv0Cy3HT`gwXJH~_0L`Y?Tf8>ElYnSNllI3u4ZXlkiaRw|}cCHhKhR6X@` zhY$Grvf$p&K;$uDmdj18tm(59UFll=XFm48zJ9fTIjV!Tx+_VF4wafRP<68|B+|qL$Q3L8E^i zI~*K@&EqtcdAU_&wqDJ|xy3LT>`tW^v;z(n^;*R=$g|AE3LB&-y{!}3Ad@iX6~k1^ z1ygaSkKXTjwER45r&bkWs+wwUNGw%Sp|T-W*HqGSTAGH``VYyGpKUTzw-0IlUyD<%m7cf$*~ z^Fo1@examDRozfc)?Ic{1W#{pIVgl2bAn^7$kl>YYlOW7WcEiTjsz=SCCa+0*9@ts zR82P>Gbj1_55woVIV>)l60fEeP%EoAJfM zEH4r@D`)Fs{(b+2O20b>WbwP}c+*<+NTvLeKj(#z=wCi!d31_=mq0#7Tw;ItquY@&D#ZQNnoRGlrOnZj zby~42JiLIQ*|uW?dT(;DxTl=<9XVpi{{O@bb0aSRyaWIb8hH`m0)w6I-K4}b_tW56 zdI>KY77|`=hlnvl1GED8a@+4#v#{VDx8K#>q*j_9nNCyR^zEBY&Hl#3dRW~yOBM?O zHG*2U1t$?10AIft8|B-Y&%VAYRce~%P8$7p`u;*6cpxomN7`o8cI$D`;zI)+hs*=G zlv815Zu6;itAKS0U~RV_GGH!?Y$L?tG$L%k`}|&>=R@)z~$2DWj_&-&vUK5Zn4uKLQG;EL@Q+d3TO nipkMt@n_4rT+@{2AYH~m27WHK^Uud8giX(;XHM{KdiehUNum_n delta 2979 zcma)8TWlLe6!rKOJI>lpS|`Lw)27aAo#xdhX_`LLS7@tJ_Z0zkN{t_zWQ*-JW2Yq* zQiAADC4|(@2OkK;AAC|R5=aOkgoIR(KuDDk`&IZr0)a&T5JG}EvyOM;wh^{`GLL)D zy>sW@x%qjGe7Q!XqmtC<;GeF{xzvu^QadT#?w>m7%{AO~B>g8GIbY6y(~e5_2gRyy%cacR%>tt(QF?leQ*K{C8qxNh>HLSJNv?S}9Fm z8X0C24KI_R{(i$O3H!MSg`5cz`^WeF_7I;NzB8v1sdE?39<^-?L5cnDpO3j}IyP;r zML3+rpp{*1ygvjkx)or_nKW2~@beh7a=4LWDd{l#EYQM#k%3JuSV|YFN|DW7G8Ndr z{*%!s6SMnG>E4>NGNCQ-ZKJzs;Vy_KYDAk!a`8p}yjk>mx$paJJ}cQdv?^8WvAgB9;JhIhS=8V`5u$FW~UD$ z>-yU;eTvL19lT}KUGL18LM{EG53{p-8Yx}7Z z)iU+RjOk}PT3!9U;Oplg`$Pl{o0?i&(C8irOtLq+65-mRs0dV#{#(~uuGO6oAYcem zD`?|boe;W!y#O50aOc%>g}O0!V(z6%LA$2VF|6zcZ08_{&HLPs1>wRjjB2M(wrEsc zREz2SomhuAj=e$F9Xnm`W3T>dOd=P25ZD-6*=wHHo7I+!Ja|)F#PFACHg#R2xiZ~{ zxlbuq)Iz$V)J4yTv+mwF^Yq>$Gwi$Gj+r_G?OeHPfF1#ntF)xjqkJTI=`oCK;bd~3 zeiDJ?N>Ru=OQUonr{OB7McHMB1)Rx1X)N_P|5#7|N4{y0xoHb~(7!D#qG&UxO9MI5 zjfM5Psh8kebei3f7w1KFZ^677&;bYoh5#`@4?rZ+ag0s^P6M6+z!Te9g&QJ<oNR zNJ4BazDCA$GI-57hx<4Db1NlfmQszpIUnpMN2RX5pR6P?kTpLQZJ zTxP?Q`7WHX8fwF%RhATLmt&ty&TV+MUx219#B5;i~O{^~I^YHuAq0&69BuzoinospP08fNG|~J z&yP({Fo^JG*t;`rs~NtX;H?%ZZ+J6mS;Mn~*G~8LOv?6+~;pSaz zNxV`tC^WUqRcMN)D0(l5A7|I_9VlqI<-BqTFKq9F diff --git a/nlp_resource_data/nltk/translate/__pycache__/meteor_score.cpython-37.pyc b/nlp_resource_data/nltk/translate/__pycache__/meteor_score.cpython-37.pyc deleted file mode 100644 index 9afe132d029e7ad111cc36dcfc97a5f313d3b7e2..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 15372 zcmeHO+ix3JdY=nPQPjn<#Q471sk>cUyE0`vn>wpzoj6K>P2srSjZrVS4TK}kP#kMG zq|Xd3(V&U~PGO}$3bg1;fnp(VAGZAmilTo&pSJ(NJQe6ufTAx&U-qT_ecu_L8B&xl zX|hST9(#r}=YGEPU4Gwpj^3G@s~EUm-MqcA^p0Wt8+{C~5?7qiMAK1XO-uk za!eHA%X_nRLY%g$$*pRsHvbGJJ(_T-1L*KT_f2~#^A>4sjs zY$e4;OZZ_j)s-E$*N7W7)xlN7UBJDLJGzWVY{cfk8W@kv1M3N%kIX$gO~boRD-7&G zp=qIeQ~a%I7!?QC9kK02jvqQvEd6i`Z{1!TIn9oATK#S(Zh4U(IU;l&=`}s+g$*zL zihDAQxD7AtVelgMT!%*Q1b&n@;fY4e=`FRfyecOv$9oM<&dQH&_V(PLMbat>ZxhAtJ zYiR|QDLo>|nlna@i0P3Qp-HWA0+HqFyH4Ee241vsIEgDP+ev|_nM~_OlVa2j{CK&T zRI&w1ru96NN;baaSXN|%+nLwp`1|h-IS&T<)sI$Nowm1nPxMrLJgfi!Co&4TE@lHI^cb0xbP1NauCrM%3s?uh#7+$2PXSkhx-m z#vUCbZYB9P=BsAalnZ!kSoDpH*qPXyS^5-SVq;(o%tzKAS$h_knb_NYq*(W}A?vn9 z+yHwz-mYlG%z4bn*_21wmI6B4uL){v_)Tl=oF>BT#q z7v|VJDsu4XySQm)erhnYF*rL4(QB2~fH#7df}(zX>9;?}|vC?uuPJ>4z*%3KQk2!h}C#whXS~A0Yq3WGSLPShv%+H&05x<)CNSjXK zX4d;TUbc)qYxgf%f3hxhP5iU|)IM#zYkai3XYWeRgJyk1Ia%IM_6lHHlyh+p%t1lD zZCmn~5zE^HZI)6v@&Af+rQypGp!F(@)HMW3nob zqqpzjjz~0(`GR>4|4x|&^Ne-QteBjy@edghl3Fqt5lQT03os6H_;Gmv871_?DK@8= zoKk>6fn33$*n7oKj8Dwi+(${buw9ThV=K1zjX%eCXl)sv08QSa;_KXjQEc|YM(o2viXiH6Q_9K!ALb0E*5nzV(>$M!DW5|> zaEEJ0D*=^%wA7E+ACv8T^+>_-HHR=xjlOE4>(^40{=U=MyypROXqY$x;x8`e!QwNU zl2EICrxA!K0`d^Cq$C98HhnPx@P6+XLCDeW^% z@T4RT{Y^zlk5|(7P#r zi{2T*5O|S9i6-uJggjwA0HPg@oM}e*X8=!`Xtg~>)VZa|Lydtv<}MyeSF=E}RcCXS zb(fUi3`Bd=71tl2(tqMM9-e<1O;q8}H6Z!hYFx@ts3AcV$3`T7hsJujIlUlO~kLD>Qx`x^BacCVwl(_Ld}L ztPLVrXHxwi;*QALGpaeU2TbFd;C1uBc*3sA5YbGoNw$w`zDjtA>Kvd21P9D*Ll#uZ zu!Y3jP)?hb6hhJVl0wrD-4tcii-44Ep)9@@YDO7_p(!n4O0x%By&55NFdx zhE-$9Kg5T|l0cGs#vbAsWULgLq!EqHEsw7wSgEtv8D7JfxC@88Xw+rK7Nw6xYP0xU}*_u zP6JQCSRe$q>8U*=NI^q(qKLP=BB#;nL|#}+cf)eAI@bMYgtx`;1PT(^Bxv%06bGxk zFCf18zcs~QC8~Z#)Z(j_?}&C;)yCwDK?Yvup_}l0_~GerA=~L2)g*+zQ-z|j0F*!b z`HUh6Rx3aMkdI$4lk8B`W6W}~7yTco$X@g?%mK-SWN%QNC?DuhPCcRp_O{o@9t#C! z*-c;@mXs6`N31FP45a6aj{MraBLnBl5wq*2>()8lfKj&rC~SK z&E;D-A2LV(kQLn%<5FoX{m;unAb9 z00UW!MynTYM^4lOsNg8Qf)zJD{e2lJ22LFKHmcPxFM!TZAC z*kGY*zziL$0&)+ErwsZ&X&~=-Y{vzvj}{gOi-UQjE!{Q3YQcz$+ZFjU)1ZGYOxI70 z!PH=BP+Bso#$FZeRMF1yDnbl64sZwl0aJHf1dSf?QRIB^-ktaU;KvRh$l>!`1a`sH zncipEoq>12+-?ew&ws$DdDfg8p>v}bcd$Nw!$Be#kK6j-j`Q9F@T}s_9Y1b4KlHa+ zH@yd55P@0LU^-HI7z}M0sv^C%2%X>Wxy;$oc?bT&jR3hf#RDIZ9y_5oHd~@EoEuw0 z?ucN8+X3*uP3N^d7c-#Nog3>nl~2%$dwwitr684?Ox4ud#ise0%~wokzlp! zHdncaOKG#0u3o-;?UKr^A-AFoJKKA2iA8xA$E+;_+$l>X*d*Ovq zPlD8}5lDfweH_gavJWBH5q%7G)5DW;u;+xiV;2;or*9JI{)Mbxy$nz7mODDS*Bw6y zXnC*-IKBsZIk%}B-8uSWg3?4Kgvq_Acwz%8MIYX}m3^6N0BAkaeI8IkR~h$bdpx!>7zqBXgAzHMgbleHK!OB%y_xx+y&vl zF_CC`LElM>V>uprvXk@194UNu<%Dj9^io7C1s3Ddo|HJ#h-NWFHMm~XgWl`2>*8Y- z1zb-kZU3-qt*zSF2t!07Z_uFVa2%sk5x5IiZ>Ip3pvVNR%A<0L263lAfkTP z>Q&hkwd1XGQIZ^ti|{>DMXh|RtUC(DW?#BBri^MV%RO|cGCMmsel${V{_9=u7djlN zM)?vW9>R!Xa(6hiR&?8u^hGKGcBm^KL zP-;=FbJ}P#=!lV+Jx(z`wXdipx*m7ng(86dfYfA?MKa2l<_H21BtH3s5%`iHlh~fdoTk-bmw3X~ND)kt;IzNRHQ`qTWrF(0inNmS0f^78zets~G}M zio8@wfryh7c^Wu{4)~cngNGyPl%O?1eKDu1HgG^|D{j>nbat8|uTf7cc+}@ol%;S~ zeOj|my#z)A@6?OUwkN{+3`4A8F(Ug09*z1G(>(5vBeSImR^&Y4GY8!ouUqB%b&YLB|YHy>A24w`uDAsA?=IXKW*m!I{wjSG$3y+I9U=Kt;Jm>YuWF-G* zpeSp%r-7eL=P4+nLx-AR;9#Y3T`y`#f0IG2^Er?Zk7Wc6IoQQ|Q6$8zjG-Zzyk8Fz z8ipbu*^>xG_I#5(C1tB&_jLf3Y@#ymJOP{-r#}pUx*|w2flnCpb^QjME8QwZvK-Ow zkoyt@+z#dCR*L>S0iXUmKdsl!q5cr^o$0F8(n11ItX zq0l>BJ`0zEG?5`BK+6ZvXbNm@b~@XrfdgSM3P^N)2xvhE*YCOa`ip zc{Df}uc*4!gDr!f$nZFajInh1%ZnjE-t?$0kSMr=);NX9LzH}5q2xnjcr8a>)apVz z!9fv0iqnZbs5K$m8j52X7!gytq9qXC(Qus6@d)1gsiwe{m5dj!=H6fX=p|s*OErOj zV~82ldI^~UwDOocr#e#~;U#4LKMR>PAZ8OrlA*8PT*#~gzI4QcU@jeu89CWYakhPX zzp!82H}=ha%QZfoN>8zv*g)5NO7aJ&DX-JxEj$3i-=?4R+${a^52DqhWmNDwYhm=L z%P$C%^2w9whXhh|$RocC2Ird32h%=STkI~E>lLl|Bh~&Wcpo1E_4&+!@tL~W{21=h z;5c~xdzMw5mhYgMTg$K17qT=tq~><^zDu8Ua)FN<@ZK7S?b(T8dhgLPD>G8#)Gi^ lf<3=fUM!!TzcOE){r2qA!o`IX~eKq%h_% z=x5~Trs`)UR%YZEBo<}s>rKvM3H0O!$`ymn5$9lJW8!0yVl3hWifJ6e4U}I{PEp6g7xaSgK->_4V5pb~MRZ~DP&m)5 zd9pp5Msw0pk-y68yedDxFM`l4aDBYr*08Vdu&41D{n|#b9zk^ZntgBy)3=j{sVRgI z;W{|j$Wor2!r?o;Opa0qgWUj@GE( z=p3`v-Ch-WIrm^@QoEX__s0GHe?I|J%zV;|G(9&8=MVm)sPnT-RI4oOSeC8ZrUjX0 zV9o+&8V}a6Ts3&Zx(ip8Qu4P0<`g_ARdce}Hn{@Vyod~#PxV9L-y*%5nVWX>pr}Nv Va@b=gegOfhdf@;7 delta 523 zcmYLF%}X0W6rY*>$nIv>tTwSLNL7@!izTrjc&J$LR7zWmUSuyzbjFxwKf+4_?b*-cjSu@v+y7HzTe3-&?2My2PkB@|qZ=nGJreF?Tp{cPJ zN|tuB(1h1YT9Ng}@~3ivr!8O+l|N_$7aj2}8c<~cD-|3_$Mk=2ib@>w{4kCokf)9L zeYJCBXUVYF91Xh(Xc$9PIW>J*0-hT_buFyTH0Ps4Y^k3dsFiePVPluMnzP~Xe@`lC ASqY z2h#GJ6@OSyHVrs`)UR%YZEBo<}s>uqjgpUlWuwONeQj)l>5b13g5 xMj-WqPnMC^Ob4X$3=< zZiu1s#=y*4Wnf|ei64Q*$kGMoGI6h?N@xVH0!%^)%BU2f0>h{nFa@J9hRP7sp$g;3N+7++nXGa0aZXdgedC61&^i4oD}^zP z^zYGf9P;Jlr{c5m-V^m)&5xtZjbyztw^}AzL9iAHO3HicAJT1myw*}rruFK6!0+$0l?+AyVl>qT6kH}*L-Fclo~WoG+oR4tjGZ7{9( zc-9kdP?L?UwJGu?Tv)<^%cqbG&ZFf@@@rwmqIqZ8ab=aJ+@E}l91{fB2(s}qA0fU< za2vrYVlo@-a6fKwCl&c#FgPRGHRq4+q0ba}o@$hy>y&Fd{ zcWj#rSdeOT=ur@OVgFVOC|X*_i(W6qk?UozMV60e?`4f9(am!e;TfkRF0;jpG3q28JW*2I9gL;q) zi3d-l|AjZQUOgF)dNzI5M1BHzv1^D1m`uL@s`~4ynyQ|k>tEN?i|Mqj$Y*3@r5PTj zD+g_9FvV9GVKgKW^(jHFhN`d1RSUI9_w~r|4e2j2lUawvH(8mbm@S#b2AFfG_$gLl z88#?AHp{Y{^bD{nyT%IA%c{xLT} z%Kh)TQ);N_cKEl##K1+xpTZQW^~a04S#_jB=+J7v3;@-_XzlI>Z3Y$o$Q{X33{aEc zO$M@1|YO{@VW~lD8j5T zo@_#1E<%@zC>Eq8$c8W=hjXWCmvbHHzUkulXW(mO_9RxcRQUhhGd-h5BRBe*xjl3y4ztg z_9QtZx2%kTapYhkVA}w@0+<*I_Y(XMMahJ<5Or8D3?8PW6=O#=>JW=&ReRpPW4ZRK IRko-90+Q^mqW}N^ diff --git a/nlp_resource_data/nltk/translate/__pycache__/stack_decoder.cpython-37.pyc b/nlp_resource_data/nltk/translate/__pycache__/stack_decoder.cpython-37.pyc index ae9377fb27c627d10259c8779338dcf2ee246a8f..fddf2599a01890fe2d93fed27c9b50259e1da30c 100644 GIT binary patch delta 34 ocmX@n#<-`Ak=u!vmx}=iO4%oF+5aK5V&Fl03i_# Axc~qF diff --git a/nlp_resource_data/nltk/translate/api.py b/nlp_resource_data/nltk/translate/api.py index 9efcbde..b889410 100644 --- a/nlp_resource_data/nltk/translate/api.py +++ b/nlp_resource_data/nltk/translate/api.py @@ -1,6 +1,6 @@ # Natural Language Toolkit: API for alignment and translation objects # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Will Zhang # Guan Gui # Steven Bird @@ -8,10 +8,14 @@ # URL: # For license information, see LICENSE.TXT +from __future__ import print_function, unicode_literals import subprocess from collections import namedtuple +from nltk.compat import python_2_unicode_compatible + +@python_2_unicode_compatible class AlignedSent(object): """ Return an aligned sentence object, which encapsulates two sentences @@ -85,8 +89,8 @@ class AlignedSent(object): """ Dot representation of the aligned sentence """ - s = "graph align {\n" - s += "node[shape=plaintext]\n" + s = 'graph align {\n' + s += 'node[shape=plaintext]\n' # Declare node for w in self._words: @@ -114,10 +118,10 @@ class AlignedSent(object): ) # Put it in the same rank - s += "{rank = same; %s}\n" % (" ".join('"%s_source"' % w for w in self._words)) - s += "{rank = same; %s}\n" % (" ".join('"%s_target"' % w for w in self._mots)) + s += '{rank = same; %s}\n' % (' '.join('"%s_source"' % w for w in self._words)) + s += '{rank = same; %s}\n' % (' '.join('"%s_target"' % w for w in self._mots)) - s += "}" + s += '}' return s @@ -125,20 +129,20 @@ class AlignedSent(object): """ Ipython magic : show SVG representation of this ``AlignedSent``. """ - dot_string = self._to_dot().encode("utf8") - output_format = "svg" + dot_string = self._to_dot().encode('utf8') + output_format = 'svg' try: process = subprocess.Popen( - ["dot", "-T%s" % output_format], + ['dot', '-T%s' % output_format], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) except OSError: - raise Exception("Cannot find the dot binary from Graphviz package") + raise Exception('Cannot find the dot binary from Graphviz package') out, err = process.communicate(dot_string) - return out.decode("utf8") + return out.decode('utf8') def __str__(self): """ @@ -159,6 +163,7 @@ class AlignedSent(object): return AlignedSent(self._mots, self._words, self._alignment.invert()) +@python_2_unicode_compatible class Alignment(frozenset): """ A storage class for representing alignment between two sequences, s1, s2. @@ -288,7 +293,7 @@ def _check_alignment(num_words, num_mots, alignment): raise IndexError("Alignment is outside boundary of mots") -PhraseTableEntry = namedtuple("PhraseTableEntry", ["trg_phrase", "log_prob"]) +PhraseTableEntry = namedtuple('PhraseTableEntry', ['trg_phrase', 'log_prob']) class PhraseTable(object): diff --git a/nlp_resource_data/nltk/translate/bleu_score.py b/nlp_resource_data/nltk/translate/bleu_score.py index a6a79a1..4617203 100644 --- a/nlp_resource_data/nltk/translate/bleu_score.py +++ b/nlp_resource_data/nltk/translate/bleu_score.py @@ -1,22 +1,29 @@ # -*- coding: utf-8 -*- # Natural Language Toolkit: BLEU Score # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Authors: Chin Yee Lee, Hengfeng Li, Ruxin Hou, Calvin Tanujaya Lim # Contributors: Björn Mattsson, Dmitrijs Milajevs, Liling Tan # URL: # For license information, see LICENSE.TXT """BLEU score implementation.""" +from __future__ import division import math import sys -from fractions import Fraction +import fractions import warnings from collections import Counter from nltk.util import ngrams +try: + fractions.Fraction(0, 1000, _normalize=False) + from fractions import Fraction +except TypeError: + from nltk.compat import Fraction + def sentence_bleu( references, @@ -467,23 +474,23 @@ class SmoothingFunction: ... 'Party', 'commands'] >>> chencherry = SmoothingFunction() - >>> print(sentence_bleu([reference1], hypothesis1)) # doctest: +ELLIPSIS + >>> print (sentence_bleu([reference1], hypothesis1)) # doctest: +ELLIPSIS 0.4118... - >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method0)) # doctest: +ELLIPSIS + >>> print (sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method0)) # doctest: +ELLIPSIS 0.4118... - >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method1)) # doctest: +ELLIPSIS + >>> print (sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method1)) # doctest: +ELLIPSIS 0.4118... - >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method2)) # doctest: +ELLIPSIS + >>> print (sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method2)) # doctest: +ELLIPSIS 0.4489... - >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method3)) # doctest: +ELLIPSIS + >>> print (sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method3)) # doctest: +ELLIPSIS 0.4118... - >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method4)) # doctest: +ELLIPSIS + >>> print (sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method4)) # doctest: +ELLIPSIS 0.4118... - >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method5)) # doctest: +ELLIPSIS + >>> print (sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method5)) # doctest: +ELLIPSIS 0.4905... - >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method6)) # doctest: +ELLIPSIS + >>> print (sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method6)) # doctest: +ELLIPSIS 0.4135... - >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method7)) # doctest: +ELLIPSIS + >>> print (sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method7)) # doctest: +ELLIPSIS 0.4905... :param epsilon: the epsilon value use in method 1 @@ -567,7 +574,7 @@ class SmoothingFunction: incvnt += 1 return p_n - def method4(self, p_n, references, hypothesis, hyp_len=None, *args, **kwargs): + def method4(self, p_n, references, hypothesis, hyp_len, *args, **kwargs): """ Smoothing method 4: Shorter translations may have inflated precision values due to having @@ -575,23 +582,21 @@ class SmoothingFunction: smaller smoothed counts. Instead of scaling to 1/(2^k), Chen and Cherry suggests dividing by 1/ln(len(T)), where T is the length of the translation. """ - hyp_len = hyp_len if hyp_len else len(hypothesis) for i, p_i in enumerate(p_n): if p_i.numerator == 0 and hyp_len != 0: incvnt = i + 1 * self.k / math.log( hyp_len ) # Note that this K is different from the K from NIST. - p_n[i] = incvnt / p_i.denominator + p_n[i] = 1 / incvnt return p_n - def method5(self, p_n, references, hypothesis, hyp_len=None, *args, **kwargs): + def method5(self, p_n, references, hypothesis, hyp_len, *args, **kwargs): """ Smoothing method 5: The matched counts for similar values of n should be similar. To a calculate the n-gram matched count, it averages the n−1, n and n+1 gram matched counts. """ - hyp_len = hyp_len if hyp_len else len(hypothesis) m = {} # Requires an precision value for an addition ngram order. p_n_plus1 = p_n + [modified_precision(references, hypothesis, 5)] @@ -601,7 +606,7 @@ class SmoothingFunction: m[i] = p_n[i] return p_n - def method6(self, p_n, references, hypothesis, hyp_len=None, *args, **kwargs): + def method6(self, p_n, references, hypothesis, hyp_len, *args, **kwargs): """ Smoothing method 6: Interpolates the maximum likelihood estimate of the precision *p_n* with @@ -610,7 +615,6 @@ class SmoothingFunction: Gao and He (2013) Training MRF-Based Phrase Translation Models using Gradient Ascent. In NAACL. """ - hyp_len = hyp_len if hyp_len else len(hypothesis) # This smoothing only works when p_1 and p_2 is non-zero. # Raise an error with an appropriate message when the input is too short # to use this smoothing technique. @@ -628,12 +632,13 @@ class SmoothingFunction: p_n[i] = (m + self.alpha * pi0) / (l + self.alpha) return p_n - def method7(self, p_n, references, hypothesis, hyp_len=None, *args, **kwargs): + def method7(self, p_n, references, hypothesis, hyp_len, *args, **kwargs): """ - Smoothing method 7: - Interpolates methods 5 and 6. + Smoothing method 6: + Interpolates the maximum likelihood estimate of the precision *p_n* with + a prior estimate *pi0*. The prior is estimated by assuming that the ratio + between pn and pn−1 will be the same as that between pn−1 and pn−2. """ - hyp_len = hyp_len if hyp_len else len(hypothesis) p_n = self.method4(p_n, references, hypothesis, hyp_len) p_n = self.method5(p_n, references, hypothesis, hyp_len) return p_n diff --git a/nlp_resource_data/nltk/translate/chrf_score.py b/nlp_resource_data/nltk/translate/chrf_score.py index ef5fb90..f77a026 100644 --- a/nlp_resource_data/nltk/translate/chrf_score.py +++ b/nlp_resource_data/nltk/translate/chrf_score.py @@ -1,13 +1,14 @@ # -*- coding: utf-8 -*- # Natural Language Toolkit: ChrF score # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Authors: Maja Popovic # Contributors: Liling Tan, Aleš Tamchyna (Memsource) # URL: # For license information, see LICENSE.TXT """ ChrF score implementation """ +from __future__ import division from collections import Counter, defaultdict import re @@ -102,10 +103,10 @@ def sentence_chrf( def _preprocess(sent, ignore_whitespace): if type(sent) != str: # turn list of tokens into a string - sent = " ".join(sent) + sent = ' '.join(sent) if ignore_whitespace: - sent = re.sub(r"\s+", "", sent) + sent = re.sub(r'\s+', '', sent) return sent diff --git a/nlp_resource_data/nltk/translate/gale_church.py b/nlp_resource_data/nltk/translate/gale_church.py index 80aa4c1..582951c 100644 --- a/nlp_resource_data/nltk/translate/gale_church.py +++ b/nlp_resource_data/nltk/translate/gale_church.py @@ -2,7 +2,7 @@ # Natural Language Toolkit: Gale-Church Aligner # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Torsten Marek # Contributor: Cassidy Laidlaw, Liling Tan # URL: @@ -17,6 +17,7 @@ http://aclweb.org/anthology/J93-1004.pdf """ +from __future__ import division import math try: @@ -71,7 +72,7 @@ except ImportError: try: return math.log(1 - norm_cdf(x)) except ValueError: - return float("-inf") + return float('-inf') LOG2 = math.log(2) @@ -146,7 +147,7 @@ def align_log_prob(i, j, source_sents, target_sents, alignment, params): m * params.VARIANCE_CHARACTERS ) except ZeroDivisionError: - return float("-inf") + return float('-inf') return -(LOG2 + norm_logsf(abs(delta)) + math.log(params.PRIORS[alignment])) @@ -178,7 +179,7 @@ def align_blocks(source_sents_lens, target_sents_lens, params=LanguageIndependen for i in range(len(source_sents_lens) + 1): for j in range(len(target_sents_lens) + 1): - min_dist = float("inf") + min_dist = float('inf') min_align = None for a in alignment_types: prev_i = -1 - a[0] @@ -192,7 +193,7 @@ def align_blocks(source_sents_lens, target_sents_lens, params=LanguageIndependen min_dist = p min_align = a - if min_dist == float("inf"): + if min_dist == float('inf'): min_dist = 0 backlinks[(i, j)] = min_align @@ -264,3 +265,11 @@ def parse_token_stream(stream, soft_delimiter, hard_delimiter): for block_it in split_at(stream, hard_delimiter) ] + +# Code for test files in nltk_contrib/align/data/*.tok +# import sys +# from contextlib import nested +# with nested(open(sys.argv[1], "r"), open(sys.argv[2], "r")) as (s, t): +# source = parse_token_stream((l.strip() for l in s), ".EOS", ".EOP") +# target = parse_token_stream((l.strip() for l in t), ".EOS", ".EOP") +# print align_texts(source, target) diff --git a/nlp_resource_data/nltk/translate/gdfa.py b/nlp_resource_data/nltk/translate/gdfa.py index bc0e91b..bdea805 100644 --- a/nlp_resource_data/nltk/translate/gdfa.py +++ b/nlp_resource_data/nltk/translate/gdfa.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Natural Language Toolkit: GDFA word alignment symmetrization # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Authors: Liling Tan # URL: # For license information, see LICENSE.TXT @@ -67,8 +67,8 @@ def grow_diag_final_and(srclen, trglen, e2f, f2e): """ # Converts pharaoh text format into list of tuples. - e2f = [tuple(map(int, a.split("-"))) for a in e2f.split()] - f2e = [tuple(map(int, a.split("-"))) for a in f2e.split()] + e2f = [tuple(map(int, a.split('-'))) for a in e2f.split()] + f2e = [tuple(map(int, a.split('-'))) for a in f2e.split()] neighbors = [(-1, 0), (0, -1), (1, 0), (0, 1), (-1, -1), (-1, 1), (1, -1), (1, 1)] alignment = set(e2f).intersection(set(f2e)) # Find the intersection. @@ -77,8 +77,8 @@ def grow_diag_final_and(srclen, trglen, e2f, f2e): # *aligned* is used to check if neighbors are aligned in grow_diag() aligned = defaultdict(set) for i, j in alignment: - aligned["e"].add(i) - aligned["f"].add(j) + aligned['e'].add(i) + aligned['f'].add(j) def grow_diag(): """ @@ -105,8 +105,8 @@ def grow_diag_final_and(srclen, trglen, e2f, f2e): e_new not in aligned and f_new not in aligned ) and neighbor in union: alignment.add(neighbor) - aligned["e"].add(e_new) - aligned["f"].add(f_new) + aligned['e'].add(e_new) + aligned['f'].add(f_new) prev_len += 1 no_new_points = False # iterate until no new points added @@ -130,8 +130,8 @@ def grow_diag_final_and(srclen, trglen, e2f, f2e): and (e_new, f_new) in union ): alignment.add((e_new, f_new)) - aligned["e"].add(e_new) - aligned["f"].add(f_new) + aligned['e'].add(e_new) + aligned['f'].add(f_new) grow_diag() final_and(e2f) diff --git a/nlp_resource_data/nltk/translate/gleu_score.py b/nlp_resource_data/nltk/translate/gleu_score.py index 9fe7214..43c3e99 100644 --- a/nlp_resource_data/nltk/translate/gleu_score.py +++ b/nlp_resource_data/nltk/translate/gleu_score.py @@ -1,14 +1,14 @@ # -*- coding: utf-8 -*- # Natural Language Toolkit: GLEU Score # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Authors: # Contributors: Mike Schuster, Michael Wayne Goodman, Liling Tan # URL: # For license information, see LICENSE.TXT """ GLEU score implementation. """ - +from __future__ import division from collections import Counter from nltk.util import ngrams, everygrams diff --git a/nlp_resource_data/nltk/translate/ibm1.py b/nlp_resource_data/nltk/translate/ibm1.py index 013f5e4..ff243fd 100644 --- a/nlp_resource_data/nltk/translate/ibm1.py +++ b/nlp_resource_data/nltk/translate/ibm1.py @@ -63,6 +63,7 @@ Translation: Parameter Estimation. Computational Linguistics, 19 (2), 263-311. """ +from __future__ import division from collections import defaultdict from nltk.translate import AlignedSent from nltk.translate import Alignment @@ -132,7 +133,7 @@ class IBMModel1(IBMModel): self.set_uniform_probabilities(sentence_aligned_corpus) else: # Set user-defined probabilities - self.translation_table = probability_tables["translation_table"] + self.translation_table = probability_tables['translation_table'] for n in range(0, iterations): self.train(sentence_aligned_corpus) diff --git a/nlp_resource_data/nltk/translate/ibm2.py b/nlp_resource_data/nltk/translate/ibm2.py index a806d41..e235f59 100644 --- a/nlp_resource_data/nltk/translate/ibm2.py +++ b/nlp_resource_data/nltk/translate/ibm2.py @@ -46,6 +46,8 @@ Translation: Parameter Estimation. Computational Linguistics, 19 (2), 263-311. """ +from __future__ import division + import warnings from collections import defaultdict @@ -129,8 +131,8 @@ class IBMModel2(IBMModel): self.set_uniform_probabilities(sentence_aligned_corpus) else: # Set user-defined probabilities - self.translation_table = probability_tables["translation_table"] - self.alignment_table = probability_tables["alignment_table"] + self.translation_table = probability_tables['translation_table'] + self.alignment_table = probability_tables['alignment_table'] for n in range(0, iterations): self.train(sentence_aligned_corpus) @@ -161,7 +163,7 @@ class IBMModel2(IBMModel): counts = Model2Counts() for aligned_sentence in parallel_corpus: src_sentence = [None] + aligned_sentence.mots - trg_sentence = ["UNUSED"] + aligned_sentence.words # 1-indexed + trg_sentence = ['UNUSED'] + aligned_sentence.words # 1-indexed l = len(aligned_sentence.mots) m = len(aligned_sentence.words) diff --git a/nlp_resource_data/nltk/translate/ibm3.py b/nlp_resource_data/nltk/translate/ibm3.py index ed491f9..2c7c618 100644 --- a/nlp_resource_data/nltk/translate/ibm3.py +++ b/nlp_resource_data/nltk/translate/ibm3.py @@ -73,6 +73,8 @@ Translation: Parameter Estimation. Computational Linguistics, 19 (2), 263-311. """ +from __future__ import division + import warnings from collections import defaultdict from math import factorial @@ -169,11 +171,11 @@ class IBMModel3(IBMModel): self.set_uniform_probabilities(sentence_aligned_corpus) else: # Set user-defined probabilities - self.translation_table = probability_tables["translation_table"] - self.alignment_table = probability_tables["alignment_table"] - self.fertility_table = probability_tables["fertility_table"] - self.p1 = probability_tables["p1"] - self.distortion_table = probability_tables["distortion_table"] + self.translation_table = probability_tables['translation_table'] + self.alignment_table = probability_tables['alignment_table'] + self.fertility_table = probability_tables['fertility_table'] + self.p1 = probability_tables['p1'] + self.distortion_table = probability_tables['distortion_table'] for n in range(0, iterations): self.train(sentence_aligned_corpus) diff --git a/nlp_resource_data/nltk/translate/ibm4.py b/nlp_resource_data/nltk/translate/ibm4.py index fc6c295..323dd4d 100644 --- a/nlp_resource_data/nltk/translate/ibm4.py +++ b/nlp_resource_data/nltk/translate/ibm4.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Natural Language Toolkit: IBM Model 4 # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Tah Wei Hoon # URL: # For license information, see LICENSE.TXT @@ -101,6 +101,8 @@ Translation: Parameter Estimation. Computational Linguistics, 19 (2), 263-311. """ +from __future__ import division + import warnings from collections import defaultdict from math import factorial @@ -220,13 +222,13 @@ class IBMModel4(IBMModel): self.set_uniform_probabilities(sentence_aligned_corpus) else: # Set user-defined probabilities - self.translation_table = probability_tables["translation_table"] - self.alignment_table = probability_tables["alignment_table"] - self.fertility_table = probability_tables["fertility_table"] - self.p1 = probability_tables["p1"] - self.head_distortion_table = probability_tables["head_distortion_table"] + self.translation_table = probability_tables['translation_table'] + self.alignment_table = probability_tables['alignment_table'] + self.fertility_table = probability_tables['fertility_table'] + self.p1 = probability_tables['p1'] + self.head_distortion_table = probability_tables['head_distortion_table'] self.non_head_distortion_table = probability_tables[ - "non_head_distortion_table" + 'non_head_distortion_table' ] for n in range(0, iterations): diff --git a/nlp_resource_data/nltk/translate/ibm5.py b/nlp_resource_data/nltk/translate/ibm5.py index 88a64f2..b1b44e7 100644 --- a/nlp_resource_data/nltk/translate/ibm5.py +++ b/nlp_resource_data/nltk/translate/ibm5.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Natural Language Toolkit: IBM Model 5 # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Tah Wei Hoon # URL: # For license information, see LICENSE.TXT @@ -111,6 +111,8 @@ Translation: Parameter Estimation. Computational Linguistics, 19 (2), 263-311. """ +from __future__ import division + import warnings from collections import defaultdict from math import factorial @@ -236,16 +238,16 @@ class IBMModel5(IBMModel): self.set_uniform_probabilities(sentence_aligned_corpus) else: # Set user-defined probabilities - self.translation_table = probability_tables["translation_table"] - self.alignment_table = probability_tables["alignment_table"] - self.fertility_table = probability_tables["fertility_table"] - self.p1 = probability_tables["p1"] - self.head_distortion_table = probability_tables["head_distortion_table"] + self.translation_table = probability_tables['translation_table'] + self.alignment_table = probability_tables['alignment_table'] + self.fertility_table = probability_tables['fertility_table'] + self.p1 = probability_tables['p1'] + self.head_distortion_table = probability_tables['head_distortion_table'] self.non_head_distortion_table = probability_tables[ - "non_head_distortion_table" + 'non_head_distortion_table' ] - self.head_vacancy_table = probability_tables["head_vacancy_table"] - self.non_head_vacancy_table = probability_tables["non_head_vacancy_table"] + self.head_vacancy_table = probability_tables['head_vacancy_table'] + self.non_head_vacancy_table = probability_tables['non_head_vacancy_table'] for n in range(0, iterations): self.train(sentence_aligned_corpus) diff --git a/nlp_resource_data/nltk/translate/ibm_model.py b/nlp_resource_data/nltk/translate/ibm_model.py index 3b9b913..24f6928 100644 --- a/nlp_resource_data/nltk/translate/ibm_model.py +++ b/nlp_resource_data/nltk/translate/ibm_model.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Natural Language Toolkit: IBM Model Core # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Tah Wei Hoon # URL: # For license information, see LICENSE.TXT @@ -37,7 +37,7 @@ Robert L. Mercer. 1993. The Mathematics of Statistical Machine Translation: Parameter Estimation. Computational Linguistics, 19 (2), 263-311. """ - +from __future__ import division from bisect import insort_left from collections import defaultdict from copy import deepcopy @@ -201,7 +201,7 @@ class IBMModel(object): :type i_pegged: int """ src_sentence = [None] + sentence_pair.mots - trg_sentence = ["UNUSED"] + sentence_pair.words # 1-indexed + trg_sentence = ['UNUSED'] + sentence_pair.words # 1-indexed l = len(src_sentence) - 1 # exclude NULL m = len(trg_sentence) - 1 diff --git a/nlp_resource_data/nltk/translate/meteor_score.py b/nlp_resource_data/nltk/translate/meteor_score.py deleted file mode 100644 index 008836f..0000000 --- a/nlp_resource_data/nltk/translate/meteor_score.py +++ /dev/null @@ -1,434 +0,0 @@ -# -*- coding: utf-8 -*- -# Natural Language Toolkit: Machine Translation -# -# Copyright (C) 2001-2020 NLTK Project -# Author: Uday Krishna -# URL: -# For license information, see LICENSE.TXT - - -from nltk.stem.porter import PorterStemmer -from nltk.corpus import wordnet -from itertools import chain, product - - -def _generate_enums(hypothesis, reference, preprocess=str.lower): - """ - Takes in string inputs for hypothesis and reference and returns - enumerated word lists for each of them - - :param hypothesis: hypothesis string - :type hypothesis: str - :param reference: reference string - :type reference: str - :preprocess: preprocessing method (default str.lower) - :type preprocess: method - :return: enumerated words list - :rtype: list of 2D tuples, list of 2D tuples - """ - hypothesis_list = list(enumerate(preprocess(hypothesis).split())) - reference_list = list(enumerate(preprocess(reference).split())) - return hypothesis_list, reference_list - - -def exact_match(hypothesis, reference): - """ - matches exact words in hypothesis and reference - and returns a word mapping based on the enumerated - word id between hypothesis and reference - - :param hypothesis: hypothesis string - :type hypothesis: str - :param reference: reference string - :type reference: str - :return: enumerated matched tuples, enumerated unmatched hypothesis tuples, - enumerated unmatched reference tuples - :rtype: list of 2D tuples, list of 2D tuples, list of 2D tuples - """ - hypothesis_list, reference_list = _generate_enums(hypothesis, reference) - return _match_enums(hypothesis_list, reference_list) - - -def _match_enums(enum_hypothesis_list, enum_reference_list): - """ - matches exact words in hypothesis and reference and returns - a word mapping between enum_hypothesis_list and enum_reference_list - based on the enumerated word id. - - :param enum_hypothesis_list: enumerated hypothesis list - :type enum_hypothesis_list: list of tuples - :param enum_reference_list: enumerated reference list - :type enum_reference_list: list of 2D tuples - :return: enumerated matched tuples, enumerated unmatched hypothesis tuples, - enumerated unmatched reference tuples - :rtype: list of 2D tuples, list of 2D tuples, list of 2D tuples - """ - word_match = [] - for i in range(len(enum_hypothesis_list))[::-1]: - for j in range(len(enum_reference_list))[::-1]: - if enum_hypothesis_list[i][1] == enum_reference_list[j][1]: - word_match.append( - (enum_hypothesis_list[i][0], enum_reference_list[j][0]) - ) - (enum_hypothesis_list.pop(i)[1], enum_reference_list.pop(j)[1]) - break - return word_match, enum_hypothesis_list, enum_reference_list - - -def _enum_stem_match( - enum_hypothesis_list, enum_reference_list, stemmer=PorterStemmer() -): - """ - Stems each word and matches them in hypothesis and reference - and returns a word mapping between enum_hypothesis_list and - enum_reference_list based on the enumerated word id. The function also - returns a enumerated list of unmatched words for hypothesis and reference. - - :param enum_hypothesis_list: - :type enum_hypothesis_list: - :param enum_reference_list: - :type enum_reference_list: - :param stemmer: nltk.stem.api.StemmerI object (default PorterStemmer()) - :type stemmer: nltk.stem.api.StemmerI or any class that implements a stem method - :return: enumerated matched tuples, enumerated unmatched hypothesis tuples, - enumerated unmatched reference tuples - :rtype: list of 2D tuples, list of 2D tuples, list of 2D tuples - """ - stemmed_enum_list1 = [ - (word_pair[0], stemmer.stem(word_pair[1])) for word_pair in enum_hypothesis_list - ] - - stemmed_enum_list2 = [ - (word_pair[0], stemmer.stem(word_pair[1])) for word_pair in enum_reference_list - ] - - word_match, enum_unmat_hypo_list, enum_unmat_ref_list = _match_enums( - stemmed_enum_list1, stemmed_enum_list2 - ) - - enum_unmat_hypo_list = ( - list(zip(*enum_unmat_hypo_list)) if len(enum_unmat_hypo_list) > 0 else [] - ) - - enum_unmat_ref_list = ( - list(zip(*enum_unmat_ref_list)) if len(enum_unmat_ref_list) > 0 else [] - ) - - enum_hypothesis_list = list( - filter(lambda x: x[0] not in enum_unmat_hypo_list, enum_hypothesis_list) - ) - - enum_reference_list = list( - filter(lambda x: x[0] not in enum_unmat_ref_list, enum_reference_list) - ) - - return word_match, enum_hypothesis_list, enum_reference_list - - -def stem_match(hypothesis, reference, stemmer=PorterStemmer()): - """ - Stems each word and matches them in hypothesis and reference - and returns a word mapping between hypothesis and reference - - :param hypothesis: - :type hypothesis: - :param reference: - :type reference: - :param stemmer: nltk.stem.api.StemmerI object (default PorterStemmer()) - :type stemmer: nltk.stem.api.StemmerI or any class that - implements a stem method - :return: enumerated matched tuples, enumerated unmatched hypothesis tuples, - enumerated unmatched reference tuples - :rtype: list of 2D tuples, list of 2D tuples, list of 2D tuples - """ - enum_hypothesis_list, enum_reference_list = _generate_enums(hypothesis, reference) - return _enum_stem_match(enum_hypothesis_list, enum_reference_list, stemmer=stemmer) - - -def _enum_wordnetsyn_match(enum_hypothesis_list, enum_reference_list, wordnet=wordnet): - """ - Matches each word in reference to a word in hypothesis - if any synonym of a hypothesis word is the exact match - to the reference word. - - :param enum_hypothesis_list: enumerated hypothesis list - :param enum_reference_list: enumerated reference list - :param wordnet: a wordnet corpus reader object (default nltk.corpus.wordnet) - :type wordnet: WordNetCorpusReader - :return: list of matched tuples, unmatched hypothesis list, unmatched reference list - :rtype: list of tuples, list of tuples, list of tuples - - """ - word_match = [] - for i in range(len(enum_hypothesis_list))[::-1]: - hypothesis_syns = set( - chain( - *[ - [ - lemma.name() - for lemma in synset.lemmas() - if lemma.name().find("_") < 0 - ] - for synset in wordnet.synsets(enum_hypothesis_list[i][1]) - ] - ) - ).union({enum_hypothesis_list[i][1]}) - for j in range(len(enum_reference_list))[::-1]: - if enum_reference_list[j][1] in hypothesis_syns: - word_match.append( - (enum_hypothesis_list[i][0], enum_reference_list[j][0]) - ) - enum_hypothesis_list.pop(i), enum_reference_list.pop(j) - break - return word_match, enum_hypothesis_list, enum_reference_list - - -def wordnetsyn_match(hypothesis, reference, wordnet=wordnet): - """ - Matches each word in reference to a word in hypothesis if any synonym - of a hypothesis word is the exact match to the reference word. - - :param hypothesis: hypothesis string - :param reference: reference string - :param wordnet: a wordnet corpus reader object (default nltk.corpus.wordnet) - :type wordnet: WordNetCorpusReader - :return: list of mapped tuples - :rtype: list of tuples - """ - enum_hypothesis_list, enum_reference_list = _generate_enums(hypothesis, reference) - return _enum_wordnetsyn_match( - enum_hypothesis_list, enum_reference_list, wordnet=wordnet - ) - - -def _enum_allign_words( - enum_hypothesis_list, enum_reference_list, stemmer=PorterStemmer(), wordnet=wordnet -): - """ - Aligns/matches words in the hypothesis to reference by sequentially - applying exact match, stemmed match and wordnet based synonym match. - in case there are multiple matches the match which has the least number - of crossing is chosen. Takes enumerated list as input instead of - string input - - :param enum_hypothesis_list: enumerated hypothesis list - :param enum_reference_list: enumerated reference list - :param stemmer: nltk.stem.api.StemmerI object (default PorterStemmer()) - :type stemmer: nltk.stem.api.StemmerI or any class that implements a stem method - :param wordnet: a wordnet corpus reader object (default nltk.corpus.wordnet) - :type wordnet: WordNetCorpusReader - :return: sorted list of matched tuples, unmatched hypothesis list, - unmatched reference list - :rtype: list of tuples, list of tuples, list of tuples - """ - exact_matches, enum_hypothesis_list, enum_reference_list = _match_enums( - enum_hypothesis_list, enum_reference_list - ) - - stem_matches, enum_hypothesis_list, enum_reference_list = _enum_stem_match( - enum_hypothesis_list, enum_reference_list, stemmer=stemmer - ) - - wns_matches, enum_hypothesis_list, enum_reference_list = _enum_wordnetsyn_match( - enum_hypothesis_list, enum_reference_list, wordnet=wordnet - ) - - return ( - sorted( - exact_matches + stem_matches + wns_matches, key=lambda wordpair: wordpair[0] - ), - enum_hypothesis_list, - enum_reference_list, - ) - - -def allign_words(hypothesis, reference, stemmer=PorterStemmer(), wordnet=wordnet): - """ - Aligns/matches words in the hypothesis to reference by sequentially - applying exact match, stemmed match and wordnet based synonym match. - In case there are multiple matches the match which has the least number - of crossing is chosen. - - :param hypothesis: hypothesis string - :param reference: reference string - :param stemmer: nltk.stem.api.StemmerI object (default PorterStemmer()) - :type stemmer: nltk.stem.api.StemmerI or any class that implements a stem method - :param wordnet: a wordnet corpus reader object (default nltk.corpus.wordnet) - :type wordnet: WordNetCorpusReader - :return: sorted list of matched tuples, unmatched hypothesis list, unmatched reference list - :rtype: list of tuples, list of tuples, list of tuples - """ - enum_hypothesis_list, enum_reference_list = _generate_enums(hypothesis, reference) - return _enum_allign_words( - enum_hypothesis_list, enum_reference_list, stemmer=stemmer, wordnet=wordnet - ) - - -def _count_chunks(matches): - """ - Counts the fewest possible number of chunks such that matched unigrams - of each chunk are adjacent to each other. This is used to caluclate the - fragmentation part of the metric. - - :param matches: list containing a mapping of matched words (output of allign_words) - :return: Number of chunks a sentence is divided into post allignment - :rtype: int - """ - i = 0 - chunks = 1 - while i < len(matches) - 1: - if (matches[i + 1][0] == matches[i][0] + 1) and ( - matches[i + 1][1] == matches[i][1] + 1 - ): - i += 1 - continue - i += 1 - chunks += 1 - return chunks - - -def single_meteor_score( - reference, - hypothesis, - preprocess=str.lower, - stemmer=PorterStemmer(), - wordnet=wordnet, - alpha=0.9, - beta=3, - gamma=0.5, -): - """ - Calculates METEOR score for single hypothesis and reference as per - "Meteor: An Automatic Metric for MT Evaluation with HighLevels of - Correlation with Human Judgments" by Alon Lavie and Abhaya Agarwal, - in Proceedings of ACL. - http://www.cs.cmu.edu/~alavie/METEOR/pdf/Lavie-Agarwal-2007-METEOR.pdf - - - >>> hypothesis1 = 'It is a guide to action which ensures that the military always obeys the commands of the party' - - >>> reference1 = 'It is a guide to action that ensures that the military will forever heed Party commands' - - - >>> round(single_meteor_score(reference1, hypothesis1),4) - 0.7398 - - If there is no words match during the alignment the method returns the - score as 0. We can safely return a zero instead of raising a - division by zero error as no match usually implies a bad translation. - - >>> round(meteor_score('this is a cat', 'non matching hypothesis'),4) - 0.0 - - :param references: reference sentences - :type references: list(str) - :param hypothesis: a hypothesis sentence - :type hypothesis: str - :param preprocess: preprocessing function (default str.lower) - :type preprocess: method - :param stemmer: nltk.stem.api.StemmerI object (default PorterStemmer()) - :type stemmer: nltk.stem.api.StemmerI or any class that implements a stem method - :param wordnet: a wordnet corpus reader object (default nltk.corpus.wordnet) - :type wordnet: WordNetCorpusReader - :param alpha: parameter for controlling relative weights of precision and recall. - :type alpha: float - :param beta: parameter for controlling shape of penalty as a - function of as a function of fragmentation. - :type beta: float - :param gamma: relative weight assigned to fragmentation penality. - :type gamma: float - :return: The sentence-level METEOR score. - :rtype: float - """ - enum_hypothesis, enum_reference = _generate_enums( - hypothesis, reference, preprocess=preprocess - ) - translation_length = len(enum_hypothesis) - reference_length = len(enum_reference) - matches, _, _ = _enum_allign_words(enum_hypothesis, enum_reference, stemmer=stemmer) - matches_count = len(matches) - try: - precision = float(matches_count) / translation_length - recall = float(matches_count) / reference_length - fmean = (precision * recall) / (alpha * precision + (1 - alpha) * recall) - chunk_count = float(_count_chunks(matches)) - frag_frac = chunk_count / matches_count - except ZeroDivisionError: - return 0.0 - penalty = gamma * frag_frac ** beta - return (1 - penalty) * fmean - - -def meteor_score( - references, - hypothesis, - preprocess=str.lower, - stemmer=PorterStemmer(), - wordnet=wordnet, - alpha=0.9, - beta=3, - gamma=0.5, -): - """ - Calculates METEOR score for hypothesis with multiple references as - described in "Meteor: An Automatic Metric for MT Evaluation with - HighLevels of Correlation with Human Judgments" by Alon Lavie and - Abhaya Agarwal, in Proceedings of ACL. - http://www.cs.cmu.edu/~alavie/METEOR/pdf/Lavie-Agarwal-2007-METEOR.pdf - - - In case of multiple references the best score is chosen. This method - iterates over single_meteor_score and picks the best pair among all - the references for a given hypothesis - - >>> hypothesis1 = 'It is a guide to action which ensures that the military always obeys the commands of the party' - >>> hypothesis2 = 'It is to insure the troops forever hearing the activity guidebook that party direct' - - >>> reference1 = 'It is a guide to action that ensures that the military will forever heed Party commands' - >>> reference2 = 'It is the guiding principle which guarantees the military forces always being under the command of the Party' - >>> reference3 = 'It is the practical guide for the army always to heed the directions of the party' - - >>> round(meteor_score([reference1, reference2, reference3], hypothesis1),4) - 0.7398 - - If there is no words match during the alignment the method returns the - score as 0. We can safely return a zero instead of raising a - division by zero error as no match usually implies a bad translation. - - >>> round(meteor_score(['this is a cat'], 'non matching hypothesis'),4) - 0.0 - - :param references: reference sentences - :type references: list(str) - :param hypothesis: a hypothesis sentence - :type hypothesis: str - :param preprocess: preprocessing function (default str.lower) - :type preprocess: method - :param stemmer: nltk.stem.api.StemmerI object (default PorterStemmer()) - :type stemmer: nltk.stem.api.StemmerI or any class that implements a stem method - :param wordnet: a wordnet corpus reader object (default nltk.corpus.wordnet) - :type wordnet: WordNetCorpusReader - :param alpha: parameter for controlling relative weights of precision and recall. - :type alpha: float - :param beta: parameter for controlling shape of penalty as a function - of as a function of fragmentation. - :type beta: float - :param gamma: relative weight assigned to fragmentation penality. - :type gamma: float - :return: The sentence-level METEOR score. - :rtype: float - """ - return max( - [ - single_meteor_score( - reference, - hypothesis, - stemmer=stemmer, - wordnet=wordnet, - alpha=alpha, - beta=beta, - gamma=gamma, - ) - for reference in references - ] - ) diff --git a/nlp_resource_data/nltk/translate/metrics.py b/nlp_resource_data/nltk/translate/metrics.py index d11addb..a984f96 100644 --- a/nlp_resource_data/nltk/translate/metrics.py +++ b/nlp_resource_data/nltk/translate/metrics.py @@ -1,11 +1,12 @@ # Natural Language Toolkit: Translation metrics # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Will Zhang # Guan Gui # Steven Bird # URL: # For license information, see LICENSE.TXT +from __future__ import division def alignment_error_rate(reference, hypothesis, possible=None): diff --git a/nlp_resource_data/nltk/translate/nist_score.py b/nlp_resource_data/nltk/translate/nist_score.py index ca9ac2b..57b2074 100644 --- a/nlp_resource_data/nltk/translate/nist_score.py +++ b/nlp_resource_data/nltk/translate/nist_score.py @@ -1,13 +1,14 @@ # -*- coding: utf-8 -*- # Natural Language Toolkit: NIST Score # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Authors: # Contributors: # URL: # For license information, see LICENSE.TXT """NIST score implementation.""" +from __future__ import division import math import fractions diff --git a/nlp_resource_data/nltk/translate/phrase_based.py b/nlp_resource_data/nltk/translate/phrase_based.py index a50887e..df2ba2d 100644 --- a/nlp_resource_data/nltk/translate/phrase_based.py +++ b/nlp_resource_data/nltk/translate/phrase_based.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Natural Language Toolkit: Phrase Extraction Algorithm # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Authors: Liling Tan, Fredrik Hedman, Petra Barancikova # URL: # For license information, see LICENSE.TXT @@ -41,11 +41,11 @@ def extract( :type f_start: int :param f_start: Starting index of the possible foreign language phrases :type f_end: int - :param f_end: End index of the possible foreign language phrases + :param f_end: Starting index of the possible foreign language phrases :type e_start: int :param e_start: Starting index of the possible source language phrases :type e_end: int - :param e_end: End index of the possible source language phrases + :param e_end: Starting index of the possible source language phrases :type srctext: list :param srctext: The source language tokens, a list of string. :type trgtext: list @@ -75,10 +75,10 @@ def extract( trg_phrase = " ".join(trgtext[fs : fe + 1]) # Include more data for later ordering. phrases.add( - ((e_start, e_end + 1), (fs, fe + 1), src_phrase, trg_phrase) + ((e_start, e_end + 1), (f_start, f_end + 1), src_phrase, trg_phrase) ) fe += 1 - if fe in f_aligned or fe >= trglen: + if fe in f_aligned or fe == trglen: break fs -= 1 if fs in f_aligned or fs < 0: @@ -111,20 +111,20 @@ def phrase_extraction(srctext, trgtext, alignment, max_phrase_length=0): ... ((0, 1), (0, 1), 'michael', 'michael') ((0, 2), (0, 4), 'michael assumes', 'michael geht davon aus') - ((0, 2), (0, 5), 'michael assumes', 'michael geht davon aus ,') + ((0, 2), (0, 4), 'michael assumes', 'michael geht davon aus ,') ((0, 3), (0, 6), 'michael assumes that', 'michael geht davon aus , dass') ((0, 4), (0, 7), 'michael assumes that he', 'michael geht davon aus , dass er') ((0, 9), (0, 10), 'michael assumes that he will stay in the house', 'michael geht davon aus , dass er im haus bleibt') ((1, 2), (1, 4), 'assumes', 'geht davon aus') - ((1, 2), (1, 5), 'assumes', 'geht davon aus ,') + ((1, 2), (1, 4), 'assumes', 'geht davon aus ,') ((1, 3), (1, 6), 'assumes that', 'geht davon aus , dass') ((1, 4), (1, 7), 'assumes that he', 'geht davon aus , dass er') ((1, 9), (1, 10), 'assumes that he will stay in the house', 'geht davon aus , dass er im haus bleibt') - ((2, 3), (4, 6), 'that', ', dass') + ((2, 3), (5, 6), 'that', ', dass') ((2, 3), (5, 6), 'that', 'dass') - ((2, 4), (4, 7), 'that he', ', dass er') + ((2, 4), (5, 7), 'that he', ', dass er') ((2, 4), (5, 7), 'that he', 'dass er') - ((2, 9), (4, 10), 'that he will stay in the house', ', dass er im haus bleibt') + ((2, 9), (5, 10), 'that he will stay in the house', ', dass er im haus bleibt') ((2, 9), (5, 10), 'that he will stay in the house', 'dass er im haus bleibt') ((3, 4), (6, 7), 'he', 'er') ((3, 9), (6, 10), 'he will stay in the house', 'er im haus bleibt') @@ -138,7 +138,7 @@ def phrase_extraction(srctext, trgtext, alignment, max_phrase_length=0): :param srctext: The sentence string from the source language. :type trgtext: str :param trgtext: The sentence string from the target language. - :type alignment: list(tuple) + :type alignment: str :param alignment: The word alignment outputs as list of tuples, where the first elements of tuples are the source words' indices and second elements are the target words' indices. This is also the output diff --git a/nlp_resource_data/nltk/translate/ribes_score.py b/nlp_resource_data/nltk/translate/ribes_score.py index 912084f..fac42b0 100644 --- a/nlp_resource_data/nltk/translate/ribes_score.py +++ b/nlp_resource_data/nltk/translate/ribes_score.py @@ -1,13 +1,13 @@ # -*- coding: utf-8 -*- # Natural Language Toolkit: RIBES Score # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Contributors: Katsuhito Sudoh, Liling Tan, Kasramvd, J.F.Sebastian # Mark Byers, ekhumoro, P. Ortiz # URL: # For license information, see LICENSE.TXT """ RIBES score implementation """ - +from __future__ import division from itertools import islice import math diff --git a/nlp_resource_data/nltk/translate/stack_decoder.py b/nlp_resource_data/nltk/translate/stack_decoder.py index af0ce7e..2b4194a 100644 --- a/nlp_resource_data/nltk/translate/stack_decoder.py +++ b/nlp_resource_data/nltk/translate/stack_decoder.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Natural Language Toolkit: Stack decoder # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Tah Wei Hoon # URL: # For license information, see LICENSE.TXT @@ -188,9 +188,9 @@ class StackDecoder(object): if not stacks[sentence_length]: warnings.warn( - "Unable to translate all words. " - "The source sentence contains words not in " - "the phrase table" + 'Unable to translate all words. ' + 'The source sentence contains words not in ' + 'the phrase table' ) # Instead of returning empty output, perhaps a partial # translation could be returned @@ -238,7 +238,7 @@ class StackDecoder(object): subsequence covering positions 2, 3, and 4. :rtype: dict(int: (dict(int): float)) """ - scores = defaultdict(lambda: defaultdict(lambda: float("-inf"))) + scores = defaultdict(lambda: defaultdict(lambda: float('-inf'))) for seq_length in range(1, len(src_sentence) + 1): for start in range(0, len(src_sentence) - seq_length + 1): end = start + seq_length @@ -466,7 +466,7 @@ class _Stack(object): self.items = [] if beam_threshold == 0.0: - self.__log_beam_threshold = float("-inf") + self.__log_beam_threshold = float('-inf') else: self.__log_beam_threshold = log(beam_threshold) diff --git a/nlp_resource_data/nltk/tree.py b/nlp_resource_data/nltk/tree.py index 1614c45..9f79355 100644 --- a/nlp_resource_data/nltk/tree.py +++ b/nlp_resource_data/nltk/tree.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Natural Language Toolkit: Text Trees # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Edward Loper # Steven Bird # Peter Ljunglöf @@ -13,15 +13,17 @@ Class for representing hierarchical language structures, such as syntax trees and morphological trees. """ +from __future__ import print_function, unicode_literals import re -import sys from abc import ABCMeta, abstractmethod +from six import string_types, add_metaclass from nltk.grammar import Production, Nonterminal from nltk.probability import ProbabilisticMixIn from nltk.util import slice_bounds +from nltk.compat import python_2_unicode_compatible, unicode_repr from nltk.internals import raise_unorderable_types # TODO: add LabelledTree (can be used for dependency trees) @@ -31,7 +33,7 @@ from nltk.internals import raise_unorderable_types ###################################################################### - +@python_2_unicode_compatible class Tree(list): """ A Tree represents a hierarchical grouping of leaves and subtrees. @@ -101,7 +103,7 @@ class Tree(list): raise TypeError( "%s: Expected a node value and child list " % type(self).__name__ ) - elif isinstance(children, str): + elif isinstance(children, string_types): raise TypeError( "%s() argument 2 should be a list, not a " "string" % type(self).__name__ @@ -142,16 +144,16 @@ class Tree(list): # //////////////////////////////////////////////////////////// def __mul__(self, v): - raise TypeError("Tree does not support multiplication") + raise TypeError('Tree does not support multiplication') def __rmul__(self, v): - raise TypeError("Tree does not support multiplication") + raise TypeError('Tree does not support multiplication') def __add__(self, v): - raise TypeError("Tree does not support addition") + raise TypeError('Tree does not support addition') def __radd__(self, v): - raise TypeError("Tree does not support addition") + raise TypeError('Tree does not support addition') # //////////////////////////////////////////////////////////// # Indexing (with support for tree positions) @@ -178,7 +180,7 @@ class Tree(list): return list.__setitem__(self, index, value) elif isinstance(index, (list, tuple)): if len(index) == 0: - raise IndexError("The tree position () may not be " "assigned to.") + raise IndexError('The tree position () may not be ' 'assigned to.') elif len(index) == 1: self[index[0]] = value else: @@ -194,7 +196,7 @@ class Tree(list): return list.__delitem__(self, index) elif isinstance(index, (list, tuple)): if len(index) == 0: - raise IndexError("The tree position () may not be deleted.") + raise IndexError('The tree position () may not be deleted.') elif len(index) == 1: del self[index[0]] else: @@ -308,7 +310,7 @@ class Tree(list): max_child_height = max(max_child_height, 1) return 1 + max_child_height - def treepositions(self, order="preorder"): + def treepositions(self, order='preorder'): """ >>> t = Tree.fromstring("(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))") >>> t.treepositions() # doctest: +ELLIPSIS @@ -322,7 +324,7 @@ class Tree(list): ``leaves``. """ positions = [] - if order in ("preorder", "bothorder"): + if order in ('preorder', 'bothorder'): positions.append(()) for i, child in enumerate(self): if isinstance(child, Tree): @@ -330,7 +332,7 @@ class Tree(list): positions.extend((i,) + p for p in childpos) else: positions.append((i,)) - if order in ("postorder", "bothorder"): + if order in ('postorder', 'bothorder'): positions.append(()) return positions @@ -372,9 +374,9 @@ class Tree(list): :rtype: list(Production) """ - if not isinstance(self._label, str): + if not isinstance(self._label, string_types): raise TypeError( - "Productions can only be generated from trees having node labels that are strings" + 'Productions can only be generated from trees having node labels that are strings' ) prods = [Production(Nonterminal(self._label), _child_names(self))] @@ -413,7 +415,7 @@ class Tree(list): leaves, or if ``index<0``. """ if index < 0: - raise IndexError("index must be non-negative") + raise IndexError('index must be non-negative') stack = [(self, ())] while stack: @@ -427,7 +429,7 @@ class Tree(list): for i in range(len(value) - 1, -1, -1): stack.append((value[i], treepos + (i,))) - raise IndexError("index must be less than or equal to len(self)") + raise IndexError('index must be less than or equal to len(self)') def treeposition_spanning_leaves(self, start, end): """ @@ -436,7 +438,7 @@ class Tree(list): :raise ValueError: if ``end <= start`` """ if end <= start: - raise ValueError("end must be greater than start") + raise ValueError('end must be greater than start') # Find the tree positions of the start & end leaves, and # take the longest common subsequence. start_treepos = self.leaf_treeposition(start) @@ -554,12 +556,6 @@ class Tree(list): else: return tree - def __copy__(self): - return self.copy() - - def __deepcopy__(self, memo): - return self.copy(deep=True) - def copy(self, deep=False): if not deep: return type(self)(self._label, self) @@ -575,7 +571,7 @@ class Tree(list): newcopy = frozen_class.convert(self) else: newcopy = self.copy(deep=True) - for pos in newcopy.treepositions("leaves"): + for pos in newcopy.treepositions('leaves'): newcopy[pos] = leaf_freezer(newcopy[pos]) newcopy = frozen_class.convert(newcopy) hash(newcopy) # Make sure the leaves are hashable. @@ -589,7 +585,7 @@ class Tree(list): def fromstring( cls, s, - brackets="()", + brackets='()', read_node=None, read_leaf=None, node_pattern=None, @@ -645,19 +641,19 @@ class Tree(list): then it will return a tree of that type. :rtype: Tree """ - if not isinstance(brackets, str) or len(brackets) != 2: - raise TypeError("brackets must be a length-2 string") - if re.search("\s", brackets): - raise TypeError("whitespace brackets not allowed") + if not isinstance(brackets, string_types) or len(brackets) != 2: + raise TypeError('brackets must be a length-2 string') + if re.search('\s', brackets): + raise TypeError('whitespace brackets not allowed') # Construct a regexp that will tokenize the string. open_b, close_b = brackets open_pattern, close_pattern = (re.escape(open_b), re.escape(close_b)) if node_pattern is None: - node_pattern = "[^\s%s%s]+" % (open_pattern, close_pattern) + node_pattern = '[^\s%s%s]+' % (open_pattern, close_pattern) if leaf_pattern is None: - leaf_pattern = "[^\s%s%s]+" % (open_pattern, close_pattern) + leaf_pattern = '[^\s%s%s]+' % (open_pattern, close_pattern) token_re = re.compile( - "%s\s*(%s)?|%s|(%s)" + '%s\s*(%s)?|%s|(%s)' % (open_pattern, node_pattern, close_pattern, leaf_pattern) ) # Walk through each token, updating a stack of trees. @@ -667,7 +663,7 @@ class Tree(list): # Beginning of a tree/subtree if token[0] == open_b: if len(stack) == 1 and len(stack[0][1]) > 0: - cls._parse_error(s, match, "end-of-string") + cls._parse_error(s, match, 'end-of-string') label = token[1:].lstrip() if read_node is not None: label = read_node(label) @@ -678,7 +674,7 @@ class Tree(list): if len(stack[0][1]) == 0: cls._parse_error(s, match, open_b) else: - cls._parse_error(s, match, "end-of-string") + cls._parse_error(s, match, 'end-of-string') label, children = stack.pop() stack[-1][1].append(cls(label, children)) # Leaf node @@ -691,9 +687,9 @@ class Tree(list): # check that we got exactly one complete tree. if len(stack) > 1: - cls._parse_error(s, "end-of-string", close_b) + cls._parse_error(s, 'end-of-string', close_b) elif len(stack[0][1]) == 0: - cls._parse_error(s, "end-of-string", open_b) + cls._parse_error(s, 'end-of-string', open_b) else: assert stack[0][0] is None assert len(stack[0][1]) == 1 @@ -701,7 +697,7 @@ class Tree(list): # If the tree has an extra level with node='', then get rid of # it. E.g.: "((S (NP ...) (VP ...)))" - if remove_empty_top_bracketing and tree._label == "" and len(tree) == 1: + if remove_empty_top_bracketing and tree._label == '' and len(tree) == 1: tree = tree[0] # return the tree. return tree @@ -715,26 +711,26 @@ class Tree(list): :param expecting: what we expected to see instead. """ # Construct a basic error message - if match == "end-of-string": - pos, token = len(s), "end-of-string" + if match == 'end-of-string': + pos, token = len(s), 'end-of-string' else: pos, token = match.start(), match.group() - msg = "%s.read(): expected %r but got %r\n%sat index %d." % ( + msg = '%s.read(): expected %r but got %r\n%sat index %d.' % ( cls.__name__, expecting, token, - " " * 12, + ' ' * 12, pos, ) # Add a display showing the error token itsels: - s = s.replace("\n", " ").replace("\t", " ") + s = s.replace('\n', ' ').replace('\t', ' ') offset = pos if len(s) > pos + 10: - s = s[: pos + 10] + "..." + s = s[: pos + 10] + '...' if pos > 10: - s = "..." + s[pos - 10 :] + s = '...' + s[pos - 10 :] offset = 13 - msg += '\n%s"%s"\n%s^' % (" " * 16, s, " " * (17 + offset)) + msg += '\n%s"%s"\n%s^' % (' ' * 16, s, ' ' * (17 + offset)) raise ValueError(msg) # //////////////////////////////////////////////////////////// @@ -760,10 +756,10 @@ class Tree(list): print(TreePrettyPrinter(self, sentence, highlight).text(**kwargs), file=stream) def __repr__(self): - childstr = ", ".join(repr(c) for c in self) - return "%s(%s, [%s])" % ( + childstr = ", ".join(unicode_repr(c) for c in self) + return '%s(%s, [%s])' % ( type(self).__name__, - repr(self._label), + unicode_repr(self._label), childstr, ) @@ -786,37 +782,26 @@ class Tree(list): _canvas_frame.add_widget(widget) x, y, w, h = widget.bbox() # print_to_file uses scrollregion to set the width and height of the pdf. - _canvas_frame.canvas()["scrollregion"] = (0, 0, w, h) + _canvas_frame.canvas()['scrollregion'] = (0, 0, w, h) with tempfile.NamedTemporaryFile() as file: - in_path = "{0:}.ps".format(file.name) - out_path = "{0:}.png".format(file.name) + in_path = '{0:}.ps'.format(file.name) + out_path = '{0:}.png'.format(file.name) _canvas_frame.print_to_file(in_path) _canvas_frame.destroy_widget(widget) - try: - subprocess.call( - [ - find_binary( - "gs", - binary_names=["gswin32c.exe", "gswin64c.exe"], - env_vars=["PATH"], - verbose=False, - ) - ] - + "-q -dEPSCrop -sDEVICE=png16m -r90 -dTextAlphaBits=4 -dGraphicsAlphaBits=4 -dSAFER -dBATCH -dNOPAUSE -sOutputFile={0:} {1:}".format( - out_path, in_path - ).split() - ) - except LookupError: - pre_error_message = str( - "The Ghostscript executable isn't found.\n" - "See http://web.mit.edu/ghostscript/www/Install.htm\n" - "If you're using a Mac, you can try installing\n" - "https://docs.brew.sh/Installation then `brew install ghostscript`" - ) - print(pre_error_message, file=sys.stderr) - raise LookupError - - with open(out_path, "rb") as sr: + subprocess.call( + [ + find_binary( + 'gs', + binary_names=['gswin32c.exe', 'gswin64c.exe'], + env_vars=['PATH'], + verbose=False, + ) + ] + + '-q -dEPSCrop -sDEVICE=png16m -r90 -dTextAlphaBits=4 -dGraphicsAlphaBits=4 -dSAFER -dBATCH -dNOPAUSE -sOutputFile={0:} {1:}'.format( + out_path, in_path + ).split() + ) + with open(out_path, 'rb') as sr: res = sr.read() os.remove(in_path) os.remove(out_path) @@ -837,7 +822,7 @@ class Tree(list): stream = None print(self.pformat(**kwargs), file=stream) - def pformat(self, margin=70, indent=0, nodesep="", parens="()", quotes=False): + def pformat(self, margin=70, indent=0, nodesep='', parens='()', quotes=False): """ :return: A pretty-printed string representation of this tree. :rtype: str @@ -858,23 +843,23 @@ class Tree(list): return s # If it doesn't fit on one line, then write it on multi-lines. - if isinstance(self._label, str): - s = "%s%s%s" % (parens[0], self._label, nodesep) + if isinstance(self._label, string_types): + s = '%s%s%s' % (parens[0], self._label, nodesep) else: - s = "%s%s%s" % (parens[0], repr(self._label), nodesep) + s = '%s%s%s' % (parens[0], unicode_repr(self._label), nodesep) for child in self: if isinstance(child, Tree): s += ( - "\n" - + " " * (indent + 2) + '\n' + + ' ' * (indent + 2) + child.pformat(margin, indent + 2, nodesep, parens, quotes) ) elif isinstance(child, tuple): - s += "\n" + " " * (indent + 2) + "/".join(child) - elif isinstance(child, str) and not quotes: - s += "\n" + " " * (indent + 2) + "%s" % child + s += '\n' + ' ' * (indent + 2) + "/".join(child) + elif isinstance(child, string_types) and not quotes: + s += '\n' + ' ' * (indent + 2) + '%s' % child else: - s += "\n" + " " * (indent + 2) + repr(child) + s += '\n' + ' ' * (indent + 2) + unicode_repr(child) return s + parens[1] def pformat_latex_qtree(self): @@ -895,10 +880,10 @@ class Tree(list): :return: A latex qtree representation of this tree. :rtype: str """ - reserved_chars = re.compile("([#\$%&~_\{\}])") + reserved_chars = re.compile('([#\$%&~_\{\}])') - pformat = self.pformat(indent=6, nodesep="", parens=("[.", " ]")) - return r"\Tree " + re.sub(reserved_chars, r"\\\1", pformat) + pformat = self.pformat(indent=6, nodesep='', parens=('[.', ' ]')) + return r'\Tree ' + re.sub(reserved_chars, r'\\\1', pformat) def _pformat_flat(self, nodesep, parens, quotes): childstrs = [] @@ -907,12 +892,12 @@ class Tree(list): childstrs.append(child._pformat_flat(nodesep, parens, quotes)) elif isinstance(child, tuple): childstrs.append("/".join(child)) - elif isinstance(child, str) and not quotes: - childstrs.append("%s" % child) + elif isinstance(child, string_types) and not quotes: + childstrs.append('%s' % child) else: - childstrs.append(repr(child)) - if isinstance(self._label, str): - return "%s%s%s %s%s" % ( + childstrs.append(unicode_repr(child)) + if isinstance(self._label, string_types): + return '%s%s%s %s%s' % ( parens[0], self._label, nodesep, @@ -920,9 +905,9 @@ class Tree(list): parens[1], ) else: - return "%s%s%s %s%s" % ( + return '%s%s%s %s%s' % ( parens[0], - repr(self._label), + unicode_repr(self._label), nodesep, " ".join(childstrs), parens[1], @@ -942,40 +927,40 @@ class ImmutableTree(Tree): ) def __setitem__(self, index, value): - raise ValueError("%s may not be modified" % type(self).__name__) + raise ValueError('%s may not be modified' % type(self).__name__) def __setslice__(self, i, j, value): - raise ValueError("%s may not be modified" % type(self).__name__) + raise ValueError('%s may not be modified' % type(self).__name__) def __delitem__(self, index): - raise ValueError("%s may not be modified" % type(self).__name__) + raise ValueError('%s may not be modified' % type(self).__name__) def __delslice__(self, i, j): - raise ValueError("%s may not be modified" % type(self).__name__) + raise ValueError('%s may not be modified' % type(self).__name__) def __iadd__(self, other): - raise ValueError("%s may not be modified" % type(self).__name__) + raise ValueError('%s may not be modified' % type(self).__name__) def __imul__(self, other): - raise ValueError("%s may not be modified" % type(self).__name__) + raise ValueError('%s may not be modified' % type(self).__name__) def append(self, v): - raise ValueError("%s may not be modified" % type(self).__name__) + raise ValueError('%s may not be modified' % type(self).__name__) def extend(self, v): - raise ValueError("%s may not be modified" % type(self).__name__) + raise ValueError('%s may not be modified' % type(self).__name__) def pop(self, v=None): - raise ValueError("%s may not be modified" % type(self).__name__) + raise ValueError('%s may not be modified' % type(self).__name__) def remove(self, v): - raise ValueError("%s may not be modified" % type(self).__name__) + raise ValueError('%s may not be modified' % type(self).__name__) def reverse(self): - raise ValueError("%s may not be modified" % type(self).__name__) + raise ValueError('%s may not be modified' % type(self).__name__) def sort(self): - raise ValueError("%s may not be modified" % type(self).__name__) + raise ValueError('%s may not be modified' % type(self).__name__) def __hash__(self): return self._hash @@ -985,15 +970,16 @@ class ImmutableTree(Tree): Set the node label. This will only succeed the first time the node label is set, which should occur in ImmutableTree.__init__(). """ - if hasattr(self, "_label"): - raise ValueError("%s may not be modified" % type(self).__name__) + if hasattr(self, '_label'): + raise ValueError('%s may not be modified' % type(self).__name__) self._label = value ###################################################################### ## Parented trees ###################################################################### -class AbstractParentedTree(Tree, metaclass=ABCMeta): +@add_metaclass(ABCMeta) +class AbstractParentedTree(Tree): """ An abstract base class for a ``Tree`` that automatically maintains pointers to parent nodes. These parent pointers are updated @@ -1094,7 +1080,7 @@ class AbstractParentedTree(Tree, metaclass=ABCMeta): if index < 0: index += len(self) if index < 0: - raise IndexError("index out of range") + raise IndexError('index out of range') # Clear the child's parent pointer. if isinstance(self[index], Tree): self._delparent(self[index], index) @@ -1104,7 +1090,7 @@ class AbstractParentedTree(Tree, metaclass=ABCMeta): elif isinstance(index, (list, tuple)): # del ptree[()] if len(index) == 0: - raise IndexError("The tree position () may not be deleted.") + raise IndexError('The tree position () may not be deleted.') # del ptree[(i,)] elif len(index) == 1: del self[index[0]] @@ -1148,7 +1134,7 @@ class AbstractParentedTree(Tree, metaclass=ABCMeta): if index < 0: index += len(self) if index < 0: - raise IndexError("index out of range") + raise IndexError('index out of range') # if the value is not changing, do nothing. if value is self[index]: return @@ -1164,7 +1150,7 @@ class AbstractParentedTree(Tree, metaclass=ABCMeta): elif isinstance(index, (list, tuple)): # ptree[()] = value if len(index) == 0: - raise IndexError("The tree position () may not be assigned to.") + raise IndexError('The tree position () may not be assigned to.') # ptree[(i,)] = value elif len(index) == 1: self[index[0]] = value @@ -1206,7 +1192,7 @@ class AbstractParentedTree(Tree, metaclass=ABCMeta): if index < 0: index += len(self) if index < 0: - raise IndexError("index out of range") + raise IndexError('index out of range') if isinstance(self[index], Tree): self._delparent(self[index], index) return super(AbstractParentedTree, self).pop(index) @@ -1225,7 +1211,7 @@ class AbstractParentedTree(Tree, metaclass=ABCMeta): # __getitem__ etc., but use max(0, start) and max(0, stop) because # because negative indices are already handled *before* # __getslice__ is called; and we don't want to double-count them. - if hasattr(list, "__getslice__"): + if hasattr(list, '__getslice__'): def __getslice__(self, start, stop): return self.__getitem__(slice(max(0, start), max(0, stop))) @@ -1293,7 +1279,7 @@ class ParentedTree(AbstractParentedTree): for i, child in enumerate(self._parent): if child is self: return i - assert False, "expected to find self in self._parent!" + assert False, 'expected to find self in self._parent!' def left_sibling(self): """The left sibling of this tree, or None if it has none.""" @@ -1347,12 +1333,12 @@ class ParentedTree(AbstractParentedTree): # If the child's type is incorrect, then complain. if not isinstance(child, ParentedTree): raise TypeError( - "Can not insert a non-ParentedTree " + "into a ParentedTree" + 'Can not insert a non-ParentedTree ' + 'into a ParentedTree' ) # If child already has a parent, then complain. if child._parent is not None: - raise ValueError("Can not insert a subtree that already " "has a parent.") + raise ValueError('Can not insert a subtree that already ' 'has a parent.') # Set child's parent pointer & index. if not dry_run: @@ -1526,7 +1512,7 @@ class MultiParentedTree(AbstractParentedTree): # If the child's type is incorrect, then complain. if not isinstance(child, MultiParentedTree): raise TypeError( - "Can not insert a non-MultiParentedTree " + "into a MultiParentedTree" + 'Can not insert a non-MultiParentedTree ' + 'into a MultiParentedTree' ) # Add self as a parent pointer if it's not already listed. @@ -1551,7 +1537,7 @@ class ImmutableMultiParentedTree(ImmutableTree, MultiParentedTree): ###################################################################### - +@python_2_unicode_compatible class ProbabilisticTree(Tree, ProbabilisticMixIn): def __init__(self, node, children=None, **prob_kwargs): Tree.__init__(self, node, children) @@ -1562,10 +1548,10 @@ class ProbabilisticTree(Tree, ProbabilisticMixIn): return ImmutableProbabilisticTree def __repr__(self): - return "%s (p=%r)" % (Tree.__repr__(self), self.prob()) + return '%s (p=%r)' % (Tree.unicode_repr(self), self.prob()) def __str__(self): - return "%s (p=%.6g)" % (self.pformat(margin=60), self.prob()) + return '%s (p=%.6g)' % (self.pformat(margin=60), self.prob()) def copy(self, deep=False): if not deep: @@ -1604,7 +1590,7 @@ class ProbabilisticTree(Tree, ProbabilisticMixIn): return self.__class__.__name__ < other.__class__.__name__ - +@python_2_unicode_compatible class ImmutableProbabilisticTree(ImmutableTree, ProbabilisticMixIn): def __init__(self, node, children=None, **prob_kwargs): ImmutableTree.__init__(self, node, children) @@ -1616,10 +1602,10 @@ class ImmutableProbabilisticTree(ImmutableTree, ProbabilisticMixIn): return ImmutableProbabilisticTree def __repr__(self): - return "%s [%s]" % (Tree.__repr__(self), self.prob()) + return '%s [%s]' % (Tree.unicode_repr(self), self.prob()) def __str__(self): - return "%s [%s]" % (self.pformat(margin=60), self.prob()) + return '%s [%s]' % (self.pformat(margin=60), self.prob()) def copy(self, deep=False): if not deep: @@ -1672,21 +1658,21 @@ def sinica_parse(s): :param s: The string to be converted :type s: str """ - tokens = re.split(r"([()| ])", s) + tokens = re.split(r'([()| ])', s) for i in range(len(tokens)): - if tokens[i] == "(": + if tokens[i] == '(': tokens[i - 1], tokens[i] = ( tokens[i], tokens[i - 1], ) # pull nonterminal inside parens - elif ":" in tokens[i]: - fields = tokens[i].split(":") + elif ':' in tokens[i]: + fields = tokens[i].split(':') if len(fields) == 2: # non-terminal tokens[i] = fields[1] else: tokens[i] = "(" + fields[-2] + " " + fields[-1] + ")" - elif tokens[i] == "|": - tokens[i] = "" + elif tokens[i] == '|': + tokens[i] = '' treebank_string = " ".join(tokens) return Tree.fromstring(treebank_string, remove_empty_top_bracketing=True) @@ -1713,7 +1699,7 @@ def demo(): from nltk import Tree, ProbabilisticTree # Demonstrate tree parsing. - s = "(S (NP (DT the) (NN cat)) (VP (VBD ate) (NP (DT a) (NN cookie))))" + s = '(S (NP (DT the) (NN cat)) (VP (VBD ate) (NP (DT a) (NN cookie))))' t = Tree.fromstring(s) print("Convert bracketed string into tree:") print(t) @@ -1731,10 +1717,10 @@ def demo(): # Demonstrate tree modification. the_cat = t[0] - the_cat.insert(1, Tree.fromstring("(JJ big)")) + the_cat.insert(1, Tree.fromstring('(JJ big)')) print("Tree modification:") print(t) - t[1, 1, 1] = Tree.fromstring("(NN cake)") + t[1, 1, 1] = Tree.fromstring('(NN cake)') print(t) print() @@ -1748,7 +1734,7 @@ def demo(): print() # Demonstrate probabilistic trees. - pt = ProbabilisticTree("x", ["y", "z"], prob=0.5) + pt = ProbabilisticTree('x', ['y', 'z'], prob=0.5) print("Probabilistic Tree:") print(pt) print() @@ -1770,20 +1756,20 @@ def demo(): print() # Demonstrate tree nodes containing objects other than strings - t.set_label(("test", 3)) + t.set_label(('test', 3)) print(t) __all__ = [ - "ImmutableProbabilisticTree", - "ImmutableTree", - "ProbabilisticMixIn", - "ProbabilisticTree", - "Tree", - "bracket_parse", - "sinica_parse", - "ParentedTree", - "MultiParentedTree", - "ImmutableParentedTree", - "ImmutableMultiParentedTree", + 'ImmutableProbabilisticTree', + 'ImmutableTree', + 'ProbabilisticMixIn', + 'ProbabilisticTree', + 'Tree', + 'bracket_parse', + 'sinica_parse', + 'ParentedTree', + 'MultiParentedTree', + 'ImmutableParentedTree', + 'ImmutableMultiParentedTree', ] diff --git a/nlp_resource_data/nltk/treeprettyprinter.py b/nlp_resource_data/nltk/treeprettyprinter.py index 50b0bb0..260f431 100644 --- a/nlp_resource_data/nltk/treeprettyprinter.py +++ b/nlp_resource_data/nltk/treeprettyprinter.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Natural Language Toolkit: ASCII visualization of NLTK trees # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Andreas van Cranenburgh # Peter Ljunglöf # URL: @@ -18,29 +18,30 @@ Graph Algorithms and Applications, 10(2) 141--157 (2006)149. http://jgaa.info/accepted/2006/EschbachGuentherBecker2006.10.2.pdf """ +from __future__ import division, print_function, unicode_literals + import re -try: - from html import escape -except ImportError: - from cgi import escape +from cgi import escape from collections import defaultdict from operator import itemgetter from nltk.util import OrderedDict +from nltk.compat import python_2_unicode_compatible from nltk.tree import Tree ANSICOLOR = { - "black": 30, - "red": 31, - "green": 32, - "yellow": 33, - "blue": 34, - "magenta": 35, - "cyan": 36, - "white": 37, + 'black': 30, + 'red': 31, + 'green': 32, + 'yellow': 33, + 'blue': 34, + 'magenta': 35, + 'cyan': 36, + 'white': 37, } +@python_2_unicode_compatible class TreePrettyPrinter(object): """ Pretty-print a tree in text format, either as ASCII or Unicode. @@ -92,8 +93,8 @@ class TreePrettyPrinter(object): if not isinstance(b, Tree): a[n] = len(sentence) if type(b) == tuple: - b = "/".join(b) - sentence.append("%s" % b) + b = '/'.join(b) + sentence.append('%s' % b) self.nodes, self.coords, self.edges, self.highlight = self.nodecoords( tree, sentence, highlight ) @@ -102,7 +103,7 @@ class TreePrettyPrinter(object): return self.text() def __repr__(self): - return "" % len(self.nodes) + return '' % len(self.nodes) @staticmethod def nodecoords(tree, sentence, highlight): @@ -190,27 +191,27 @@ class TreePrettyPrinter(object): i += scale j -= scale raise ValueError( - "could not find a free cell for:\n%s\n%s" - "min=%d; max=%d" % (tree[m], minidx, maxidx, dumpmatrix()) + 'could not find a free cell for:\n%s\n%s' + 'min=%d; max=%d' % (tree[m], minidx, maxidx, dumpmatrix()) ) def dumpmatrix(): """Dump matrix contents for debugging purposes.""" - return "\n".join( - "%2d: %s" % (n, " ".join(("%2r" % i)[:2] for i in row)) + return '\n'.join( + '%2d: %s' % (n, ' '.join(('%2r' % i)[:2] for i in row)) for n, row in enumerate(matrix) ) leaves = tree.leaves() if not all(isinstance(n, int) for n in leaves): - raise ValueError("All leaves must be integer indices.") + raise ValueError('All leaves must be integer indices.') if len(leaves) != len(set(leaves)): - raise ValueError("Indices must occur at most once.") + raise ValueError('Indices must occur at most once.') if not all(0 <= n < len(sentence) for n in leaves): raise ValueError( - "All leaves must be in the interval 0..n " - "with n=len(sentence)\ntokens: %d indices: " - "%r\nsentence: %s" % (len(sentence), tree.leaves(), sentence) + 'All leaves must be in the interval 0..n ' + 'with n=len(sentence)\ntokens: %d indices: ' + '%r\nsentence: %s' % (len(sentence), tree.leaves(), sentence) ) vertline, corner = -1, -2 # constants tree = tree.copy(True) @@ -248,7 +249,7 @@ class TreePrettyPrinter(object): matrix[0][i] = ids[m] nodes[ids[m]] = sentence[tree[m]] if nodes[ids[m]] is None: - nodes[ids[m]] = "..." + nodes[ids[m]] = '...' highlighted_nodes.discard(ids[m]) positions.remove(m) childcols[m[:-1]].add((0, i)) @@ -334,9 +335,9 @@ class TreePrettyPrinter(object): unicodelines=False, html=False, ansi=False, - nodecolor="blue", - leafcolor="red", - funccolor="green", + nodecolor='blue', + leafcolor='red', + funccolor='green', abbreviate=None, maxwidth=16, ): @@ -358,28 +359,28 @@ class TreePrettyPrinter(object): if abbreviate == True: abbreviate = 5 if unicodelines: - horzline = "\u2500" - leftcorner = "\u250c" - rightcorner = "\u2510" - vertline = " \u2502 " - tee = horzline + "\u252C" + horzline - bottom = horzline + "\u2534" + horzline - cross = horzline + "\u253c" + horzline - ellipsis = "\u2026" + horzline = '\u2500' + leftcorner = '\u250c' + rightcorner = '\u2510' + vertline = ' \u2502 ' + tee = horzline + '\u252C' + horzline + bottom = horzline + '\u2534' + horzline + cross = horzline + '\u253c' + horzline + ellipsis = '\u2026' else: - horzline = "_" - leftcorner = rightcorner = " " - vertline = " | " + horzline = '_' + leftcorner = rightcorner = ' ' + vertline = ' | ' tee = 3 * horzline - cross = bottom = "_|_" - ellipsis = "." + cross = bottom = '_|_' + ellipsis = '.' def crosscell(cur, x=vertline): """Overwrite center of this cell with a vertical branch.""" splitl = len(cur) - len(cur) // 2 - len(x) // 2 - 1 lst = list(cur) lst[splitl : splitl + len(x)] = list(x) - return "".join(lst) + return ''.join(lst) result = [] matrix = defaultdict(dict) @@ -391,7 +392,7 @@ class TreePrettyPrinter(object): childcols = defaultdict(set) labels = {} wrapre = re.compile( - "(.{%d,%d}\\b\\W*|.{%d})" % (maxwidth - 4, maxwidth, maxwidth) + '(.{%d,%d}\\b\\W*|.{%d})' % (maxwidth - 4, maxwidth, maxwidth) ) # collect labels and coordinates for a in self.nodes: @@ -406,8 +407,8 @@ class TreePrettyPrinter(object): if abbreviate and len(label) > abbreviate: label = label[:abbreviate] + ellipsis if maxwidth and len(label) > maxwidth: - label = wrapre.sub(r"\1\n", label).strip() - label = label.split("\n") + label = wrapre.sub(r'\1\n', label).strip() + label = label.split('\n') maxnodeheight[row] = max(maxnodeheight[row], len(label)) maxnodewith[column] = max(maxnodewith[column], max(map(len, label))) labels[a] = label @@ -420,10 +421,10 @@ class TreePrettyPrinter(object): # bottom up level order traversal for row in sorted(matrix, reverse=True): noderows = [ - ["".center(maxnodewith[col]) for col in range(maxcol + 1)] + [''.center(maxnodewith[col]) for col in range(maxcol + 1)] for _ in range(maxnodeheight[row]) ] - branchrow = ["".center(maxnodewith[col]) for col in range(maxcol + 1)] + branchrow = [''.center(maxnodewith[col]) for col in range(maxcol + 1)] for col in matrix[row]: n = matrix[row][col] node = self.nodes[n] @@ -433,10 +434,10 @@ class TreePrettyPrinter(object): if n in minchildcol and minchildcol[n] < maxchildcol[n]: i, j = minchildcol[n], maxchildcol[n] a, b = (maxnodewith[i] + 1) // 2 - 1, maxnodewith[j] // 2 - branchrow[i] = ((" " * a) + leftcorner).ljust( + branchrow[i] = ((' ' * a) + leftcorner).ljust( maxnodewith[i], horzline ) - branchrow[j] = (rightcorner + (" " * b)).rjust( + branchrow[j] = (rightcorner + (' ' * b)).rjust( maxnodewith[j], horzline ) for i in range(minchildcol[n] + 1, maxchildcol[n]): @@ -453,22 +454,22 @@ class TreePrettyPrinter(object): branchrow[col] = crosscell(branchrow[col]) text = [a.center(maxnodewith[col]) for a in text] color = nodecolor if isinstance(node, Tree) else leafcolor - if isinstance(node, Tree) and node.label().startswith("-"): + if isinstance(node, Tree) and node.label().startswith('-'): color = funccolor if html: - text = [escape(a, quote=False) for a in text] + text = [escape(a) for a in text] if n in self.highlight: - text = ["%s" % (color, a) for a in text] + text = ['%s' % (color, a) for a in text] elif ansi and n in self.highlight: - text = ["\x1b[%d;1m%s\x1b[0m" % (ANSICOLOR[color], a) for a in text] + text = ['\x1b[%d;1m%s\x1b[0m' % (ANSICOLOR[color], a) for a in text] for x in range(maxnodeheight[row]): # draw vertical lines in partially filled multiline node # labels, but only if it's not a frontier node. noderows[x][col] = ( text[x] if x < len(text) - else (vertline if childcols[n] else " ").center( - maxnodewith[col], " " + else (vertline if childcols[n] else ' ').center( + maxnodewith[col], ' ' ) ) # for each column, if there is a node below us which has a parent @@ -481,16 +482,16 @@ class TreePrettyPrinter(object): for noderow in noderows: noderow[col] = crosscell(noderow[col]) branchrow = [ - a + ((a[-1] if a[-1] != " " else b[0]) * nodedist) - for a, b in zip(branchrow, branchrow[1:] + [" "]) + a + ((a[-1] if a[-1] != ' ' else b[0]) * nodedist) + for a, b in zip(branchrow, branchrow[1:] + [' ']) ] - result.append("".join(branchrow)) + result.append(''.join(branchrow)) result.extend( - (" " * nodedist).join(noderow) for noderow in reversed(noderows) + (' ' * nodedist).join(noderow) for noderow in reversed(noderows) ) - return "\n".join(reversed(result)) + "\n" + return '\n'.join(reversed(result)) + '\n' - def svg(self, nodecolor="blue", leafcolor="red", funccolor="green"): + def svg(self, nodecolor='blue', leafcolor='red', funccolor='green'): """ :return: SVG representation of a tree. """ @@ -563,10 +564,10 @@ class TreePrettyPrinter(object): y = row * vscale + vstart if n in self.highlight: color = nodecolor if isinstance(node, Tree) else leafcolor - if isinstance(node, Tree) and node.label().startswith("-"): + if isinstance(node, Tree) and node.label().startswith('-'): color = funccolor else: - color = "black" + color = 'black' result += [ '\t%s' @@ -575,12 +576,12 @@ class TreePrettyPrinter(object): fontsize, x, y, - escape(node.label() if isinstance(node, Tree) else node, quote=False), + escape(node.label() if isinstance(node, Tree) else node), ) ] - result += [""] - return "\n".join(result) + result += [''] + return '\n'.join(result) def test(): @@ -588,7 +589,7 @@ def test(): def print_tree(n, tree, sentence=None, ansi=True, **xargs): print() - print('{0}: "{1}"'.format(n, " ".join(sentence or tree.leaves()))) + print('{0}: "{1}"'.format(n, ' '.join(sentence or tree.leaves()))) print(tree) print() drawtree = TreePrettyPrinter(tree, sentence) @@ -603,23 +604,23 @@ def test(): tree = treebank.parsed_sents()[n] print_tree(n, tree, nodedist=2, maxwidth=8) print() - print("ASCII version:") + print('ASCII version:') print(TreePrettyPrinter(tree).text(nodedist=2)) tree = Tree.fromstring( - "(top (punct 8) (smain (noun 0) (verb 1) (inf (verb 5) (inf (verb 6) " - "(conj (inf (pp (prep 2) (np (det 3) (noun 4))) (verb 7)) (inf (verb 9)) " - "(vg 10) (inf (verb 11)))))) (punct 12))", + '(top (punct 8) (smain (noun 0) (verb 1) (inf (verb 5) (inf (verb 6) ' + '(conj (inf (pp (prep 2) (np (det 3) (noun 4))) (verb 7)) (inf (verb 9)) ' + '(vg 10) (inf (verb 11)))))) (punct 12))', read_leaf=int, ) sentence = ( - "Ze had met haar moeder kunnen gaan winkelen ," - " zwemmen of terrassen .".split() + 'Ze had met haar moeder kunnen gaan winkelen ,' + ' zwemmen of terrassen .'.split() ) - print_tree("Discontinuous tree", tree, sentence, nodedist=2) + print_tree('Discontinuous tree', tree, sentence, nodedist=2) -__all__ = ["TreePrettyPrinter"] +__all__ = ['TreePrettyPrinter'] -if __name__ == "__main__": +if __name__ == '__main__': test() diff --git a/nlp_resource_data/nltk/treetransforms.py b/nlp_resource_data/nltk/treetransforms.py index 0c422f6..c64ac70 100644 --- a/nlp_resource_data/nltk/treetransforms.py +++ b/nlp_resource_data/nltk/treetransforms.py @@ -106,6 +106,7 @@ The following is a short tutorial on the available transformations. C D C D """ +from __future__ import print_function from nltk.tree import Tree @@ -331,7 +332,7 @@ def demo(): draw_trees(t, collapsedTree, cnfTree, parentTree, original) -if __name__ == "__main__": +if __name__ == '__main__': demo() __all__ = ["chomsky_normal_form", "un_chomsky_normal_form", "collapse_unary"] diff --git a/nlp_resource_data/nltk/twitter/__init__.py b/nlp_resource_data/nltk/twitter/__init__.py index 1666e2c..2d848e0 100644 --- a/nlp_resource_data/nltk/twitter/__init__.py +++ b/nlp_resource_data/nltk/twitter/__init__.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Natural Language Toolkit: Twitter # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Ewan Klein # URL: # For license information, see LICENSE.TXT diff --git a/nlp_resource_data/nltk/twitter/__pycache__/__init__.cpython-37.pyc b/nlp_resource_data/nltk/twitter/__pycache__/__init__.cpython-37.pyc index 053ad6a13d3c8c8054766ebcd2c56139410a8a27..59d96cd004ced118174a6d8095a4f6208e5c4d35 100644 GIT binary patch delta 47 ycmZo*`@+iY#LLUY00gD%6E|`{Wn_$+{E=~r03T4K2t@p{*DozD(x1GLDINd~Wehj~ delta 59 zcmeyu+Q7!`#LLUY00eSL0UNoWGBVzp{E=~r3_nn$2t@qy*3Zb#P1Vmztjx$SNG!_M K*PA?*DINgHI}rW= diff --git a/nlp_resource_data/nltk/twitter/__pycache__/api.cpython-37.pyc b/nlp_resource_data/nltk/twitter/__pycache__/api.cpython-37.pyc index b098ccce80b5c41ee4268793c6b44d709deb915e..d7f7688bb167f04eae720720a0dd5ca9f55732ed 100644 GIT binary patch delta 1139 zcmZ`&J8#rL5cb-8Yv0+vI|4zu03i@wCy@lDh@uGb6t5-{kTgeOIky|diO-JKjs)SN zM7mIbL=kNpp-@p#MRW*iYC1Rc6#M`xW)_J91eW&m?94YiJCFT3x_7F2!}lu&eCzgZ zP#f5-4v`nT=O;qeV#MHtm*R4#(yC~g#cs!IdBnJ5aGN_j26u$rvs%9Jdq%4&DuVV1 z*vh;Dwu-3rtR89ALE~}{G@hsf`w%xD8clz9Z*$rzSiTV@OKDNgqmJMr&cnjtVJ>jn zB!wSx9&|(=w&O6%Kw&M+-#oZZ-kEL{?)-gray^ri%BR=JMKwp`O5vdAMw$rqk)9lL zfh6k)NdFCLxGe(=DN-7wD5>k@=)tAZG#T^IcnqQMg${)i1W^*@L2y+)XK$yD|Kgnh zsY#@E{L9DXY0yrSJWAHZoLaWW?++uH$wTNpfN&V$2*L=$Nre6>TGIDzfMFU|#sD0f znw6SF9o}4(olj)=;Is48WT=xPYTms-nA&t37y1dELepu4egJ(oMt-C|xUZUMXqq~g zfv}5jg6b|K?U%_EW~P6;77RH5`({mSQaq+d?|1FRjTa$qTlW%1)R|-BJ5zN6)A) z)tRY33s%(I%c8v+;IIa9)QR#fI3*heFsMf;sS`)ttIcjYD?4h6;+w$wAN*R=Bv;mL`Z&N5BF|icr$# zEP5<$kXmjM=c^O#w6g}OV>$HOkYmVV>EeoL=X%m~pnu)0a4vUJz8;G!KKNuf%+z$C S2-PE|XL?rs7_*=vBR>FiLgdN- delta 1076 zcmZ`&&1(}u6yMpM%+6*vAFWCgTWf3m$R652Pc?{04=wgk3rY_`4BO0>Bu#e1?Dk;Y zBD5lS=ppV&5NS_>;Hl`J;lZ18(VHOt1)iKY1yf25%r9@=`@OGuZ{JS;n#wQca#>CO zCO6g^-$n-c66z0b%`IDXt4HbtY0DbRF!DlU#5ebide+bGYjs;{O-7}b`j}YW3#Z)41>5G zZ2R}qSLW=K5|9Zmg7*l(D8L1P34kjA!zfBpg^x+YygU&!N(t7p1r9nF39IMFUbE{( zQEJlfsC4|3KGO*p`9#{XE67X-cDXX_Yzj=%0K-fw36!v&ezo7ZU~qNSl)!Qr-WjSO zEiYhQKSC9XSAri0?79YsC7Ykszj*wZu5m zJ2=jC2r~}QE(RvACF*|*M=K{j>W;QFzNhSu(=WMlIf+e14rxM?OoES$eT3vXCR)RK zoqxZQ8lMA#&+BknLcj^u^^iv?|Cs-12#_vJd^&; zFW9G^Ev*(Vjqb>h5oFM?gD@(hY`R{kJ%?ehuDWQ+wf;YGLz4J)fExg_0K@o7QnR@# z4Quj5u;3KS1t>YhAsqB>UagS8&=H6I7KXRcYI0qQTxEc+IR?l8z!s^GfnNhjEek|C eFSVd|dLi5H`gft42&%z4m0gyM;y5uzC;tKv-O-o; diff --git a/nlp_resource_data/nltk/twitter/__pycache__/common.cpython-37.pyc b/nlp_resource_data/nltk/twitter/__pycache__/common.cpython-37.pyc index f04e698a42b8ab783b3d5c4d4dedd74292d807dd..9769110a88e24b0e48e11c1eee66d58fbd3bffee 100644 GIT binary patch delta 1842 zcmb7E&2QX96!+L;ujBp5CYvVv*)9p7nkG&8sOW~Y2`TCa0#s3j3Kz-U?syxgyY@2n zW|eBA9EgxOR3gk3q^5rXCX)c zzCHi<;`T~9ZAdV>iCz1r7bWQtZk`MUh-H|;7XVDMBu1FrB5m1{3DQbSv6OhF7L8Zc zQemYnt(~wEAfsA3(^z6hvPPKBMqo9V!IH2hS&F51mLx01GHetWX;xu5HnBsj3_H$_ zu}Ppt*$Fnq@<3%-ffYe&jFng!)^Vm>m(1$!k2@8UM41kEz0huUy+-Kz-h05(8h*P| z5BG17?@2NR{y#oH-wpV@IIUbM74T7607<9BATr644B)uTGh$cy^r{LcrW(=p!1s6( zHXMT#DR-UCNZqKnx(-K^14tu<8nZA19Y7*FNuf1W+@hDw*uaqdC`i4A^4ftsC45x; zOmAJD00!LbyPio8@8;vc<~f8ROZbchm^`VL&d#a;CKH z0XLM^318?d{CFs@D}AWCKIzL$zE8KPmkp7l9CAYXK<(2#vZZ>npz}^BKR^k(Cxf&R zOVdw)2CG`19_T4sY9F+kG8B}0Z^fv>xXK)FJ9L|y)q1DHeJC%8RrTWa52}MVSMRtR zj#mxWoa(jBaLxCs8x9Yk^kVHu?uDkzP4J(;1Q3xPux^_PjvW;#*FJnTqId4OorwA! z$BPsQ1jp*J(yN}^@R@T1BD?BvQ{yMm$Wu%F6mI7bP<|+^dtsgUoGM*;9)#lD@fVQ7n&enc9P`gHD^bRdA8d!d-E>jYnlkMpOBNVG&`_rNS;wmJTjk zLH1h+v9)tRJrbW6Pmpu_Ul(g+HWq{42R@iyMnQCEC<+|C_(vQo*Z+^gzv5webp<~= zUPP!M#0gFS6=_YMx9cH~%K)=&X7~jZ!K4h{GmF%`s8!Br@h_GWex+EbA{QNhCQLQh z{wW?-YTHPe3V#mIrvWaRJ{^$Z?h{abtqNenbcSDtY+b{W$U>5FZjbz9B_+yxi_3SFYZzpcyA XEsCDgbzOs5GG>i}L?mJ?>0|!^%ua5i delta 1985 zcmb7FOK;p%6!x{RJ&xy@N!mOnY21|3x=AK!Pzxz}5d@(~REVmTFd*a3bsA?Lj;@^y z6HS#xBD%@~Swbx7PpD?W4)_6VSYnB85JEzNHLZliId;-%Co5*=e&_o5+;h)+{$2cT zX@4!7)dcvQyZ52>$*Y3!8$O(TC~Q_?`QHFA!4w!_VvU$2rlKjvR5B%?QjOMSQzinV zrov>V913QNrI-p-WqnKo+Q-r?15{&KmIIn*?@X{F8#p9p#*x|J0bx2j!_FR(17c>` zc{ap`;a!eRvJo~4yF4qgF*sOY0A~*-fwR%x>AOO>f)g)!A}*MS8C^gpL&e1eFqvh@?PrfJKpK!z1a-o9MhDM|8{g z8XRN9`v4-T>a-$x*RJh29Mva~!5yBs0?Stc1fr4*YNO$&blr#*^vLHxY98g4V|hw= zF8qmpzCHj9xY=_X2JPO>`+?1i2tAtezxjxQI;BKjC9P9t!GR#!#T zSFsz+We}{0x`9*g)megLtKkpI3@L?wDTm^la`-s^s57X3AQ87?r#~VWGnTg#Y*fmBNc-@U-5TMbOPfdOcq5=)F3e$ zq=nA{XK&_?@95sP)3|KtV7m_T<>XFaFc&f()LOdR@B_zYkA$llbt6X>@7_U*fRqCjuD!6xVqT(4o*^om!9Lh^lhDjS4Pf(}T;B_RAVf`Ndk z?scU2LO?9klC^_}Am@t!f)rM>rTrVQYfF4M5VlAgS_&;=;t_p78-)NlQkN5m-%4%z zh&+(na+_{owbNUX*bEG@Ye__!294# z0=|z0D-cQs#R8;&Pj-IEy{|HiM0Do96Ih!NV~1NCu2W-H&GiF*0p7)_=da)n1JAKA zIaU(S0z|o_R;<8-gEfa=1g0@yrh}&A1b)K16h0i9x!M!v*iE{Oage$h|pYTb^{yztvhSjka%%bB$V1ExQdEZ!i{oj$NEo2ihGE{BEL>I>$-H z=MbP*J`?y7ZsVRqTVAj?(Is;UC3q)jU% OBWrnq2t->^`~L%qQ+=D1q&n=z>0IOT_>>wiFEGy&OPsQ=KB5f z-_pbfx%M9a`9jqNB}md0rU&Ow@_DL8X1OVe~< zr=>h5S{}ernx%s~1W*CcF z(;y8#36vDA!Y1h4X5cB|b(VXq5amIhRaTU{No|r%B7REVS$<1lQ}hVS3_)rH`T5St zGI-M9b)|aRxE55D_J`zFaIi}Jbc369p|3aVHDNmS_V1y)M7_AsboqjJN4-kUdf%vv zXTd_L26zSzog6{F9pdj^t|R$-TkW@8O4q*|!* zj`l%fg3Ni7iH}+xhhdIe%~f<5kG1@-gg&#i~IW+YdWNQ*8KZ%`#bC^oPoI zl!fEgIAe7g%mFW*=7d#(?cv8k@=sD2+m;jK6?e;A7ktXQnEX;^1H%jpZqzo{s{7L7 z^I+7?Xx+3RJ?O$olwk$@7?K4fdp(!!9+a*QB=iu}4kmOOBz~xFd1mUX))WBP5gbOR z*mJFArLtLHcl34c*t$vi z1Xy*~YTe5c6YoN{M3&oH_6q3^lphAaYv8N5 zW&6e%b%h}y6_=mF=$Jxx+5UT~xpl{~oUH?6{Td8A&5U*TY(?>AbDzG63*xJsHJAZo zRQr>NVbqIAjsp4L&|W%U%sm6GeM1{0{BnCSze~uB_e)`pOnZM8CaR<8Zx~6h-+Lv? z4)fRXcF|iZ4qcLqQwFR>sL-ha+r2mJS5U97^MjUYn*tjzOq!Y4y2IDa4+&?}?f0CmIlKX{CNpSvlYH-|QkC3n|5hpl z(iJd#sG9U#<-JGzj29~(ZiUcmuS#uzJ!lwp+W9n0>h2Zm$6?Z3!1pdTsu3*=m#}!f zLbd3|Hsw$|?c|7|I40VkV*Wq`@B4Cu{O$c%p2*w=Wk0Ixwqe$Fo!|1Fmq%JhKnS1n z33MbsZNgvfh|ZztRpiNw8>!q?nD!&OPMw;r`*9t9SVWUE@ESlXoU9*{KQ@kIxg392 z_%$F}qHc-JV>QQa7=lA<1{5tRMcqf(do+5eCA)#~NNysLH3#2VUDd)Xm#$pB&Ql=OLioCLI={C$lEX)F)Q`SqJG5!Br3ygz TZ6G0%3`9wiL<8RZ*o}Vye0cV@ delta 2287 zcmaJ>&2Jk;6yI6f>-A^s_#<)Sq)wXBY}zLMpbf1^)vp}rH#AkNRjbv;Gj6uo^_tl= zNvJ9#wcHSDwI@y-iUddqkbJ1PaNvYE^o*9jpnm~~UU+Y895+IEC^3N6{6*i`j8U z)m0~ z9PQ+v5P$`MhiQ?P1S$erpks7gpc0@(Izg)fl>trCDPd#`_DASmS`#PZfR55>IwMd8 z&@sA?&I+^#(0=-s(3*h#0Xir4)eakF1y*DwR%T;toK@H!Ho>a*Q~D&Eq%xI1l`E1y z#r9HdLw-v58p}VHh_qE?Qx~OsX}!j3h@X-VPu`Z;G@WPJF^G@g{$OuqSzM`nQ__x` z??)slxw2(CEt`2Y-|hZDE=9Vj*gZ0OeR17&*y00od);lBd}DDTXe!sqQt-2~bV`Pr zX)+&yg^wc1A%pZ7dDw^Xj~HKbJ@z&LFA7A86jfdb7UC}tp|H^4LNu|>JfCrDZl6bC z6@{Zol7o*|gE{plZAU@ugjURBe!F!AHH3L38{yO4Wa2Ez?gM71cs^&Q!)Jp#$<@vz zu=o^^)mJf~S|n5&u5G)Ud_SJd0=cF|xq<*YGF1Fc#(a;b0BA&){mq`oI}YUiP0I(r z>$d5624#+W2V}i*AW}>rD#^lMB?^S0W{FIch(fpqhp&o(ov#ERq#ky9_r`bv*c=@T zRnG2Pm<}FCsCc3_T12=Q#$2=QuZK$Awpi2O?NOm8?7Lo_Gu9M7IbfxiQt%0r$`^t2 zciK0%Z8yPd-lo;?`Al#t{k8B$BVo+vW_{zDHslsR2ugjA8kP<2L9&h_4j?p;TWL%k>!ahCC?}{*Dwp&wiB~+|&zO4Q`LrN(0NoCWFsL#tWan zX(vtia`(kZhAa*2==rwCxM4LLuF>GGV_1~eK&#(s4VOEnuf_X$3|{Asp52wmlI^-1 z?Uu2&ZJ;TOisF8OH=`srTvB}4aw_5Hx_165xrx<*u{;3FHLyQuv!Qybo^SeK)8ofL zO1RbUw*M|`kS>fDz5&r*>Jt5~x)3ZCzPyC@g$idaW6Og73-`ZztF=3037j47b0o#q}5*LgNhr)z$TF${}{z^AWG&=;*ME z!H?An`7`*fI-R=>N1lM!Q+}UDx)wjG8x)DO)bbaP@T4_BR&#Z!O397Y`-xh_@OaNiNz- zqy=AVUO*zcVj6cE5`6SHJ~S}nK$el5LvjJh3X;o6R*_)1dgR=dD|+nW%GK*Pc^Vl} mtWYn-*dp`8xdI%j?>a7R+w4pYkarhIR8~nErZLc`R{sI6QrsZ` diff --git a/nlp_resource_data/nltk/twitter/__pycache__/twitterclient.cpython-37.pyc b/nlp_resource_data/nltk/twitter/__pycache__/twitterclient.cpython-37.pyc index 40849cc514ea83d9c21085880b97be891e443774..a44a3ade820c2479c064c91d103bcb997a72c982 100644 GIT binary patch delta 1430 zcmZ{kO>7%Q6vsRM*aojni;~91#HkY}ZZ~lfJ82rXandAho#LiO6;TOkigA`r>cn52 zU86Q3K|+xbmm+v4q!v}8CvIptAr8PDICp_75#oRZ9JnBu|Ex%RpqBj8%)Iyh^X9#o z{bP53SDg!m0*Az}b@+qN)cbzr!br>HBlZ{m_r1)q3(wqTtk-@N2r_n(epYYK^$1@_ zS*uhngY$%#@IIg)=mwGkn!=yOcmNm#QUZ>*X0G!S^mV$O4bkVparGoj5*qy(oKxeN z_X0!qY$%|xaaw4PbWVtshvIK~1e8OZ!6Qps?TfJ=#s%mYRGz3ZJ6hK^zyWv<^8$FJ1+G>o!P z5sZR>4@lC3SnTK%Sz>XXzKzWshqNOS!~V`;Zdklxn9EAd&CV_8C3~hj>=E@?ipSW5 z{bu|l54%D~6Ipc$Lh_Wa(#wg(CAUCgQS`#Cfe>&ExBy%R zE&ogmRurZ4CB?NPQf2m_)+40|M8T<~F(_y;SpA4bdi*nndXwrNAFDWcfj|K;F zGQkprUI$(ja6;=_m1aqA)SBY#rbtdYK|=FBdAI0X>Wm_7DcViN(>I_X6Ls$rrmq1} z0C{U6_kzlD57Kw3@F3!s18Kk6)|Y@`aEq%_$vL9US&EhWX`gJ{cfh@XJy(y9h;IhRT?d;*|K;WM9@*K%`Jtj z-{V78VBY``#>@8XL_h1sNw*ScS*7~g*r{FD0ZAUCfzu(9g{doo48R*oP?$!NT#Lc!*QHxvW~wx zyH(p*E>aI1q2h7j&=Uw%J)olf2Pm8?;JOtjBtYUug#;&fZx%(RQZ4zXnRy@czGi>D z_2ibe90>Fr!(U`)>uc?magwS05G*(!^JRMP%R`Pj>fcCPy)#)j=jJ$}Z< zz|n3jN72qvuNXCp3o(Kl5hIKeB7`_XSH+98)(PW;1j6yw%{}o7JWK`I1l;q_Xk#Rj zI0e7?m$f0<4-+QfK{{k#2>hY2X;=z|`m%WX1^!JBLOmuJY0W|-SU`pEf)y5rvEC}1 zhfjLXtFt6LW&hm!J7Wp>zAwh|@T6}X(JkDS-Tqj%)>Uo7n{>Ak|UY@C0LCW-gXN|clyN9 zdQ%o#XW}gqXd+2QDMFesMFoSmH z&u-b`rJ7MUa9DcDo}v~df}08aGZA9z5Sl!gkuxu|KT03<@_BqX&#msdEMgIUoQ$y| z{5iSAw(V@P&x4bHXK@H>>G7AIwS_O!BPX}f^IaTnQ^iG>*XQXvF1F#P^bRY-($s0T zZnvjO3M;|mY?!UV-`VicGv(hyL$ObAQ=%7aug#ud5lZ3kcst!mwx>GDs@ZI`GacRU zD(}IKxrISlfH>;uUG5l5N77{#ewdraH>k`jo?+5+eAHvd*KF~IsM3W81oyiFi8E2P zLi=5K^USvz`4sX29YAE^^8p!K7aht6wR_?aTZA@&zLgGqnrntKv{IWp URbH_~k!0J1GP>=D%-NZL0o(LOUH||9 diff --git a/nlp_resource_data/nltk/twitter/__pycache__/util.cpython-37.pyc b/nlp_resource_data/nltk/twitter/__pycache__/util.cpython-37.pyc index 8433890c2f6a558b3ee80815f1b112c70f43a485..91b85406cef384b24d87bfbd19ae555a7cfb41ec 100644 GIT binary patch delta 981 zcmZ8fO>5L(5YC&V`ObFjhh?#fTA`-Own&ApEu{#x7wJXx5R{Pan|9OgCSj7GMWF|K zE-VrJ0iFc$vcEt)i)VB5?4kEPIg=pL=I~@@-e+c>8Rq=vhgCz-Cl*jN`?c4olSJ9;IH+ zfcqFtieHBNV4g@cQmG`>N=r4_NBw>VR0ggR+(2;Dx~)h)gdD6m3MS3%2Fv)5sk1pp3urIf`-ut1E9!pf%4x#xGgL7IBV z38J^7qsBvaQfr{pi7@r|($c`4`w`DmEJ>>HETtstU9M*wq~lBKSo zJ=lW>FXj0I1WyHT`v>$lc*{k4_TarBIFlgQF6_K{^S=4!o5_5d|2=0vv29br-}Pq0 z`*CDXZ#VG5C0F8%%RT0@{FGf8Po<~yRaa#aw|R+chs-T)N?hm0p~Q_qd#|`UuktcC z@x|a3ZXHT)naeLEXYA<1=F!R5MFq)0B~wCkEP;W?>1Mqh^@DoXAGV{|hrN1D9JBSY zNqkEPEU6j;Q~Y7inI@jf-fTx<^)hiFEc!^6LwP1n;*SVYU&DAa3&xxIYt0; z+wTp6b$|%KrhppzX*)B*2>Q9FcB4)R6>*|JV3xRQ%#KeWtY9caiw)!E#j|xmN6hl* zo$-Y&S?D@)CfDv{Y|mkkr{{1|cv@cF4Z`4c3~N2{%p5KhaB!%|5~ZHcfZG@i#V^yn zJ3}DqCDY;z#N(#@vU<3@B&?_ zgFIGD^IU%#zRu!5p}3)_vSFDkppAysEUVQ+1+rk( z=~mtW5fw7a#ayQ-P$yHcpqJ_CekV->fJM3~X|xA72ys?eHtuuo`K?xvBpzada5#Ef zz0VE`4U}5yBz`l=Oq{tN@q{?Bg5~uC+eCFV!Z diff --git a/nlp_resource_data/nltk/twitter/api.py b/nlp_resource_data/nltk/twitter/api.py index 1533ad2..2cce2b7 100644 --- a/nlp_resource_data/nltk/twitter/api.py +++ b/nlp_resource_data/nltk/twitter/api.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Natural Language Toolkit: Twitter API # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Ewan Klein # Lorenzo Rubio # URL: @@ -14,7 +14,11 @@ handling. import time as _time from abc import ABCMeta, abstractmethod -from datetime import tzinfo, timedelta, timezone, datetime +from datetime import tzinfo, timedelta, datetime + +from six import add_metaclass + +from nltk.compat import UTC class LocalTimezoneOffsetWithUTC(tzinfo): @@ -47,7 +51,8 @@ class LocalTimezoneOffsetWithUTC(tzinfo): LOCAL = LocalTimezoneOffsetWithUTC() -class BasicTweetHandler(metaclass=ABCMeta): +@add_metaclass(ABCMeta) +class BasicTweetHandler(object): """ Minimal implementation of `TweetHandler`. @@ -124,9 +129,9 @@ class TweetHandlerI(BasicTweetHandler): Validate date limits. """ if self.upper_date_limit or self.lower_date_limit: - date_fmt = "%a %b %d %H:%M:%S +0000 %Y" - tweet_date = datetime.strptime(data["created_at"], date_fmt).replace( - tzinfo=timezone.utc + date_fmt = '%a %b %d %H:%M:%S +0000 %Y' + tweet_date = datetime.strptime(data['created_at'], date_fmt).replace( + tzinfo=UTC ) if (self.upper_date_limit and tweet_date > self.upper_date_limit) or ( self.lower_date_limit and tweet_date < self.lower_date_limit diff --git a/nlp_resource_data/nltk/twitter/common.py b/nlp_resource_data/nltk/twitter/common.py index e4b3182..a06f08f 100644 --- a/nlp_resource_data/nltk/twitter/common.py +++ b/nlp_resource_data/nltk/twitter/common.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Natural Language Toolkit: Twitter client # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Ewan Klein # Lorenzo Rubio # URL: @@ -11,11 +11,13 @@ Utility functions for the :module:`twitterclient` module which do not require the `twython` library to have been installed. """ +from __future__ import print_function + import csv import gzip import json -from nltk.internals import deprecated +from nltk import compat HIER_SEPARATOR = "." @@ -34,7 +36,7 @@ def extract_fields(tweet, fields): _add_field_to_out(tweet, field, out) except TypeError: raise RuntimeError( - "Fatal error when extracting fields. Cannot find field ", field + 'Fatal error when extracting fields. Cannot find field ', field ) return out @@ -72,7 +74,7 @@ def _get_entity_recursive(json, entity): # structure that contain other Twitter objects. See: # https://dev.twitter.com/overview/api/entities-in-twitter-objects - if key == "entities" or key == "extended_entities": + if key == 'entities' or key == 'extended_entities': candidate = _get_entity_recursive(value, entity) if candidate is not None: return candidate @@ -88,7 +90,7 @@ def _get_entity_recursive(json, entity): def json2csv( - fp, outfile, fields, encoding="utf8", errors="replace", gzip_compress=False + fp, outfile, fields, encoding='utf8', errors='replace', gzip_compress=False ): """ Extract selected fields from a file of line-separated JSON tweets and @@ -113,7 +115,7 @@ def json2csv( are 'id_str' for the tweetID and 'text' for the text of the tweet. See\ for a full list of fields.\ e. g.: ['id_str'], ['id', 'text', 'favorite_count', 'retweet_count']\ - Additionally, it allows IDs from other Twitter objects, e. g.,\ + Additonally, it allows IDs from other Twitter objects, e. g.,\ ['id', 'text', 'user.id', 'user.followers_count', 'user.friends_count'] :param error: Behaviour for encoding errors, see\ @@ -121,7 +123,7 @@ def json2csv( :param gzip_compress: if `True`, output files are compressed with gzip """ - (writer, outf) = _outf_writer(outfile, encoding, errors, gzip_compress) + (writer, outf) = outf_writer_compat(outfile, encoding, errors, gzip_compress) # write the list of fields as header writer.writerow(fields) # process the file @@ -132,18 +134,22 @@ def json2csv( outf.close() -@deprecated("Use open() and csv.writer() directly instead.") def outf_writer_compat(outfile, encoding, errors, gzip_compress=False): - """Get a CSV writer with optional compression.""" - return _outf_writer(outfile, encoding, errors, gzip_compress) - - -def _outf_writer(outfile, encoding, errors, gzip_compress=False): - if gzip_compress: - outf = gzip.open(outfile, "wt", encoding=encoding, errors=errors) + """ + Identify appropriate CSV writer given the Python version + """ + if compat.PY3: + if gzip_compress: + outf = gzip.open(outfile, 'wt', encoding=encoding, errors=errors) + else: + outf = open(outfile, 'w', encoding=encoding, errors=errors) + writer = csv.writer(outf) else: - outf = open(outfile, "w", encoding=encoding, errors=errors) - writer = csv.writer(outf) + if gzip_compress: + outf = gzip.open(outfile, 'wb') + else: + outf = open(outfile, 'wb') + writer = compat.UnicodeWriter(outf, encoding=encoding, errors=errors) return (writer, outf) @@ -153,8 +159,8 @@ def json2csv_entities( main_fields, entity_type, entity_fields, - encoding="utf8", - errors="replace", + encoding='utf8', + errors='replace', gzip_compress=False, ): """ @@ -197,7 +203,7 @@ def json2csv_entities( :param gzip_compress: if `True`, ouput files are compressed with gzip """ - (writer, outf) = _outf_writer(outfile, encoding, errors, gzip_compress) + (writer, outf) = outf_writer_compat(outfile, encoding, errors, gzip_compress) header = get_header_field_list(main_fields, entity_type, entity_fields) writer.writerow(header) for line in tweets_file: diff --git a/nlp_resource_data/nltk/twitter/twitter_demo.py b/nlp_resource_data/nltk/twitter/twitter_demo.py index a241c07..967728b 100644 --- a/nlp_resource_data/nltk/twitter/twitter_demo.py +++ b/nlp_resource_data/nltk/twitter/twitter_demo.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Natural Language Toolkit: Twitter client # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Ewan Klein # Lorenzo Rubio # URL: @@ -30,11 +30,13 @@ For documentation about the Twitter APIs, see `The Streaming APIs Overview For error codes see Twitter's `Error Codes and Responses ` """ +from __future__ import print_function import datetime from functools import wraps import json -from io import StringIO + +from nltk.compat import StringIO from nltk.twitter import ( Query, @@ -46,7 +48,7 @@ from nltk.twitter import ( ) -SPACER = "###################################" +SPACER = '###################################' def verbose(func): @@ -79,10 +81,10 @@ def setup(): """ global USERIDS, FIELDS - USERIDS = ["759251", "612473", "15108702", "6017542", "2673523800"] + USERIDS = ['759251', '612473', '15108702', '6017542', '2673523800'] # UserIDs corresponding to\ # @CNN, @BBCNews, @ReutersLive, @BreakingNews, @AJELive - FIELDS = ["id_str"] + FIELDS = ['id_str'] @verbose @@ -92,18 +94,18 @@ def twitterclass_demo(): """ tw = Twitter() print("Track from the public stream\n") - tw.tweets(keywords="love, hate", limit=10) # public stream + tw.tweets(keywords='love, hate', limit=10) # public stream print(SPACER) print("Search past Tweets\n") tw = Twitter() - tw.tweets(keywords="love, hate", stream=False, limit=10) # search past tweets + tw.tweets(keywords='love, hate', stream=False, limit=10) # search past tweets print(SPACER) print( "Follow two accounts in the public stream" + " -- be prepared to wait a few minutes\n" ) tw = Twitter() - tw.tweets(follow=["759251", "6017542"], stream=True, limit=5) # public stream + tw.tweets(follow=['759251', '6017542'], stream=True, limit=5) # public stream @verbose @@ -129,18 +131,18 @@ def tracktoscreen_demo(track="taylor swift", limit=10): @verbose -def search_demo(keywords="nltk"): +def search_demo(keywords='nltk'): """ Use the REST API to search for past tweets containing a given keyword. """ oauth = credsfromfile() client = Query(**oauth) for tweet in client.search_tweets(keywords=keywords, limit=10): - print(tweet["text"]) + print(tweet['text']) @verbose -def tweets_by_user_demo(user="NLTK_org", count=200): +def tweets_by_user_demo(user='NLTK_org', count=200): """ Use the REST API to search for past tweets by a given user. """ @@ -159,9 +161,9 @@ def lookup_by_userid_demo(): client = Query(**oauth) user_info = client.user_info_from_id(USERIDS) for info in user_info: - name = info["screen_name"] - followers = info["followers_count"] - following = info["friends_count"] + name = info['screen_name'] + followers = info['followers_count'] + following = info['friends_count'] print("{0}, followers: {1}, following: {2}".format(name, followers, following)) @@ -209,7 +211,7 @@ def limit_by_time_demo(keywords="nltk"): print("Cutoff date: {}\n".format(dt_date)) for tweet in client.search_tweets(keywords=keywords): - print("{} ".format(tweet["created_at"]), end="") + print("{} ".format(tweet['created_at']), end='') client.handler.handle(tweet) @@ -269,12 +271,12 @@ def expand_tweetids_demo(): hydrated = client.expand_tweetids(ids_f) for tweet in hydrated: - id_str = tweet["id_str"] - print("id: {}".format(id_str)) - text = tweet["text"] - if text.startswith("@null"): + id_str = tweet['id_str'] + print('id: {}'.format(id_str)) + text = tweet['text'] + if text.startswith('@null'): text = "[Tweet not available]" - print(text + "\n") + print(text + '\n') ALL = [ diff --git a/nlp_resource_data/nltk/twitter/twitterclient.py b/nlp_resource_data/nltk/twitter/twitterclient.py index a2af7af..9f79198 100644 --- a/nlp_resource_data/nltk/twitter/twitterclient.py +++ b/nlp_resource_data/nltk/twitter/twitterclient.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Natural Language Toolkit: Twitter client # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Ewan Klein # Lorenzo Rubio # URL: @@ -69,7 +69,7 @@ class Streamer(TwythonStreamer): """ if self.do_continue: if self.handler is not None: - if "text" in data: + if 'text' in data: self.handler.counter += 1 self.handler.handle(data) self.do_continue = self.handler.do_continue() @@ -104,7 +104,7 @@ class Streamer(TwythonStreamer): print("Error (stream will continue): {0}".format(e)) continue - def filter(self, track="", follow="", lang="en"): + def filter(self, track='', follow='', lang='en'): """ Wrapper for 'statuses / filter' API call """ @@ -112,7 +112,7 @@ class Streamer(TwythonStreamer): # Stream in an endless loop until limit is reached try: - if track == "" and follow == "": + if track == '' and follow == '': msg = "Please supply a value for 'track', 'follow'" raise ValueError(msg) self.statuses.filter(track=track, follow=follow, lang=lang) @@ -167,7 +167,7 @@ class Query(Twython): return itertools.chain.from_iterable(chunked_tweets) - def _search_tweets(self, keywords, limit=100, lang="en"): + def _search_tweets(self, keywords, limit=100, lang='en'): """ Assumes that the handler has been informed. Fetches Tweets from search_tweets generator output and passses them to handler @@ -191,7 +191,7 @@ class Query(Twython): self, keywords, limit=100, - lang="en", + lang='en', max_id=None, retries_after_twython_exception=0, ): @@ -199,7 +199,7 @@ class Query(Twython): Call the REST API ``'search/tweets'`` endpoint with some plausible defaults. See `the Twitter search documentation `_ for more information - about admissible search parameters. + about admissable search parameters. :param str keywords: A list of query terms to search for, written as\ a comma-separated string @@ -220,16 +220,16 @@ class Query(Twython): self.handler.max_id = max_id else: results = self.search( - q=keywords, count=min(100, limit), lang=lang, result_type="recent" + q=keywords, count=min(100, limit), lang=lang, result_type='recent' ) - count = len(results["statuses"]) + count = len(results['statuses']) if count == 0: print("No Tweets available through REST API for those keywords") return count_from_query = count - self.handler.max_id = results["statuses"][count - 1]["id"] - 1 + self.handler.max_id = results['statuses'][count - 1]['id'] - 1 - for result in results["statuses"]: + for result in results['statuses']: yield result self.handler.counter += 1 if self.handler.do_continue() == False: @@ -246,7 +246,7 @@ class Query(Twython): count=mcount, lang=lang, max_id=self.handler.max_id, - result_type="recent", + result_type='recent', ) except TwythonRateLimitError as e: print("Waiting for 15 minutes -{0}".format(e)) @@ -258,7 +258,7 @@ class Query(Twython): raise e retries += 1 - count = len(results["statuses"]) + count = len(results['statuses']) if count == 0: print("No more Tweets available through rest api") return @@ -267,9 +267,9 @@ class Query(Twython): # results['search_metadata']['next_results'], but as part of a # query and difficult to fetch. This is doing the equivalent # (last tweet id minus one) - self.handler.max_id = results["statuses"][count - 1]["id"] - 1 + self.handler.max_id = results['statuses'][count - 1]['id'] - 1 - for result in results["statuses"]: + for result in results['statuses']: yield result self.handler.counter += 1 if self.handler.do_continue() == False: @@ -286,7 +286,7 @@ class Query(Twython): """ return [self.show_user(user_id=userid) for userid in userids] - def user_tweets(self, screen_name, limit, include_rts="false"): + def user_tweets(self, screen_name, limit, include_rts='false'): """ Return a collection of the most recent Tweets posted by the user @@ -315,13 +315,13 @@ class Twitter(object): def tweets( self, - keywords="", - follow="", + keywords='', + follow='', to_screen=True, stream=True, limit=100, date_limit=None, - lang="en", + lang='en', repeat=False, gzip_compress=False, ): @@ -398,13 +398,13 @@ class Twitter(object): if stream: self.streamer.register(handler) - if keywords == "" and follow == "": + if keywords == '' and follow == '': self.streamer.sample() else: self.streamer.filter(track=keywords, follow=follow, lang=lang) else: self.query.register(handler) - if keywords == "": + if keywords == '': raise ValueError("Please supply at least one keyword to search for.") else: self.query._search_tweets(keywords, limit=limit, lang=lang) @@ -423,7 +423,7 @@ class TweetViewer(TweetHandlerI): :rtype: bool :param data: Tweet object returned by Twitter API """ - text = data["text"] + text = data['text'] print(text) self.check_date_limit(data) @@ -431,7 +431,7 @@ class TweetViewer(TweetHandlerI): return def on_finish(self): - print("Written {0} Tweets".format(self.counter)) + print('Written {0} Tweets'.format(self.counter)) class TweetWriter(TweetHandlerI): @@ -444,8 +444,8 @@ class TweetWriter(TweetHandlerI): limit=2000, upper_date_limit=None, lower_date_limit=None, - fprefix="tweets", - subdir="twitter-files", + fprefix='tweets', + subdir='twitter-files', repeat=False, gzip_compress=False, ): @@ -497,13 +497,13 @@ class TweetWriter(TweetHandlerI): os.mkdir(subdir) fname = os.path.join(subdir, fprefix) - fmt = "%Y%m%d-%H%M%S" + fmt = '%Y%m%d-%H%M%S' timestamp = datetime.datetime.now().strftime(fmt) if self.gzip_compress: - suffix = ".gz" + suffix = '.gz' else: - suffix = "" - outfile = "{0}.{1}.json{2}".format(fname, timestamp, suffix) + suffix = '' + outfile = '{0}.{1}.json{2}'.format(fname, timestamp, suffix) return outfile def handle(self, data): @@ -515,14 +515,14 @@ class TweetWriter(TweetHandlerI): """ if self.startingup: if self.gzip_compress: - self.output = gzip.open(self.fname, "w") + self.output = gzip.open(self.fname, 'w') else: - self.output = open(self.fname, "w") - print("Writing to {0}".format(self.fname)) + self.output = open(self.fname, 'w') + print('Writing to {0}'.format(self.fname)) json_data = json.dumps(data) if self.gzip_compress: - self.output.write((json_data + "\n").encode("utf-8")) + self.output.write((json_data + "\n").encode('utf-8')) else: self.output.write(json_data + "\n") @@ -533,7 +533,7 @@ class TweetWriter(TweetHandlerI): self.startingup = False def on_finish(self): - print("Written {0} Tweets".format(self.counter)) + print('Written {0} Tweets'.format(self.counter)) if self.output: self.output.close() diff --git a/nlp_resource_data/nltk/twitter/util.py b/nlp_resource_data/nltk/twitter/util.py index 1d859f9..888ed75 100644 --- a/nlp_resource_data/nltk/twitter/util.py +++ b/nlp_resource_data/nltk/twitter/util.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Natural Language Toolkit: Twitter client # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Ewan Klein # Lorenzo Rubio # URL: @@ -11,6 +11,8 @@ Authentication utilities to accompany :module:`twitterclient`. """ +from __future__ import print_function + import os import pprint from twython import Twython @@ -31,12 +33,12 @@ class Authenticate(object): """ def __init__(self): - self.creds_file = "credentials.txt" + self.creds_file = 'credentials.txt' self.creds_fullpath = None self.oauth = {} try: - self.twitter_dir = os.environ["TWITTER"] + self.twitter_dir = os.environ['TWITTER'] self.creds_subdir = self.twitter_dir except KeyError: self.twitter_dir = None @@ -84,15 +86,15 @@ class Authenticate(object): ) if not os.path.isfile(self.creds_fullpath): - raise OSError("Cannot find file {}".format(self.creds_fullpath)) + raise OSError('Cannot find file {}'.format(self.creds_fullpath)) with open(self.creds_fullpath) as infile: if verbose: - print("Reading credentials file {}".format(self.creds_fullpath)) + print('Reading credentials file {}'.format(self.creds_fullpath)) for line in infile: - if "=" in line: - name, value = line.split("=", 1) + if '=' in line: + name, value = line.split('=', 1) self.oauth[name.strip()] = value.strip() self._validate_creds_file(verbose=verbose) @@ -102,16 +104,16 @@ class Authenticate(object): def _validate_creds_file(self, verbose=False): """Check validity of a credentials file.""" oauth1 = False - oauth1_keys = ["app_key", "app_secret", "oauth_token", "oauth_token_secret"] + oauth1_keys = ['app_key', 'app_secret', 'oauth_token', 'oauth_token_secret'] oauth2 = False - oauth2_keys = ["app_key", "app_secret", "access_token"] + oauth2_keys = ['app_key', 'app_secret', 'access_token'] if all(k in self.oauth for k in oauth1_keys): oauth1 = True elif all(k in self.oauth for k in oauth2_keys): oauth2 = True if not (oauth1 or oauth2): - msg = "Missing or incorrect entries in {}\n".format(self.creds_file) + msg = 'Missing or incorrect entries in {}\n'.format(self.creds_file) msg += pprint.pformat(self.oauth) raise ValueError(msg) elif verbose: @@ -125,15 +127,15 @@ def add_access_token(creds_file=None): """ if creds_file is None: path = os.path.dirname(__file__) - creds_file = os.path.join(path, "credentials2.txt") + creds_file = os.path.join(path, 'credentials2.txt') oauth2 = credsfromfile(creds_file=creds_file) - app_key = oauth2["app_key"] - app_secret = oauth2["app_secret"] + app_key = oauth2['app_key'] + app_secret = oauth2['app_secret'] twitter = Twython(app_key, app_secret, oauth_version=2) access_token = twitter.obtain_access_token() - tok = "access_token={}\n".format(access_token) - with open(creds_file, "a") as infile: + tok = 'access_token={}\n'.format(access_token) + with open(creds_file, 'a') as infile: print(tok, file=infile) diff --git a/nlp_resource_data/nltk/util.py b/nlp_resource_data/nltk/util.py index baff54e..b4c5b00 100644 --- a/nlp_resource_data/nltk/util.py +++ b/nlp_resource_data/nltk/util.py @@ -1,9 +1,10 @@ # Natural Language Toolkit: Utility functions # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # Author: Steven Bird # URL: # For license information, see LICENSE.TXT +from __future__ import print_function import sys import inspect @@ -15,12 +16,13 @@ import pydoc import bisect import os -from itertools import islice, chain, combinations, tee +from itertools import islice, chain, combinations from pprint import pprint from collections import defaultdict, deque from sys import version_info -from urllib.request import ( +from six import class_types, string_types, text_type +from six.moves.urllib.request import ( build_opener, install_opener, getproxies, @@ -32,6 +34,7 @@ from urllib.request import ( from nltk.internals import slice_bounds, raise_unorderable_types from nltk.collections import * +from nltk.compat import python_2_unicode_compatible ###################################################################### @@ -39,34 +42,37 @@ from nltk.collections import * ###################################################################### -def usage(obj, selfname="self"): +def usage(obj, selfname='self'): str(obj) # In case it's lazy, this will load it. - if not isinstance(obj, type): + if not isinstance(obj, class_types): obj = obj.__class__ - print("%s supports the following operations:" % obj.__name__) + print('%s supports the following operations:' % obj.__name__) for (name, method) in sorted(pydoc.allmethods(obj).items()): - if name.startswith("_"): + if name.startswith('_'): continue - if getattr(method, "__deprecated__", False): + if getattr(method, '__deprecated__', False): continue - getargspec = inspect.getfullargspec + if sys.version_info[0] >= 3: + getargspec = inspect.getfullargspec + else: + getargspec = inspect.getargspec args, varargs, varkw, defaults = getargspec(method)[:4] if ( args - and args[0] == "self" + and args[0] == 'self' and (defaults is None or len(args) > len(defaults)) ): args = args[1:] - name = "%s.%s" % (selfname, name) + name = '%s.%s' % (selfname, name) argspec = inspect.formatargspec(args, varargs, varkw, defaults) print( textwrap.fill( - "%s%s" % (name, argspec), - initial_indent=" - ", - subsequent_indent=" " * (len(name) + 5), + '%s%s' % (name, argspec), + initial_indent=' - ', + subsequent_indent=' ' * (len(name) + 5), ) ) @@ -89,7 +95,7 @@ def in_idle(): """ import sys - return sys.stdin.__class__.__name__ in ("PyShell", "RPCProxy") + return sys.stdin.__class__.__name__ in ('PyShell', 'RPCProxy') ########################################################################## @@ -120,7 +126,7 @@ def print_string(s, width=70): :param width: the display width :type width: int """ - print("\n".join(textwrap.wrap(s, width=width))) + print('\n'.join(textwrap.wrap(s, width=width))) def tokenwrap(tokens, separator=" ", width=70): @@ -134,7 +140,7 @@ def tokenwrap(tokens, separator=" ", width=70): :param width: the display width (default=70) :type width: int """ - return "\n".join(textwrap.wrap(separator.join(tokens), width=width)) + return '\n'.join(textwrap.wrap(separator.join(tokens), width=width)) ########################################################################## @@ -196,10 +202,10 @@ def re_show(regexp, string, left="{", right="}"): # recipe from David Mertz def filestring(f): - if hasattr(f, "read"): + if hasattr(f, 'read'): return f.read() - elif isinstance(f, str): - with open(f, "r") as infile: + elif isinstance(f, string_types): + with open(f, 'r') as infile: return infile.read() else: raise ValueError("Must be called with a filename or file-like object") @@ -253,7 +259,7 @@ def guess_encoding(data): """ successful_encoding = None # we make 'utf-8' the first encoding - encodings = ["utf-8"] + encodings = ['utf-8'] # # next we add anything we can learn from the locale try: @@ -270,14 +276,14 @@ def guess_encoding(data): pass # # we try 'latin-1' last - encodings.append("latin-1") + encodings.append('latin-1') for enc in encodings: # some of the locale calls # may have returned None if not enc: continue try: - decoded = str(data, enc) + decoded = text_type(data, enc) successful_encoding = enc except (UnicodeError, LookupError): @@ -286,9 +292,9 @@ def guess_encoding(data): break if not successful_encoding: raise UnicodeError( - "Unable to decode input data. " - "Tried the following encodings: %s." - % ", ".join([repr(enc) for enc in encodings if enc]) + 'Unable to decode input data. ' + 'Tried the following encodings: %s.' + % ', '.join([repr(enc) for enc in encodings if enc]) ) else: return (decoded, successful_encoding) @@ -313,7 +319,7 @@ def unique_list(xs): def invert_dict(d): inverted_dict = defaultdict(list) for key in d: - if hasattr(d[key], "__iter__"): + if hasattr(d[key], '__iter__'): for term in d[key]: inverted_dict[term].append(key) else: @@ -622,7 +628,7 @@ def skipgrams(sequence, n, k, **kwargs): """ # Pads the sequence as desired by **kwargs. - if "pad_left" in kwargs or "pad_right" in kwargs: + if 'pad_left' in kwargs or 'pad_right' in kwargs: sequence = pad_sequence(sequence, n, **kwargs) # Note when iterating through the ngrams, the pad_right here is not @@ -654,12 +660,12 @@ def binary_search_file(file, key, cache={}, cacheDepth=-1): :param key: the identifier we are searching for. """ - key = key + " " + key = key + ' ' keylen = len(key) start = 0 currentDepth = 0 - if hasattr(file, "name"): + if hasattr(file, 'name'): end = os.stat(file.name).st_size - 1 else: file.seek(0, 2) @@ -717,7 +723,7 @@ def binary_search_file(file, key, cache={}, cacheDepth=-1): ###################################################################### -def set_proxy(proxy, user=None, password=""): +def set_proxy(proxy, user=None, password=''): """ Set the HTTP proxy for Python to download through. @@ -730,15 +736,17 @@ def set_proxy(proxy, user=None, password=""): authentication. :param password: The password to authenticate with. """ + from nltk import compat + if proxy is None: # Try and find the system proxy settings try: - proxy = getproxies()["http"] + proxy = getproxies()['http'] except KeyError: - raise ValueError("Could not detect default proxy settings") + raise ValueError('Could not detect default proxy settings') # Set up the proxy handler - proxy_handler = ProxyHandler({"https": proxy, "http": proxy}) + proxy_handler = ProxyHandler({'https': proxy, 'http': proxy}) opener = build_opener(proxy_handler) if user is not None: @@ -817,29 +825,3 @@ def choose(n, k): return ntok // ktok else: return 0 - - -###################################################################### -# Iteration utilities -###################################################################### - - -def pairwise(iterable): - """s -> (s0,s1), (s1,s2), (s2, s3), ...""" - a, b = tee(iterable) - next(b, None) - return zip(a, b) - -###################################################################### -# Parallization. -###################################################################### - - -def parallelize_preprocess(func, iterator, processes, progress_bar=False): - from tqdm import tqdm - from joblib import Parallel, delayed - - iterator = tqdm(iterator) if progress_bar else iterator - if processes <= 1: - return map(func, iterator) - return Parallel(n_jobs=processes)(delayed(func)(line) for line in iterator) diff --git a/nlp_resource_data/nltk/wsd.py b/nlp_resource_data/nltk/wsd.py index ed9599c..611f649 100644 --- a/nlp_resource_data/nltk/wsd.py +++ b/nlp_resource_data/nltk/wsd.py @@ -3,7 +3,7 @@ # Authors: Liling Tan , # Dmitrijs Milajevs # -# Copyright (C) 2001-2020 NLTK Project +# Copyright (C) 2001-2019 NLTK Project # URL: # For license information, see LICENSE.TXT -- 2.7.4