Remove unnecessary files in nltk

author jay.ho.park <jay.ho.park@samsung.com>

Sat, 24 Oct 2020 04:06:24 +0000 (13:06 +0900)

committer jay.ho.park <jay.ho.park@samsung.com>

Sat, 24 Oct 2020 04:06:24 +0000 (13:06 +0900)
author jay.ho.park <jay.ho.park@samsung.com>
Sat, 24 Oct 2020 04:06:24 +0000 (13:06 +0900)
committer jay.ho.park <jay.ho.park@samsung.com>
Sat, 24 Oct 2020 04:06:24 +0000 (13:06 +0900)
diff --git a/nlp_resource_data/nltk/test/__init__.py b/nlp_resource_data/nltk/test/__init__.py

deleted file mode 100644 (file)

index 107774e..0000000
--- a/nlp_resource_data/nltk/test/__init__.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# Natural Language Toolkit: Unit Tests
-#
-# Copyright (C) 2001-2019 NLTK Project
-# Author: Edward Loper <edloper@gmail.com>
-# URL: <http://nltk.org/>
-# For license information, see LICENSE.TXT
-
-"""
-Unit tests for the NLTK modules.  These tests are intended to ensure
-that source code changes don't accidentally introduce bugs.
-For instructions, please see:
-
-../../web/dev/local_testing.rst
-
-https://github.com/nltk/nltk/blob/develop/web/dev/local_testing.rst
-
-
-"""
diff --git a/nlp_resource_data/nltk/test/__pycache__/__init__.cpython-37.pyc b/nlp_resource_data/nltk/test/__pycache__/__init__.cpython-37.pyc

deleted file mode 100644 (file)

index 319c440..0000000

Binary files a/nlp_resource_data/nltk/test/__pycache__/__init__.cpython-37.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/__pycache__/all.cpython-37.pyc b/nlp_resource_data/nltk/test/__pycache__/all.cpython-37.pyc

deleted file mode 100644 (file)

index ee3607d..0000000

Binary files a/nlp_resource_data/nltk/test/__pycache__/all.cpython-37.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/__pycache__/childes_fixt.cpython-37.pyc b/nlp_resource_data/nltk/test/__pycache__/childes_fixt.cpython-37.pyc

deleted file mode 100644 (file)

index 7c11dc1..0000000

Binary files a/nlp_resource_data/nltk/test/__pycache__/childes_fixt.cpython-37.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/__pycache__/classify_fixt.cpython-37.pyc b/nlp_resource_data/nltk/test/__pycache__/classify_fixt.cpython-37.pyc

deleted file mode 100644 (file)

index d35f0e4..0000000

Binary files a/nlp_resource_data/nltk/test/__pycache__/classify_fixt.cpython-37.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/__pycache__/compat_fixt.cpython-37.pyc b/nlp_resource_data/nltk/test/__pycache__/compat_fixt.cpython-37.pyc

deleted file mode 100644 (file)

index 0dfa362..0000000

Binary files a/nlp_resource_data/nltk/test/__pycache__/compat_fixt.cpython-37.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/__pycache__/corpus_fixt.cpython-37.pyc b/nlp_resource_data/nltk/test/__pycache__/corpus_fixt.cpython-37.pyc

deleted file mode 100644 (file)

index 4da5b4e..0000000

Binary files a/nlp_resource_data/nltk/test/__pycache__/corpus_fixt.cpython-37.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/__pycache__/discourse_fixt.cpython-37.pyc b/nlp_resource_data/nltk/test/__pycache__/discourse_fixt.cpython-37.pyc

deleted file mode 100644 (file)

index f6b3b4f..0000000

Binary files a/nlp_resource_data/nltk/test/__pycache__/discourse_fixt.cpython-37.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/__pycache__/doctest_nose_plugin.cpython-37.pyc b/nlp_resource_data/nltk/test/__pycache__/doctest_nose_plugin.cpython-37.pyc

deleted file mode 100644 (file)

index 4e9a518..0000000

Binary files a/nlp_resource_data/nltk/test/__pycache__/doctest_nose_plugin.cpython-37.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/__pycache__/gensim_fixt.cpython-37.pyc b/nlp_resource_data/nltk/test/__pycache__/gensim_fixt.cpython-37.pyc

deleted file mode 100644 (file)

index 08ccf6c..0000000

Binary files a/nlp_resource_data/nltk/test/__pycache__/gensim_fixt.cpython-37.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/__pycache__/gluesemantics_malt_fixt.cpython-37.pyc b/nlp_resource_data/nltk/test/__pycache__/gluesemantics_malt_fixt.cpython-37.pyc

deleted file mode 100644 (file)

index ff389c2..0000000

Binary files a/nlp_resource_data/nltk/test/__pycache__/gluesemantics_malt_fixt.cpython-37.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/__pycache__/inference_fixt.cpython-37.pyc b/nlp_resource_data/nltk/test/__pycache__/inference_fixt.cpython-37.pyc

deleted file mode 100644 (file)

index 3850b9a..0000000

Binary files a/nlp_resource_data/nltk/test/__pycache__/inference_fixt.cpython-37.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/__pycache__/nonmonotonic_fixt.cpython-37.pyc b/nlp_resource_data/nltk/test/__pycache__/nonmonotonic_fixt.cpython-37.pyc

deleted file mode 100644 (file)

index 4171a7e..0000000

Binary files a/nlp_resource_data/nltk/test/__pycache__/nonmonotonic_fixt.cpython-37.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/__pycache__/portuguese_en_fixt.cpython-37.pyc b/nlp_resource_data/nltk/test/__pycache__/portuguese_en_fixt.cpython-37.pyc

deleted file mode 100644 (file)

index e888c66..0000000

Binary files a/nlp_resource_data/nltk/test/__pycache__/portuguese_en_fixt.cpython-37.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/__pycache__/probability_fixt.cpython-37.pyc b/nlp_resource_data/nltk/test/__pycache__/probability_fixt.cpython-37.pyc

deleted file mode 100644 (file)

index b8f9560..0000000

Binary files a/nlp_resource_data/nltk/test/__pycache__/probability_fixt.cpython-37.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/__pycache__/runtests.cpython-37.pyc b/nlp_resource_data/nltk/test/__pycache__/runtests.cpython-37.pyc

deleted file mode 100644 (file)

index 37d94b2..0000000

Binary files a/nlp_resource_data/nltk/test/__pycache__/runtests.cpython-37.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/__pycache__/segmentation_fixt.cpython-37.pyc b/nlp_resource_data/nltk/test/__pycache__/segmentation_fixt.cpython-37.pyc

deleted file mode 100644 (file)

index d703bab..0000000

Binary files a/nlp_resource_data/nltk/test/__pycache__/segmentation_fixt.cpython-37.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/__pycache__/semantics_fixt.cpython-37.pyc b/nlp_resource_data/nltk/test/__pycache__/semantics_fixt.cpython-37.pyc

deleted file mode 100644 (file)

index 9d1d2e1..0000000

Binary files a/nlp_resource_data/nltk/test/__pycache__/semantics_fixt.cpython-37.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/__pycache__/translate_fixt.cpython-37.pyc b/nlp_resource_data/nltk/test/__pycache__/translate_fixt.cpython-37.pyc

deleted file mode 100644 (file)

index c1e47e5..0000000

Binary files a/nlp_resource_data/nltk/test/__pycache__/translate_fixt.cpython-37.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/__pycache__/wordnet_fixt.cpython-37.pyc b/nlp_resource_data/nltk/test/__pycache__/wordnet_fixt.cpython-37.pyc

deleted file mode 100644 (file)

index 3362534..0000000

Binary files a/nlp_resource_data/nltk/test/__pycache__/wordnet_fixt.cpython-37.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/all.py b/nlp_resource_data/nltk/test/all.py

deleted file mode 100644 (file)

index c48e52a..0000000
--- a/nlp_resource_data/nltk/test/all.py
+++ /dev/null
@@ -1,24 +0,0 @@
-"""Test suite that runs all NLTK tests.
-
-This module, `nltk.test.all`, is named as the NLTK ``test_suite`` in the
-project's ``setup-eggs.py`` file.  Here, we create a test suite that
-runs all of our doctests, and return it for processing by the setuptools
-test harness.
-
-"""
-import doctest, unittest
-from glob import glob
-import os.path
-
-
-def additional_tests():
-    # print "here-000000000000000"
-    # print "-----", glob(os.path.join(os.path.dirname(__file__), '*.doctest'))
-    dir = os.path.dirname(__file__)
-    paths = glob(os.path.join(dir, '*.doctest'))
-    files = [os.path.basename(path) for path in paths]
-    return unittest.TestSuite([doctest.DocFileSuite(file) for file in files])
-
-
-# if os.path.split(path)[-1] != 'index.rst'
-# skips time-dependent doctest in index.rst
diff --git a/nlp_resource_data/nltk/test/bleu.doctest b/nlp_resource_data/nltk/test/bleu.doctest

deleted file mode 100644 (file)

index e5ed074..0000000
--- a/nlp_resource_data/nltk/test/bleu.doctest
+++ /dev/null
@@ -1,14 +0,0 @@
-==========
-BLEU tests
-==========
-
->>> from nltk.translate import bleu
-
-If the candidate has no alignment to any of the references, the BLEU score is 0.
-
->>> bleu(
-...     ['The candidate has no alignment to any of the references'.split()],
-...     'John loves Mary'.split(),
-...     [1],
-... )
-0
diff --git a/nlp_resource_data/nltk/test/bnc.doctest b/nlp_resource_data/nltk/test/bnc.doctest

deleted file mode 100644 (file)

index e16f8a1..0000000
--- a/nlp_resource_data/nltk/test/bnc.doctest
+++ /dev/null
@@ -1,60 +0,0 @@
-.. Copyright (C) 2001-2019 NLTK Project
-.. For license information, see LICENSE.TXT
-
-    >>> import os.path
-
-    >>> from nltk.corpus.reader import BNCCorpusReader
-    >>> import nltk.test
-
-    >>> root = os.path.dirname(nltk.test.__file__)
-    >>> bnc = BNCCorpusReader(root=root, fileids='FX8.xml')
-
-Checking the word access.
--------------------------
-
-    >>> len(bnc.words())
-    151
-
-    >>> bnc.words()[:6]
-    ['Ah', 'there', 'we', 'are', ',', '.']
-    >>> bnc.words(stem=True)[:6]
-    ['ah', 'there', 'we', 'be', ',', '.']
-
-    >>> bnc.tagged_words()[:6]
-    [('Ah', 'INTERJ'), ('there', 'ADV'), ('we', 'PRON'), ('are', 'VERB'), (',', 'PUN'), ('.', 'PUN')]
-
-    >>> bnc.tagged_words(c5=True)[:6]
-    [('Ah', 'ITJ'), ('there', 'AV0'), ('we', 'PNP'), ('are', 'VBB'), (',', 'PUN'), ('.', 'PUN')]
-
-Testing access to the sentences.
---------------------------------
-
-    >>> len(bnc.sents())
-    15
-
-    >>> bnc.sents()[0]
-    ['Ah', 'there', 'we', 'are', ',', '.']
-    >>> bnc.sents(stem=True)[0]
-    ['ah', 'there', 'we', 'be', ',', '.']
-
-    >>> bnc.tagged_sents()[0]
-    [('Ah', 'INTERJ'), ('there', 'ADV'), ('we', 'PRON'), ('are', 'VERB'), (',', 'PUN'), ('.', 'PUN')]
-    >>> bnc.tagged_sents(c5=True)[0]
-    [('Ah', 'ITJ'), ('there', 'AV0'), ('we', 'PNP'), ('are', 'VBB'), (',', 'PUN'), ('.', 'PUN')]
-
-A not lazy loader.
-------------------
-
-    >>> eager = BNCCorpusReader(root=root, fileids=r'FX8.xml', lazy=False)
-
-    >>> len(eager.words())
-    151
-    >>> eager.words(stem=True)[6:17]
-    ['right', 'abdominal', 'wound', ',', 'she', 'be', 'a', 'wee', 'bit', 'confuse', '.']
-
-    >>> eager.tagged_words()[6:11]
-    [('Right', 'ADV'), ('abdominal', 'ADJ'), ('wound', 'SUBST'), (',', 'PUN'), ('she', 'PRON')]
-    >>> eager.tagged_words(c5=True)[6:17]
-    [('Right', 'AV0'), ('abdominal', 'AJ0'), ('wound', 'NN1'), (',', 'PUN'), ('she', 'PNP'), ("'s", 'VBZ'), ('a', 'AT0'), ('wee', 'AJ0-NN1'), ('bit', 'NN1'), ('confused', 'VVN-AJ0'), ('.', 'PUN')]
-    >>> len(eager.sents())
-    15
diff --git a/nlp_resource_data/nltk/test/ccg.doctest b/nlp_resource_data/nltk/test/ccg.doctest

deleted file mode 100644 (file)

index cc0ad49..0000000
--- a/nlp_resource_data/nltk/test/ccg.doctest
+++ /dev/null
@@ -1,376 +0,0 @@
-.. Copyright (C) 2001-2019 NLTK Project
-.. For license information, see LICENSE.TXT
-
-==============================
-Combinatory Categorial Grammar
-==============================
-
-Relative Clauses
-----------------
-
-    >>> from nltk.ccg import chart, lexicon
-
-Construct a lexicon:
-
-    >>> lex = lexicon.parseLexicon('''
-    ...     :- S, NP, N, VP
-    ...
-    ...     Det :: NP/N
-    ...     Pro :: NP
-    ...     Modal :: S\\NP/VP
-    ...
-    ...     TV :: VP/NP
-    ...     DTV :: TV/NP
-    ...
-    ...     the => Det
-    ...
-    ...     that => Det
-    ...     that => NP
-    ...
-    ...     I => Pro
-    ...     you => Pro
-    ...     we => Pro
-    ...
-    ...     chef => N
-    ...     cake => N
-    ...     children => N
-    ...     dough => N
-    ...
-    ...     will => Modal
-    ...     should => Modal
-    ...     might => Modal
-    ...     must => Modal
-    ...
-    ...     and => var\\.,var/.,var
-    ...
-    ...     to => VP[to]/VP
-    ...
-    ...     without => (VP\\VP)/VP[ing]
-    ...
-    ...     be => TV
-    ...     cook => TV
-    ...     eat => TV
-    ...
-    ...     cooking => VP[ing]/NP
-    ...
-    ...     give => DTV
-    ...
-    ...     is => (S\\NP)/NP
-    ...     prefer => (S\\NP)/NP
-    ...
-    ...     which => (N\\N)/(S/NP)
-    ...
-    ...     persuade => (VP/VP[to])/NP
-    ...     ''')
-
-    >>> parser = chart.CCGChartParser(lex, chart.DefaultRuleSet)
-    >>> for parse in parser.parse("you prefer that cake".split()):
-    ...     chart.printCCGDerivation(parse)
-    ...     break
-    ...
-     you    prefer      that   cake
-     NP   ((S\NP)/NP)  (NP/N)   N
-                      -------------->
-                            NP
-         --------------------------->
-                   (S\NP)
-    --------------------------------<
-                   S
-
-    >>> for parse in parser.parse("that is the cake which you prefer".split()):
-    ...     chart.printCCGDerivation(parse)
-    ...     break
-    ...
-     that      is        the    cake      which       you    prefer
-      NP   ((S\NP)/NP)  (NP/N)   N    ((N\N)/(S/NP))  NP   ((S\NP)/NP)
-                                                     ----->T
-                                                  (S/(S\NP))
-                                                     ------------------>B
-                                                           (S/NP)
-                                     ---------------------------------->
-                                                   (N\N)
-                               ----------------------------------------<
-                                                  N
-                       ------------------------------------------------>
-                                              NP
-          ------------------------------------------------------------->
-                                     (S\NP)
-    -------------------------------------------------------------------<
-                                     S
-
-
-Some other sentences to try:
-"that is the cake which we will persuade the chef to cook"
-"that is the cake which we will persuade the chef to give the children"
-
-    >>> sent = "that is the dough which you will eat without cooking".split()
-    >>> nosub_parser = chart.CCGChartParser(lex, chart.ApplicationRuleSet +
-    ...                       chart.CompositionRuleSet + chart.TypeRaiseRuleSet)
-
-Without Substitution (no output)
-
-    >>> for parse in nosub_parser.parse(sent):
-    ...     chart.printCCGDerivation(parse)
-
-With Substitution:
-
-    >>> for parse in parser.parse(sent):
-    ...     chart.printCCGDerivation(parse)
-    ...     break
-    ...
-     that      is        the    dough      which       you     will        eat          without           cooking
-      NP   ((S\NP)/NP)  (NP/N)    N    ((N\N)/(S/NP))  NP   ((S\NP)/VP)  (VP/NP)  ((VP\VP)/VP['ing'])  (VP['ing']/NP)
-                                                      ----->T
-                                                   (S/(S\NP))
-                                                                                 ------------------------------------->B
-                                                                                             ((VP\VP)/NP)
-                                                                        ----------------------------------------------<Sx
-                                                                                           (VP/NP)
-                                                           ----------------------------------------------------------->B
-                                                                                   ((S\NP)/NP)
-                                                      ---------------------------------------------------------------->B
-                                                                                   (S/NP)
-                                      -------------------------------------------------------------------------------->
-                                                                           (N\N)
-                               ---------------------------------------------------------------------------------------<
-                                                                          N
-                       ----------------------------------------------------------------------------------------------->
-                                                                     NP
-          ------------------------------------------------------------------------------------------------------------>
-                                                             (S\NP)
-    ------------------------------------------------------------------------------------------------------------------<
-                                                            S
-
-
-Conjunction
------------
-
-    >>> from nltk.ccg.chart import CCGChartParser, ApplicationRuleSet, CompositionRuleSet
-    >>> from nltk.ccg.chart import SubstitutionRuleSet, TypeRaiseRuleSet, printCCGDerivation
-    >>> from nltk.ccg import lexicon
-
-Lexicons for the tests:
-
-    >>> test1_lex = '''
-    ...        :- S,N,NP,VP
-    ...        I => NP
-    ...        you => NP
-    ...        will => S\\NP/VP
-    ...        cook => VP/NP
-    ...        which => (N\\N)/(S/NP)
-    ...        and => var\\.,var/.,var
-    ...        might => S\\NP/VP
-    ...        eat => VP/NP
-    ...        the => NP/N
-    ...        mushrooms => N
-    ...        parsnips => N'''
-    >>> test2_lex = '''
-    ...         :- N, S, NP, VP
-    ...         articles => N
-    ...         the => NP/N
-    ...         and => var\\.,var/.,var
-    ...         which => (N\\N)/(S/NP)
-    ...         I => NP
-    ...         anyone => NP
-    ...         will => (S/VP)\\NP
-    ...         file => VP/NP
-    ...         without => (VP\\VP)/VP[ing]
-    ...         forget => VP/NP
-    ...         reading => VP[ing]/NP
-    ...         '''
-
-Tests handling of conjunctions.
-Note that while the two derivations are different, they are semantically equivalent.
-
-    >>> lex = lexicon.parseLexicon(test1_lex)
-    >>> parser = CCGChartParser(lex, ApplicationRuleSet + CompositionRuleSet + SubstitutionRuleSet)
-    >>> for parse in parser.parse("I will cook and might eat the mushrooms and parsnips".split()):
-    ...     printCCGDerivation(parse)
-     I      will       cook               and                might       eat     the    mushrooms             and             parsnips
-     NP  ((S\NP)/VP)  (VP/NP)  ((_var0\.,_var0)/.,_var0)  ((S\NP)/VP)  (VP/NP)  (NP/N)      N      ((_var0\.,_var0)/.,_var0)     N
-        ---------------------->B
-             ((S\NP)/NP)
-                                                         ---------------------->B
-                                                              ((S\NP)/NP)
-                              ------------------------------------------------->
-                                         (((S\NP)/NP)\.,((S\NP)/NP))
-        -----------------------------------------------------------------------<
-                                      ((S\NP)/NP)
-                                                                                                  ------------------------------------->
-                                                                                                                 (N\.,N)
-                                                                                       ------------------------------------------------<
-                                                                                                              N
-                                                                               -------------------------------------------------------->
-                                                                                                          NP
-        ------------------------------------------------------------------------------------------------------------------------------->
-                                                                    (S\NP)
-    -----------------------------------------------------------------------------------------------------------------------------------<
-                                                                     S
-     I      will       cook               and                might       eat     the    mushrooms             and             parsnips
-     NP  ((S\NP)/VP)  (VP/NP)  ((_var0\.,_var0)/.,_var0)  ((S\NP)/VP)  (VP/NP)  (NP/N)      N      ((_var0\.,_var0)/.,_var0)     N
-        ---------------------->B
-             ((S\NP)/NP)
-                                                         ---------------------->B
-                                                              ((S\NP)/NP)
-                              ------------------------------------------------->
-                                         (((S\NP)/NP)\.,((S\NP)/NP))
-        -----------------------------------------------------------------------<
-                                      ((S\NP)/NP)
-        ------------------------------------------------------------------------------->B
-                                          ((S\NP)/N)
-                                                                                                  ------------------------------------->
-                                                                                                                 (N\.,N)
-                                                                                       ------------------------------------------------<
-                                                                                                              N
-        ------------------------------------------------------------------------------------------------------------------------------->
-                                                                    (S\NP)
-    -----------------------------------------------------------------------------------------------------------------------------------<
-                                                                     S
-
-
-Tests handling subject extraction.
-Interesting to point that the two parses are clearly semantically different.
-
-    >>> lex = lexicon.parseLexicon(test2_lex)
-    >>> parser = CCGChartParser(lex, ApplicationRuleSet + CompositionRuleSet + SubstitutionRuleSet)
-    >>> for parse in parser.parse("articles which I will file and forget without reading".split()):
-    ...     printCCGDerivation(parse)
-     articles      which       I      will       file               and             forget         without           reading
-        N      ((N\N)/(S/NP))  NP  ((S/VP)\NP)  (VP/NP)  ((_var0\.,_var0)/.,_var0)  (VP/NP)  ((VP\VP)/VP['ing'])  (VP['ing']/NP)
-                              -----------------<
-                                   (S/VP)
-                                                                                            ------------------------------------->B
-                                                                                                        ((VP\VP)/NP)
-                                                                                   ----------------------------------------------<Sx
-                                                                                                      (VP/NP)
-                                                        ------------------------------------------------------------------------->
-                                                                                   ((VP/NP)\.,(VP/NP))
-                                               ----------------------------------------------------------------------------------<
-                                                                                    (VP/NP)
-                              --------------------------------------------------------------------------------------------------->B
-                                                                            (S/NP)
-              ------------------------------------------------------------------------------------------------------------------->
-                                                                     (N\N)
-    -----------------------------------------------------------------------------------------------------------------------------<
-                                                                  N
-     articles      which       I      will       file               and             forget         without           reading
-        N      ((N\N)/(S/NP))  NP  ((S/VP)\NP)  (VP/NP)  ((_var0\.,_var0)/.,_var0)  (VP/NP)  ((VP\VP)/VP['ing'])  (VP['ing']/NP)
-                              -----------------<
-                                   (S/VP)
-                                                        ------------------------------------>
-                                                                ((VP/NP)\.,(VP/NP))
-                                               ---------------------------------------------<
-                                                                  (VP/NP)
-                                                                                            ------------------------------------->B
-                                                                                                        ((VP\VP)/NP)
-                                               ----------------------------------------------------------------------------------<Sx
-                                                                                    (VP/NP)
-                              --------------------------------------------------------------------------------------------------->B
-                                                                            (S/NP)
-              ------------------------------------------------------------------------------------------------------------------->
-                                                                     (N\N)
-    -----------------------------------------------------------------------------------------------------------------------------<
-                                                                  N
-
-
-Unicode support
----------------
-
-Unicode words are supported.
-
-    >>> from nltk.ccg import chart, lexicon
-
-Lexicons for the tests:
-
-    >>> lex = lexicon.parseLexicon(u'''
-    ...        :- S, N, NP, PP
-    ...
-    ...        AdjI :: N\\N
-    ...        AdjD :: N/N
-    ...        AdvD :: S/S
-    ...        AdvI :: S\\S
-    ...        Det :: NP/N
-    ...        PrepNPCompl :: PP/NP
-    ...        PrepNAdjN :: S\\S/N
-    ...        PrepNAdjNP :: S\\S/NP
-    ...        VPNP :: S\\NP/NP
-    ...        VPPP :: S\\NP/PP
-    ...        VPser :: S\\NP/AdjI
-    ...
-    ...        auto => N
-    ...        bebidas => N
-    ...        cine => N
-    ...        ley => N
-    ...        libro => N
-    ...        ministro => N
-    ...        panadería => N
-    ...        presidente => N
-    ...        super => N
-    ...
-    ...        el => Det
-    ...        la => Det
-    ...        las => Det
-    ...        un => Det
-    ...
-    ...        Ana => NP
-    ...        Pablo => NP
-    ...
-    ...        y => var\\.,var/.,var
-    ...
-    ...        pero => (S/NP)\\(S/NP)/(S/NP)
-    ...
-    ...        anunció => VPNP
-    ...        compró => VPNP
-    ...        cree => S\\NP/S[dep]
-    ...        desmintió => VPNP
-    ...        lee => VPNP
-    ...        fueron => VPPP
-    ...
-    ...        es => VPser
-    ...
-    ...        interesante => AdjD
-    ...        interesante => AdjI
-    ...        nueva => AdjD
-    ...        nueva => AdjI
-    ...
-    ...        a => PrepNPCompl
-    ...        en => PrepNAdjN
-    ...        en => PrepNAdjNP
-    ...
-    ...        ayer => AdvI
-    ...
-    ...        que => (NP\\NP)/(S/NP)
-    ...        que => S[dep]/S
-    ...     ''')
-
-    >>> parser = chart.CCGChartParser(lex, chart.DefaultRuleSet)
-    >>> for parse in parser.parse(u"el ministro anunció pero el presidente desmintió la nueva ley".split()):
-    ...     printCCGDerivation(parse) # doctest: +SKIP 
-    ...     # it fails on python2.7 because of the unicode problem explained in https://github.com/nltk/nltk/pull/1354
-    ...     break
-       el    ministro    anunció              pero              el    presidente   desmintió     la    nueva  ley
-     (NP/N)     N      ((S\NP)/NP)  (((S/NP)\(S/NP))/(S/NP))  (NP/N)      N       ((S\NP)/NP)  (NP/N)  (N/N)   N
-    ------------------>
-            NP
-    ------------------>T
-        (S/(S\NP))
-                                                             -------------------->
-                                                                      NP
-                                                             -------------------->T
-                                                                  (S/(S\NP))
-                                                             --------------------------------->B
-                                                                          (S/NP)
-                                   ----------------------------------------------------------->
-                                                         ((S/NP)\(S/NP))
-                                                                                                      ------------>
-                                                                                                           N
-                                                                                              -------------------->
-                                                                                                       NP
-                                                                                              --------------------<T
-                                                                                                   (S\(S/NP))
-                                   -------------------------------------------------------------------------------<B
-                                                                     (S\(S/NP))
-                      --------------------------------------------------------------------------------------------<B
-                                                                 (S/NP)
-    -------------------------------------------------------------------------------------------------------------->
-                                                          S
diff --git a/nlp_resource_data/nltk/test/ccg_semantics.doctest b/nlp_resource_data/nltk/test/ccg_semantics.doctest

deleted file mode 100644 (file)

index ce62733..0000000
--- a/nlp_resource_data/nltk/test/ccg_semantics.doctest
+++ /dev/null
@@ -1,553 +0,0 @@
-.. Copyright (C) 2001-2019 NLTK Project
-.. For license information, see LICENSE.TXT
-
-==============================================
-Combinatory Categorial Grammar with semantics
-==============================================
-
------
-Chart
------
-
-
-    >>> from nltk.ccg import chart, lexicon
-    >>> from nltk.ccg.chart import printCCGDerivation
-
-No semantics
--------------------
-
-    >>> lex = lexicon.fromstring('''
-    ...     :- S, NP, N
-    ...     She => NP
-    ...     has => (S\\NP)/NP
-    ...     books => NP
-    ...     ''',
-    ...     False)
-
-    >>> parser = chart.CCGChartParser(lex, chart.DefaultRuleSet)
-    >>> parses = list(parser.parse("She has books".split()))
-    >>> print(str(len(parses)) + " parses")
-    3 parses
-
-    >>> printCCGDerivation(parses[0])
-     She      has      books
-     NP   ((S\NP)/NP)   NP
-         -------------------->
-                (S\NP)
-    -------------------------<
-                S
-
-    >>> printCCGDerivation(parses[1])
-     She      has      books
-     NP   ((S\NP)/NP)   NP
-    ----->T
-    (S/(S\NP))
-         -------------------->
-                (S\NP)
-    ------------------------->
-                S
-
-
-    >>> printCCGDerivation(parses[2])
-     She      has      books
-     NP   ((S\NP)/NP)   NP
-    ----->T
-    (S/(S\NP))
-    ------------------>B
-          (S/NP)
-    ------------------------->
-                S
-
-Simple semantics
--------------------
-
-    >>> lex = lexicon.fromstring('''
-    ...     :- S, NP, N
-    ...     She => NP {she}
-    ...     has => (S\\NP)/NP {\\x y.have(y, x)}
-    ...     a => NP/N {\\P.exists z.P(z)}
-    ...     book => N {book}
-    ...     ''',
-    ...     True)
-
-    >>> parser = chart.CCGChartParser(lex, chart.DefaultRuleSet)
-    >>> parses = list(parser.parse("She has a book".split()))
-    >>> print(str(len(parses)) + " parses")
-    7 parses
-
-    >>> printCCGDerivation(parses[0])
-       She                 has                           a                book
-     NP {she}  ((S\NP)/NP) {\x y.have(y,x)}  (NP/N) {\P.exists z.P(z)}  N {book}
-                                            ------------------------------------->
-                                                    NP {exists z.book(z)}
-              ------------------------------------------------------------------->
-                             (S\NP) {\y.have(y,exists z.book(z))}
-    -----------------------------------------------------------------------------<
-                           S {have(she,exists z.book(z))}
-
-    >>> printCCGDerivation(parses[1])
-       She                 has                           a                book
-     NP {she}  ((S\NP)/NP) {\x y.have(y,x)}  (NP/N) {\P.exists z.P(z)}  N {book}
-              --------------------------------------------------------->B
-                       ((S\NP)/N) {\P y.have(y,exists z.P(z))}
-              ------------------------------------------------------------------->
-                             (S\NP) {\y.have(y,exists z.book(z))}
-    -----------------------------------------------------------------------------<
-                           S {have(she,exists z.book(z))}
-    
-    >>> printCCGDerivation(parses[2])
-       She                 has                           a                book
-     NP {she}  ((S\NP)/NP) {\x y.have(y,x)}  (NP/N) {\P.exists z.P(z)}  N {book}
-    ---------->T
-    (S/(S\NP)) {\F.F(she)}
-                                            ------------------------------------->
-                                                    NP {exists z.book(z)}
-              ------------------------------------------------------------------->
-                             (S\NP) {\y.have(y,exists z.book(z))}
-    ----------------------------------------------------------------------------->
-                           S {have(she,exists z.book(z))}
-
-    >>> printCCGDerivation(parses[3])
-       She                 has                           a                book
-     NP {she}  ((S\NP)/NP) {\x y.have(y,x)}  (NP/N) {\P.exists z.P(z)}  N {book}
-    ---------->T
-    (S/(S\NP)) {\F.F(she)}
-              --------------------------------------------------------->B
-                       ((S\NP)/N) {\P y.have(y,exists z.P(z))}
-              ------------------------------------------------------------------->
-                             (S\NP) {\y.have(y,exists z.book(z))}
-    ----------------------------------------------------------------------------->
-                           S {have(she,exists z.book(z))}
-
-    >>> printCCGDerivation(parses[4])
-       She                 has                           a                book
-     NP {she}  ((S\NP)/NP) {\x y.have(y,x)}  (NP/N) {\P.exists z.P(z)}  N {book}
-    ---------->T
-    (S/(S\NP)) {\F.F(she)}
-    ---------------------------------------->B
-            (S/NP) {\x.have(she,x)}
-                                            ------------------------------------->
-                                                    NP {exists z.book(z)}
-    ----------------------------------------------------------------------------->
-                           S {have(she,exists z.book(z))}
-
-    >>> printCCGDerivation(parses[5])
-       She                 has                           a                book
-     NP {she}  ((S\NP)/NP) {\x y.have(y,x)}  (NP/N) {\P.exists z.P(z)}  N {book}
-    ---------->T
-    (S/(S\NP)) {\F.F(she)}
-              --------------------------------------------------------->B
-                       ((S\NP)/N) {\P y.have(y,exists z.P(z))}
-    ------------------------------------------------------------------->B
-                    (S/N) {\P.have(she,exists z.P(z))}
-    ----------------------------------------------------------------------------->
-                           S {have(she,exists z.book(z))}
-
-    >>> printCCGDerivation(parses[6])
-       She                 has                           a                book
-     NP {she}  ((S\NP)/NP) {\x y.have(y,x)}  (NP/N) {\P.exists z.P(z)}  N {book}
-    ---------->T
-    (S/(S\NP)) {\F.F(she)}
-    ---------------------------------------->B
-            (S/NP) {\x.have(she,x)}
-    ------------------------------------------------------------------->B
-                    (S/N) {\P.have(she,exists z.P(z))}
-    ----------------------------------------------------------------------------->
-                           S {have(she,exists z.book(z))}
-
-Complex semantics
--------------------
-
-    >>> lex = lexicon.fromstring('''
-    ...     :- S, NP, N
-    ...     She => NP {she}
-    ...     has => (S\\NP)/NP {\\x y.have(y, x)}
-    ...     a => ((S\\NP)\\((S\\NP)/NP))/N {\\P R x.(exists z.P(z) & R(z,x))}
-    ...     book => N {book}
-    ...     ''',
-    ...     True)
-
-    >>> parser = chart.CCGChartParser(lex, chart.DefaultRuleSet)
-    >>> parses = list(parser.parse("She has a book".split()))
-    >>> print(str(len(parses)) + " parses")
-    2 parses
-
-    >>> printCCGDerivation(parses[0])
-       She                 has                                           a                                 book
-     NP {she}  ((S\NP)/NP) {\x y.have(y,x)}  (((S\NP)\((S\NP)/NP))/N) {\P R x.(exists z.P(z) & R(z,x))}  N {book}
-                                            ---------------------------------------------------------------------->
-                                                   ((S\NP)\((S\NP)/NP)) {\R x.(exists z.book(z) & R(z,x))}
-              ----------------------------------------------------------------------------------------------------<
-                                           (S\NP) {\x.(exists z.book(z) & have(x,z))}
-    --------------------------------------------------------------------------------------------------------------<
-                                         S {(exists z.book(z) & have(she,z))}
-
-    >>> printCCGDerivation(parses[1])
-       She                 has                                           a                                 book
-     NP {she}  ((S\NP)/NP) {\x y.have(y,x)}  (((S\NP)\((S\NP)/NP))/N) {\P R x.(exists z.P(z) & R(z,x))}  N {book}
-    ---------->T
-    (S/(S\NP)) {\F.F(she)}
-                                            ---------------------------------------------------------------------->
-                                                   ((S\NP)\((S\NP)/NP)) {\R x.(exists z.book(z) & R(z,x))}
-              ----------------------------------------------------------------------------------------------------<
-                                           (S\NP) {\x.(exists z.book(z) & have(x,z))}
-    -------------------------------------------------------------------------------------------------------------->
-                                         S {(exists z.book(z) & have(she,z))}
-
-Using conjunctions
----------------------
-
-    # TODO: The semantics of "and" should have been more flexible
-    >>> lex = lexicon.fromstring('''
-    ...     :- S, NP, N
-    ...     I => NP {I}
-    ...     cook => (S\\NP)/NP {\\x y.cook(x,y)}
-    ...     and => var\\.,var/.,var {\\P Q x y.(P(x,y) & Q(x,y))}
-    ...     eat => (S\\NP)/NP {\\x y.eat(x,y)}
-    ...     the => NP/N {\\x.the(x)}
-    ...     bacon => N {bacon}
-    ...     ''',
-    ...     True)
-
-    >>> parser = chart.CCGChartParser(lex, chart.DefaultRuleSet)
-    >>> parses = list(parser.parse("I cook and eat the bacon".split()))
-    >>> print(str(len(parses)) + " parses")
-    7 parses
-
-    >>> printCCGDerivation(parses[0])
-       I                 cook                                       and                                        eat                     the            bacon
-     NP {I}  ((S\NP)/NP) {\x y.cook(x,y)}  ((_var0\.,_var0)/.,_var0) {\P Q x y.(P(x,y) & Q(x,y))}  ((S\NP)/NP) {\x y.eat(x,y)}  (NP/N) {\x.the(x)}  N {bacon}
-                                          ------------------------------------------------------------------------------------->
-                                                        (((S\NP)/NP)\.,((S\NP)/NP)) {\Q x y.(eat(x,y) & Q(x,y))}
-            -------------------------------------------------------------------------------------------------------------------<
-                                                 ((S\NP)/NP) {\x y.(eat(x,y) & cook(x,y))}
-                                                                                                                               ------------------------------->
-                                                                                                                                       NP {the(bacon)}
-            -------------------------------------------------------------------------------------------------------------------------------------------------->
-                                                           (S\NP) {\y.(eat(the(bacon),y) & cook(the(bacon),y))}
-    ----------------------------------------------------------------------------------------------------------------------------------------------------------<
-                                                           S {(eat(the(bacon),I) & cook(the(bacon),I))}
-
-    >>> printCCGDerivation(parses[1])
-       I                 cook                                       and                                        eat                     the            bacon
-     NP {I}  ((S\NP)/NP) {\x y.cook(x,y)}  ((_var0\.,_var0)/.,_var0) {\P Q x y.(P(x,y) & Q(x,y))}  ((S\NP)/NP) {\x y.eat(x,y)}  (NP/N) {\x.the(x)}  N {bacon}
-                                          ------------------------------------------------------------------------------------->
-                                                        (((S\NP)/NP)\.,((S\NP)/NP)) {\Q x y.(eat(x,y) & Q(x,y))}
-            -------------------------------------------------------------------------------------------------------------------<
-                                                 ((S\NP)/NP) {\x y.(eat(x,y) & cook(x,y))}
-            --------------------------------------------------------------------------------------------------------------------------------------->B
-                                                      ((S\NP)/N) {\x y.(eat(the(x),y) & cook(the(x),y))}
-            -------------------------------------------------------------------------------------------------------------------------------------------------->
-                                                           (S\NP) {\y.(eat(the(bacon),y) & cook(the(bacon),y))}
-    ----------------------------------------------------------------------------------------------------------------------------------------------------------<
-                                                           S {(eat(the(bacon),I) & cook(the(bacon),I))}
-
-    >>> printCCGDerivation(parses[2])
-       I                 cook                                       and                                        eat                     the            bacon
-     NP {I}  ((S\NP)/NP) {\x y.cook(x,y)}  ((_var0\.,_var0)/.,_var0) {\P Q x y.(P(x,y) & Q(x,y))}  ((S\NP)/NP) {\x y.eat(x,y)}  (NP/N) {\x.the(x)}  N {bacon}
-    -------->T
-    (S/(S\NP)) {\F.F(I)}
-                                          ------------------------------------------------------------------------------------->
-                                                        (((S\NP)/NP)\.,((S\NP)/NP)) {\Q x y.(eat(x,y) & Q(x,y))}
-            -------------------------------------------------------------------------------------------------------------------<
-                                                 ((S\NP)/NP) {\x y.(eat(x,y) & cook(x,y))}
-                                                                                                                               ------------------------------->
-                                                                                                                                       NP {the(bacon)}
-            -------------------------------------------------------------------------------------------------------------------------------------------------->
-                                                           (S\NP) {\y.(eat(the(bacon),y) & cook(the(bacon),y))}
-    ---------------------------------------------------------------------------------------------------------------------------------------------------------->
-                                                           S {(eat(the(bacon),I) & cook(the(bacon),I))}
-
-    >>> printCCGDerivation(parses[3])
-       I                 cook                                       and                                        eat                     the            bacon
-     NP {I}  ((S\NP)/NP) {\x y.cook(x,y)}  ((_var0\.,_var0)/.,_var0) {\P Q x y.(P(x,y) & Q(x,y))}  ((S\NP)/NP) {\x y.eat(x,y)}  (NP/N) {\x.the(x)}  N {bacon}
-    -------->T
-    (S/(S\NP)) {\F.F(I)}
-                                          ------------------------------------------------------------------------------------->
-                                                        (((S\NP)/NP)\.,((S\NP)/NP)) {\Q x y.(eat(x,y) & Q(x,y))}
-            -------------------------------------------------------------------------------------------------------------------<
-                                                 ((S\NP)/NP) {\x y.(eat(x,y) & cook(x,y))}
-            --------------------------------------------------------------------------------------------------------------------------------------->B
-                                                      ((S\NP)/N) {\x y.(eat(the(x),y) & cook(the(x),y))}
-            -------------------------------------------------------------------------------------------------------------------------------------------------->
-                                                           (S\NP) {\y.(eat(the(bacon),y) & cook(the(bacon),y))}
-    ---------------------------------------------------------------------------------------------------------------------------------------------------------->
-                                                           S {(eat(the(bacon),I) & cook(the(bacon),I))}
-
-    >>> printCCGDerivation(parses[4])
-       I                 cook                                       and                                        eat                     the            bacon
-     NP {I}  ((S\NP)/NP) {\x y.cook(x,y)}  ((_var0\.,_var0)/.,_var0) {\P Q x y.(P(x,y) & Q(x,y))}  ((S\NP)/NP) {\x y.eat(x,y)}  (NP/N) {\x.the(x)}  N {bacon}
-    -------->T
-    (S/(S\NP)) {\F.F(I)}
-                                          ------------------------------------------------------------------------------------->
-                                                        (((S\NP)/NP)\.,((S\NP)/NP)) {\Q x y.(eat(x,y) & Q(x,y))}
-            -------------------------------------------------------------------------------------------------------------------<
-                                                 ((S\NP)/NP) {\x y.(eat(x,y) & cook(x,y))}
-    --------------------------------------------------------------------------------------------------------------------------->B
-                                                (S/NP) {\x.(eat(x,I) & cook(x,I))}
-                                                                                                                               ------------------------------->
-                                                                                                                                       NP {the(bacon)}
-    ---------------------------------------------------------------------------------------------------------------------------------------------------------->
-                                                           S {(eat(the(bacon),I) & cook(the(bacon),I))}
-
-    >>> printCCGDerivation(parses[5])
-       I                 cook                                       and                                        eat                     the            bacon
-     NP {I}  ((S\NP)/NP) {\x y.cook(x,y)}  ((_var0\.,_var0)/.,_var0) {\P Q x y.(P(x,y) & Q(x,y))}  ((S\NP)/NP) {\x y.eat(x,y)}  (NP/N) {\x.the(x)}  N {bacon}
-    -------->T
-    (S/(S\NP)) {\F.F(I)}
-                                          ------------------------------------------------------------------------------------->
-                                                        (((S\NP)/NP)\.,((S\NP)/NP)) {\Q x y.(eat(x,y) & Q(x,y))}
-            -------------------------------------------------------------------------------------------------------------------<
-                                                 ((S\NP)/NP) {\x y.(eat(x,y) & cook(x,y))}
-            --------------------------------------------------------------------------------------------------------------------------------------->B
-                                                      ((S\NP)/N) {\x y.(eat(the(x),y) & cook(the(x),y))}
-    ----------------------------------------------------------------------------------------------------------------------------------------------->B
-                                                      (S/N) {\x.(eat(the(x),I) & cook(the(x),I))}
-    ---------------------------------------------------------------------------------------------------------------------------------------------------------->
-                                                           S {(eat(the(bacon),I) & cook(the(bacon),I))}
-
-    >>> printCCGDerivation(parses[6])
-       I                 cook                                       and                                        eat                     the            bacon
-     NP {I}  ((S\NP)/NP) {\x y.cook(x,y)}  ((_var0\.,_var0)/.,_var0) {\P Q x y.(P(x,y) & Q(x,y))}  ((S\NP)/NP) {\x y.eat(x,y)}  (NP/N) {\x.the(x)}  N {bacon}
-    -------->T
-    (S/(S\NP)) {\F.F(I)}
-                                          ------------------------------------------------------------------------------------->
-                                                        (((S\NP)/NP)\.,((S\NP)/NP)) {\Q x y.(eat(x,y) & Q(x,y))}
-            -------------------------------------------------------------------------------------------------------------------<
-                                                 ((S\NP)/NP) {\x y.(eat(x,y) & cook(x,y))}
-    --------------------------------------------------------------------------------------------------------------------------->B
-                                                (S/NP) {\x.(eat(x,I) & cook(x,I))}
-    ----------------------------------------------------------------------------------------------------------------------------------------------->B
-                                                      (S/N) {\x.(eat(the(x),I) & cook(the(x),I))}
-    ---------------------------------------------------------------------------------------------------------------------------------------------------------->
-                                                           S {(eat(the(bacon),I) & cook(the(bacon),I))}
-
-Tests from published papers
-------------------------------
-
-An example from "CCGbank: A Corpus of CCG Derivations and Dependency Structures Extracted from the Penn Treebank", Hockenmaier and Steedman, 2007, Page 359, https://www.aclweb.org/anthology/J/J07/J07-3004.pdf
-
-    >>> lex = lexicon.fromstring('''
-    ...     :- S, NP
-    ...     I => NP {I}
-    ...     give => ((S\\NP)/NP)/NP {\\x y z.give(y,x,z)}
-    ...     them => NP {them}
-    ...     money => NP {money}
-    ...     ''',
-    ...     True)
-
-    >>> parser = chart.CCGChartParser(lex, chart.DefaultRuleSet)
-    >>> parses = list(parser.parse("I give them money".split()))
-    >>> print(str(len(parses)) + " parses")
-    3 parses
-
-    >>> printCCGDerivation(parses[0])
-       I                     give                     them       money
-     NP {I}  (((S\NP)/NP)/NP) {\x y z.give(y,x,z)}  NP {them}  NP {money}
-            -------------------------------------------------->
-                    ((S\NP)/NP) {\y z.give(y,them,z)}
-            -------------------------------------------------------------->
-                            (S\NP) {\z.give(money,them,z)}
-    ----------------------------------------------------------------------<
-                            S {give(money,them,I)}
-
-    >>> printCCGDerivation(parses[1])
-       I                     give                     them       money
-     NP {I}  (((S\NP)/NP)/NP) {\x y z.give(y,x,z)}  NP {them}  NP {money}
-    -------->T
-    (S/(S\NP)) {\F.F(I)}
-            -------------------------------------------------->
-                    ((S\NP)/NP) {\y z.give(y,them,z)}
-            -------------------------------------------------------------->
-                            (S\NP) {\z.give(money,them,z)}
-    ---------------------------------------------------------------------->
-                            S {give(money,them,I)}
-
-    
-    >>> printCCGDerivation(parses[2])
-       I                     give                     them       money
-     NP {I}  (((S\NP)/NP)/NP) {\x y z.give(y,x,z)}  NP {them}  NP {money}
-    -------->T
-    (S/(S\NP)) {\F.F(I)}
-            -------------------------------------------------->
-                    ((S\NP)/NP) {\y z.give(y,them,z)}
-    ---------------------------------------------------------->B
-                    (S/NP) {\y.give(y,them,I)}
-    ---------------------------------------------------------------------->
-                            S {give(money,them,I)}
-
-
-An example from "CCGbank: A Corpus of CCG Derivations and Dependency Structures Extracted from the Penn Treebank", Hockenmaier and Steedman, 2007, Page 359, https://www.aclweb.org/anthology/J/J07/J07-3004.pdf
-
-    >>> lex = lexicon.fromstring('''
-    ...     :- N, NP, S
-    ...     money => N {money}
-    ...     that => (N\\N)/(S/NP) {\\P Q x.(P(x) & Q(x))}
-    ...     I => NP {I}
-    ...     give => ((S\\NP)/NP)/NP {\\x y z.give(y,x,z)}
-    ...     them => NP {them}
-    ...     ''',
-    ...     True)
-
-    >>> parser = chart.CCGChartParser(lex, chart.DefaultRuleSet)
-    >>> parses = list(parser.parse("money that I give them".split()))
-    >>> print(str(len(parses)) + " parses")
-    3 parses
-
-    >>> printCCGDerivation(parses[0])
-       money                    that                     I                     give                     them
-     N {money}  ((N\N)/(S/NP)) {\P Q x.(P(x) & Q(x))}  NP {I}  (((S\NP)/NP)/NP) {\x y z.give(y,x,z)}  NP {them}
-                                                      -------->T
-                                                (S/(S\NP)) {\F.F(I)}
-                                                              -------------------------------------------------->
-                                                                      ((S\NP)/NP) {\y z.give(y,them,z)}
-                                                      ---------------------------------------------------------->B
-                                                                      (S/NP) {\y.give(y,them,I)}
-               ------------------------------------------------------------------------------------------------->
-                                             (N\N) {\Q x.(give(x,them,I) & Q(x))}
-    ------------------------------------------------------------------------------------------------------------<
-                                         N {\x.(give(x,them,I) & money(x))}
-
-    >>> printCCGDerivation(parses[1])
-       money                    that                     I                     give                     them
-     N {money}  ((N\N)/(S/NP)) {\P Q x.(P(x) & Q(x))}  NP {I}  (((S\NP)/NP)/NP) {\x y z.give(y,x,z)}  NP {them}
-    ----------->T
-    (N/(N\N)) {\F.F(money)}
-                                                      -------->T
-                                                (S/(S\NP)) {\F.F(I)}
-                                                              -------------------------------------------------->
-                                                                      ((S\NP)/NP) {\y z.give(y,them,z)}
-                                                      ---------------------------------------------------------->B
-                                                                      (S/NP) {\y.give(y,them,I)}
-               ------------------------------------------------------------------------------------------------->
-                                             (N\N) {\Q x.(give(x,them,I) & Q(x))}
-    ------------------------------------------------------------------------------------------------------------>
-                                         N {\x.(give(x,them,I) & money(x))}
-
-    >>> printCCGDerivation(parses[2])
-       money                    that                     I                     give                     them
-     N {money}  ((N\N)/(S/NP)) {\P Q x.(P(x) & Q(x))}  NP {I}  (((S\NP)/NP)/NP) {\x y z.give(y,x,z)}  NP {them}
-    ----------->T
-    (N/(N\N)) {\F.F(money)}
-    -------------------------------------------------->B
-           (N/(S/NP)) {\P x.(P(x) & money(x))}
-                                                      -------->T
-                                                (S/(S\NP)) {\F.F(I)}
-                                                              -------------------------------------------------->
-                                                                      ((S\NP)/NP) {\y z.give(y,them,z)}
-                                                      ---------------------------------------------------------->B
-                                                                      (S/NP) {\y.give(y,them,I)}
-    ------------------------------------------------------------------------------------------------------------>
-                                         N {\x.(give(x,them,I) & money(x))}
-
-
--------
-Lexicon
--------
-
-    >>> from nltk.ccg import lexicon
-
-Parse lexicon with semantics
-
-    >>> print(str(lexicon.fromstring(
-    ...     '''
-    ...     :- S,NP
-    ...
-    ...     IntransVsg :: S\\NP[sg]
-    ...     
-    ...     sleeps => IntransVsg {\\x.sleep(x)}
-    ...     eats => S\\NP[sg]/NP {\\x y.eat(x,y)}
-    ...        
-    ...     and => var\\var/var {\\x y.x & y}
-    ...     ''',
-    ...     True
-    ... )))
-    and => ((_var0\_var0)/_var0) {(\x y.x & y)}
-    eats => ((S\NP['sg'])/NP) {\x y.eat(x,y)}
-    sleeps => (S\NP['sg']) {\x.sleep(x)}
-
-Parse lexicon without semantics
-
-    >>> print(str(lexicon.fromstring(
-    ...     '''
-    ...     :- S,NP
-    ...
-    ...     IntransVsg :: S\\NP[sg]
-    ...     
-    ...     sleeps => IntransVsg
-    ...     eats => S\\NP[sg]/NP {sem=\\x y.eat(x,y)}
-    ...        
-    ...     and => var\\var/var
-    ...     ''',
-    ...     False
-    ... )))
-    and => ((_var0\_var0)/_var0)
-    eats => ((S\NP['sg'])/NP)
-    sleeps => (S\NP['sg'])
-
-Semantics are missing
-
-    >>> print(str(lexicon.fromstring(
-    ...     '''
-    ...     :- S,NP
-    ...     
-    ...     eats => S\\NP[sg]/NP
-    ...     ''',
-    ...     True
-    ... )))
-    Traceback (most recent call last):
-      ...
-    AssertionError: eats => S\NP[sg]/NP must contain semantics because include_semantics is set to True
-
-
-------------------------------------
-CCG combinator semantics computation
-------------------------------------
-
-    >>> from nltk.sem.logic import *
-    >>> from nltk.ccg.logic import *
-
-    >>> read_expr = Expression.fromstring
-
-Compute semantics from function application
-
-    >>> print(str(compute_function_semantics(read_expr(r'\x.P(x)'), read_expr(r'book'))))
-    P(book)
-
-    >>> print(str(compute_function_semantics(read_expr(r'\P.P(book)'), read_expr(r'read'))))
-    read(book)
-
-    >>> print(str(compute_function_semantics(read_expr(r'\P.P(book)'), read_expr(r'\x.read(x)'))))
-    read(book)
-
-Compute semantics from composition
-
-    >>> print(str(compute_composition_semantics(read_expr(r'\x.P(x)'), read_expr(r'\x.Q(x)'))))
-    \x.P(Q(x))
-
-    >>> print(str(compute_composition_semantics(read_expr(r'\x.P(x)'), read_expr(r'read'))))
-    Traceback (most recent call last):
-      ...
-    AssertionError: `read` must be a lambda expression
-
-Compute semantics from substitution
-
-    >>> print(str(compute_substitution_semantics(read_expr(r'\x y.P(x,y)'), read_expr(r'\x.Q(x)'))))
-    \x.P(x,Q(x))
-    
-    >>> print(str(compute_substitution_semantics(read_expr(r'\x.P(x)'), read_expr(r'read'))))
-    Traceback (most recent call last):
-      ...
-    AssertionError: `\x.P(x)` must be a lambda expression with 2 arguments
-
-Compute type-raise semantics
-
-    >>> print(str(compute_type_raised_semantics(read_expr(r'\x.P(x)'))))
-    \F x.F(P(x))
-
-    >>> print(str(compute_type_raised_semantics(read_expr(r'\x.F(x)'))))
-    \F1 x.F1(F(x))
-
-    >>> print(str(compute_type_raised_semantics(read_expr(r'\x y z.P(x,y,z)'))))
-    \F x y z.F(P(x,y,z))
-
diff --git a/nlp_resource_data/nltk/test/chat80.doctest b/nlp_resource_data/nltk/test/chat80.doctest

deleted file mode 100644 (file)

index 9efe693..0000000
--- a/nlp_resource_data/nltk/test/chat80.doctest
+++ /dev/null
@@ -1,234 +0,0 @@
-.. Copyright (C) 2001-2019 NLTK Project
-.. For license information, see LICENSE.TXT
-
-=======
-Chat-80
-=======
-
-Chat-80 was a natural language system which allowed the user to
-interrogate a Prolog knowledge base in the domain of world
-geography. It was developed in the early '80s by Warren and Pereira; see
-`<http://acl.ldc.upenn.edu/J/J82/J82-3002.pdf>`_ for a description and
-`<http://www.cis.upenn.edu/~pereira/oldies.html>`_ for the source
-files.
-
-The ``chat80`` module contains functions to extract data from the Chat-80
-relation files ('the world database'), and convert then into a format
-that can be incorporated in the FOL models of
-``nltk.sem.evaluate``. The code assumes that the Prolog
-input files are available in the NLTK corpora directory.
-
-The Chat-80 World Database consists of the following files::
-
-    world0.pl
-    rivers.pl
-    cities.pl
-    countries.pl
-    contain.pl
-    borders.pl
-
-This module uses a slightly modified version of ``world0.pl``, in which
-a set of Prolog rules have been omitted. The modified file is named
-``world1.pl``. Currently, the file ``rivers.pl`` is not read in, since
-it uses a list rather than a string in the second field.
-
-Reading Chat-80 Files
-=====================
-
-Chat-80 relations are like tables in a relational database. The
-relation acts as the name of the table; the first argument acts as the
-'primary key'; and subsequent arguments are further fields in the
-table. In general, the name of the table provides a label for a unary
-predicate whose extension is all the primary keys. For example,
-relations in ``cities.pl`` are of the following form::
-
-   'city(athens,greece,1368).'
-
-Here, ``'athens'`` is the key, and will be mapped to a member of the
-unary predicate *city*.
-
-By analogy with NLTK corpora, ``chat80`` defines a number of 'items'
-which correspond to the relations.
-
-    >>> from nltk.sem import chat80
-    >>> print(chat80.items) # doctest: +ELLIPSIS
-    ('borders', 'circle_of_lat', 'circle_of_long', 'city', ...)
-
-The fields in the table are mapped to binary predicates. The first
-argument of the predicate is the primary key, while the second
-argument is the data in the relevant field. Thus, in the above
-example, the third field is mapped to the binary predicate
-*population_of*, whose extension is a set of pairs such as
-``'(athens, 1368)'``.
-
-An exception to this general framework is required by the relations in
-the files ``borders.pl`` and ``contains.pl``. These contain facts of the
-following form::
-
-    'borders(albania,greece).'
-
-    'contains0(africa,central_africa).'
-
-We do not want to form a unary concept out the element in
-the first field of these records, and we want the label of the binary
-relation just to be ``'border'``/``'contain'`` respectively.
-
-In order to drive the extraction process, we use 'relation metadata bundles'
-which are Python dictionaries such as the following::
-
-  city = {'label': 'city',
-          'closures': [],
-          'schema': ['city', 'country', 'population'],
-          'filename': 'cities.pl'}
-
-According to this, the file ``city['filename']`` contains a list of
-relational tuples (or more accurately, the corresponding strings in
-Prolog form) whose predicate symbol is ``city['label']`` and whose
-relational schema is ``city['schema']``. The notion of a ``closure`` is
-discussed in the next section.
-
-Concepts
-========
-In order to encapsulate the results of the extraction, a class of
-``Concept``\ s is introduced.  A ``Concept`` object has a number of
-attributes, in particular a ``prefLabel``, an arity and ``extension``.
-
-    >>> c1 = chat80.Concept('dog', arity=1, extension=set(['d1', 'd2']))
-    >>> print(c1)
-    Label = 'dog'
-    Arity = 1
-    Extension = ['d1', 'd2']
-
-
-
-The ``extension`` attribute makes it easier to inspect the output of
-the extraction.
-
-    >>> schema = ['city', 'country', 'population']
-    >>> concepts = chat80.clause2concepts('cities.pl', 'city', schema)
-    >>> concepts
-    [Concept('city'), Concept('country_of'), Concept('population_of')]
-    >>> for c in concepts: # doctest: +NORMALIZE_WHITESPACE
-    ...     print("%s:\n\t%s" % (c.prefLabel, c.extension[:4]))
-    city:
-        ['athens', 'bangkok', 'barcelona', 'berlin']
-    country_of:
-        [('athens', 'greece'), ('bangkok', 'thailand'), ('barcelona', 'spain'), ('berlin', 'east_germany')]
-    population_of:
-        [('athens', '1368'), ('bangkok', '1178'), ('barcelona', '1280'), ('berlin', '3481')]
-
-In addition, the ``extension`` can be further
-processed: in the case of the ``'border'`` relation, we check that the
-relation is **symmetric**, and in the case of the ``'contain'``
-relation, we carry out the **transitive closure**. The closure
-properties associated with a concept is indicated in the relation
-metadata, as indicated earlier.
-
-    >>> borders = set([('a1', 'a2'), ('a2', 'a3')])
-    >>> c2 = chat80.Concept('borders', arity=2, extension=borders)
-    >>> print(c2)
-    Label = 'borders'
-    Arity = 2
-    Extension = [('a1', 'a2'), ('a2', 'a3')]
-    >>> c3 = chat80.Concept('borders', arity=2, closures=['symmetric'], extension=borders)
-    >>> c3.close()
-    >>> print(c3)
-    Label = 'borders'
-    Arity = 2
-    Extension = [('a1', 'a2'), ('a2', 'a1'), ('a2', 'a3'), ('a3', 'a2')]
-
-The ``extension`` of a ``Concept`` object is then incorporated into a
-``Valuation`` object.
-
-Persistence
-===========
-The functions ``val_dump`` and ``val_load`` are provided to allow a
-valuation to be stored in a persistent database and re-loaded, rather
-than having to be re-computed each time.
-
-Individuals and Lexical Items
-=============================
-As well as deriving relations from the Chat-80 data, we also create a
-set of individual constants, one for each entity in the domain. The
-individual constants are string-identical to the entities. For
-example, given a data item such as ``'zloty'``, we add to the valuation
-a pair ``('zloty', 'zloty')``. In order to parse English sentences that
-refer to these entities, we also create a lexical item such as the
-following for each individual constant::
-
-   PropN[num=sg, sem=<\P.(P zloty)>] -> 'Zloty'
-
-The set of rules is written to the file ``chat_pnames.fcfg`` in the
-current directory.
-
-SQL Query
-=========
-
-The ``city`` relation is also available in RDB form and can be queried
-using SQL statements.
-
-    >>> import nltk
-    >>> q = "SELECT City, Population FROM city_table WHERE Country = 'china' and Population > 1000"
-    >>> for answer in chat80.sql_query('corpora/city_database/city.db', q):
-    ...     print("%-10s %4s" % answer)
-    canton     1496
-    chungking  1100
-    mukden     1551
-    peking     2031
-    shanghai   5407
-    tientsin   1795
-
-The (deliberately naive) grammar ``sql.fcfg`` translates from English
-to SQL:
-
-    >>> nltk.data.show_cfg('grammars/book_grammars/sql0.fcfg')
-    % start S
-    S[SEM=(?np + WHERE + ?vp)] -> NP[SEM=?np] VP[SEM=?vp]
-    VP[SEM=(?v + ?pp)] -> IV[SEM=?v] PP[SEM=?pp]
-    VP[SEM=(?v + ?ap)] -> IV[SEM=?v] AP[SEM=?ap]
-    NP[SEM=(?det + ?n)] -> Det[SEM=?det] N[SEM=?n]
-    PP[SEM=(?p + ?np)] -> P[SEM=?p] NP[SEM=?np]
-    AP[SEM=?pp] -> A[SEM=?a] PP[SEM=?pp]
-    NP[SEM='Country="greece"'] -> 'Greece'
-    NP[SEM='Country="china"'] -> 'China'
-    Det[SEM='SELECT'] -> 'Which' | 'What'
-    N[SEM='City FROM city_table'] -> 'cities'
-    IV[SEM=''] -> 'are'
-    A[SEM=''] -> 'located'
-    P[SEM=''] -> 'in'
-
-Given this grammar, we can express, and then execute, queries in English.
-
-    >>> cp = nltk.parse.load_parser('grammars/book_grammars/sql0.fcfg')
-    >>> query = 'What cities are in China'
-    >>> for tree in cp.parse(query.split()):
-    ...     answer = tree.label()['SEM']
-    ...     q = " ".join(answer)
-    ...     print(q)
-    ...
-    SELECT City FROM city_table WHERE   Country="china"
-
-    >>> rows = chat80.sql_query('corpora/city_database/city.db', q)
-    >>> for r in rows: print("%s" % r, end=' ')
-    canton chungking dairen harbin kowloon mukden peking shanghai sian tientsin
-
-
-Using Valuations
------------------
-
-In order to convert such an extension into a valuation, we use the
-``make_valuation()`` method; setting ``read=True`` creates and returns
-a new ``Valuation`` object which contains the results.
-
-   >>> val = chat80.make_valuation(concepts, read=True)
-   >>> 'calcutta' in val['city']
-   True
-   >>> [town for (town, country) in val['country_of'] if country == 'india']
-   ['bombay', 'calcutta', 'delhi', 'hyderabad', 'madras']
-   >>> dom = val.domain
-   >>> g = nltk.sem.Assignment(dom)
-   >>> m = nltk.sem.Model(dom, val)
-   >>> m.evaluate(r'population_of(jakarta, 533)', g)
-   True
-
-
diff --git a/nlp_resource_data/nltk/test/childes.doctest b/nlp_resource_data/nltk/test/childes.doctest

deleted file mode 100644 (file)

index 7900c54..0000000
--- a/nlp_resource_data/nltk/test/childes.doctest
+++ /dev/null
@@ -1,184 +0,0 @@
-=======================
- CHILDES Corpus Readers
-=======================
-
-Read the XML version of the CHILDES corpus.
-
-How to use CHILDESCorpusReader
-==============================
-
-Read the CHILDESCorpusReader class and read the CHILDES corpus saved in
-the nltk_data directory.
-
-    >>> import nltk
-    >>> from nltk.corpus.reader import CHILDESCorpusReader
-    >>> corpus_root = nltk.data.find('corpora/childes/data-xml/Eng-USA-MOR/')
-
-Reading files in the Valian corpus (Valian, 1991).
-
-    >>> valian = CHILDESCorpusReader(corpus_root, 'Valian/.*.xml')
-    >>> valian.fileids()
-    ['Valian/01a.xml', 'Valian/01b.xml', 'Valian/02a.xml', 'Valian/02b.xml',...
-
-Count the number of files
-
-    >>> len(valian.fileids())
-    43
-
-Printing properties of the corpus files.
-
-    >>> corpus_data = valian.corpus(valian.fileids())
-    >>> print(corpus_data[0]['Lang'])
-    eng
-    >>> for key in sorted(corpus_data[0].keys()):
-    ...    print(key, ": ", corpus_data[0][key])
-    Corpus :  valian
-    Date :  1986-03-04
-    Id :  01a
-    Lang :  eng
-    Version :  2.0.1
-    {http://www.w3.org/2001/XMLSchema-instance}schemaLocation :  http://www.talkbank.org/ns/talkbank http://talkbank.org/software/talkbank.xsd
-
-Printing information of participants of the corpus. The most common codes for
-the participants are 'CHI' (target child), 'MOT' (mother), and 'INV' (investigator).
-
-    >>> corpus_participants = valian.participants(valian.fileids())
-    >>> for this_corpus_participants in corpus_participants[:2]:
-    ...     for key in sorted(this_corpus_participants.keys()):
-    ...         dct = this_corpus_participants[key]
-    ...         print(key, ": ", [(k, dct[k]) for k in sorted(dct.keys())])
-    CHI :  [('age', 'P2Y1M3D'), ('group', 'normal'), ('id', 'CHI'), ('language', 'eng'), ('role', 'Target_Child'), ('sex', 'female')]
-    INV :  [('id', 'INV'), ('language', 'eng'), ('role', 'Investigator')]
-    MOT :  [('id', 'MOT'), ('language', 'eng'), ('role', 'Mother')]
-    CHI :  [('age', 'P2Y1M12D'), ('group', 'normal'), ('id', 'CHI'), ('language', 'eng'), ('role', 'Target_Child'), ('sex', 'female')]
-    INV :  [('id', 'INV'), ('language', 'eng'), ('role', 'Investigator')]
-    MOT :  [('id', 'MOT'), ('language', 'eng'), ('role', 'Mother')]
-
-printing words.
-
-    >>> valian.words('Valian/01a.xml')
-    ['at', 'Parent', "Lastname's", 'house', 'with', 'Child', 'Lastname', ...
-
-printing sentences.
-
-    >>> valian.sents('Valian/01a.xml')
-    [['at', 'Parent', "Lastname's", 'house', 'with', 'Child', 'Lastname',
-      'and', 'it', 'is', 'March', 'fourth', 'I', 'believe', 'and', 'when',
-      'was', "Parent's", 'birthday'], ["Child's"], ['oh', "I'm", 'sorry'],
-      ["that's", 'okay'], ...
-
-You can specify the participants with the argument *speaker*.
-
-    >>> valian.words('Valian/01a.xml',speaker=['INV'])
-    ['at', 'Parent', "Lastname's", 'house', 'with', 'Child', 'Lastname', ...
-    >>> valian.words('Valian/01a.xml',speaker=['MOT'])
-    ["Child's", "that's", 'okay', 'February', 'first', 'nineteen', ...
-    >>> valian.words('Valian/01a.xml',speaker=['CHI'])
-    ['tape', 'it', 'up', 'and', 'two', 'tape', 'players', 'have',...
-
-
-tagged_words() and tagged_sents() return the usual (word,pos) tuple lists.
-POS tags in the CHILDES are automatically assigned by MOR and POST programs
-(MacWhinney, 2000).
-
-    >>> valian.tagged_words('Valian/01a.xml')[:30]
-    [('at', 'prep'), ('Parent', 'n:prop'), ("Lastname's", 'n:prop'), ('house', 'n'),
-    ('with', 'prep'), ('Child', 'n:prop'), ('Lastname', 'n:prop'), ('and', 'coord'),
-    ('it', 'pro'), ('is', 'v:cop'), ('March', 'n:prop'), ('fourth', 'adj'),
-    ('I', 'pro:sub'), ('believe', 'v'), ('and', 'coord'), ('when', 'adv:wh'),
-    ('was', 'v:cop'), ("Parent's", 'n:prop'), ('birthday', 'n'), ("Child's", 'n:prop'),
-    ('oh', 'co'), ("I'm", 'pro:sub'), ('sorry', 'adj'), ("that's", 'pro:dem'),
-    ('okay', 'adj'), ('February', 'n:prop'), ('first', 'adj'),
-    ('nineteen', 'det:num'), ('eighty', 'det:num'), ('four', 'det:num')]
-
-    >>> valian.tagged_sents('Valian/01a.xml')[:10]
-    [[('at', 'prep'), ('Parent', 'n:prop'), ("Lastname's", 'n:prop'), ('house', 'n'),
-    ('with', 'prep'), ('Child', 'n:prop'), ('Lastname', 'n:prop'), ('and', 'coord'),
-    ('it', 'pro'), ('is', 'v:cop'), ('March', 'n:prop'), ('fourth', 'adj'),
-    ('I', 'pro:sub'), ('believe', 'v'), ('and', 'coord'), ('when', 'adv:wh'),
-    ('was', 'v:cop'), ("Parent's", 'n:prop'), ('birthday', 'n')],
-    [("Child's", 'n:prop')], [('oh', 'co'), ("I'm", 'pro:sub'), ('sorry', 'adj')],
-    [("that's", 'pro:dem'), ('okay', 'adj')],
-    [('February', 'n:prop'), ('first', 'adj'), ('nineteen', 'det:num'),
-    ('eighty', 'det:num'), ('four', 'det:num')],
-    [('great', 'adj')],
-    [('and', 'coord'), ("she's", 'pro:sub'), ('two', 'det:num'), ('years', 'n'), ('old', 'adj')],
-    [('correct', 'adj')],
-    [('okay', 'co')], [('she', 'pro:sub'), ('just', 'adv:int'), ('turned', 'part'), ('two', 'det:num'),
-    ('a', 'det'), ('month', 'n'), ('ago', 'adv')]]
-
-When the argument *stem* is true, the word stems (e.g., 'is' -> 'be-3PS') are
-used instread of the original words.
-
-    >>> valian.words('Valian/01a.xml')[:30]
-    ['at', 'Parent', "Lastname's", 'house', 'with', 'Child', 'Lastname', 'and', 'it', 'is', ...
-    >>> valian.words('Valian/01a.xml',stem=True)[:30]
-    ['at', 'Parent', 'Lastname', 's', 'house', 'with', 'Child', 'Lastname', 'and', 'it', 'be-3S', ...
-
-When the argument *replace* is true, the replaced words are used instread of
-the original words.
-
-    >>> valian.words('Valian/01a.xml',speaker='CHI')[247]
-    'tikteat'
-    >>> valian.words('Valian/01a.xml',speaker='CHI',replace=True)[247]
-    'trick'
-
-When the argument *relation* is true, the relational relationships in the
-sentence are returned. See Sagae et al. (2010) for details of the relational
-structure adopted in the CHILDES.
-
-    >>> valian.words('Valian/01a.xml',relation=True)[:10]
-    [[('at', 'prep', '1|0|ROOT'), ('Parent', 'n', '2|5|VOC'), ('Lastname', 'n', '3|5|MOD'), ('s', 'poss', '4|5|MOD'), ('house', 'n', '5|1|POBJ'), ('with', 'prep', '6|1|JCT'), ('Child', 'n', '7|8|NAME'), ('Lastname', 'n', '8|6|POBJ'), ('and', 'coord', '9|8|COORD'), ('it', 'pro', '10|11|SUBJ'), ('be-3S', 'v', '11|9|COMP'), ('March', 'n', '12|11|PRED'), ('fourth', 'adj', '13|12|MOD'), ('I', 'pro', '15|16|SUBJ'), ('believe', 'v', '16|14|ROOT'), ('and', 'coord', '18|17|ROOT'), ('when', 'adv', '19|20|PRED'), ('be-PAST', 'v', '20|18|COMP'), ('Parent', 'n', '21|23|MOD'), ('s', 'poss', '22|23|MOD'), ('birth', 'n', '23|20|SUBJ')], [('Child', 'n', '1|2|MOD'), ('s', 'poss', '2|0|ROOT')], [('oh', 'co', '1|4|COM'), ('I', 'pro', '3|4|SUBJ'), ('be', 'v', '4|0|ROOT'), ('sorry', 'adj', '5|4|PRED')], [('that', 'pro', '1|2|SUBJ'), ('be', 'v', '2|0|ROOT'), ('okay', 'adj', '3|2|PRED')], [('February', 'n', '1|6|VOC'), ('first', 'adj', '2|6|ENUM'), ('nineteen', 'det', '4|6|ENUM'), ('eighty', 'det', '5|6|ENUM'), ('four', 'det', '6|0|ROOT')], [('great', 'adj', '1|0|ROOT')], [('and', 'coord', '1|0|ROOT'), ('she', 'pro', '2|1|ROOT'), ('be', 'aux', '3|5|AUX'), ('two', 'det', '4|5|QUANT'), ('year-PL', 'n', '5|2|ROOT'), ('old', 'adj', '6|5|MOD')], [('correct', 'adj', '1|0|ROOT')], [('okay', 'co', '1|0|ROOT')], [('she', 'pro', '1|0|ROOT'), ('just', 'adv', '2|3|JCT'), ('turn-PERF', 'part', '3|1|XCOMP'), ('two', 'det', '4|6|QUANT'), ('a', 'det', '5|6|DET'), ('month', 'n', '6|3|OBJ'), ('ago', 'adv', '7|3|JCT')]]
-
-Printing age. When the argument *month* is true, the age information in
-the CHILDES format is converted into the number of months.
-
-    >>> valian.age()
-    ['P2Y1M3D', 'P2Y1M12D', 'P1Y9M21D', 'P1Y9M28D', 'P2Y1M23D', ...
-    >>> valian.age('Valian/01a.xml')
-    ['P2Y1M3D']
-    >>> valian.age('Valian/01a.xml',month=True)
-    [25]
-
-Printing MLU. The criteria for the MLU computation is broadly based on
-Brown (1973).
-
-    >>> valian.MLU()
-    [2.3574660633484..., 2.292682926829..., 3.492857142857..., 2.961783439490...,
-     2.0842696629213..., 3.169811320754..., 3.137404580152..., 3.0578034682080...,
-     4.090163934426..., 3.488372093023..., 2.8773584905660..., 3.4792899408284...,
-     4.0111940298507..., 3.456790123456..., 4.487603305785..., 4.007936507936...,
-     5.25, 5.154696132596..., ...]
-
-    >>> valian.MLU('Valian/01a.xml')
-    [2.35746606334...]
-
-
-Basic stuff
-==============================
-
-Count the number of words and sentences of each file.
-
-    >>> valian = CHILDESCorpusReader(corpus_root, 'Valian/.*.xml')
-    >>> for this_file in valian.fileids()[:6]:
-    ...     print(valian.corpus(this_file)[0]['Corpus'], valian.corpus(this_file)[0]['Id'])
-    ...     print("num of words: %i" % len(valian.words(this_file)))
-    ...     print("num of sents: %i" % len(valian.sents(this_file)))
-    valian 01a
-    num of words: 3606
-    num of sents: 1027
-    valian 01b
-    num of words: 4376
-    num of sents: 1274
-    valian 02a
-    num of words: 2673
-    num of sents: 801
-    valian 02b
-    num of words: 5020
-    num of sents: 1583
-    valian 03a
-    num of words: 2743
-    num of sents: 988
-    valian 03b
-    num of words: 4409
-    num of sents: 1397
diff --git a/nlp_resource_data/nltk/test/childes_fixt.py b/nlp_resource_data/nltk/test/childes_fixt.py

deleted file mode 100644 (file)

index 04701fb..0000000
--- a/nlp_resource_data/nltk/test/childes_fixt.py
+++ /dev/null
@@ -1,17 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import
-
-
-def setup_module(module):
-    from nose import SkipTest
-    import nltk.data
-
-    try:
-        nltk.data.find('corpora/childes/data-xml/Eng-USA-MOR/')
-    except LookupError as e:
-        print(e)
-        raise SkipTest(
-            "The CHILDES corpus is not found. "
-            "It should be manually downloaded and saved/unpacked "
-            "to [NLTK_Data_Dir]/corpora/childes/"
-        )
diff --git a/nlp_resource_data/nltk/test/chunk.doctest b/nlp_resource_data/nltk/test/chunk.doctest

deleted file mode 100644 (file)

index 6fd2ad7..0000000
--- a/nlp_resource_data/nltk/test/chunk.doctest
+++ /dev/null
@@ -1,373 +0,0 @@
-.. Copyright (C) 2001-2019 NLTK Project
-.. For license information, see LICENSE.TXT
-
-==========
- Chunking
-==========
-
-    >>> from nltk.chunk import *
-    >>> from nltk.chunk.util import *
-    >>> from nltk.chunk.regexp import *
-    >>> from nltk import Tree
-
-    >>> tagged_text = "[ The/DT cat/NN ] sat/VBD on/IN [ the/DT mat/NN ] [ the/DT dog/NN ] chewed/VBD ./."
-    >>> gold_chunked_text = tagstr2tree(tagged_text)
-    >>> unchunked_text = gold_chunked_text.flatten()
-
-Chunking uses a special regexp syntax for rules that delimit the chunks. These
-rules must be converted to 'regular' regular expressions before a sentence can
-be chunked.
-
-    >>> tag_pattern = "<DT>?<JJ>*<NN.*>"
-    >>> regexp_pattern = tag_pattern2re_pattern(tag_pattern)
-    >>> regexp_pattern
-    '(<(DT)>)?(<(JJ)>)*(<(NN[^\\{\\}<>]*)>)'
-
-Construct some new chunking rules.
-
-    >>> chunk_rule = ChunkRule("<.*>+", "Chunk everything")
-    >>> chink_rule = ChinkRule("<VBD|IN|\.>", "Chink on verbs/prepositions")
-    >>> split_rule = SplitRule("<DT><NN>", "<DT><NN>",
-    ...                        "Split successive determiner/noun pairs")
-
-
-Create and score a series of chunk parsers, successively more complex.
-
-    >>> chunk_parser = RegexpChunkParser([chunk_rule], chunk_label='NP')
-    >>> chunked_text = chunk_parser.parse(unchunked_text)
-    >>> print(chunked_text)
-    (S
-      (NP
-        The/DT
-        cat/NN
-        sat/VBD
-        on/IN
-        the/DT
-        mat/NN
-        the/DT
-        dog/NN
-        chewed/VBD
-        ./.))
-
-    >>> chunkscore = ChunkScore()
-    >>> chunkscore.score(gold_chunked_text, chunked_text)
-    >>> print(chunkscore.precision())
-    0.0
-
-    >>> print(chunkscore.recall())
-    0.0
-
-    >>> print(chunkscore.f_measure())
-    0
-
-    >>> for chunk in sorted(chunkscore.missed()): print(chunk)
-    (NP The/DT cat/NN)
-    (NP the/DT dog/NN)
-    (NP the/DT mat/NN)
-
-    >>> for chunk in chunkscore.incorrect(): print(chunk)
-    (NP
-      The/DT
-      cat/NN
-      sat/VBD
-      on/IN
-      the/DT
-      mat/NN
-      the/DT
-      dog/NN
-      chewed/VBD
-      ./.)
-
-    >>> chunk_parser = RegexpChunkParser([chunk_rule, chink_rule],
-    ...                                  chunk_label='NP')
-    >>> chunked_text = chunk_parser.parse(unchunked_text)
-    >>> print(chunked_text)
-    (S
-      (NP The/DT cat/NN)
-      sat/VBD
-      on/IN
-      (NP the/DT mat/NN the/DT dog/NN)
-      chewed/VBD
-      ./.)
-    >>> assert chunked_text == chunk_parser.parse(list(unchunked_text))
-
-    >>> chunkscore = ChunkScore()
-    >>> chunkscore.score(gold_chunked_text, chunked_text)
-    >>> chunkscore.precision()
-    0.5
-
-    >>> print(chunkscore.recall())
-    0.33333333...
-
-    >>> print(chunkscore.f_measure())
-    0.4
-
-    >>> for chunk in sorted(chunkscore.missed()): print(chunk)
-    (NP the/DT dog/NN)
-    (NP the/DT mat/NN)
-
-    >>> for chunk in chunkscore.incorrect(): print(chunk)
-    (NP the/DT mat/NN the/DT dog/NN)
-
-    >>> chunk_parser = RegexpChunkParser([chunk_rule, chink_rule, split_rule],
-    ...                                  chunk_label='NP')
-    >>> chunked_text = chunk_parser.parse(unchunked_text, trace=True)
-    # Input:
-     <DT>  <NN>  <VBD>  <IN>  <DT>  <NN>  <DT>  <NN>  <VBD>  <.>
-    # Chunk everything:
-    {<DT>  <NN>  <VBD>  <IN>  <DT>  <NN>  <DT>  <NN>  <VBD>  <.>}
-    # Chink on verbs/prepositions:
-    {<DT>  <NN>} <VBD>  <IN> {<DT>  <NN>  <DT>  <NN>} <VBD>  <.>
-    # Split successive determiner/noun pairs:
-    {<DT>  <NN>} <VBD>  <IN> {<DT>  <NN>}{<DT>  <NN>} <VBD>  <.>
-    >>> print(chunked_text)
-    (S
-      (NP The/DT cat/NN)
-      sat/VBD
-      on/IN
-      (NP the/DT mat/NN)
-      (NP the/DT dog/NN)
-      chewed/VBD
-      ./.)
-
-    >>> chunkscore = ChunkScore()
-    >>> chunkscore.score(gold_chunked_text, chunked_text)
-    >>> chunkscore.precision()
-    1.0
-
-    >>> chunkscore.recall()
-    1.0
-
-    >>> chunkscore.f_measure()
-    1.0
-
-    >>> chunkscore.missed()
-    []
-
-    >>> chunkscore.incorrect()
-    []
-
-    >>> chunk_parser.rules() # doctest: +NORMALIZE_WHITESPACE
-    [<ChunkRule: '<.*>+'>, <ChinkRule: '<VBD|IN|\\.>'>,
-     <SplitRule: '<DT><NN>', '<DT><NN>'>]
-
-Printing parsers:
-
-    >>> print(repr(chunk_parser))
-    <RegexpChunkParser with 3 rules>
-    >>> print(chunk_parser)
-    RegexpChunkParser with 3 rules:
-        Chunk everything
-          <ChunkRule: '<.*>+'>
-        Chink on verbs/prepositions
-          <ChinkRule: '<VBD|IN|\\.>'>
-        Split successive determiner/noun pairs
-          <SplitRule: '<DT><NN>', '<DT><NN>'>
-
-Regression Tests
-~~~~~~~~~~~~~~~~
-ChunkParserI
-------------
-`ChunkParserI` is an abstract interface -- it is not meant to be
-instantiated directly.
-
-    >>> ChunkParserI().parse([])
-    Traceback (most recent call last):
-      . . .
-    NotImplementedError
-
-
-ChunkString
------------
-ChunkString can be built from a tree of tagged tuples, a tree of
-trees, or a mixed list of both:
-
-    >>> t1 = Tree('S', [('w%d' % i, 't%d' % i) for i in range(10)])
-    >>> t2 = Tree('S', [Tree('t0', []), Tree('t1', ['c1'])])
-    >>> t3 = Tree('S', [('w0', 't0'), Tree('t1', ['c1'])])
-    >>> ChunkString(t1)
-    <ChunkString: '<t0><t1><t2><t3><t4><t5><t6><t7><t8><t9>'>
-    >>> ChunkString(t2)
-    <ChunkString: '<t0><t1>'>
-    >>> ChunkString(t3)
-    <ChunkString: '<t0><t1>'>
-
-Other values generate an error:
-
-    >>> ChunkString(Tree('S', ['x']))
-    Traceback (most recent call last):
-      . . .
-    ValueError: chunk structures must contain tagged tokens or trees
-
-The `str()` for a chunk string adds spaces to it, which makes it line
-up with `str()` output for other chunk strings over the same
-underlying input.
-
-    >>> cs = ChunkString(t1)
-    >>> print(cs)
-     <t0>  <t1>  <t2>  <t3>  <t4>  <t5>  <t6>  <t7>  <t8>  <t9>
-    >>> cs.xform('<t3>', '{<t3>}')
-    >>> print(cs)
-     <t0>  <t1>  <t2> {<t3>} <t4>  <t5>  <t6>  <t7>  <t8>  <t9>
-
-The `_verify()` method makes sure that our transforms don't corrupt
-the chunk string.  By setting debug_level=2, `_verify()` will be
-called at the end of every call to `xform`.
-
-    >>> cs = ChunkString(t1, debug_level=3)
-
-    >>> # tag not marked with <...>:
-    >>> cs.xform('<t3>', 't3')
-    Traceback (most recent call last):
-      . . .
-    ValueError: Transformation generated invalid chunkstring:
-      <t0><t1><t2>t3<t4><t5><t6><t7><t8><t9>
-
-    >>> # brackets not balanced:
-    >>> cs.xform('<t3>', '{<t3>')
-    Traceback (most recent call last):
-      . . .
-    ValueError: Transformation generated invalid chunkstring:
-      <t0><t1><t2>{<t3><t4><t5><t6><t7><t8><t9>
-
-    >>> # nested brackets:
-    >>> cs.xform('<t3><t4><t5>', '{<t3>{<t4>}<t5>}')
-    Traceback (most recent call last):
-      . . .
-    ValueError: Transformation generated invalid chunkstring:
-      <t0><t1><t2>{<t3>{<t4>}<t5>}<t6><t7><t8><t9>
-
-    >>> # modified tags:
-    >>> cs.xform('<t3>', '<t9>')
-    Traceback (most recent call last):
-      . . .
-    ValueError: Transformation generated invalid chunkstring: tag changed
-
-    >>> # added tags:
-    >>> cs.xform('<t9>', '<t9><t10>')
-    Traceback (most recent call last):
-      . . .
-    ValueError: Transformation generated invalid chunkstring: tag changed
-
-Chunking Rules
---------------
-
-Test the different rule constructors & __repr__ methods:
-
-    >>> r1 = RegexpChunkRule('<a|b>'+ChunkString.IN_CHINK_PATTERN,
-    ...                      '{<a|b>}', 'chunk <a> and <b>')
-    >>> r2 = RegexpChunkRule(re.compile('<a|b>'+ChunkString.IN_CHINK_PATTERN),
-    ...                      '{<a|b>}', 'chunk <a> and <b>')
-    >>> r3 = ChunkRule('<a|b>', 'chunk <a> and <b>')
-    >>> r4 = ChinkRule('<a|b>', 'chink <a> and <b>')
-    >>> r5 = UnChunkRule('<a|b>', 'unchunk <a> and <b>')
-    >>> r6 = MergeRule('<a>', '<b>', 'merge <a> w/ <b>')
-    >>> r7 = SplitRule('<a>', '<b>', 'split <a> from <b>')
-    >>> r8 = ExpandLeftRule('<a>', '<b>', 'expand left <a> <b>')
-    >>> r9 = ExpandRightRule('<a>', '<b>', 'expand right <a> <b>')
-    >>> for rule in r1, r2, r3, r4, r5, r6, r7, r8, r9:
-    ...     print(rule)
-    <RegexpChunkRule: '<a|b>(?=[^\\}]*(\\{|$))'->'{<a|b>}'>
-    <RegexpChunkRule: '<a|b>(?=[^\\}]*(\\{|$))'->'{<a|b>}'>
-    <ChunkRule: '<a|b>'>
-    <ChinkRule: '<a|b>'>
-    <UnChunkRule: '<a|b>'>
-    <MergeRule: '<a>', '<b>'>
-    <SplitRule: '<a>', '<b>'>
-    <ExpandLeftRule: '<a>', '<b>'>
-    <ExpandRightRule: '<a>', '<b>'>
-
-`tag_pattern2re_pattern()` complains if the tag pattern looks problematic:
-
-    >>> tag_pattern2re_pattern('{}')
-    Traceback (most recent call last):
-      . . .
-    ValueError: Bad tag pattern: '{}'
-
-RegexpChunkParser
------------------
-
-A warning is printed when parsing an empty sentence:
-
-    >>> parser = RegexpChunkParser([ChunkRule('<a>', '')])
-    >>> parser.parse(Tree('S', []))
-    Warning: parsing empty text
-    Tree('S', [])
-
-RegexpParser
-------------
-
-    >>> parser = RegexpParser('''
-    ... NP: {<DT>? <JJ>* <NN>*} # NP
-    ... P: {<IN>}           # Preposition
-    ... V: {<V.*>}          # Verb
-    ... PP: {<P> <NP>}      # PP -> P NP
-    ... VP: {<V> <NP|PP>*}  # VP -> V (NP|PP)*
-    ... ''')
-    >>> print(repr(parser))
-    <chunk.RegexpParser with 5 stages>
-    >>> print(parser)
-    chunk.RegexpParser with 5 stages:
-    RegexpChunkParser with 1 rules:
-        NP   <ChunkRule: '<DT>? <JJ>* <NN>*'>
-    RegexpChunkParser with 1 rules:
-        Preposition   <ChunkRule: '<IN>'>
-    RegexpChunkParser with 1 rules:
-        Verb   <ChunkRule: '<V.*>'>
-    RegexpChunkParser with 1 rules:
-        PP -> P NP   <ChunkRule: '<P> <NP>'>
-    RegexpChunkParser with 1 rules:
-        VP -> V (NP|PP)*   <ChunkRule: '<V> <NP|PP>*'>
-    >>> print(parser.parse(unchunked_text, trace=True))
-    # Input:
-     <DT>  <NN>  <VBD>  <IN>  <DT>  <NN>  <DT>  <NN>  <VBD>  <.>
-    # NP:
-    {<DT>  <NN>} <VBD>  <IN> {<DT>  <NN>}{<DT>  <NN>} <VBD>  <.>
-    # Input:
-     <NP>  <VBD>  <IN>  <NP>  <NP>  <VBD>  <.>
-    # Preposition:
-     <NP>  <VBD> {<IN>} <NP>  <NP>  <VBD>  <.>
-    # Input:
-     <NP>  <VBD>  <P>  <NP>  <NP>  <VBD>  <.>
-    # Verb:
-     <NP> {<VBD>} <P>  <NP>  <NP> {<VBD>} <.>
-    # Input:
-     <NP>  <V>  <P>  <NP>  <NP>  <V>  <.>
-    # PP -> P NP:
-     <NP>  <V> {<P>  <NP>} <NP>  <V>  <.>
-    # Input:
-     <NP>  <V>  <PP>  <NP>  <V>  <.>
-    # VP -> V (NP|PP)*:
-     <NP> {<V>  <PP>  <NP>}{<V>} <.>
-    (S
-      (NP The/DT cat/NN)
-      (VP
-        (V sat/VBD)
-        (PP (P on/IN) (NP the/DT mat/NN))
-        (NP the/DT dog/NN))
-      (VP (V chewed/VBD))
-      ./.)
-
-Test parsing of other rule types:
-
-    >>> print(RegexpParser('''
-    ... X:
-    ...   }<a><b>{     # chink rule
-    ...   <a>}{<b>     # split rule
-    ...   <a>{}<b>     # merge rule
-    ...   <a>{<b>}<c>  # chunk rule w/ context
-    ... '''))
-    chunk.RegexpParser with 1 stages:
-    RegexpChunkParser with 4 rules:
-        chink rule              <ChinkRule: '<a><b>'>
-        split rule              <SplitRule: '<a>', '<b>'>
-        merge rule              <MergeRule: '<a>', '<b>'>
-        chunk rule w/ context   <ChunkRuleWithContext: '<a>', '<b>', '<c>'>
-
-Illegal patterns give an error message:
-
-    >>> print(RegexpParser('X: {<foo>} {<bar>}'))
-    Traceback (most recent call last):
-      . . .
-    ValueError: Illegal chunk pattern: {<foo>} {<bar>}
-
diff --git a/nlp_resource_data/nltk/test/classify.doctest b/nlp_resource_data/nltk/test/classify.doctest

deleted file mode 100644 (file)

index d208084..0000000
--- a/nlp_resource_data/nltk/test/classify.doctest
+++ /dev/null
@@ -1,183 +0,0 @@
-.. Copyright (C) 2001-2019 NLTK Project
-.. For license information, see LICENSE.TXT
-
-=============
- Classifiers
-=============
-
-Classifiers label tokens with category labels (or *class labels*).
-Typically, labels are represented with strings (such as ``"health"``
-or ``"sports"``.  In NLTK, classifiers are defined using classes that
-implement the `ClassifyI` interface:
-
-    >>> import nltk
-    >>> nltk.usage(nltk.classify.ClassifierI)
-    ClassifierI supports the following operations:
-      - self.classify(featureset)
-      - self.classify_many(featuresets)
-      - self.labels()
-      - self.prob_classify(featureset)
-      - self.prob_classify_many(featuresets)
-
-NLTK defines several classifier classes:
-
-- `ConditionalExponentialClassifier`
-- `DecisionTreeClassifier`
-- `MaxentClassifier`
-- `NaiveBayesClassifier`
-- `WekaClassifier`
-
-Classifiers are typically created by training them on a training
-corpus.
-
-
-Regression Tests
-~~~~~~~~~~~~~~~~
-
-We define a very simple training corpus with 3 binary features: ['a',
-'b', 'c'], and are two labels: ['x', 'y'].  We use a simple feature set so
-that the correct answers can be calculated analytically (although we
-haven't done this yet for all tests).
-
-    >>> train = [
-    ...     (dict(a=1,b=1,c=1), 'y'),
-    ...     (dict(a=1,b=1,c=1), 'x'),
-    ...     (dict(a=1,b=1,c=0), 'y'),
-    ...     (dict(a=0,b=1,c=1), 'x'),
-    ...     (dict(a=0,b=1,c=1), 'y'),
-    ...     (dict(a=0,b=0,c=1), 'y'),
-    ...     (dict(a=0,b=1,c=0), 'x'),
-    ...     (dict(a=0,b=0,c=0), 'x'),
-    ...     (dict(a=0,b=1,c=1), 'y'),
-    ...     ]
-    >>> test = [
-    ...     (dict(a=1,b=0,c=1)), # unseen
-    ...     (dict(a=1,b=0,c=0)), # unseen
-    ...     (dict(a=0,b=1,c=1)), # seen 3 times, labels=y,y,x
-    ...     (dict(a=0,b=1,c=0)), # seen 1 time, label=x
-    ...     ]
-
-Test the Naive Bayes classifier:
-
-    >>> classifier = nltk.classify.NaiveBayesClassifier.train(train)
-    >>> sorted(classifier.labels())
-    ['x', 'y']
-    >>> classifier.classify_many(test)
-    ['y', 'x', 'y', 'x']
-    >>> for pdist in classifier.prob_classify_many(test):
-    ...     print('%.4f %.4f' % (pdist.prob('x'), pdist.prob('y')))
-    0.3203 0.6797
-    0.5857 0.4143
-    0.3792 0.6208
-    0.6470 0.3530
-    >>> classifier.show_most_informative_features()
-    Most Informative Features
-                           c = 0                   x : y      =      2.0 : 1.0
-                           c = 1                   y : x      =      1.5 : 1.0
-                           a = 1                   y : x      =      1.4 : 1.0
-                           b = 0                   x : y      =      1.2 : 1.0
-                           a = 0                   x : y      =      1.2 : 1.0
-                           b = 1                   y : x      =      1.1 : 1.0
-
-Test the Decision Tree classifier:
-
-    >>> classifier = nltk.classify.DecisionTreeClassifier.train(
-    ...     train, entropy_cutoff=0,
-    ...                                                support_cutoff=0)
-    >>> sorted(classifier.labels())
-    ['x', 'y']
-    >>> print(classifier)
-    c=0? .................................................. x
-      a=0? ................................................ x
-      a=1? ................................................ y
-    c=1? .................................................. y
-    <BLANKLINE>
-    >>> classifier.classify_many(test)
-    ['y', 'y', 'y', 'x']
-    >>> for pdist in classifier.prob_classify_many(test):
-    ...     print('%.4f %.4f' % (pdist.prob('x'), pdist.prob('y')))
-    Traceback (most recent call last):
-      . . .
-    NotImplementedError
-
-Test SklearnClassifier, which requires the scikit-learn package.
-
-    >>> from nltk.classify import SklearnClassifier
-    >>> from sklearn.naive_bayes import BernoulliNB
-    >>> from sklearn.svm import SVC
-    >>> train_data = [({"a": 4, "b": 1, "c": 0}, "ham"),
-    ...               ({"a": 5, "b": 2, "c": 1}, "ham"),
-    ...               ({"a": 0, "b": 3, "c": 4}, "spam"),
-    ...               ({"a": 5, "b": 1, "c": 1}, "ham"),
-    ...               ({"a": 1, "b": 4, "c": 3}, "spam")]
-    >>> classif = SklearnClassifier(BernoulliNB()).train(train_data)
-    >>> test_data = [{"a": 3, "b": 2, "c": 1},
-    ...              {"a": 0, "b": 3, "c": 7}]
-    >>> classif.classify_many(test_data)
-    ['ham', 'spam']
-    >>> classif = SklearnClassifier(SVC(), sparse=False).train(train_data)
-    >>> classif.classify_many(test_data)
-    ['ham', 'spam']
-
-Test the Maximum Entropy classifier training algorithms; they should all
-generate the same results.
-
-    >>> def print_maxent_test_header():
-    ...     print(' '*11+''.join(['      test[%s]  ' % i
-    ...                           for i in range(len(test))]))
-    ...     print(' '*11+'     p(x)  p(y)'*len(test))
-    ...     print('-'*(11+15*len(test)))
-
-    >>> def test_maxent(algorithm):
-    ...     print('%11s' % algorithm, end=' ')
-    ...     try:
-    ...         classifier = nltk.classify.MaxentClassifier.train(
-    ...                         train, algorithm, trace=0, max_iter=1000)
-    ...     except Exception as e:
-    ...         print('Error: %r' % e)
-    ...         return
-    ...
-    ...     for featureset in test:
-    ...         pdist = classifier.prob_classify(featureset)
-    ...         print('%8.2f%6.2f' % (pdist.prob('x'), pdist.prob('y')), end=' ')
-    ...     print()
-
-    >>> print_maxent_test_header(); test_maxent('GIS'); test_maxent('IIS')
-                     test[0]        test[1]        test[2]        test[3]
-                    p(x)  p(y)     p(x)  p(y)     p(x)  p(y)     p(x)  p(y)
-    -----------------------------------------------------------------------
-            GIS     0.16  0.84     0.46  0.54     0.41  0.59     0.76  0.24
-            IIS     0.16  0.84     0.46  0.54     0.41  0.59     0.76  0.24
-
-    >>> test_maxent('MEGAM'); test_maxent('TADM') # doctest: +SKIP
-            MEGAM   0.16  0.84     0.46  0.54     0.41  0.59     0.76  0.24
-            TADM    0.16  0.84     0.46  0.54     0.41  0.59     0.76  0.24
-
-
-
-Regression tests for TypedMaxentFeatureEncoding
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-    >>> from nltk.classify import maxent
-    >>> train = [
-    ...     ({'a': 1, 'b': 1, 'c': 1}, 'y'),
-    ...     ({'a': 5, 'b': 5, 'c': 5}, 'x'),
-    ...     ({'a': 0.9, 'b': 0.9, 'c': 0.9}, 'y'),
-    ...     ({'a': 5.5, 'b': 5.4, 'c': 5.3}, 'x'),
-    ...     ({'a': 0.8, 'b': 1.2, 'c': 1}, 'y'),
-    ...     ({'a': 5.1, 'b': 4.9, 'c': 5.2}, 'x')
-    ... ]
-
-    >>> test = [
-    ...     {'a': 1, 'b': 0.8, 'c': 1.2},
-    ...     {'a': 5.2, 'b': 5.1, 'c': 5}
-    ... ]
-
-    >>> encoding = maxent.TypedMaxentFeatureEncoding.train(
-    ...     train, count_cutoff=3, alwayson_features=True)
-
-    >>> classifier = maxent.MaxentClassifier.train(
-    ...     train, bernoulli=False, encoding=encoding, trace=0)
-
-    >>> classifier.classify_many(test)
-    ['y', 'x']
diff --git a/nlp_resource_data/nltk/test/classify_fixt.py b/nlp_resource_data/nltk/test/classify_fixt.py

deleted file mode 100644 (file)

index dce0704..0000000
--- a/nlp_resource_data/nltk/test/classify_fixt.py
+++ /dev/null
@@ -1,12 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import
-
-
-# most of classify.doctest requires numpy
-def setup_module(module):
-    from nose import SkipTest
-
-    try:
-        import numpy
-    except ImportError:
-        raise SkipTest("classify.doctest requires numpy")
diff --git a/nlp_resource_data/nltk/test/collections.doctest b/nlp_resource_data/nltk/test/collections.doctest

deleted file mode 100644 (file)

index 6a67511..0000000
--- a/nlp_resource_data/nltk/test/collections.doctest
+++ /dev/null
@@ -1,20 +0,0 @@
-.. Copyright (C) 2001-2019 NLTK Project
-.. For license information, see LICENSE.TXT
-
-===========
-Collections
-===========
-
-    >>> import nltk
-    >>> from nltk.collections import *
-
-Trie
-----
-
-Trie can be pickled:
-
-    >>> import pickle
-    >>> trie = nltk.collections.Trie(['a'])
-    >>> s = pickle.dumps(trie)
-    >>> pickle.loads(s)
-    {'a': {True: None}}
-\ No newline at end of file
diff --git a/nlp_resource_data/nltk/test/collocations.doctest b/nlp_resource_data/nltk/test/collocations.doctest

deleted file mode 100644 (file)

index b4af859..0000000
--- a/nlp_resource_data/nltk/test/collocations.doctest
+++ /dev/null
@@ -1,276 +0,0 @@
-.. Copyright (C) 2001-2019 NLTK Project
-.. For license information, see LICENSE.TXT
-
-==============
- Collocations
-==============
-
-Overview
-~~~~~~~~
-
-Collocations are expressions of multiple words which commonly co-occur. For
-example, the top ten bigram collocations in Genesis are listed below, as
-measured using Pointwise Mutual Information.
-
-    >>> import nltk
-    >>> from nltk.collocations import *
-    >>> bigram_measures = nltk.collocations.BigramAssocMeasures()
-    >>> trigram_measures = nltk.collocations.TrigramAssocMeasures()
-    >>> finder = BigramCollocationFinder.from_words(
-    ...     nltk.corpus.genesis.words('english-web.txt'))
-    >>> finder.nbest(bigram_measures.pmi, 10)  # doctest: +NORMALIZE_WHITESPACE
-    [(u'Allon', u'Bacuth'), (u'Ashteroth', u'Karnaim'), (u'Ben', u'Ammi'),
-     (u'En', u'Mishpat'), (u'Jegar', u'Sahadutha'), (u'Salt', u'Sea'),
-     (u'Whoever', u'sheds'), (u'appoint', u'overseers'), (u'aromatic', u'resin'),
-     (u'cutting', u'instrument')]
-
-While these words are highly collocated, the expressions are also very
-infrequent.  Therefore it is useful to apply filters, such as ignoring all
-bigrams which occur less than three times in the corpus:
-
-    >>> finder.apply_freq_filter(3)
-    >>> finder.nbest(bigram_measures.pmi, 10)  # doctest: +NORMALIZE_WHITESPACE
-    [(u'Beer', u'Lahai'), (u'Lahai', u'Roi'), (u'gray', u'hairs'),
-     (u'Most', u'High'), (u'ewe', u'lambs'), (u'many', u'colors'),
-     (u'burnt', u'offering'), (u'Paddan', u'Aram'), (u'east', u'wind'),
-     (u'living', u'creature')]
-
-We may similarly find collocations among tagged words:
-
-    >>> finder = BigramCollocationFinder.from_words(
-    ...     nltk.corpus.brown.tagged_words('ca01', tagset='universal'))
-    >>> finder.nbest(bigram_measures.pmi, 5)  # doctest: +NORMALIZE_WHITESPACE
-    [(('1,119', 'NUM'), ('votes', 'NOUN')),
-     (('1962', 'NUM'), ("governor's", 'NOUN')),
-     (('637', 'NUM'), ('E.', 'NOUN')),
-     (('Alpharetta', 'NOUN'), ('prison', 'NOUN')),
-     (('Bar', 'NOUN'), ('Association', 'NOUN'))]
-
-Or tags alone:
-
-    >>> finder = BigramCollocationFinder.from_words(t for w, t in
-    ...     nltk.corpus.brown.tagged_words('ca01', tagset='universal'))
-    >>> finder.nbest(bigram_measures.pmi, 10)  # doctest: +NORMALIZE_WHITESPACE
-    [('PRT', 'VERB'), ('PRON', 'VERB'), ('ADP', 'DET'), ('.', 'PRON'), ('DET', 'ADJ'),
-     ('CONJ', 'PRON'), ('ADP', 'NUM'), ('NUM', '.'), ('ADV', 'ADV'), ('VERB', 'ADV')]
-
-Or spanning intervening words:
-
-    >>> finder = BigramCollocationFinder.from_words(
-    ...     nltk.corpus.genesis.words('english-web.txt'),
-    ...     window_size = 20)
-    >>> finder.apply_freq_filter(2)
-    >>> ignored_words = nltk.corpus.stopwords.words('english')
-    >>> finder.apply_word_filter(lambda w: len(w) < 3 or w.lower() in ignored_words)
-    >>> finder.nbest(bigram_measures.likelihood_ratio, 10) # doctest: +NORMALIZE_WHITESPACE
-    [(u'chief', u'chief'), (u'became', u'father'), (u'years', u'became'),
-     (u'hundred', u'years'), (u'lived', u'became'), (u'king', u'king'),
-     (u'lived', u'years'), (u'became', u'became'), (u'chief', u'chiefs'),
-     (u'hundred', u'became')]
-
-Finders
-~~~~~~~
-
-The collocations package provides collocation finders which by default
-consider all ngrams in a text as candidate collocations:
-
-    >>> text = "I do not like green eggs and ham, I do not like them Sam I am!"
-    >>> tokens = nltk.wordpunct_tokenize(text)
-    >>> finder = BigramCollocationFinder.from_words(tokens)
-    >>> scored = finder.score_ngrams(bigram_measures.raw_freq)
-    >>> sorted(bigram for bigram, score in scored)  # doctest: +NORMALIZE_WHITESPACE
-    [(',', 'I'), ('I', 'am'), ('I', 'do'), ('Sam', 'I'), ('am', '!'),
-     ('and', 'ham'), ('do', 'not'), ('eggs', 'and'), ('green', 'eggs'),
-     ('ham', ','), ('like', 'green'), ('like', 'them'), ('not', 'like'),
-     ('them', 'Sam')]
-
-We could otherwise construct the collocation finder from manually-derived
-FreqDists:
-
-    >>> word_fd = nltk.FreqDist(tokens)
-    >>> bigram_fd = nltk.FreqDist(nltk.bigrams(tokens))
-    >>> finder = BigramCollocationFinder(word_fd, bigram_fd)
-    >>> scored == finder.score_ngrams(bigram_measures.raw_freq)
-    True
-
-A similar interface is provided for trigrams:
-
-    >>> finder = TrigramCollocationFinder.from_words(tokens)
-    >>> scored = finder.score_ngrams(trigram_measures.raw_freq)
-    >>> set(trigram for trigram, score in scored) == set(nltk.trigrams(tokens))
-    True
-
-We may want to select only the top n results:
-
-    >>> sorted(finder.nbest(trigram_measures.raw_freq, 2))
-    [('I', 'do', 'not'), ('do', 'not', 'like')]
-
-Alternatively, we can select those above a minimum score value:
-
-    >>> sorted(finder.above_score(trigram_measures.raw_freq,
-    ...                           1.0 / len(tuple(nltk.trigrams(tokens)))))
-    [('I', 'do', 'not'), ('do', 'not', 'like')]
-
-Now spanning intervening words:
-
-    >>> finder = TrigramCollocationFinder.from_words(tokens)
-    >>> finder = TrigramCollocationFinder.from_words(tokens, window_size=4)
-    >>> sorted(finder.nbest(trigram_measures.raw_freq, 4))
-    [('I', 'do', 'like'), ('I', 'do', 'not'), ('I', 'not', 'like'), ('do', 'not', 'like')]
-    
-A closer look at the finder's ngram frequencies:
-
-    >>> sorted(finder.ngram_fd.items(), key=lambda t: (-t[1], t[0]))[:10]  # doctest: +NORMALIZE_WHITESPACE
-    [(('I', 'do', 'like'), 2), (('I', 'do', 'not'), 2), (('I', 'not', 'like'), 2),
-     (('do', 'not', 'like'), 2), ((',', 'I', 'do'), 1), ((',', 'I', 'not'), 1),
-     ((',', 'do', 'not'), 1), (('I', 'am', '!'), 1), (('Sam', 'I', '!'), 1),
-     (('Sam', 'I', 'am'), 1)]
-
-
-Filtering candidates
-~~~~~~~~~~~~~~~~~~~~
-
-All the ngrams in a text are often too many to be useful when finding
-collocations.  It is generally useful to remove some words or punctuation,
-and to require a minimum frequency for candidate collocations.
-
-Given our sample text above, if we remove all trigrams containing personal
-pronouns from candidature, score_ngrams should return 6 less results, and
-'do not like' will be the only candidate which occurs more than once:
-
-    >>> finder = TrigramCollocationFinder.from_words(tokens)
-    >>> len(finder.score_ngrams(trigram_measures.raw_freq))
-    14
-    >>> finder.apply_word_filter(lambda w: w in ('I', 'me'))
-    >>> len(finder.score_ngrams(trigram_measures.raw_freq))
-    8
-    >>> sorted(finder.above_score(trigram_measures.raw_freq,
-    ...                           1.0 / len(tuple(nltk.trigrams(tokens)))))
-    [('do', 'not', 'like')]
-
-Sometimes a filter is a function on the whole ngram, rather than each word,
-such as if we may permit 'and' to appear in the middle of a trigram, but
-not on either edge:
-
-    >>> finder.apply_ngram_filter(lambda w1, w2, w3: 'and' in (w1, w3))
-    >>> len(finder.score_ngrams(trigram_measures.raw_freq))
-    6
-
-Finally, it is often important to remove low frequency candidates, as we
-lack sufficient evidence about their significance as collocations:
-
-    >>> finder.apply_freq_filter(2)
-    >>> len(finder.score_ngrams(trigram_measures.raw_freq))
-    1
-
-Association measures
-~~~~~~~~~~~~~~~~~~~~
-
-A number of measures are available to score collocations or other associations.
-The arguments to measure functions are marginals of a contingency table, in the
-bigram case (n_ii, (n_ix, n_xi), n_xx)::
-
-            w1    ~w1
-         ------ ------
-     w2 | n_ii | n_oi | = n_xi
-         ------ ------
-    ~w2 | n_io | n_oo |
-         ------ ------
-         = n_ix        TOTAL = n_xx
-
-We test their calculation using some known values presented in Manning and
-Schutze's text and other papers.
-
-Student's t: examples from Manning and Schutze 5.3.2
-
-   >>> print('%0.4f' % bigram_measures.student_t(8, (15828, 4675), 14307668))
-   0.9999
-   >>> print('%0.4f' % bigram_measures.student_t(20, (42, 20), 14307668))
-   4.4721
-
-Chi-square: examples from Manning and Schutze 5.3.3
-
-   >>> print('%0.2f' % bigram_measures.chi_sq(8, (15828, 4675), 14307668))
-   1.55
-   >>> print('%0.0f' % bigram_measures.chi_sq(59, (67, 65), 571007))
-   456400
-
-Likelihood ratios: examples from Dunning, CL, 1993
-
-   >>> print('%0.2f' % bigram_measures.likelihood_ratio(110, (2552, 221), 31777))
-   270.72
-   >>> print('%0.2f' % bigram_measures.likelihood_ratio(8, (13, 32), 31777))
-   95.29
-
-Pointwise Mutual Information: examples from Manning and Schutze 5.4
-
-   >>> print('%0.2f' % bigram_measures.pmi(20, (42, 20), 14307668))
-   18.38
-   >>> print('%0.2f' % bigram_measures.pmi(20, (15019, 15629), 14307668))
-   0.29
-
-TODO: Find authoritative results for trigrams.
-
-Using contingency table values
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-While frequency counts make marginals readily available for collocation
-finding, it is common to find published contingency table values. The
-collocations package therefore provides a wrapper, ContingencyMeasures, which
-wraps an association measures class, providing association measures which
-take contingency values as arguments, (n_ii, n_io, n_oi, n_oo) in the
-bigram case.
-
-   >>> from nltk.metrics import ContingencyMeasures
-   >>> cont_bigram_measures = ContingencyMeasures(bigram_measures)
-   >>> print('%0.2f' % cont_bigram_measures.likelihood_ratio(8, 5, 24, 31740))
-   95.29
-   >>> print('%0.2f' % cont_bigram_measures.chi_sq(8, 15820, 4667, 14287173))
-   1.55
-
-Ranking and correlation
-~~~~~~~~~~~~~~~~~~~~~~~
-
-It is useful to consider the results of finding collocations as a ranking, and
-the rankings output using different association measures can be compared using
-the Spearman correlation coefficient.
-
-Ranks can be assigned to a sorted list of results trivially by assigning
-strictly increasing ranks to each result:
-
-    >>> from nltk.metrics.spearman import *
-    >>> results_list = ['item1', 'item2', 'item3', 'item4', 'item5']
-    >>> print(list(ranks_from_sequence(results_list)))
-    [('item1', 0), ('item2', 1), ('item3', 2), ('item4', 3), ('item5', 4)]
-
-If scores are available for each result, we may allow sufficiently similar
-results (differing by no more than rank_gap) to be assigned the same rank:
-
-    >>> results_scored = [('item1', 50.0), ('item2', 40.0), ('item3', 38.0),
-    ...                   ('item4', 35.0), ('item5', 14.0)]
-    >>> print(list(ranks_from_scores(results_scored, rank_gap=5)))
-    [('item1', 0), ('item2', 1), ('item3', 1), ('item4', 1), ('item5', 4)]
-
-The Spearman correlation coefficient gives a number from -1.0 to 1.0 comparing
-two rankings.  A coefficient of 1.0 indicates identical rankings; -1.0 indicates
-exact opposite rankings.
-
-    >>> print('%0.1f' % spearman_correlation(
-    ...         ranks_from_sequence(results_list),
-    ...         ranks_from_sequence(results_list)))
-    1.0
-    >>> print('%0.1f' % spearman_correlation(
-    ...         ranks_from_sequence(reversed(results_list)),
-    ...         ranks_from_sequence(results_list)))
-    -1.0
-    >>> results_list2 = ['item2', 'item3', 'item1', 'item5', 'item4']
-    >>> print('%0.1f' % spearman_correlation(
-    ...        ranks_from_sequence(results_list),
-    ...        ranks_from_sequence(results_list2)))
-    0.6
-    >>> print('%0.1f' % spearman_correlation(
-    ...        ranks_from_sequence(reversed(results_list)),
-    ...        ranks_from_sequence(results_list2)))
-    -0.6
-
-
diff --git a/nlp_resource_data/nltk/test/compat.doctest b/nlp_resource_data/nltk/test/compat.doctest

deleted file mode 100644 (file)

index 1d668f3..0000000
--- a/nlp_resource_data/nltk/test/compat.doctest
+++ /dev/null
@@ -1,134 +0,0 @@
-
-=========================================
-NLTK Python 2.x - 3.x Compatibility Layer
-=========================================
-
-NLTK comes with a Python 2.x/3.x compatibility layer, nltk.compat
-(which is loosely based on `six <http://packages.python.org/six/>`_)::
-
-    >>> from nltk import compat
-    >>> compat.PY3
-    False
-    >>> # and so on
-
-@python_2_unicode_compatible
-----------------------------
-
-Under Python 2.x ``__str__`` and ``__repr__`` methods must
-return bytestrings.
-
-``@python_2_unicode_compatible`` decorator allows writing these methods
-in a way compatible with Python 3.x:
-
-1) wrap a class with this decorator,
-2) define ``__str__`` and ``__repr__`` methods returning unicode text
-   (that's what they must return under Python 3.x),
-
-and they would be fixed under Python 2.x to return byte strings::
-
-    >>> from nltk.compat import python_2_unicode_compatible
-
-    >>> @python_2_unicode_compatible
-    ... class Foo(object):
-    ...     def __str__(self):
-    ...         return u'__str__ is called'
-    ...     def __repr__(self):
-    ...         return u'__repr__ is called'
-
-    >>> foo = Foo()
-    >>> foo.__str__().__class__
-    <type 'str'>
-    >>> foo.__repr__().__class__
-    <type 'str'>
-    >>> print(foo)
-    __str__ is called
-    >>> foo
-    __repr__ is called
-
-Original versions of ``__str__`` and ``__repr__`` are available as
-``__unicode__`` and ``unicode_repr``::
-
-    >>> foo.__unicode__().__class__
-    <type 'unicode'>
-    >>> foo.unicode_repr().__class__
-    <type 'unicode'>
-    >>> unicode(foo)
-    u'__str__ is called'
-    >>> foo.unicode_repr()
-    u'__repr__ is called'
-
-There is no need to wrap a subclass with ``@python_2_unicode_compatible``
-if it doesn't override ``__str__`` and ``__repr__``::
-
-    >>> class Bar(Foo):
-    ...     pass
-    >>> bar = Bar()
-    >>> bar.__str__().__class__
-    <type 'str'>
-
-However, if a subclass overrides ``__str__`` or ``__repr__``,
-wrap it again::
-
-    >>> class BadBaz(Foo):
-    ...     def __str__(self):
-    ...         return u'Baz.__str__'
-    >>> baz = BadBaz()
-    >>> baz.__str__().__class__  # this is incorrect!
-    <type 'unicode'>
-
-    >>> @python_2_unicode_compatible
-    ... class GoodBaz(Foo):
-    ...     def __str__(self):
-    ...         return u'Baz.__str__'
-    >>> baz = GoodBaz()
-    >>> baz.__str__().__class__
-    <type 'str'>
-    >>> baz.__unicode__().__class__
-    <type 'unicode'>
-
-Applying ``@python_2_unicode_compatible`` to a subclass
-shouldn't break methods that was not overridden::
-
-    >>> baz.__repr__().__class__
-    <type 'str'>
-    >>> baz.unicode_repr().__class__
-    <type 'unicode'>
-
-unicode_repr
-------------
-
-Under Python 3.x ``repr(unicode_string)`` doesn't have a leading "u" letter.
-
-``nltk.compat.unicode_repr`` function may be used instead of ``repr`` and
-``"%r" % obj`` to make the output more consistent under Python 2.x and 3.x::
-
-    >>> from nltk.compat import unicode_repr
-    >>> print(repr(u"test"))
-    u'test'
-    >>> print(unicode_repr(u"test"))
-    'test'
-
-It may be also used to get an original unescaped repr (as unicode)
-of objects which class was fixed by ``@python_2_unicode_compatible``
-decorator::
-
-    >>> @python_2_unicode_compatible
-    ... class Foo(object):
-    ...     def __repr__(self):
-    ...         return u'<Foo: foo>'
-
-    >>> foo = Foo()
-    >>> repr(foo)
-    '<Foo: foo>'
-    >>> unicode_repr(foo)
-    u'<Foo: foo>'
-
-For other objects it returns the same value as ``repr``::
-
-    >>> unicode_repr(5)
-    '5'
-
-It may be a good idea to use ``unicode_repr`` instead of ``%r``
-string formatting specifier inside ``__repr__`` or ``__str__``
-methods of classes fixed by ``@python_2_unicode_compatible``
-to make the output consistent between Python 2.x and 3.x.
diff --git a/nlp_resource_data/nltk/test/compat_fixt.py b/nlp_resource_data/nltk/test/compat_fixt.py

deleted file mode 100644 (file)

index 5878d9b..0000000
--- a/nlp_resource_data/nltk/test/compat_fixt.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import
-from nltk.compat import PY3
-
-
-def setup_module(module):
-    from nose import SkipTest
-
-    if PY3:
-        raise SkipTest("compat.doctest is for Python 2.x")
diff --git a/nlp_resource_data/nltk/test/concordance.doctest b/nlp_resource_data/nltk/test/concordance.doctest

deleted file mode 100644 (file)

index 8f11fc8..0000000
--- a/nlp_resource_data/nltk/test/concordance.doctest
+++ /dev/null
@@ -1,68 +0,0 @@
-.. Copyright (C) 2001-2016 NLTK Project
-.. For license information, see LICENSE.TXT
-
-==================================
-Concordance Example
-==================================
-
-A concordance view shows us every occurrence of a given
-word, together with some context. Here we look up the word monstrous
-in Moby Dick by entering text1 followed by a period, then the term
-concordance, and then placing "monstrous" in parentheses:
-
->>> from nltk.corpus import gutenberg
->>> from nltk.text import Text
->>> corpus = gutenberg.words('melville-moby_dick.txt')
->>> text = Text(corpus)
-
->>> text.concordance("monstrous") # doctest:+NORMALIZE_WHITESPACE
-Displaying 11 of 11 matches:
-ong the former , one was of a most monstrous size . ... This came towards us ,
-ON OF THE PSALMS . " Touching that monstrous bulk of the whale or ork we have r
-ll over with a heathenish array of monstrous clubs and spears . Some were thick
-d as you gazed , and wondered what monstrous cannibal and savage could ever hav
-that has survived the flood ; most monstrous and most mountainous ! That Himmal
-they might scout at Moby Dick as a monstrous fable , or still worse and more de
-th of Radney .'" CHAPTER 55 Of the Monstrous Pictures of Whales . I shall ere l
-ing Scenes . In connexion with the monstrous pictures of whales , I am strongly
-ere to enter upon those still more monstrous stories of them which are to be fo
-ght have been rummaged out of this monstrous cabinet there is no telling . But
-of Whale - Bones ; for Whales of a monstrous size are oftentimes cast up dead u
-
->>> text.concordance("monstrous") # doctest:+ELLIPSIS, +NORMALIZE_WHITESPACE
-Displaying 11 of 11 matches:
-ong the former , one was of a most monstrous size . ... This came towards us ,
-ON OF THE PSALMS . " Touching that monstrous bulk of the whale or ork we have r
-ll over with a heathenish array of monstrous clubs and spears . Some were thick
-...
-
-=================================
-Concordance List
-=================================
-
-Often we need to store the results of concordance for further usage.
-To do so, call the concordance function with the stdout argument set
-to false:
-
->>> from nltk.corpus import gutenberg
->>> from nltk.text import Text
->>> corpus = gutenberg.words('melville-moby_dick.txt')
->>> text = Text(corpus)
->>> con_list = text.concordance_list("monstrous")
->>> con_list[2].line
-'ll over with a heathenish array of monstrous clubs and spears . Some were thick'
->>> len(con_list)
-11
-
-=================================
-Patching Issue #2088
-=================================
-
-Patching https://github.com/nltk/nltk/issues/2088
-The left slice of the left context should be clip to 0 if the `i-context` < 0.
-
->>> from nltk import Text, word_tokenize
->>> jane_eyre = 'Chapter 1\nTHERE was no possibility of taking a walk that day. We had been wandering, indeed, in the leafless shrubbery an hour in the morning; but since dinner (Mrs. Reed, when there was no company, dined early) the cold winter wind had brought with it clouds so sombre, and a rain so penetrating, that further outdoor exercise was now out of the question.'
->>> text = Text(word_tokenize(jane_eyre))
->>> text.concordance_list('taking')[0].left
-['Chapter', '1', 'THERE', 'was', 'no', 'possibility', 'of']
diff --git a/nlp_resource_data/nltk/test/corpus.doctest b/nlp_resource_data/nltk/test/corpus.doctest

deleted file mode 100644 (file)

index 3fa0ae6..0000000
--- a/nlp_resource_data/nltk/test/corpus.doctest
+++ /dev/null
@@ -1,2200 +0,0 @@
-.. Copyright (C) 2001-2019 NLTK Project
-.. For license information, see LICENSE.TXT
-
-================
- Corpus Readers
-================
-
-The `nltk.corpus` package defines a collection of *corpus reader*
-classes, which can be used to access the contents of a diverse set of
-corpora.  The list of available corpora is given at:
-
-http://www.nltk.org/nltk_data/
-
-Each corpus reader class is specialized to handle a specific
-corpus format.  In addition, the `nltk.corpus` package automatically
-creates a set of corpus reader instances that can be used to access
-the corpora in the NLTK data package.
-Section `Corpus Reader Objects`_ ("Corpus Reader Objects") describes
-the corpus reader instances that can be used to read the corpora in
-the NLTK data package.  Section `Corpus Reader Classes`_ ("Corpus
-Reader Classes") describes the corpus reader classes themselves, and
-discusses the issues involved in creating new corpus reader objects
-and new corpus reader classes.  Section `Regression Tests`_
-("Regression Tests") contains regression tests for the corpus readers
-and associated functions and classes.
-
-.. contents:: **Table of Contents**
-  :depth: 2
-  :backlinks: none
-
----------------------
-Corpus Reader Objects
----------------------
-
-Overview
-========
-
-NLTK includes a diverse set of corpora which can be
-read using the ``nltk.corpus`` package.  Each corpus is accessed by
-means of a "corpus reader" object from ``nltk.corpus``:
-
-    >>> import nltk.corpus
-    >>> # The Brown corpus:
-    >>> print(str(nltk.corpus.brown).replace('\\\\','/'))
-    <CategorizedTaggedCorpusReader in '.../corpora/brown'...>
-    >>> # The Penn Treebank Corpus:
-    >>> print(str(nltk.corpus.treebank).replace('\\\\','/'))
-    <BracketParseCorpusReader in '.../corpora/treebank/combined'...>
-    >>> # The Name Genders Corpus:
-    >>> print(str(nltk.corpus.names).replace('\\\\','/'))
-    <WordListCorpusReader in '.../corpora/names'...>
-    >>> # The Inaugural Address Corpus:
-    >>> print(str(nltk.corpus.inaugural).replace('\\\\','/'))
-    <PlaintextCorpusReader in '.../corpora/inaugural'...>
-
-Most corpora consist of a set of files, each containing a document (or
-other pieces of text).  A list of identifiers for these files is
-accessed via the ``fileids()`` method of the corpus reader:
-
-    >>> nltk.corpus.treebank.fileids() # doctest: +ELLIPSIS
-    ['wsj_0001.mrg', 'wsj_0002.mrg', 'wsj_0003.mrg', 'wsj_0004.mrg', ...]
-    >>> nltk.corpus.inaugural.fileids() # doctest: +ELLIPSIS
-    ['1789-Washington.txt', '1793-Washington.txt', '1797-Adams.txt', ...]
-
-Each corpus reader provides a variety of methods to read data from the
-corpus, depending on the format of the corpus.  For example, plaintext
-corpora support methods to read the corpus as raw text, a list of
-words, a list of sentences, or a list of paragraphs.
-
-    >>> from nltk.corpus import inaugural
-    >>> inaugural.raw('1789-Washington.txt') # doctest: +ELLIPSIS
-    'Fellow-Citizens of the Senate ...'
-    >>> inaugural.words('1789-Washington.txt')
-    ['Fellow', '-', 'Citizens', 'of', 'the', ...]
-    >>> inaugural.sents('1789-Washington.txt') # doctest: +ELLIPSIS
-    [['Fellow', '-', 'Citizens'...], ['Among', 'the', 'vicissitudes'...]...]
-    >>> inaugural.paras('1789-Washington.txt') # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
-    [[['Fellow', '-', 'Citizens'...]],
-     [['Among', 'the', 'vicissitudes'...],
-      ['On', 'the', 'one', 'hand', ',', 'I'...]...]...]
-
-Each of these reader methods may be given a single document's item
-name or a list of document item names.  When given a list of document
-item names, the reader methods will concatenate together the contents
-of the individual documents.
-
-    >>> l1 = len(inaugural.words('1789-Washington.txt'))
-    >>> l2 = len(inaugural.words('1793-Washington.txt'))
-    >>> l3 = len(inaugural.words(['1789-Washington.txt', '1793-Washington.txt']))
-    >>> print('%s+%s == %s' % (l1, l2, l3))
-    1538+147 == 1685
-
-If the reader methods are called without any arguments, they will
-typically load all documents in the corpus.
-
-    >>> len(inaugural.words())
-    145735
-
-If a corpus contains a README file, it can be accessed with a ``readme()`` method:
-
-    >>> inaugural.readme()[:32]
-    'C-Span Inaugural Address Corpus\n'
-
-Plaintext Corpora
-=================
-
-Here are the first few words from each of NLTK's plaintext corpora:
-
-    >>> nltk.corpus.abc.words()
-    ['PM', 'denies', 'knowledge', 'of', 'AWB', ...]
-    >>> nltk.corpus.genesis.words()
-    [u'In', u'the', u'beginning', u'God', u'created', ...]
-    >>> nltk.corpus.gutenberg.words(fileids='austen-emma.txt')
-    ['[', 'Emma', 'by', 'Jane', 'Austen', '1816', ...]
-    >>> nltk.corpus.inaugural.words()
-    ['Fellow', '-', 'Citizens', 'of', 'the', ...]
-    >>> nltk.corpus.state_union.words()
-    ['PRESIDENT', 'HARRY', 'S', '.', 'TRUMAN', "'", ...]
-    >>> nltk.corpus.webtext.words()
-    ['Cookie', 'Manager', ':', '"', 'Don', "'", 't', ...]
-
-Tagged Corpora
-==============
-
-In addition to the plaintext corpora, NLTK's data package also
-contains a wide variety of annotated corpora.  For example, the Brown
-Corpus is annotated with part-of-speech tags, and defines additional
-methods ``tagged_*()`` which words as `(word,tag)` tuples, rather
-than just bare word strings.
-
-    >>> from nltk.corpus import brown
-    >>> print(brown.words())
-    ['The', 'Fulton', 'County', 'Grand', 'Jury', ...]
-    >>> print(brown.tagged_words())
-    [('The', 'AT'), ('Fulton', 'NP-TL'), ...]
-    >>> print(brown.sents()) # doctest: +ELLIPSIS
-    [['The', 'Fulton', 'County'...], ['The', 'jury', 'further'...], ...]
-    >>> print(brown.tagged_sents()) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
-    [[('The', 'AT'), ('Fulton', 'NP-TL')...],
-     [('The', 'AT'), ('jury', 'NN'), ('further', 'RBR')...]...]
-    >>> print(brown.paras(categories='reviews')) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
-    [[['It', 'is', 'not', 'news', 'that', 'Nathan', 'Milstein'...],
-      ['Certainly', 'not', 'in', 'Orchestra', 'Hall', 'where'...]],
-     [['There', 'was', 'about', 'that', 'song', 'something', ...],
-      ['Not', 'the', 'noblest', 'performance', 'we', 'have', ...], ...], ...]
-    >>> print(brown.tagged_paras(categories='reviews')) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
-    [[[('It', 'PPS'), ('is', 'BEZ'), ('not', '*'), ...],
-      [('Certainly', 'RB'), ('not', '*'), ('in', 'IN'), ...]],
-     [[('There', 'EX'), ('was', 'BEDZ'), ('about', 'IN'), ...],
-      [('Not', '*'), ('the', 'AT'), ('noblest', 'JJT'), ...], ...], ...]
-
-Similarly, the Indian Language POS-Tagged Corpus includes samples of
-Indian text annotated with part-of-speech tags:
-
-    >>> from nltk.corpus import indian
-    >>> print(indian.words()) # doctest: +SKIP
-    ['\xe0\xa6\xae\xe0\xa6\xb9\xe0\xa6\xbf\...',
-     '\xe0\xa6\xb8\xe0\xa6\xa8\xe0\xa7\x8d\xe0...', ...]
-    >>> print(indian.tagged_words()) # doctest: +SKIP
-    [('\xe0\xa6\xae\xe0\xa6\xb9\xe0\xa6\xbf...', 'NN'),
-     ('\xe0\xa6\xb8\xe0\xa6\xa8\xe0\xa7\x8d\xe0...', 'NN'), ...]
-
-Several tagged corpora support access to a simplified, universal tagset, e.g. where all nouns
-tags are collapsed to a single category ``NOUN``:
-
-    >>> print(brown.tagged_sents(tagset='universal')) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
-    [[('The', 'DET'), ('Fulton', 'NOUN'), ('County', 'NOUN'), ('Grand', 'ADJ'), ('Jury', 'NOUN'), ...],
-     [('The', 'DET'), ('jury', 'NOUN'), ('further', 'ADV'), ('said', 'VERB'), ('in', 'ADP'), ...]...]
-    >>> from nltk.corpus import conll2000, switchboard
-    >>> print(conll2000.tagged_words(tagset='universal')) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
-    [('Confidence', 'NOUN'), ('in', 'ADP'), ...]
-
-Use ``nltk.app.pos_concordance()`` to access a GUI for searching tagged corpora.
-
-Chunked Corpora
-===============
-
-The CoNLL corpora also provide chunk structures, which are encoded as
-flat trees.  The CoNLL 2000 Corpus includes phrasal chunks; and the
-CoNLL 2002 Corpus includes named entity chunks.
-
-    >>> from nltk.corpus import conll2000, conll2002
-    >>> print(conll2000.sents()) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
-    [['Confidence', 'in', 'the', 'pound', 'is', 'widely', ...],
-     ['Chancellor', 'of', 'the', 'Exchequer', ...], ...]
-    >>> for tree in conll2000.chunked_sents()[:2]:
-    ...     print(tree) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
-    (S
-      (NP Confidence/NN)
-      (PP in/IN)
-      (NP the/DT pound/NN)
-      (VP is/VBZ widely/RB expected/VBN to/TO take/VB)
-      (NP another/DT sharp/JJ dive/NN)
-      if/IN
-      ...)
-    (S
-      Chancellor/NNP
-      (PP of/IN)
-      (NP the/DT Exchequer/NNP)
-      ...)
-    >>> print(conll2002.sents()) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
-    [[u'Sao', u'Paulo', u'(', u'Brasil', u')', u',', ...], [u'-'], ...]
-    >>> for tree in conll2002.chunked_sents()[:2]:
-    ...     print(tree) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
-    (S
-      (LOC Sao/NC Paulo/VMI)
-      (/Fpa
-      (LOC Brasil/NC)
-      )/Fpt
-      ...)
-    (S -/Fg)
-
-.. note:: Since the CONLL corpora do not contain paragraph break
-   information, these readers do not support the ``para()`` method.)
-
-.. warning:: if you call the conll corpora reader methods without any
-   arguments, they will return the contents of the entire corpus,
-   *including* the 'test' portions of the corpus.)
-
-SemCor is a subset of the Brown corpus tagged with WordNet senses and
-named entities. Both kinds of lexical items include multiword units,
-which are encoded as chunks (senses and part-of-speech tags pertain
-to the entire chunk).
-
-    >>> from nltk.corpus import semcor
-    >>> semcor.words()
-    ['The', 'Fulton', 'County', 'Grand', 'Jury', ...]
-    >>> semcor.chunks()
-    [['The'], ['Fulton', 'County', 'Grand', 'Jury'], ...]
-    >>> semcor.sents() # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
-    [['The', 'Fulton', 'County', 'Grand', 'Jury', 'said', ...],
-    ['The', 'jury', 'further', 'said', ...], ...]
-    >>> semcor.chunk_sents() # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
-    [[['The'], ['Fulton', 'County', 'Grand', 'Jury'], ['said'], ...
-    ['.']], [['The'], ['jury'], ['further'], ['said'], ... ['.']], ...]
-    >>> list(map(str, semcor.tagged_chunks(tag='both')[:3]))
-    ['(DT The)', "(Lemma('group.n.01.group') (NE (NNP Fulton County Grand Jury)))", "(Lemma('state.v.01.say') (VB said))"]
-    >>> [[str(c) for c in s] for s in semcor.tagged_sents(tag='both')[:2]]
-    [['(DT The)', "(Lemma('group.n.01.group') (NE (NNP Fulton County Grand Jury)))", ...
-     '(None .)'], ['(DT The)', ... '(None .)']]
-
-
-The IEER corpus is another chunked corpus.  This corpus is unusual in
-that each corpus item contains multiple documents.  (This reflects the
-fact that each corpus file contains multiple documents.)  The IEER
-corpus defines the `parsed_docs` method, which returns the documents
-in a given item as `IEERDocument` objects:
-
-    >>> from nltk.corpus import ieer
-    >>> ieer.fileids() # doctest: +NORMALIZE_WHITESPACE
-    ['APW_19980314', 'APW_19980424', 'APW_19980429',
-     'NYT_19980315', 'NYT_19980403', 'NYT_19980407']
-    >>> docs = ieer.parsed_docs('APW_19980314')
-    >>> print(docs[0])
-    <IEERDocument APW19980314.0391: 'Kenyans protest tax hikes'>
-    >>> print(docs[0].docno)
-    APW19980314.0391
-    >>> print(docs[0].doctype)
-    NEWS STORY
-    >>> print(docs[0].date_time)
-    03/14/1998 10:36:00
-    >>> print(docs[0].headline)
-    (DOCUMENT Kenyans protest tax hikes)
-    >>> print(docs[0].text) # doctest: +ELLIPSIS
-    (DOCUMENT
-      (LOCATION NAIROBI)
-      ,
-      (LOCATION Kenya)
-      (
-      (ORGANIZATION AP)
-      )
-      _
-      (CARDINAL Thousands)
-      of
-      laborers,
-      ...
-      on
-      (DATE Saturday)
-      ...)
-
-Parsed Corpora
-==============
-
-The Treebank corpora provide a syntactic parse for each sentence.  The
-NLTK data package includes a 10% sample of the Penn Treebank (in
-``treebank``), as well as the Sinica Treebank (in ``sinica_treebank``).
-
-Reading the Penn Treebank (Wall Street Journal sample):
-
-    >>> from nltk.corpus import treebank
-    >>> print(treebank.fileids()) # doctest: +ELLIPSIS
-    ['wsj_0001.mrg', 'wsj_0002.mrg', 'wsj_0003.mrg', 'wsj_0004.mrg', ...]
-    >>> print(treebank.words('wsj_0003.mrg'))
-    ['A', 'form', 'of', 'asbestos', 'once', 'used', ...]
-    >>> print(treebank.tagged_words('wsj_0003.mrg'))
-    [('A', 'DT'), ('form', 'NN'), ('of', 'IN'), ...]
-    >>> print(treebank.parsed_sents('wsj_0003.mrg')[0]) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
-    (S
-      (S-TPC-1
-        (NP-SBJ
-          (NP (NP (DT A) (NN form)) (PP (IN of) (NP (NN asbestos))))
-          (RRC ...)...)...)
-      ...
-      (VP (VBD reported) (SBAR (-NONE- 0) (S (-NONE- *T*-1))))
-      (. .))
-
-If you have access to a full installation of the Penn Treebank, NLTK
-can be configured to load it as well. Download the ``ptb`` package,
-and in the directory ``nltk_data/corpora/ptb`` place the ``BROWN``
-and ``WSJ`` directories of the Treebank installation (symlinks work
-as well). Then use the ``ptb`` module instead of ``treebank``:
-
-   >>> from nltk.corpus import ptb
-   >>> print(ptb.fileids()) # doctest: +SKIP
-   ['BROWN/CF/CF01.MRG', 'BROWN/CF/CF02.MRG', 'BROWN/CF/CF03.MRG', 'BROWN/CF/CF04.MRG', ...]
-   >>> print(ptb.words('WSJ/00/WSJ_0003.MRG')) # doctest: +SKIP
-   ['A', 'form', 'of', 'asbestos', 'once', 'used', '*', ...]
-   >>> print(ptb.tagged_words('WSJ/00/WSJ_0003.MRG')) # doctest: +SKIP
-   [('A', 'DT'), ('form', 'NN'), ('of', 'IN'), ...]
-
-...and so forth, like ``treebank`` but with extended fileids. Categories
-specified in ``allcats.txt`` can be used to filter by genre; they consist
-of ``news`` (for WSJ articles) and names of the Brown subcategories
-(``fiction``, ``humor``, ``romance``, etc.):
-
-   >>> ptb.categories() # doctest: +SKIP
-   ['adventure', 'belles_lettres', 'fiction', 'humor', 'lore', 'mystery', 'news', 'romance', 'science_fiction']
-   >>> print(ptb.fileids('news')) # doctest: +SKIP
-   ['WSJ/00/WSJ_0001.MRG', 'WSJ/00/WSJ_0002.MRG', 'WSJ/00/WSJ_0003.MRG', ...]
-   >>> print(ptb.words(categories=['humor','fiction'])) # doctest: +SKIP
-   ['Thirty-three', 'Scotty', 'did', 'not', 'go', 'back', ...]
-
-As PropBank and NomBank depend on the (WSJ portion of the) Penn Treebank,
-the modules ``propbank_ptb`` and ``nombank_ptb`` are provided for access
-to a full PTB installation.
-
-Reading the Sinica Treebank:
-
-    >>> from nltk.corpus import sinica_treebank
-    >>> print(sinica_treebank.sents()) # doctest: +SKIP
-    [['\xe4\xb8\x80'], ['\xe5\x8f\x8b\xe6\x83\x85'], ...]
-    >>> sinica_treebank.parsed_sents()[25] # doctest: +SKIP
-    Tree('S',
-        [Tree('NP',
-            [Tree('Nba', ['\xe5\x98\x89\xe7\x8f\x8d'])]),
-         Tree('V\xe2\x80\xa7\xe5\x9c\xb0',
-            [Tree('VA11', ['\xe4\xb8\x8d\xe5\x81\x9c']),
-             Tree('DE', ['\xe7\x9a\x84'])]),
-         Tree('VA4', ['\xe5\x93\xad\xe6\xb3\xa3'])])
-
-Reading the CoNLL 2007 Dependency Treebanks:
-
-    >>> from nltk.corpus import conll2007
-    >>> conll2007.sents('esp.train')[0] # doctest: +SKIP
-    ['El', 'aumento', 'del', 'índice', 'de', 'desempleo', ...]
-    >>> conll2007.parsed_sents('esp.train')[0] # doctest: +SKIP
-    <DependencyGraph with 38 nodes>
-    >>> print(conll2007.parsed_sents('esp.train')[0].tree()) # doctest: +SKIP
-    (fortaleció
-      (aumento El (del (índice (de (desempleo estadounidense)))))
-      hoy
-      considerablemente
-      (al
-        (euro
-          (cotizaba
-            ,
-            que
-            (a (15.35 las GMT))
-            se
-            (en (mercado el (de divisas) (de Fráncfort)))
-            (a 0,9452_dólares)
-            (frente_a , (0,9349_dólares los (de (mañana esta)))))))
-      .)
-
-Word Lists and Lexicons
-=======================
-
-The NLTK data package also includes a number of lexicons and word
-lists.  These are accessed just like text corpora.  The following
-examples illustrate the use of the wordlist corpora:
-
-    >>> from nltk.corpus import names, stopwords, words
-    >>> words.fileids()
-    ['en', 'en-basic']
-    >>> words.words('en') # doctest: +ELLIPSIS
-    ['A', 'a', 'aa', 'aal', 'aalii', 'aam', 'Aani', 'aardvark', 'aardwolf', ...]
-
-    >>> stopwords.fileids() # doctest: +ELLIPSIS
-    ['arabic', 'azerbaijani', 'danish', 'dutch', 'english', 'finnish', 'french', ...]
-    >>> stopwords.words('portuguese') # doctest: +ELLIPSIS
-    ['de', 'a', 'o', 'que', 'e', 'do', 'da', 'em', 'um', 'para', ...]
-    >>> names.fileids()
-    ['female.txt', 'male.txt']
-    >>> names.words('male.txt') # doctest: +ELLIPSIS
-    ['Aamir', 'Aaron', 'Abbey', 'Abbie', 'Abbot', 'Abbott', ...]
-    >>> names.words('female.txt') # doctest: +ELLIPSIS
-    ['Abagael', 'Abagail', 'Abbe', 'Abbey', 'Abbi', 'Abbie', ...]
-
-The CMU Pronunciation Dictionary corpus contains pronounciation
-transcriptions for over 100,000 words.  It can be accessed as a list
-of entries (where each entry consists of a word, an identifier, and a
-transcription) or as a dictionary from words to lists of
-transcriptions.  Transcriptions are encoded as tuples of phoneme
-strings.
-
-    >>> from nltk.corpus import cmudict
-    >>> print(cmudict.entries()[653:659]) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
-    [('acetate', ['AE1', 'S', 'AH0', 'T', 'EY2', 'T']),
-    ('acetic', ['AH0', 'S', 'EH1', 'T', 'IH0', 'K']),
-    ('acetic', ['AH0', 'S', 'IY1', 'T', 'IH0', 'K']),
-    ('aceto', ['AA0', 'S', 'EH1', 'T', 'OW0']),
-    ('acetochlor', ['AA0', 'S', 'EH1', 'T', 'OW0', 'K', 'L', 'AO2', 'R']),
-    ('acetone', ['AE1', 'S', 'AH0', 'T', 'OW2', 'N'])]
-    >>> # Load the entire cmudict corpus into a Python dictionary:
-    >>> transcr = cmudict.dict()
-    >>> print([transcr[w][0] for w in 'Natural Language Tool Kit'.lower().split()]) # doctest: +NORMALIZE_WHITESPACE
-    [['N', 'AE1', 'CH', 'ER0', 'AH0', 'L'],
-     ['L', 'AE1', 'NG', 'G', 'W', 'AH0', 'JH'],
-     ['T', 'UW1', 'L'],
-     ['K', 'IH1', 'T']]
-
-
-WordNet
-=======
-
-Please see the separate WordNet howto.
-
-FrameNet
-========
-
-Please see the separate FrameNet howto.
-
-PropBank
-========
-
-Please see the separate PropBank howto.
-
-SentiWordNet
-============
-
-Please see the separate SentiWordNet howto.
-
-Categorized Corpora
-===================
-
-Several corpora included with NLTK contain documents that have been categorized for
-topic, genre, polarity, etc.  In addition to the standard corpus interface, these
-corpora provide access to the list of categories and the mapping between the documents
-and their categories (in both directions).  Access the categories using the ``categories()``
-method, e.g.:
-
-    >>> from nltk.corpus import brown, movie_reviews, reuters
-    >>> brown.categories() # doctest: +NORMALIZE_WHITESPACE
-    ['adventure', 'belles_lettres', 'editorial', 'fiction', 'government', 'hobbies', 'humor',
-    'learned', 'lore', 'mystery', 'news', 'religion', 'reviews', 'romance', 'science_fiction']
-    >>> movie_reviews.categories()
-    ['neg', 'pos']
-    >>> reuters.categories() # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
-    ['acq', 'alum', 'barley', 'bop', 'carcass', 'castor-oil', 'cocoa',
-    'coconut', 'coconut-oil', 'coffee', 'copper', 'copra-cake', 'corn',
-    'cotton', 'cotton-oil', 'cpi', 'cpu', 'crude', 'dfl', 'dlr', ...]
-
-This method has an optional argument that specifies a document or a list
-of documents, allowing us to map from (one or more) documents to (one or more) categories:
-
-    >>> brown.categories('ca01')
-    ['news']
-    >>> brown.categories(['ca01','cb01'])
-    ['editorial', 'news']
-    >>> reuters.categories('training/9865')
-    ['barley', 'corn', 'grain', 'wheat']
-    >>> reuters.categories(['training/9865', 'training/9880'])
-    ['barley', 'corn', 'grain', 'money-fx', 'wheat']
-
-We can go back the other way using the optional argument of the ``fileids()`` method:
-
-    >>> reuters.fileids('barley') # doctest: +ELLIPSIS
-    ['test/15618', 'test/15649', 'test/15676', 'test/15728', 'test/15871', ...]
-
-Both the ``categories()`` and ``fileids()`` methods return a sorted list containing
-no duplicates.
-
-In addition to mapping between categories and documents, these corpora permit
-direct access to their contents via the categories.  Instead of accessing a subset
-of a corpus by specifying one or more fileids, we can identify one or more categories, e.g.:
-
-    >>> brown.tagged_words(categories='news')
-    [('The', 'AT'), ('Fulton', 'NP-TL'), ...]
-    >>> brown.sents(categories=['editorial','reviews']) # doctest: +NORMALIZE_WHITESPACE
-    [['Assembly', 'session', 'brought', 'much', 'good'], ['The', 'General',
-    'Assembly', ',', 'which', 'adjourns', 'today', ',', 'has', 'performed',
-    'in', 'an', 'atmosphere', 'of', 'crisis', 'and', 'struggle', 'from',
-    'the', 'day', 'it', 'convened', '.'], ...]
-
-Note that it is an error to specify both documents and categories.
-
-In the context of a text categorization system, we can easily test if the
-category assigned to a document is correct as follows:
-
-    >>> def classify(doc): return 'news'   # Trivial classifier
-    >>> doc = 'ca01'
-    >>> classify(doc) in brown.categories(doc)
-    True
-
-
-Other Corpora
-=============
-
-comparative_sentences
----------------------
-A list of sentences from various sources, especially reviews and articles. Each
-line contains one sentence; sentences were separated by using a sentence tokenizer.
-Comparative sentences have been annotated with their type, entities, features and
-keywords.
-
-    >>> from nltk.corpus import comparative_sentences
-    >>> comparison = comparative_sentences.comparisons()[0]
-    >>> comparison.text
-    ['its', 'fast-forward', 'and', 'rewind', 'work', 'much', 'more', 'smoothly',
-    'and', 'consistently', 'than', 'those', 'of', 'other', 'models', 'i', "'ve",
-    'had', '.']
-    >>> comparison.entity_2
-    'models'
-    >>> (comparison.feature, comparison.keyword)
-    ('rewind', 'more')
-    >>> len(comparative_sentences.comparisons())
-    853
-
-opinion_lexicon
----------------
-A list of positive and negative opinion words or sentiment words for English.
-
-    >>> from nltk.corpus import opinion_lexicon
-    >>> opinion_lexicon.words()[:4]
-        ['2-faced', '2-faces', 'abnormal', 'abolish']
-
-The OpinionLexiconCorpusReader also provides shortcuts to retrieve positive/negative
-words:
-
-    >>> opinion_lexicon.negative()[:4]
-    ['2-faced', '2-faces', 'abnormal', 'abolish']
-
-Note that words from `words()` method in opinion_lexicon are sorted by file id,
-not alphabetically:
-
-    >>> opinion_lexicon.words()[0:10]
-    ['2-faced', '2-faces', 'abnormal', 'abolish', 'abominable', 'abominably',
-    'abominate', 'abomination', 'abort', 'aborted']
-    >>> sorted(opinion_lexicon.words())[0:10]
-    ['2-faced', '2-faces', 'a+', 'abnormal', 'abolish', 'abominable', 'abominably',
-    'abominate', 'abomination', 'abort']
-
-ppattach
---------
-The Prepositional Phrase Attachment corpus is a corpus of
-prepositional phrase attachment decisions.  Each instance in the
-corpus is encoded as a ``PPAttachment`` object:
-
-    >>> from nltk.corpus import ppattach
-    >>> ppattach.attachments('training') # doctest: +NORMALIZE_WHITESPACE
-    [PPAttachment(sent='0', verb='join', noun1='board',
-                  prep='as', noun2='director', attachment='V'),
-     PPAttachment(sent='1', verb='is', noun1='chairman',
-                  prep='of', noun2='N.V.', attachment='N'),
-     ...]
-    >>> inst = ppattach.attachments('training')[0]
-    >>> (inst.sent, inst.verb, inst.noun1, inst.prep, inst.noun2)
-    ('0', 'join', 'board', 'as', 'director')
-    >>> inst.attachment
-    'V'
-
-product_reviews_1 and product_reviews_2
----------------------------------------
-These two datasets respectively contain annotated customer reviews of 5 and 9
-products from amazon.com.
-
-    >>> from nltk.corpus import product_reviews_1
-    >>> camera_reviews = product_reviews_1.reviews('Canon_G3.txt')
-    >>> review = camera_reviews[0]
-    >>> review.sents()[0]
-    ['i', 'recently', 'purchased', 'the', 'canon', 'powershot', 'g3', 'and', 'am',
-    'extremely', 'satisfied', 'with', 'the', 'purchase', '.']
-    >>> review.features()
-    [('canon powershot g3', '+3'), ('use', '+2'), ('picture', '+2'),
-    ('picture quality', '+1'), ('picture quality', '+1'), ('camera', '+2'),
-    ('use', '+2'), ('feature', '+1'), ('picture quality', '+3'), ('use', '+1'),
-    ('option', '+1')]
-
-It is also possible to reach the same information directly from the stream:
-
-    >>> product_reviews_1.features('Canon_G3.txt')
-    [('canon powershot g3', '+3'), ('use', '+2'), ...]
-
-We can compute stats for specific product features:
-
-    >>> n_reviews = len([(feat,score) for (feat,score) in product_reviews_1.features('Canon_G3.txt') if feat=='picture'])
-    >>> tot = sum([int(score) for (feat,score) in product_reviews_1.features('Canon_G3.txt') if feat=='picture'])
-    >>> # We use float for backward compatibility with division in Python2.7
-    >>> mean = tot/float(n_reviews)
-    >>> print(n_reviews, tot, mean)
-    15 24 1.6
-
-pros_cons
----------
-A list of pros/cons sentences for determining context (aspect) dependent
-sentiment words, which are then applied to sentiment analysis of comparative
-sentences.
-
-    >>> from nltk.corpus import pros_cons
-    >>> pros_cons.sents(categories='Cons')
-    [['East', 'batteries', '!', 'On', '-', 'off', 'switch', 'too', 'easy',
-    'to', 'maneuver', '.'], ['Eats', '...', 'no', ',', 'GULPS', 'batteries'],
-    ...]
-    >>> pros_cons.words('IntegratedPros.txt')
-    ['Easy', 'to', 'use', ',', 'economical', '!', ...]
-
-semcor
-------
-The Brown Corpus, annotated with WordNet senses.
-
-    >>> from nltk.corpus import semcor
-    >>> semcor.words('brown2/tagfiles/br-n12.xml')  # doctest: +ELLIPSIS
-    ['When', 'several', 'minutes', 'had', 'passed', ...]
-    >>> sent = semcor.xml('brown2/tagfiles/br-n12.xml').findall('context/p/s')[0]
-    >>> for wordform in sent.getchildren():
-    ...     print(wordform.text, end=' ')
-    ...     for key in sorted(wordform.keys()):
-    ...         print(key + '=' + wordform.get(key), end=' ')
-    ...     print()
-    ...
-    When cmd=ignore pos=WRB
-    several cmd=done lemma=several lexsn=5:00:00:some(a):00 pos=JJ wnsn=1
-    minutes cmd=done lemma=minute lexsn=1:28:00:: pos=NN wnsn=1
-    had cmd=done ot=notag pos=VBD
-    passed cmd=done lemma=pass lexsn=2:38:03:: pos=VB wnsn=4
-    and cmd=ignore pos=CC
-    Curt cmd=done lemma=person lexsn=1:03:00:: pn=person pos=NNP rdf=person wnsn=1
-    had cmd=done ot=notag pos=VBD
-    n't cmd=done lemma=n't lexsn=4:02:00:: pos=RB wnsn=0
-    emerged cmd=done lemma=emerge lexsn=2:30:00:: pos=VB wnsn=1
-    from cmd=ignore pos=IN
-    the cmd=ignore pos=DT
-    livery_stable cmd=done lemma=livery_stable lexsn=1:06:00:: pos=NN wnsn=1
-    ,
-    Brenner cmd=done lemma=person lexsn=1:03:00:: pn=person pos=NNP rdf=person wnsn=1
-    re-entered cmd=done lemma=re-enter lexsn=2:38:00:: pos=VB wnsn=1
-    the cmd=ignore pos=DT
-    hotel cmd=done lemma=hotel lexsn=1:06:00:: pos=NN wnsn=1
-    and cmd=ignore pos=CC
-    faced cmd=done lemma=face lexsn=2:42:02:: pos=VB wnsn=4
-    Summers cmd=done lemma=person lexsn=1:03:00:: pn=person pos=NNP rdf=person wnsn=1
-    across cmd=ignore pos=IN
-    the cmd=ignore pos=DT
-    counter cmd=done lemma=counter lexsn=1:06:00:: pos=NN wnsn=1
-    .
-
-senseval
---------
-The Senseval 2 corpus is a word sense disambiguation corpus.  Each
-item in the corpus corresponds to a single ambiguous word.  For each
-of these words, the corpus contains a list of instances, corresponding
-to occurrences of that word.  Each instance provides the word; a list
-of word senses that apply to the word occurrence; and the word's
-context.
-
-    >>> from nltk.corpus import senseval
-    >>> senseval.fileids()
-    ['hard.pos', 'interest.pos', 'line.pos', 'serve.pos']
-    >>> senseval.instances('hard.pos')
-    ... # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
-    [SensevalInstance(word='hard-a',
-        position=20,
-        context=[('``', '``'), ('he', 'PRP'), ...('hard', 'JJ'), ...],
-        senses=('HARD1',)),
-     SensevalInstance(word='hard-a',
-        position=10,
-        context=[('clever', 'NNP'), ...('hard', 'JJ'), ('time', 'NN'), ...],
-        senses=('HARD1',)), ...]
-
-The following code looks at instances of the word 'interest', and
-displays their local context (2 words on each side) and word sense(s):
-
-    >>> for inst in senseval.instances('interest.pos')[:10]:
-    ...     p = inst.position
-    ...     left = ' '.join(w for (w,t) in inst.context[p-2:p])
-    ...     word = ' '.join(w for (w,t) in inst.context[p:p+1])
-    ...     right = ' '.join(w for (w,t) in inst.context[p+1:p+3])
-    ...     senses = ' '.join(inst.senses)
-    ...     print('%20s |%10s | %-15s -> %s' % (left, word, right, senses))
-             declines in |  interest | rates .         -> interest_6
-      indicate declining |  interest | rates because   -> interest_6
-           in short-term |  interest | rates .         -> interest_6
-                     4 % |  interest | in this         -> interest_5
-            company with | interests | in the          -> interest_5
-                  , plus |  interest | .               -> interest_6
-                 set the |  interest | rate on         -> interest_6
-                  's own |  interest | , prompted      -> interest_4
-           principal and |  interest | is the          -> interest_6
-            increase its |  interest | to 70           -> interest_5
-
-sentence_polarity
------------------
-The Sentence Polarity dataset contains 5331 positive and 5331 negative processed
-sentences.
-
-    >>> from nltk.corpus import sentence_polarity
-    >>> sentence_polarity.sents()
-    [['simplistic', ',', 'silly', 'and', 'tedious', '.'], ["it's", 'so', 'laddish',
-    'and', 'juvenile', ',', 'only', 'teenage', 'boys', 'could', 'possibly', 'find',
-    'it', 'funny', '.'], ...]
-    >>> sentence_polarity.categories()
-    ['neg', 'pos']
-    >>> sentence_polarity.sents()[1]
-    ["it's", 'so', 'laddish', 'and', 'juvenile', ',', 'only', 'teenage', 'boys',
-    'could', 'possibly', 'find', 'it', 'funny', '.']
-
-shakespeare
------------
-The Shakespeare corpus contains a set of Shakespeare plays, formatted
-as XML files.  These corpora are returned as ElementTree objects:
-
-    >>> from nltk.corpus import shakespeare
-    >>> from xml.etree import ElementTree
-    >>> shakespeare.fileids() # doctest: +ELLIPSIS
-    ['a_and_c.xml', 'dream.xml', 'hamlet.xml', 'j_caesar.xml', ...]
-    >>> play = shakespeare.xml('dream.xml')
-    >>> print(play) # doctest: +ELLIPSIS
-    <Element 'PLAY' at ...>
-    >>> print('%s: %s' % (play[0].tag, play[0].text))
-    TITLE: A Midsummer Night's Dream
-    >>> personae = [persona.text for persona in
-    ...             play.findall('PERSONAE/PERSONA')]
-    >>> print(personae) # doctest: +ELLIPSIS
-    ['THESEUS, Duke of Athens.', 'EGEUS, father to Hermia.', ...]
-    >>> # Find and print speakers not listed as personae
-    >>> names = [persona.split(',')[0] for persona in personae]
-    >>> speakers = set(speaker.text for speaker in
-    ...                play.findall('*/*/*/SPEAKER'))
-    >>> print(sorted(speakers.difference(names))) # doctest: +NORMALIZE_WHITESPACE
-    ['ALL', 'COBWEB', 'DEMETRIUS', 'Fairy', 'HERNIA', 'LYSANDER',
-     'Lion', 'MOTH', 'MUSTARDSEED', 'Moonshine', 'PEASEBLOSSOM',
-     'Prologue', 'Pyramus', 'Thisbe', 'Wall']
-
-subjectivity
------------
-The Subjectivity Dataset contains 5000 subjective and 5000 objective processed
-sentences.
-
-    >>> from nltk.corpus import subjectivity
-    >>> subjectivity.categories()
-    ['obj', 'subj']
-    >>> subjectivity.sents()[23]
-    ['television', 'made', 'him', 'famous', ',', 'but', 'his', 'biggest', 'hits',
-    'happened', 'off', 'screen', '.']
-    >>> subjectivity.words(categories='subj')
-    ['smart', 'and', 'alert', ',', 'thirteen', ...]
-
-toolbox
--------
-The Toolbox corpus distributed with NLTK contains a sample lexicon and
-several sample texts from the Rotokas language.  The Toolbox corpus
-reader returns Toolbox files as XML ElementTree objects.  The
-following example loads the Rotokas dictionary, and figures out the
-distribution of part-of-speech tags for reduplicated words.
-
-.. doctest: +SKIP
-
-    >>> from nltk.corpus import toolbox
-    >>> from nltk.probability import FreqDist
-    >>> from xml.etree import ElementTree
-    >>> import re
-    >>> rotokas = toolbox.xml('rotokas.dic')
-    >>> redup_pos_freqdist = FreqDist()
-    >>> # Note: we skip over the first record, which is actually
-    >>> # the header.
-    >>> for record in rotokas[1:]:
-    ...     lexeme = record.find('lx').text
-    ...     if re.match(r'(.*)\1$', lexeme):
-    ...         redup_pos_freqdist[record.find('ps').text] += 1
-    >>> for item, count in redup_pos_freqdist.most_common():
-    ...     print(item, count)
-    V 41
-    N 14
-    ??? 4
-
-This example displays some records from a Rotokas text:
-
-.. doctest: +SKIP
-
-    >>> river = toolbox.xml('rotokas/river.txt', key='ref')
-    >>> for record in river.findall('record')[:3]:
-    ...     for piece in record:
-    ...         if len(piece.text) > 60:
-    ...             print('%-6s %s...' % (piece.tag, piece.text[:57]))
-    ...         else:
-    ...             print('%-6s %s' % (piece.tag, piece.text))
-    ref    Paragraph 1
-    t      ``Viapau oisio              ra   ovaupasi                ...
-    m      viapau   oisio              ra   ovau   -pa       -si    ...
-    g      NEG      this way/like this and  forget -PROG     -2/3.DL...
-    p      NEG      ???                CONJ V.I    -SUFF.V.3 -SUFF.V...
-    f      ``No ken lus tingting wanema samting papa i bin tok,'' Na...
-    fe     ``Don't forget what Dad said,'' yelled Naomi.
-    ref    2
-    t      Osa     Ira  ora  Reviti viapau uvupasiva.
-    m      osa     Ira  ora  Reviti viapau uvu        -pa       -si ...
-    g      as/like name and  name   NEG    hear/smell -PROG     -2/3...
-    p      CONJ    N.PN CONJ N.PN   NEG    V.T        -SUFF.V.3 -SUF...
-    f      Tasol Ila na David no bin harim toktok.
-    fe     But Ila and David took no notice.
-    ref    3
-    t      Ikaupaoro                     rokosiva                   ...
-    m      ikau      -pa       -oro      roko    -si       -va      ...
-    g      run/hurry -PROG     -SIM      go down -2/3.DL.M -RP      ...
-    p      V.T       -SUFF.V.3 -SUFF.V.4 ADV     -SUFF.V.4 -SUFF.VT....
-    f      Tupela i bin hariap i go long wara .
-    fe     They raced to the river.
-
-timit
------
-The NLTK data package includes a fragment of the TIMIT
-Acoustic-Phonetic Continuous Speech Corpus.  This corpus is broken
-down into small speech samples, each of which is available as a wave
-file, a phonetic transcription, and a tokenized word list.
-
-    >>> from nltk.corpus import timit
-    >>> print(timit.utteranceids()) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
-    ['dr1-fvmh0/sa1', 'dr1-fvmh0/sa2', 'dr1-fvmh0/si1466',
-    'dr1-fvmh0/si2096', 'dr1-fvmh0/si836', 'dr1-fvmh0/sx116',
-    'dr1-fvmh0/sx206', 'dr1-fvmh0/sx26', 'dr1-fvmh0/sx296', ...]
-
-    >>> item = timit.utteranceids()[5]
-    >>> print(timit.phones(item)) # doctest: +NORMALIZE_WHITESPACE
-    ['h#', 'k', 'l', 'ae', 's', 'pcl', 'p', 'dh', 'ax',
-     's', 'kcl', 'k', 'r', 'ux', 'ix', 'nx', 'y', 'ax',
-     'l', 'eh', 'f', 'tcl', 't', 'hh', 'ae', 'n', 'dcl',
-     'd', 'h#']
-    >>> print(timit.words(item))
-    ['clasp', 'the', 'screw', 'in', 'your', 'left', 'hand']
-    >>> timit.play(item) # doctest: +SKIP
-
-The corpus reader can combine the word segmentation information with
-the phonemes to produce a single tree structure:
-
-    >>> for tree in timit.phone_trees(item):
-    ...     print(tree)
-    (S
-      h#
-      (clasp k l ae s pcl p)
-      (the dh ax)
-      (screw s kcl k r ux)
-      (in ix nx)
-      (your y ax)
-      (left l eh f tcl t)
-      (hand hh ae n dcl d)
-      h#)
-
-The start time and stop time of each phoneme, word, and sentence are
-also available:
-
-    >>> print(timit.phone_times(item)) # doctest: +ELLIPSIS
-    [('h#', 0, 2190), ('k', 2190, 3430), ('l', 3430, 4326), ...]
-    >>> print(timit.word_times(item)) # doctest: +ELLIPSIS
-    [('clasp', 2190, 8804), ('the', 8804, 9734), ...]
-    >>> print(timit.sent_times(item))
-    [('Clasp the screw in your left hand.', 0, 32154)]
-
-We can use these times to play selected pieces of a speech sample:
-
-    >>> timit.play(item, 2190, 8804) # 'clasp'  # doctest: +SKIP
-
-The corpus reader can also be queried for information about the
-speaker and sentence identifier for a given speech sample:
-
-    >>> print(timit.spkrid(item))
-    dr1-fvmh0
-    >>> print(timit.sentid(item))
-    sx116
-    >>> print(timit.spkrinfo(timit.spkrid(item))) # doctest: +NORMALIZE_WHITESPACE
-    SpeakerInfo(id='VMH0',
-                sex='F',
-                dr='1',
-                use='TRN',
-                recdate='03/11/86',
-                birthdate='01/08/60',
-                ht='5\'05"',
-                race='WHT',
-                edu='BS',
-                comments='BEST NEW ENGLAND ACCENT SO FAR')
-
-    >>> # List the speech samples from the same speaker:
-    >>> timit.utteranceids(spkrid=timit.spkrid(item)) # doctest: +ELLIPSIS
-    ['dr1-fvmh0/sa1', 'dr1-fvmh0/sa2', 'dr1-fvmh0/si1466', ...]
-
-twitter_samples
----------------
-
-Twitter is well-known microblog service that allows public data to be
-collected via APIs. NLTK's twitter corpus currently contains a sample of 20k Tweets
-retrieved from the Twitter Streaming API.
-
-    >>> from nltk.corpus import twitter_samples
-    >>> twitter_samples.fileids()
-    ['negative_tweets.json', 'positive_tweets.json', 'tweets.20150430-223406.json']
-
-We follow standard practice in storing full Tweets as line-separated
-JSON. These data structures can be accessed via `tweets.docs()`. However, in general it
-is more practical to focus just on the text field of the Tweets, which
-are accessed via the `strings()` method.
-
-    >>> twitter_samples.strings('tweets.20150430-223406.json')
-    ['RT @KirkKus: Indirect cost of the UK being in the EU is estimated to be costing Britain \xa3170 billion per year! #BetterOffOut #UKIP', ...]
-
-The default tokenizer for Tweets is specialised for 'casual' text, and
-the `tokenized()` method returns a list of lists of tokens.
-
-    >>> twitter_samples.tokenized('tweets.20150430-223406.json')
-    [['RT', '@KirkKus', ':', 'Indirect', 'cost', 'of', 'the', 'UK', 'being', 'in', ...],
-     ['VIDEO', ':', 'Sturgeon', 'on', 'post-election', 'deals', 'http://t.co/BTJwrpbmOY'], ...]
-
-rte
----
-The RTE (Recognizing Textual Entailment) corpus was derived from the
-RTE1, RTE2 and RTE3 datasets (dev and test data), and consists of a
-list of XML-formatted 'text'/'hypothesis' pairs.
-
-    >>> from nltk.corpus import rte
-    >>> print(rte.fileids()) # doctest: +ELLIPSIS
-    ['rte1_dev.xml', 'rte1_test.xml', 'rte2_dev.xml', ..., 'rte3_test.xml']
-    >>> rtepairs = rte.pairs(['rte2_test.xml', 'rte3_test.xml'])
-    >>> print(rtepairs)  # doctest: +ELLIPSIS
-    [<RTEPair: gid=2-8>, <RTEPair: gid=2-9>, <RTEPair: gid=2-15>, ...]
-
-In the gold standard test sets, each pair is labeled according to
-whether or not the text 'entails' the hypothesis; the
-entailment value is mapped to an integer 1 (True) or 0 (False).
-
-    >>> rtepairs[5]
-    <RTEPair: gid=2-23>
-    >>> rtepairs[5].text # doctest: +NORMALIZE_WHITESPACE
-    'His wife Strida won a seat in parliament after forging an alliance
-    with the main anti-Syrian coalition in the recent election.'
-    >>> rtepairs[5].hyp
-    'Strida elected to parliament.'
-    >>> rtepairs[5].value
-    1
-
-The RTE corpus also supports an ``xml()`` method which produces ElementTrees.
-
-    >>> xmltree = rte.xml('rte3_dev.xml')
-    >>> xmltree # doctest: +SKIP
-    <Element entailment-corpus at ...>
-    >>> xmltree[7].findtext('t') # doctest: +NORMALIZE_WHITESPACE
-    "Mrs. Bush's approval ratings have remained very high, above 80%,
-    even as her husband's have recently dropped below 50%."
-
-verbnet
--------
-The VerbNet corpus is a lexicon that divides verbs into classes, based
-on their syntax-semantics linking behavior.  The basic elements in the
-lexicon are verb lemmas, such as 'abandon' and 'accept', and verb
-classes, which have identifiers such as 'remove-10.1' and
-'admire-31.2-1'.  These class identifiers consist of a representative
-verb selected from the class, followed by a numerical identifier.  The
-list of verb lemmas, and the list of class identifiers, can be
-retrieved with the following methods:
-
-    >>> from nltk.corpus import verbnet
-    >>> verbnet.lemmas()[20:25]
-    ['accelerate', 'accept', 'acclaim', 'accompany', 'accrue']
-    >>> verbnet.classids()[:5]
-    ['accompany-51.7', 'admire-31.2', 'admire-31.2-1', 'admit-65', 'adopt-93']
-
-The `classids()` method may also be used to retrieve the classes that
-a given lemma belongs to:
-
-    >>> verbnet.classids('accept')
-    ['approve-77', 'characterize-29.2-1-1', 'obtain-13.5.2']
-
-The `classids()` method may additionally be used to retrieve all classes
-within verbnet if nothing is passed:
-
-    >>> verbnet.classids()
-    ['accompany-51.7', 'admire-31.2', 'admire-31.2-1', 'admit-65', 'adopt-93', 'advise-37.9', 'advise-37.9-1', 'allow-64', 'amalgamate-22.2', 'amalgamate-22.2-1', 'amalgamate-22.2-1-1', 'amalgamate-22.2-2', 'amalgamate-22.2-2-1', 'amalgamate-22.2-3', 'amalgamate-22.2-3-1', 'amalgamate-22.2-3-1-1', 'amalgamate-22.2-3-2', 'amuse-31.1', 'animal_sounds-38', 'appeal-31.4', 'appeal-31.4-1', 'appeal-31.4-2', 'appeal-31.4-3', 'appear-48.1.1', 'appoint-29.1', 'approve-77', 'assessment-34', 'assuming_position-50', 'avoid-52', 'banish-10.2', 'battle-36.4', 'battle-36.4-1', 'begin-55.1', 'begin-55.1-1', 'being_dressed-41.3.3', 'bend-45.2', 'berry-13.7', 'bill-54.5', 'body_internal_motion-49', 'body_internal_states-40.6', 'braid-41.2.2', 'break-45.1', 'breathe-40.1.2', 'breathe-40.1.2-1', 'bring-11.3', 'bring-11.3-1', 'build-26.1', 'build-26.1-1', 'bulge-47.5.3', 'bump-18.4', 'bump-18.4-1', 'butter-9.9', 'calibratable_cos-45.6', 'calibratable_cos-45.6-1', 'calve-28', 'captain-29.8', 'captain-29.8-1', 'captain-29.8-1-1', 'care-88', 'care-88-1', 'carry-11.4', 'carry-11.4-1', 'carry-11.4-1-1', 'carve-21.2', 'carve-21.2-1', 'carve-21.2-2', 'change_bodily_state-40.8.4', 'characterize-29.2', 'characterize-29.2-1', 'characterize-29.2-1-1', 'characterize-29.2-1-2', 'chase-51.6', 'cheat-10.6', 'cheat-10.6-1', 'cheat-10.6-1-1', 'chew-39.2', 'chew-39.2-1', 'chew-39.2-2', 'chit_chat-37.6', 'clear-10.3', 'clear-10.3-1', 'cling-22.5', 'coil-9.6', 'coil-9.6-1', 'coloring-24', 'complain-37.8', 'complete-55.2', 'concealment-16', 'concealment-16-1', 'confess-37.10', 'confine-92', 'confine-92-1', 'conjecture-29.5', 'conjecture-29.5-1', 'conjecture-29.5-2', 'consider-29.9', 'consider-29.9-1', 'consider-29.9-1-1', 'consider-29.9-1-1-1', 'consider-29.9-2', 'conspire-71', 'consume-66', 'consume-66-1', 'contiguous_location-47.8', 'contiguous_location-47.8-1', 'contiguous_location-47.8-2', 'continue-55.3', 'contribute-13.2', 'contribute-13.2-1', 'contribute-13.2-1-1', 'contribute-13.2-1-1-1', 'contribute-13.2-2', 'contribute-13.2-2-1', 'convert-26.6.2', 'convert-26.6.2-1', 'cooking-45.3', 'cooperate-73', 'cooperate-73-1', 'cooperate-73-2', 'cooperate-73-3', 'cope-83', 'cope-83-1', 'cope-83-1-1', 'correlate-86', 'correspond-36.1', 'correspond-36.1-1', 'correspond-36.1-1-1', 'cost-54.2', 'crane-40.3.2', 'create-26.4', 'create-26.4-1', 'curtsey-40.3.3', 'cut-21.1', 'cut-21.1-1', 'debone-10.8', 'declare-29.4', 'declare-29.4-1', 'declare-29.4-1-1', 'declare-29.4-1-1-1', 'declare-29.4-1-1-2', 'declare-29.4-1-1-3', 'declare-29.4-2', 'dedicate-79', 'defend-85', 'destroy-44', 'devour-39.4', 'devour-39.4-1', 'devour-39.4-2', 'differ-23.4', 'dine-39.5', 'disappearance-48.2', 'disassemble-23.3', 'discover-84', 'discover-84-1', 'discover-84-1-1', 'dress-41.1.1', 'dressing_well-41.3.2', 'drive-11.5', 'drive-11.5-1', 'dub-29.3', 'dub-29.3-1', 'eat-39.1', 'eat-39.1-1', 'eat-39.1-2', 'enforce-63', 'engender-27', 'entity_specific_cos-45.5', 'entity_specific_modes_being-47.2', 'equip-13.4.2', 'equip-13.4.2-1', 'equip-13.4.2-1-1', 'escape-51.1', 'escape-51.1-1', 'escape-51.1-2', 'escape-51.1-2-1', 'exceed-90', 'exchange-13.6', 'exchange-13.6-1', 'exchange-13.6-1-1', 'exhale-40.1.3', 'exhale-40.1.3-1', 'exhale-40.1.3-2', 'exist-47.1', 'exist-47.1-1', 'exist-47.1-1-1', 'feeding-39.7', 'ferret-35.6', 'fill-9.8', 'fill-9.8-1', 'fit-54.3', 'flinch-40.5', 'floss-41.2.1', 'focus-87', 'forbid-67', 'force-59', 'force-59-1', 'free-80', 'free-80-1', 'fulfilling-13.4.1', 'fulfilling-13.4.1-1', 'fulfilling-13.4.1-2', 'funnel-9.3', 'funnel-9.3-1', 'funnel-9.3-2', 'funnel-9.3-2-1', 'future_having-13.3', 'get-13.5.1', 'get-13.5.1-1', 'give-13.1', 'give-13.1-1', 'gobble-39.3', 'gobble-39.3-1', 'gobble-39.3-2', 'gorge-39.6', 'groom-41.1.2', 'grow-26.2', 'help-72', 'help-72-1', 'herd-47.5.2', 'hiccup-40.1.1', 'hit-18.1', 'hit-18.1-1', 'hold-15.1', 'hold-15.1-1', 'hunt-35.1', 'hurt-40.8.3', 'hurt-40.8.3-1', 'hurt-40.8.3-1-1', 'hurt-40.8.3-2', 'illustrate-25.3', 'image_impression-25.1', 'indicate-78', 'indicate-78-1', 'indicate-78-1-1', 'inquire-37.1.2', 'instr_communication-37.4', 'investigate-35.4', 'judgement-33', 'keep-15.2', 'knead-26.5', 'learn-14', 'learn-14-1', 'learn-14-2', 'learn-14-2-1', 'leave-51.2', 'leave-51.2-1', 'lecture-37.11', 'lecture-37.11-1', 'lecture-37.11-1-1', 'lecture-37.11-2', 'light_emission-43.1', 'limit-76', 'linger-53.1', 'linger-53.1-1', 'lodge-46', 'long-32.2', 'long-32.2-1', 'long-32.2-2', 'manner_speaking-37.3', 'marry-36.2', 'marvel-31.3', 'marvel-31.3-1', 'marvel-31.3-2', 'marvel-31.3-3', 'marvel-31.3-4', 'marvel-31.3-5', 'marvel-31.3-6', 'marvel-31.3-7', 'marvel-31.3-8', 'marvel-31.3-9', 'masquerade-29.6', 'masquerade-29.6-1', 'masquerade-29.6-2', 'matter-91', 'meander-47.7', 'meet-36.3', 'meet-36.3-1', 'meet-36.3-2', 'mine-10.9', 'mix-22.1', 'mix-22.1-1', 'mix-22.1-1-1', 'mix-22.1-2', 'mix-22.1-2-1', 'modes_of_being_with_motion-47.3', 'murder-42.1', 'murder-42.1-1', 'neglect-75', 'neglect-75-1', 'neglect-75-1-1', 'neglect-75-2', 'nonvehicle-51.4.2', 'nonverbal_expression-40.2', 'obtain-13.5.2', 'obtain-13.5.2-1', 'occurrence-48.3', 'order-60', 'order-60-1', 'orphan-29.7', 'other_cos-45.4', 'pain-40.8.1', 'pay-68', 'peer-30.3', 'pelt-17.2', 'performance-26.7', 'performance-26.7-1', 'performance-26.7-1-1', 'performance-26.7-2', 'performance-26.7-2-1', 'pit-10.7', 'pocket-9.10', 'pocket-9.10-1', 'poison-42.2', 'poke-19', 'pour-9.5', 'preparing-26.3', 'preparing-26.3-1', 'preparing-26.3-2', 'price-54.4', 'push-12', 'push-12-1', 'push-12-1-1', 'put-9.1', 'put-9.1-1', 'put-9.1-2', 'put_direction-9.4', 'put_spatial-9.2', 'put_spatial-9.2-1', 'reach-51.8', 'reflexive_appearance-48.1.2', 'refrain-69', 'register-54.1', 'rely-70', 'remove-10.1', 'risk-94', 'risk-94-1', 'roll-51.3.1', 'rummage-35.5', 'run-51.3.2', 'rush-53.2', 'say-37.7', 'say-37.7-1', 'say-37.7-1-1', 'say-37.7-2', 'scribble-25.2', 'search-35.2', 'see-30.1', 'see-30.1-1', 'see-30.1-1-1', 'send-11.1', 'send-11.1-1', 'separate-23.1', 'separate-23.1-1', 'separate-23.1-2', 'settle-89', 'shake-22.3', 'shake-22.3-1', 'shake-22.3-1-1', 'shake-22.3-2', 'shake-22.3-2-1', 'sight-30.2', 'simple_dressing-41.3.1', 'slide-11.2', 'slide-11.2-1-1', 'smell_emission-43.3', 'snooze-40.4', 'sound_emission-43.2', 'sound_existence-47.4', 'spank-18.3', 'spatial_configuration-47.6', 'split-23.2', 'spray-9.7', 'spray-9.7-1', 'spray-9.7-1-1', 'spray-9.7-2', 'stalk-35.3', 'steal-10.5', 'stimulus_subject-30.4', 'stop-55.4', 'stop-55.4-1', 'substance_emission-43.4', 'succeed-74', 'succeed-74-1', 'succeed-74-1-1', 'succeed-74-2', 'suffocate-40.7', 'suspect-81', 'swarm-47.5.1', 'swarm-47.5.1-1', 'swarm-47.5.1-2', 'swarm-47.5.1-2-1', 'swat-18.2', 'talk-37.5', 'tape-22.4', 'tape-22.4-1', 'tell-37.2', 'throw-17.1', 'throw-17.1-1', 'throw-17.1-1-1', 'tingle-40.8.2', 'touch-20', 'touch-20-1', 'transcribe-25.4', 'transfer_mesg-37.1.1', 'transfer_mesg-37.1.1-1', 'transfer_mesg-37.1.1-1-1', 'try-61', 'turn-26.6.1', 'turn-26.6.1-1', 'urge-58', 'vehicle-51.4.1', 'vehicle-51.4.1-1', 'waltz-51.5', 'want-32.1', 'want-32.1-1', 'want-32.1-1-1', 'weather-57', 'weekend-56', 'wink-40.3.1', 'wink-40.3.1-1', 'wipe_instr-10.4.2', 'wipe_instr-10.4.2-1', 'wipe_manner-10.4.1', 'wipe_manner-10.4.1-1', 'wish-62', 'withdraw-82', 'withdraw-82-1', 'withdraw-82-2', 'withdraw-82-3']
-
-The primary object in the lexicon is a class record, which is stored
-as an ElementTree xml object.  The class record for a given class
-identifier is returned by the `vnclass()` method:
-
-    >>> verbnet.vnclass('remove-10.1') # doctest: +ELLIPSIS
-    <Element 'VNCLASS' at ...>
-
-The `vnclass()` method also accepts "short" identifiers, such as '10.1':
-
-    >>> verbnet.vnclass('10.1') # doctest: +ELLIPSIS
-    <Element 'VNCLASS' at ...>
-
-See the Verbnet documentation, or the Verbnet files, for information
-about the structure of this xml.  As an example, we can retrieve a
-list of thematic roles for a given Verbnet class:
-
-    >>> vn_31_2 = verbnet.vnclass('admire-31.2')
-    >>> for themrole in vn_31_2.findall('THEMROLES/THEMROLE'):
-    ...     print(themrole.attrib['type'], end=' ')
-    ...     for selrestr in themrole.findall('SELRESTRS/SELRESTR'):
-    ...         print('[%(Value)s%(type)s]' % selrestr.attrib, end=' ')
-    ...     print()
-    Theme
-    Experiencer [+animate]
-    Predicate
-
-The Verbnet corpus also provides a variety of pretty printing
-functions that can be used to display the xml contents in a more
-concise form.  The simplest such method is `pprint()`:
-
-    >>> print(verbnet.pprint('57'))
-    weather-57
-      Subclasses: (none)
-      Members: blow clear drizzle fog freeze gust hail howl lightning mist
-        mizzle pelt pour precipitate rain roar shower sleet snow spit spot
-        sprinkle storm swelter teem thaw thunder
-      Thematic roles:
-        * Theme[+concrete +force]
-      Frames:
-        Intransitive (Expletive Subject)
-          Example: It's raining.
-          Syntax: LEX[it] LEX[[+be]] VERB
-          Semantics:
-            * weather(during(E), Weather_type, ?Theme)
-        NP (Expletive Subject, Theme Object)
-          Example: It's raining cats and dogs.
-          Syntax: LEX[it] LEX[[+be]] VERB NP[Theme]
-          Semantics:
-            * weather(during(E), Weather_type, Theme)
-        PP (Expletive Subject, Theme-PP)
-          Example: It was pelting with rain.
-          Syntax: LEX[it[+be]] VERB PREP[with] NP[Theme]
-          Semantics:
-            * weather(during(E), Weather_type, Theme)
-
-Verbnet gives us frames that link the syntax and semantics using an example.
-These frames are part of the corpus and we can use `frames()` to get a frame
-for a given verbnet class.
-
-    >>> frame = verbnet.frames('57')
-    >>> frame == [{'semantics': [{'arguments': [{'value': 'during(E)', 'type': 'Event'}, {'value': 'Weather_type', 'type': 'VerbSpecific'}, {'value': '?Theme', 'type': 'ThemRole'}], 'predicate_value': 'weather'}], 'example': "It's raining.", 'syntax': [{'pos_tag': 'LEX', 'modifiers': {'value': 'it', 'synrestrs': [], 'selrestrs': []}}, {'pos_tag': 'LEX', 'modifiers': {'value': '[+be]', 'synrestrs': [], 'selrestrs': []}}, {'pos_tag': 'VERB', 'modifiers': {'value': '', 'synrestrs': [], 'selrestrs': []}}], 'description': {'primary': 'Intransitive', 'secondary': 'Expletive Subject'}}, {'semantics': [{'arguments': [{'value': 'during(E)', 'type': 'Event'}, {'value': 'Weather_type', 'type': 'VerbSpecific'}, {'value': 'Theme', 'type': 'ThemRole'}], 'predicate_value': 'weather'}], 'example': "It's raining cats and dogs.", 'syntax': [{'pos_tag': 'LEX', 'modifiers': {'value': 'it', 'synrestrs': [], 'selrestrs': []}}, {'pos_tag': 'LEX', 'modifiers': {'value': '[+be]', 'synrestrs': [], 'selrestrs': []}}, {'pos_tag': 'VERB', 'modifiers': {'value': '', 'synrestrs': [], 'selrestrs': []}}, {'pos_tag': 'NP', 'modifiers': {'value': 'Theme', 'synrestrs': [], 'selrestrs': []}}], 'description': {'primary': 'NP', 'secondary': 'Expletive Subject, Theme Object'}}, {'semantics': [{'arguments': [{'value': 'during(E)', 'type': 'Event'}, {'value': 'Weather_type', 'type': 'VerbSpecific'}, {'value': 'Theme', 'type': 'ThemRole'}], 'predicate_value': 'weather'}], 'example': 'It was pelting with rain.', 'syntax': [{'pos_tag': 'LEX', 'modifiers': {'value': 'it[+be]', 'synrestrs': [], 'selrestrs': []}}, {'pos_tag': 'VERB', 'modifiers': {'value': '', 'synrestrs': [], 'selrestrs': []}}, {'pos_tag': 'PREP', 'modifiers': {'value': 'with', 'synrestrs': [], 'selrestrs': []}}, {'pos_tag': 'NP', 'modifiers': {'value': 'Theme', 'synrestrs': [], 'selrestrs': []}}], 'description': {'primary': 'PP', 'secondary': 'Expletive Subject, Theme-PP'}}]
-    True
-
-Verbnet corpus lets us access thematic roles individually using `themroles()`.
-
-    >>> themroles = verbnet.themroles('57')
-    >>> themroles == [{'modifiers': [{'type': 'concrete', 'value': '+'}, {'type': 'force', 'value': '+'}], 'type': 'Theme'}]
-    True
-
-Verbnet classes may also have subclasses sharing similar syntactic and semantic properties
-while having differences with the superclass. The Verbnet corpus allows us to access these
-subclasses using `subclasses()`.
-
-    >>> print(verbnet.subclasses('9.1')) #Testing for 9.1 since '57' does not have subclasses
-    ['put-9.1-1', 'put-9.1-2']
-
-
-nps_chat
---------
-
-The NPS Chat Corpus, Release 1.0 consists of over 10,000 posts in age-specific
-chat rooms, which have been anonymized, POS-tagged and dialogue-act tagged.
-
-    >>> print(nltk.corpus.nps_chat.words())
-    ['now', 'im', 'left', 'with', 'this', 'gay', ...]
-    >>> print(nltk.corpus.nps_chat.tagged_words())
-    [('now', 'RB'), ('im', 'PRP'), ('left', 'VBD'), ...]
-    >>> print(nltk.corpus.nps_chat.tagged_posts()) # doctest: +NORMALIZE_WHITESPACE
-    [[('now', 'RB'), ('im', 'PRP'), ('left', 'VBD'), ('with', 'IN'),
-    ('this', 'DT'), ('gay', 'JJ'), ('name', 'NN')], [(':P', 'UH')], ...]
-
-We can access the XML elements corresponding to individual posts.  These elements
-have ``class`` and ``user`` attributes that we can access using ``p.attrib['class']``
-and ``p.attrib['user']``.  They also have text content, accessed using ``p.text``.
-
-    >>> print(nltk.corpus.nps_chat.xml_posts()) # doctest: +ELLIPSIS
-    [<Element 'Post' at 0...>, <Element 'Post' at 0...>, ...]
-    >>> posts = nltk.corpus.nps_chat.xml_posts()
-    >>> sorted(nltk.FreqDist(p.attrib['class'] for p in posts).keys())
-    ['Accept', 'Bye', 'Clarify', 'Continuer', 'Emotion', 'Emphasis',
-    'Greet', 'Other', 'Reject', 'Statement', 'System', 'nAnswer',
-    'whQuestion', 'yAnswer', 'ynQuestion']
-    >>> posts[0].text
-    'now im left with this gay name'
-
-In addition to the above methods for accessing tagged text, we can navigate
-the XML structure directly, as follows:
-
-    >>> tokens = posts[0].findall('terminals/t')
-    >>> [t.attrib['pos'] + "/" + t.attrib['word'] for t in tokens]
-    ['RB/now', 'PRP/im', 'VBD/left', 'IN/with', 'DT/this', 'JJ/gay', 'NN/name']
-
-multext_east
-------------
-
-The Multext-East Corpus consists of POS-tagged versions of George Orwell's book
-1984 in 12 languages: English, Czech, Hungarian, Macedonian, Slovenian, Serbian,
-Slovak, Romanian, Estonian, Farsi, Bulgarian and Polish.
-The corpus can be accessed using the usual methods for tagged corpora. The tagset
-can be transformed from the Multext-East specific MSD tags to the Universal tagset
-using the "tagset" parameter of all functions returning tagged parts of the corpus.
-
-    >>> print(nltk.corpus.multext_east.words("oana-en.xml"))
-    ['It', 'was', 'a', 'bright', ...]
-    >>> print(nltk.corpus.multext_east.tagged_words("oana-en.xml"))
-    [('It', '#Pp3ns'), ('was', '#Vmis3s'), ('a', '#Di'), ...]
-    >>> print(nltk.corpus.multext_east.tagged_sents("oana-en.xml", "universal"))
-    [[('It', 'PRON'), ('was', 'VERB'), ('a', 'DET'), ...]
-
-
-
----------------------
-Corpus Reader Classes
----------------------
-
-NLTK's *corpus reader* classes are used to access the contents of a
-diverse set of corpora.  Each corpus reader class is specialized to
-handle a specific corpus format.  Examples include the
-`PlaintextCorpusReader`, which handles corpora that consist of a set
-of unannotated text files, and the `BracketParseCorpusReader`, which
-handles corpora that consist of files containing
-parenthesis-delineated parse trees.
-
-Automatically Created Corpus Reader Instances
-=============================================
-
-When the `nltk.corpus` module is imported, it automatically creates a
-set of corpus reader instances that can be used to access the corpora
-in the NLTK data distribution.  Here is a small sample of those
-corpus reader instances:
-
-    >>> import nltk
-    >>> nltk.corpus.brown # doctest: +ELLIPSIS
-    <CategorizedTaggedCorpusReader ...>
-    >>> nltk.corpus.treebank # doctest: +ELLIPSIS
-    <BracketParseCorpusReader ...>
-    >>> nltk.corpus.names # doctest: +ELLIPSIS
-    <WordListCorpusReader ...>
-    >>> nltk.corpus.genesis # doctest: +ELLIPSIS
-    <PlaintextCorpusReader ...>
-    >>> nltk.corpus.inaugural # doctest: +ELLIPSIS
-    <PlaintextCorpusReader ...>
-
-This sample illustrates that different corpus reader classes are used
-to read different corpora; but that the same corpus reader class may
-be used for more than one corpus (e.g., ``genesis`` and ``inaugural``).
-
-Creating New Corpus Reader Instances
-====================================
-
-Although the `nltk.corpus` module automatically creates corpus reader
-instances for the corpora in the NLTK data distribution, you may
-sometimes need to create your own corpus reader.  In particular, you
-would need to create your own corpus reader if you want...
-
-- To access a corpus that is not included in the NLTK data
-  distribution.
-
-- To access a full copy of a corpus for which the NLTK data
-  distribution only provides a sample.
-
-- To access a corpus using a customized corpus reader (e.g., with
-  a customized tokenizer).
-
-To create a new corpus reader, you will first need to look up the
-signature for that corpus reader's constructor.  Different corpus
-readers have different constructor signatures, but most of the
-constructor signatures have the basic form::
-
-    SomeCorpusReader(root, files, ...options...)
-
-Where ``root`` is an absolute path to the directory containing the
-corpus data files; ``files`` is either a list of file names (relative
-to ``root``) or a regexp specifying which files should be included;
-and ``options`` are additional reader-specific options.  For example,
-we can create a customized corpus reader for the genesis corpus that
-uses a different sentence tokenizer as follows:
-
-    >>> # Find the directory where the corpus lives.
-    >>> genesis_dir = nltk.data.find('corpora/genesis')
-    >>> # Create our custom sentence tokenizer.
-    >>> my_sent_tokenizer = nltk.RegexpTokenizer('[^.!?]+')
-    >>> # Create the new corpus reader object.
-    >>> my_genesis = nltk.corpus.PlaintextCorpusReader(
-    ...     genesis_dir, '.*\.txt', sent_tokenizer=my_sent_tokenizer)
-    >>> # Use the new corpus reader object.
-    >>> print(my_genesis.sents('english-kjv.txt')[0]) # doctest: +NORMALIZE_WHITESPACE
-    ['In', 'the', 'beginning', 'God', 'created', 'the', 'heaven',
-     'and', 'the', 'earth']
-
-If you wish to read your own plaintext corpus, which is stored in the
-directory '/usr/share/some-corpus', then you can create a corpus
-reader for it with::
-
-    >>> my_corpus = nltk.corpus.PlaintextCorpusReader(
-    ...     '/usr/share/some-corpus', '.*\.txt') # doctest: +SKIP
-
-For a complete list of corpus reader subclasses, see the API
-documentation for `nltk.corpus.reader`.
-
-Corpus Types
-============
-
-Corpora vary widely in the types of content they include.  This is
-reflected in the fact that the base class `CorpusReader` only defines
-a few general-purpose methods for listing and accessing the files that
-make up a corpus.  It is up to the subclasses to define *data access
-methods* that provide access to the information in the corpus.
-However, corpus reader subclasses should be consistent in their
-definitions of these data access methods wherever possible.
-
-At a high level, corpora can be divided into three basic types:
-
-- A *token corpus* contains information about specific occurences of
-  language use (or linguistic tokens), such as dialogues or written
-  texts.  Examples of token corpora are collections of written text
-  and collections of speech.
-
-- A *type corpus*, or *lexicon*, contains information about a coherent
-  set of lexical items (or linguistic types).  Examples of lexicons
-  are dictionaries and word lists.
-
-- A *language description corpus* contains information about a set of
-  non-lexical linguistic constructs, such as grammar rules.
-
-However, many individual corpora blur the distinctions between these
-types.  For example, corpora that are primarily lexicons may include
-token data in the form of example sentences; and corpora that are
-primarily token corpora may be accompanied by one or more word lists
-or other lexical data sets.
-
-Because corpora vary so widely in their information content, we have
-decided that it would not be wise to use separate corpus reader base
-classes for different corpus types.  Instead, we simply try to make
-the corpus readers consistent wherever possible, but let them differ
-where the underlying data itself differs.
-
-Common Corpus Reader Methods
-============================
-
-As mentioned above, there are only a handful of methods that all
-corpus readers are guaranteed to implement.  These methods provide
-access to the files that contain the corpus data.  Every corpus is
-assumed to consist of one or more files, all located in a common root
-directory (or in subdirectories of that root directory).  The absolute
-path to the root directory is stored in the ``root`` property:
-
-    >>> import os
-    >>> str(nltk.corpus.genesis.root).replace(os.path.sep,'/') # doctest: +ELLIPSIS
-    '.../nltk_data/corpora/genesis'
-
-Each file within the corpus is identified by a platform-independent
-identifier, which is basically a path string that uses ``/`` as the
-path separator.  I.e., this identifier can be converted to a relative
-path as follows:
-
-    >>> some_corpus_file_id = nltk.corpus.reuters.fileids()[0]
-    >>> import os.path
-    >>> os.path.normpath(some_corpus_file_id).replace(os.path.sep,'/')
-    'test/14826'
-
-To get a list of all data files that make up a corpus, use the
-``fileids()`` method.  In some corpora, these files will not all contain
-the same type of data; for example, for the ``nltk.corpus.timit``
-corpus, ``fileids()`` will return a list including text files, word
-segmentation files, phonetic transcription files, sound files, and
-metadata files.  For corpora with diverse file types, the ``fileids()``
-method will often take one or more optional arguments, which can be
-used to get a list of the files with a specific file type:
-
-    >>> nltk.corpus.timit.fileids() # doctest: +ELLIPSIS
-    ['dr1-fvmh0/sa1.phn', 'dr1-fvmh0/sa1.txt', 'dr1-fvmh0/sa1.wav', ...]
-    >>> nltk.corpus.timit.fileids('phn') # doctest: +ELLIPSIS
-    ['dr1-fvmh0/sa1.phn', 'dr1-fvmh0/sa2.phn', 'dr1-fvmh0/si1466.phn', ...]
-
-In some corpora, the files are divided into distinct categories.  For
-these corpora, the ``fileids()`` method takes an optional argument,
-which can be used to get a list of the files within a specific category:
-
-    >>> nltk.corpus.brown.fileids('hobbies') # doctest: +ELLIPSIS
-    ['ce01', 'ce02', 'ce03', 'ce04', 'ce05', 'ce06', 'ce07', ...]
-
-The ``abspath()`` method can be used to find the absolute path to a
-corpus file, given its file identifier:
-
-    >>> str(nltk.corpus.brown.abspath('ce06')).replace(os.path.sep,'/') # doctest: +ELLIPSIS
-    '.../corpora/brown/ce06'
-
-The ``abspaths()`` method can be used to find the absolute paths for
-one corpus file, a list of corpus files, or (if no fileids are specified),
-all corpus files.
-
-This method is mainly useful as a helper method when defining corpus
-data access methods, since data access methods can usually be called
-with a string argument (to get a view for a specific file), with a
-list argument (to get a view for a specific list of files), or with no
-argument (to get a view for the whole corpus).
-
-Data Access Methods
-===================
-
-Individual corpus reader subclasses typically extend this basic set of
-file-access methods with one or more *data access methods*, which provide
-easy access to the data contained in the corpus.  The signatures for
-data access methods often have the basic form::
-
-    corpus_reader.some_data access(fileids=None, ...options...)
-
-Where ``fileids`` can be a single file identifier string (to get a view
-for a specific file); a list of file identifier strings (to get a view
-for a specific list of files); or None (to get a view for the entire
-corpus).  Some of the common data access methods, and their return
-types, are:
-
-  - I{corpus}.words(): list of str
-  - I{corpus}.sents(): list of (list of str)
-  - I{corpus}.paras(): list of (list of (list of str))
-  - I{corpus}.tagged_words(): list of (str,str) tuple
-  - I{corpus}.tagged_sents(): list of (list of (str,str))
-  - I{corpus}.tagged_paras(): list of (list of (list of (str,str)))
-  - I{corpus}.chunked_sents(): list of (Tree w/ (str,str) leaves)
-  - I{corpus}.parsed_sents(): list of (Tree with str leaves)
-  - I{corpus}.parsed_paras(): list of (list of (Tree with str leaves))
-  - I{corpus}.xml(): A single xml ElementTree
-  - I{corpus}.raw(): str (unprocessed corpus contents)
-
-For example, the `words()` method is supported by many different
-corpora, and returns a flat list of word strings:
-
-    >>> nltk.corpus.brown.words()
-    ['The', 'Fulton', 'County', 'Grand', 'Jury', ...]
-    >>> nltk.corpus.treebank.words()
-    ['Pierre', 'Vinken', ',', '61', 'years', 'old', ...]
-    >>> nltk.corpus.conll2002.words()
-    [u'Sao', u'Paulo', u'(', u'Brasil', u')', u',', u'23', ...]
-    >>> nltk.corpus.genesis.words()
-    [u'In', u'the', u'beginning', u'God', u'created', ...]
-
-On the other hand, the `tagged_words()` method is only supported by
-corpora that include part-of-speech annotations:
-
-    >>> nltk.corpus.brown.tagged_words()
-    [('The', 'AT'), ('Fulton', 'NP-TL'), ...]
-    >>> nltk.corpus.treebank.tagged_words()
-    [('Pierre', 'NNP'), ('Vinken', 'NNP'), ...]
-    >>> nltk.corpus.conll2002.tagged_words()
-    [(u'Sao', u'NC'), (u'Paulo', u'VMI'), (u'(', u'Fpa'), ...]
-    >>> nltk.corpus.genesis.tagged_words()
-    Traceback (most recent call last):
-      ...
-    AttributeError: 'PlaintextCorpusReader' object has no attribute 'tagged_words'
-
-Although most corpus readers use file identifiers to index their
-content, some corpora use different identifiers instead.  For example,
-the data access methods for the ``timit`` corpus uses *utterance
-identifiers* to select which corpus items should be returned:
-
-    >>> nltk.corpus.timit.utteranceids() # doctest: +ELLIPSIS
-    ['dr1-fvmh0/sa1', 'dr1-fvmh0/sa2', 'dr1-fvmh0/si1466', ...]
-    >>> nltk.corpus.timit.words('dr1-fvmh0/sa2')
-    ["don't", 'ask', 'me', 'to', 'carry', 'an', 'oily', 'rag', 'like', 'that']
-
-Attempting to call ``timit``\ 's data access methods with a file
-identifier will result in an exception:
-
-    >>> nltk.corpus.timit.fileids() # doctest: +ELLIPSIS
-    ['dr1-fvmh0/sa1.phn', 'dr1-fvmh0/sa1.txt', 'dr1-fvmh0/sa1.wav', ...]
-    >>> nltk.corpus.timit.words('dr1-fvmh0/sa1.txt') # doctest: +SKIP
-    Traceback (most recent call last):
-      ...
-    IOError: No such file or directory: '.../dr1-fvmh0/sa1.txt.wrd'
-
-As another example, the ``propbank`` corpus defines the ``roleset()``
-method, which expects a roleset identifier, not a file identifier:
-
-    >>> roleset = nltk.corpus.propbank.roleset('eat.01')
-    >>> from xml.etree import ElementTree as ET
-    >>> print(ET.tostring(roleset).decode('utf8')) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
-    <roleset id="eat.01" name="consume" vncls="39.1">
-      <roles>
-        <role descr="consumer, eater" n="0">...</role>...
-      </roles>...
-    </roleset>...
-
-Stream Backed Corpus Views
-==========================
-An important feature of NLTK's corpus readers is that many of them
-access the underlying data files using "corpus views."  A *corpus
-view* is an object that acts like a simple data structure (such as a
-list), but does not store the data elements in memory; instead, data
-elements are read from the underlying data files on an as-needed
-basis.
-
-By only loading items from the file on an as-needed basis, corpus
-views maintain both memory efficiency and responsiveness.  The memory
-efficiency of corpus readers is important because some corpora contain
-very large amounts of data, and storing the entire data set in memory
-could overwhelm many machines.  The responsiveness is important when
-experimenting with corpora in interactive sessions and in in-class
-demonstrations.
-
-The most common corpus view is the `StreamBackedCorpusView`, which
-acts as a read-only list of tokens.  Two additional corpus view
-classes, `ConcatenatedCorpusView` and `LazySubsequence`, make it
-possible to create concatenations and take slices of
-`StreamBackedCorpusView` objects without actually storing the
-resulting list-like object's elements in memory.
-
-In the future, we may add additional corpus views that act like other
-basic data structures, such as dictionaries.
-
-Writing New Corpus Readers
-==========================
-
-In order to add support for new corpus formats, it is necessary to
-define new corpus reader classes.  For many corpus formats, writing
-new corpus readers is relatively straight-forward.  In this section,
-we'll describe what's involved in creating a new corpus reader.  If
-you do create a new corpus reader, we encourage you to contribute it
-back to the NLTK project.
-
-Don't Reinvent the Wheel
-------------------------
-Before you start writing a new corpus reader, you should check to be
-sure that the desired format can't be read using an existing corpus
-reader with appropriate constructor arguments.  For example, although
-the `TaggedCorpusReader` assumes that words and tags are separated by
-``/`` characters by default, an alternative tag-separation character
-can be specified via the ``sep`` constructor argument.  You should
-also check whether the new corpus format can be handled by subclassing
-an existing corpus reader, and tweaking a few methods or variables.
-
-Design
-------
-If you decide to write a new corpus reader from scratch, then you
-should first decide which data access methods you want the reader to
-provide, and what their signatures should be.  You should look at
-existing corpus readers that process corpora with similar data
-contents, and try to be consistent with those corpus readers whenever
-possible.
-
-You should also consider what sets of identifiers are appropriate for
-the corpus format.  Where it's practical, file identifiers should be
-used.  However, for some corpora, it may make sense to use additional
-sets of identifiers.  Each set of identifiers should have a distinct
-name (e.g., fileids, utteranceids, rolesets); and you should be consistent
-in using that name to refer to that identifier.  Do not use parameter
-names like ``id``, which leave it unclear what type of identifier is
-required.
-
-Once you've decided what data access methods and identifiers are
-appropriate for your corpus, you should decide if there are any
-customizable parameters that you'd like the corpus reader to handle.
-These parameters make it possible to use a single corpus reader to
-handle a wider variety of corpora.  The ``sep`` argument for
-`TaggedCorpusReader`, mentioned above, is an example of a customizable
-corpus reader parameter.
-
-Implementation
---------------
-
-Constructor
-~~~~~~~~~~~
-If your corpus reader implements any customizable parameters, then
-you'll need to override the constructor.  Typically, the new
-constructor will first call its base class's constructor, and then
-store the customizable parameters.  For example, the
-`ConllChunkCorpusReader`\ 's constructor is defined as follows:
-
-    def __init__(self, root, fileids, chunk_types, encoding='utf8',
-                 tagset=None, separator=None):
-        ConllCorpusReader.__init__(
-                self, root, fileids, ('words', 'pos', 'chunk'),
-                chunk_types=chunk_types, encoding=encoding,
-                tagset=tagset, separator=separator)
-
-If your corpus reader does not implement any customization parameters,
-then you can often just inherit the base class's constructor.
-
-Data Access Methods
-~~~~~~~~~~~~~~~~~~~
-
-The most common type of data access method takes an argument
-identifying which files to access, and returns a view covering those
-files.  This argument may be a single file identifier string (to get a
-view for a specific file); a list of file identifier strings (to get a
-view for a specific list of files); or None (to get a view for the
-entire corpus).  The method's implementation converts this argument to
-a list of path names using the `abspaths()` method, which handles all
-three value types (string, list, and None):
-
-    >>> print(str(nltk.corpus.brown.abspaths()).replace('\\\\','/')) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
-    [FileSystemPathPointer('.../corpora/brown/ca01'),
-     FileSystemPathPointer('.../corpora/brown/ca02'), ...]
-    >>> print(str(nltk.corpus.brown.abspaths('ce06')).replace('\\\\','/')) # doctest: +ELLIPSIS
-    [FileSystemPathPointer('.../corpora/brown/ce06')]
-    >>> print(str(nltk.corpus.brown.abspaths(['ce06', 'ce07'])).replace('\\\\','/')) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
-    [FileSystemPathPointer('.../corpora/brown/ce06'),
-     FileSystemPathPointer('.../corpora/brown/ce07')]
-
-An example of this type of method is the `words()` method, defined by
-the `PlaintextCorpusReader` as follows:
-
-    >>> def words(self, fileids=None):
-    ...     return concat([self.CorpusView(fileid, self._read_word_block)
-    ...                    for fileid in self.abspaths(fileids)])
-
-This method first uses `abspaths()` to convert ``fileids`` to a list of
-absolute paths.  It then creates a corpus view for each file, using
-the `PlaintextCorpusReader._read_word_block()` method to read elements
-from the data file (see the discussion of corpus views below).
-Finally, it combines these corpus views using the
-`nltk.corpus.reader.util.concat()` function.
-
-When writing a corpus reader for a corpus that is never expected to be
-very large, it can sometimes be appropriate to read the files
-directly, rather than using a corpus view.  For example, the
-`WordListCorpusView` class defines its `words()` method as follows:
-
-    >>> def words(self, fileids=None):
-    ...     return concat([[w for w in open(fileid).read().split('\n') if w]
-    ...                    for fileid in self.abspaths(fileids)])
-
-(This is usually more appropriate for lexicons than for token corpora.)
-
-If the type of data returned by a data access method is one for which
-NLTK has a conventional representation (e.g., words, tagged words, and
-parse trees), then you should use that representation.  Otherwise, you
-may find it necessary to define your own representation.  For data
-structures that are relatively corpus-specific, it's usually best to
-define new classes for these elements.  For example, the ``propbank``
-corpus defines the `PropbankInstance` class to store the semantic role
-labeling instances described by the corpus; and the ``ppattach``
-corpus defines the `PPAttachment` class to store the prepositional
-attachment instances described by the corpus.
-
-Corpus Views
-~~~~~~~~~~~~
-.. (Much of the content for this section is taken from the
-   StreamBackedCorpusView docstring.)
-
-The heart of a `StreamBackedCorpusView` is its *block reader*
-function, which reads zero or more tokens from a stream, and returns
-them as a list.  A very simple example of a block reader is:
-
-    >>> def simple_block_reader(stream):
-    ...     return stream.readline().split()
-
-This simple block reader reads a single line at a time, and returns a
-single token (consisting of a string) for each whitespace-separated
-substring on the line.  A `StreamBackedCorpusView` built from this
-block reader will act like a read-only list of all the
-whitespace-separated tokens in an underlying file.
-
-When deciding how to define the block reader for a given corpus,
-careful consideration should be given to the size of blocks handled by
-the block reader.  Smaller block sizes will increase the memory
-requirements of the corpus view's internal data structures (by 2
-integers per block).  On the other hand, larger block sizes may
-decrease performance for random access to the corpus.  (But note that
-larger block sizes will *not* decrease performance for iteration.)
-
-Internally, the `StreamBackedCorpusView` class maintains a partial
-mapping from token index to file position, with one entry per block.
-When a token with a given index *i* is requested, the corpus view
-constructs it as follows:
-
-1. First, it searches the toknum/filepos mapping for the token index
-   closest to (but less than or equal to) *i*.
-
-2. Then, starting at the file position corresponding to that index, it
-   reads one block at a time using the block reader until it reaches
-   the requested token.
-
-The toknum/filepos mapping is created lazily: it is initially empty,
-but every time a new block is read, the block's initial token is added
-to the mapping.  (Thus, the toknum/filepos map has one entry per
-block.)
-
-You can create your own corpus view in one of two ways:
-
-1. Call the `StreamBackedCorpusView` constructor, and provide your
-   block reader function via the ``block_reader`` argument.
-
-2. Subclass `StreamBackedCorpusView`, and override the
-   `read_block()` method.
-
-The first option is usually easier, but the second option can allow
-you to write a single `read_block` method whose behavior can be
-customized by different parameters to the subclass's constructor.  For
-an example of this design pattern, see the `TaggedCorpusView` class,
-which is used by `TaggedCorpusView`.
-
-----------------
-Regression Tests
-----------------
-
-The following helper functions are used to create and then delete
-testing corpora that are stored in temporary directories.  These
-testing corpora are used to make sure the readers work correctly.
-
-    >>> import tempfile, os.path, textwrap
-    >>> def make_testcorpus(ext='', **fileids):
-    ...     root = tempfile.mkdtemp()
-    ...     for fileid, contents in fileids.items():
-    ...         fileid += ext
-    ...         f = open(os.path.join(root, fileid), 'w')
-    ...         f.write(textwrap.dedent(contents))
-    ...         f.close()
-    ...     return root
-    >>> def del_testcorpus(root):
-    ...     for fileid in os.listdir(root):
-    ...         os.remove(os.path.join(root, fileid))
-    ...     os.rmdir(root)
-
-Plaintext Corpus Reader
-=======================
-The plaintext corpus reader is used to access corpora that consist of
-unprocessed plaintext data.  It assumes that paragraph breaks are
-indicated by blank lines.  Sentences and words can be tokenized using
-the default tokenizers, or by custom tokenizers specified as
-parameters to the constructor.
-
-    >>> root = make_testcorpus(ext='.txt',
-    ...     a="""\
-    ...     This is the first sentence.  Here is another
-    ...     sentence!  And here's a third sentence.
-    ...
-    ...     This is the second paragraph.  Tokenization is currently
-    ...     fairly simple, so the period in Mr. gets tokenized.
-    ...     """,
-    ...     b="""This is the second file.""")
-
-    >>> from nltk.corpus.reader.plaintext import PlaintextCorpusReader
-
-The list of documents can be specified explicitly, or implicitly (using a
-regexp).  The ``ext`` argument specifies a file extension.
-
-    >>> corpus = PlaintextCorpusReader(root, ['a.txt', 'b.txt'])
-    >>> corpus.fileids()
-    ['a.txt', 'b.txt']
-    >>> corpus = PlaintextCorpusReader(root, '.*\.txt')
-    >>> corpus.fileids()
-    ['a.txt', 'b.txt']
-
-The directory containing the corpus is corpus.root:
-
-    >>> str(corpus.root) == str(root)
-    True
-
-We can get a list of words, or the raw string:
-
-    >>> corpus.words()
-    ['This', 'is', 'the', 'first', 'sentence', '.', ...]
-    >>> corpus.raw()[:40]
-    'This is the first sentence.  Here is ano'
-
-Check that reading individual documents works, and reading all documents at
-once works:
-
-    >>> len(corpus.words()), [len(corpus.words(d)) for d in corpus.fileids()]
-    (46, [40, 6])
-    >>> corpus.words('a.txt')
-    ['This', 'is', 'the', 'first', 'sentence', '.', ...]
-    >>> corpus.words('b.txt')
-    ['This', 'is', 'the', 'second', 'file', '.']
-    >>> corpus.words()[:4], corpus.words()[-4:]
-    (['This', 'is', 'the', 'first'], ['the', 'second', 'file', '.'])
-
-We're done with the test corpus:
-
-    >>> del_testcorpus(root)
-
-Test the plaintext corpora that come with nltk:
-
-    >>> from nltk.corpus import abc, genesis, inaugural
-    >>> from nltk.corpus import state_union, webtext
-    >>> for corpus in (abc, genesis, inaugural, state_union,
-    ...                webtext):
-    ...     print(str(corpus).replace('\\\\','/'))
-    ...     print('  ', repr(corpus.fileids())[:60])
-    ...     print('  ', repr(corpus.words()[:10])[:60])
-    <PlaintextCorpusReader in '.../nltk_data/corpora/ab...'>
-       ['rural.txt', 'science.txt']
-       ['PM', 'denies', 'knowledge', 'of', 'AWB', ...
-    <PlaintextCorpusReader in '.../nltk_data/corpora/genesi...'>
-       ['english-kjv.txt', 'english-web.txt', 'finnish.txt', ...
-       ['In', 'the', 'beginning', 'God', 'created', 'the', ...
-    <PlaintextCorpusReader in '.../nltk_data/corpora/inaugura...'>
-       ['1789-Washington.txt', '1793-Washington.txt', ...
-       ['Fellow', '-', 'Citizens', 'of', 'the', 'Senate', ...
-    <PlaintextCorpusReader in '.../nltk_data/corpora/state_unio...'>
-       ['1945-Truman.txt', '1946-Truman.txt', ...
-       ['PRESIDENT', 'HARRY', 'S', '.', 'TRUMAN', "'", ...
-    <PlaintextCorpusReader in '.../nltk_data/corpora/webtex...'>
-       ['firefox.txt', 'grail.txt', 'overheard.txt', ...
-       ['Cookie', 'Manager', ':', '"', 'Don', "'", 't', ...
-
-
-Tagged Corpus Reader
-====================
-The Tagged Corpus reader can give us words, sentences, and paragraphs,
-each tagged or untagged.  All of the read methods can take one item
-(in which case they return the contents of that file) or a list of
-documents (in which case they concatenate the contents of those files).
-By default, they apply to all documents in the corpus.
-
-    >>> root = make_testcorpus(
-    ...     a="""\
-    ...     This/det is/verb the/det first/adj sentence/noun ./punc
-    ...     Here/det  is/verb  another/adj    sentence/noun ./punc
-    ...     Note/verb that/comp you/pron can/verb use/verb \
-    ...           any/noun tag/noun set/noun
-    ...
-    ...     This/det is/verb the/det second/adj paragraph/noun ./punc
-    ...     word/n without/adj a/det tag/noun :/: hello ./punc
-    ...     """,
-    ...     b="""\
-    ...     This/det is/verb the/det second/adj file/noun ./punc
-    ...     """)
-
-    >>> from nltk.corpus.reader.tagged import TaggedCorpusReader
-    >>> corpus = TaggedCorpusReader(root, list('ab'))
-    >>> corpus.fileids()
-    ['a', 'b']
-    >>> str(corpus.root) == str(root)
-    True
-    >>> corpus.words()
-    ['This', 'is', 'the', 'first', 'sentence', '.', ...]
-    >>> corpus.sents() # doctest: +ELLIPSIS
-    [['This', 'is', 'the', 'first', ...], ['Here', 'is', 'another'...], ...]
-    >>> corpus.paras() # doctest: +ELLIPSIS
-    [[['This', ...], ['Here', ...], ...], [['This', ...], ...], ...]
-    >>> corpus.tagged_words() # doctest: +ELLIPSIS
-    [('This', 'DET'), ('is', 'VERB'), ('the', 'DET'), ...]
-    >>> corpus.tagged_sents() # doctest: +ELLIPSIS
-    [[('This', 'DET'), ('is', 'VERB'), ...], [('Here', 'DET'), ...], ...]
-    >>> corpus.tagged_paras() # doctest: +ELLIPSIS
-    [[[('This', 'DET'), ...], ...], [[('This', 'DET'), ...], ...], ...]
-    >>> corpus.raw()[:40]
-    'This/det is/verb the/det first/adj sente'
-    >>> len(corpus.words()), [len(corpus.words(d)) for d in corpus.fileids()]
-    (38, [32, 6])
-    >>> len(corpus.sents()), [len(corpus.sents(d)) for d in corpus.fileids()]
-    (6, [5, 1])
-    >>> len(corpus.paras()), [len(corpus.paras(d)) for d in corpus.fileids()]
-    (3, [2, 1])
-    >>> print(corpus.words('a'))
-    ['This', 'is', 'the', 'first', 'sentence', '.', ...]
-    >>> print(corpus.words('b'))
-    ['This', 'is', 'the', 'second', 'file', '.']
-    >>> del_testcorpus(root)
-
-The Brown Corpus uses the tagged corpus reader:
-
-    >>> from nltk.corpus import brown
-    >>> brown.fileids() # doctest: +ELLIPSIS
-    ['ca01', 'ca02', 'ca03', 'ca04', 'ca05', 'ca06', 'ca07', ...]
-    >>> brown.categories() # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
-    ['adventure', 'belles_lettres', 'editorial', 'fiction', 'government', 'hobbies', 'humor',
-    'learned', 'lore', 'mystery', 'news', 'religion', 'reviews', 'romance', 'science_fiction']
-    >>> print(repr(brown.root).replace('\\\\','/')) # doctest: +ELLIPSIS
-    FileSystemPathPointer('.../corpora/brown')
-    >>> brown.words()
-    ['The', 'Fulton', 'County', 'Grand', 'Jury', ...]
-    >>> brown.sents() # doctest: +ELLIPSIS
-    [['The', 'Fulton', 'County', 'Grand', ...], ...]
-    >>> brown.paras() # doctest: +ELLIPSIS
-    [[['The', 'Fulton', 'County', ...]], [['The', 'jury', ...]], ...]
-    >>> brown.tagged_words() # doctest: +ELLIPSIS
-    [('The', 'AT'), ('Fulton', 'NP-TL'), ...]
-    >>> brown.tagged_sents() # doctest: +ELLIPSIS
-    [[('The', 'AT'), ('Fulton', 'NP-TL'), ('County', 'NN-TL'), ...], ...]
-    >>> brown.tagged_paras() # doctest: +ELLIPSIS
-    [[[('The', 'AT'), ...]], [[('The', 'AT'), ...]], ...]
-
-Verbnet Corpus Reader
-=====================
-
-Make sure we're picking up the right number of elements:
-
-    >>> from nltk.corpus import verbnet
-    >>> len(verbnet.lemmas())
-    3621
-    >>> len(verbnet.wordnetids())
-    4953
-    >>> len(verbnet.classids())
-    429
-
-Selecting classids based on various selectors:
-
-    >>> verbnet.classids(lemma='take') # doctest: +NORMALIZE_WHITESPACE
-    ['bring-11.3', 'characterize-29.2', 'convert-26.6.2', 'cost-54.2',
-    'fit-54.3', 'performance-26.7-2', 'steal-10.5']
-    >>> verbnet.classids(wordnetid='lead%2:38:01')
-    ['accompany-51.7']
-    >>> verbnet.classids(fileid='approve-77.xml')
-    ['approve-77']
-    >>> verbnet.classids(classid='admire-31.2') # subclasses
-    ['admire-31.2-1']
-
-vnclass() accepts filenames, long ids, and short ids:
-
-    >>> a = ElementTree.tostring(verbnet.vnclass('admire-31.2.xml'))
-    >>> b = ElementTree.tostring(verbnet.vnclass('admire-31.2'))
-    >>> c = ElementTree.tostring(verbnet.vnclass('31.2'))
-    >>> a == b == c
-    True
-
-fileids() can be used to get files based on verbnet class ids:
-
-    >>> verbnet.fileids('admire-31.2')
-    ['admire-31.2.xml']
-    >>> verbnet.fileids(['admire-31.2', 'obtain-13.5.2'])
-    ['admire-31.2.xml', 'obtain-13.5.2.xml']
-    >>> verbnet.fileids('badidentifier')
-    Traceback (most recent call last):
-      . . .
-    ValueError: vnclass identifier 'badidentifier' not found
-
-longid() and shortid() can be used to convert identifiers:
-
-    >>> verbnet.longid('31.2')
-    'admire-31.2'
-    >>> verbnet.longid('admire-31.2')
-    'admire-31.2'
-    >>> verbnet.shortid('31.2')
-    '31.2'
-    >>> verbnet.shortid('admire-31.2')
-    '31.2'
-    >>> verbnet.longid('badidentifier')
-    Traceback (most recent call last):
-      . . .
-    ValueError: vnclass identifier 'badidentifier' not found
-    >>> verbnet.shortid('badidentifier')
-    Traceback (most recent call last):
-      . . .
-    ValueError: vnclass identifier 'badidentifier' not found
-
-Corpus View Regression Tests
-============================
-
-Select some corpus files to play with:
-
-    >>> import nltk.data
-    >>> # A very short file (160 chars):
-    >>> f1 = nltk.data.find('corpora/inaugural/README')
-    >>> # A relatively short file (791 chars):
-    >>> f2 = nltk.data.find('corpora/inaugural/1793-Washington.txt')
-    >>> # A longer file (32k chars):
-    >>> f3 = nltk.data.find('corpora/inaugural/1909-Taft.txt')
-    >>> fileids = [f1, f2, f3]
-
-
-Concatenation
--------------
-Check that concatenation works as intended.
-
-    >>> from nltk.corpus.reader.util import *
-
-    >>> c1 = StreamBackedCorpusView(f1, read_whitespace_block, encoding='utf-8')
-    >>> c2 = StreamBackedCorpusView(f2, read_whitespace_block, encoding='utf-8')
-    >>> c3 = StreamBackedCorpusView(f3, read_whitespace_block, encoding='utf-8')
-    >>> c123 = c1+c2+c3
-    >>> print(c123)
-    ['C-Span', 'Inaugural', 'Address', 'Corpus', 'US', ...]
-
-    >>> l1 = f1.open(encoding='utf-8').read().split()
-    >>> l2 = f2.open(encoding='utf-8').read().split()
-    >>> l3 = f3.open(encoding='utf-8').read().split()
-    >>> l123 = l1+l2+l3
-
-    >>> list(c123) == l123
-    True
-
-    >>> (c1+c2+c3)[100] == l123[100]
-    True
-
-Slicing
--------
-First, do some tests with fairly small slices.  These will all
-generate tuple values.
-
-    >>> from nltk.util import LazySubsequence
-    >>> c1 = StreamBackedCorpusView(f1, read_whitespace_block, encoding='utf-8')
-    >>> l1 = f1.open(encoding='utf-8').read().split()
-    >>> print(len(c1))
-    21
-    >>> len(c1) < LazySubsequence.MIN_SIZE
-    True
-
-Choose a list of indices, based on the length, that covers the
-important corner cases:
-
-    >>> indices = [-60, -30, -22, -21, -20, -1,
-    ...            0, 1, 10, 20, 21, 22, 30, 60]
-
-Test slicing with explicit start & stop value:
-
-    >>> for s in indices:
-    ...     for e in indices:
-    ...         assert list(c1[s:e]) == l1[s:e]
-
-Test slicing with stop=None:
-
-    >>> for s in indices:
-    ...     assert list(c1[s:]) == l1[s:]
-
-Test slicing with start=None:
-
-    >>> for e in indices:
-    ...     assert list(c1[:e]) == l1[:e]
-
-Test slicing with start=stop=None:
-
-    >>> list(c1[:]) == list(l1[:])
-    True
-
-Next, we'll do some tests with much longer slices.  These will
-generate LazySubsequence objects.
-
-    >>> c3 = StreamBackedCorpusView(f3, read_whitespace_block, encoding='utf-8')
-    >>> l3 = f3.open(encoding='utf-8').read().split()
-    >>> print(len(c3))
-    5430
-    >>> len(c3) > LazySubsequence.MIN_SIZE*2
-    True
-
-Choose a list of indices, based on the length, that covers the
-important corner cases:
-
-    >>> indices = [-12000, -6000, -5431, -5430, -5429, -3000, -200, -1,
-    ...            0, 1, 200, 3000, 5000, 5429, 5430, 5431, 6000, 12000]
-
-Test slicing with explicit start & stop value:
-
-    >>> for s in indices:
-    ...     for e in indices:
-    ...         assert list(c3[s:e]) == l3[s:e]
-
-Test slicing with stop=None:
-
-    >>> for s in indices:
-    ...     assert list(c3[s:]) == l3[s:]
-
-Test slicing with start=None:
-
-    >>> for e in indices:
-    ...     assert list(c3[:e]) == l3[:e]
-
-Test slicing with start=stop=None:
-
-    >>> list(c3[:]) == list(l3[:])
-    True
-
-Multiple Iterators
-------------------
-If multiple iterators are created for the same corpus view, their
-iteration can be interleaved:
-
-    >>> c3 = StreamBackedCorpusView(f3, read_whitespace_block)
-    >>> iterators = [c3.iterate_from(n) for n in [0,15,30,45]]
-    >>> for i in range(15):
-    ...     for iterator in iterators:
-    ...         print('%-15s' % next(iterator), end=' ')
-    ...     print()
-    My              a               duties          in
-    fellow          heavy           of              a
-    citizens:       weight          the             proper
-    Anyone          of              office          sense
-    who             responsibility. upon            of
-    has             If              which           the
-    taken           not,            he              obligation
-    the             he              is              which
-    oath            has             about           the
-    I               no              to              oath
-    have            conception      enter,          imposes.
-    just            of              or              The
-    taken           the             he              office
-    must            powers          is              of
-    feel            and             lacking         an
-
-SeekableUnicodeStreamReader
-===========================
-
-The file-like objects provided by the ``codecs`` module unfortunately
-suffer from a bug that prevents them from working correctly with
-corpus view objects.  In particular, although the expose ``seek()``
-and ``tell()`` methods, those methods do not exhibit the expected
-behavior, because they are not synchronized with the internal buffers
-that are kept by the file-like objects.  For example, the ``tell()``
-method will return the file position at the end of the buffers (whose
-contents have not yet been returned by the stream); and therefore this
-file position can not be used to return to the 'current' location in
-the stream (since ``seek()`` has no way to reconstruct the buffers).
-
-To get around these problems, we define a new class,
-`SeekableUnicodeStreamReader`, to act as a file-like interface to
-files containing encoded unicode data.  This class is loosely based on
-the ``codecs.StreamReader`` class.  To construct a new reader, we call
-the constructor with an underlying stream and an encoding name:
-
-    >>> from io import StringIO, BytesIO
-    >>> from nltk.data import SeekableUnicodeStreamReader
-    >>> stream = BytesIO(b"""\
-    ... This is a test file.
-    ... It is encoded in ascii.
-    ... """.decode('ascii').encode('ascii'))
-    >>> reader = SeekableUnicodeStreamReader(stream, 'ascii')
-
-`SeekableUnicodeStreamReader`\ s support all of the normal operations
-supplied by a read-only stream.  Note that all of the read operations
-return ``unicode`` objects (not ``str`` objects).
-
-    >>> reader.read()         # read the entire file.
-    u'This is a test file.\nIt is encoded in ascii.\n'
-    >>> reader.seek(0)        # rewind to the start.
-    >>> reader.read(5)        # read at most 5 bytes.
-    u'This '
-    >>> reader.readline()     # read to the end of the line.
-    u'is a test file.\n'
-    >>> reader.seek(0)        # rewind to the start.
-    >>> for line in reader:
-    ...     print(repr(line))      # iterate over lines
-    u'This is a test file.\n'
-    u'It is encoded in ascii.\n'
-    >>> reader.seek(0)        # rewind to the start.
-    >>> reader.readlines()    # read a list of line strings
-    [u'This is a test file.\n', u'It is encoded in ascii.\n']
-    >>> reader.close()
-
-Size argument to ``read()``
----------------------------
-The ``size`` argument to ``read()`` specifies the maximum number of
-*bytes* to read, not the maximum number of *characters*.  Thus, for
-encodings that use multiple bytes per character, it may return fewer
-characters than the ``size`` argument:
-
-    >>> stream = BytesIO(b"""\
-    ... This is a test file.
-    ... It is encoded in utf-16.
-    ... """.decode('ascii').encode('utf-16'))
-    >>> reader = SeekableUnicodeStreamReader(stream, 'utf-16')
-    >>> reader.read(10)
-    u'This '
-
-If a read block ends in the middle of the byte string encoding a
-single character, then that byte string is stored in an internal
-buffer, and re-used on the next call to ``read()``.  However, if the
-size argument is too small to read even a single character, even
-though at least one character is available, then the ``read()`` method
-will read additional bytes until it can return a single character.
-This ensures that the ``read()`` method does not return an empty
-string, which could be mistaken for indicating the end of the file.
-
-    >>> reader.seek(0)            # rewind to the start.
-    >>> reader.read(1)            # we actually need to read 4 bytes
-    u'T'
-    >>> int(reader.tell())
-    4
-
-The ``readline()`` method may read more than a single line of text, in
-which case it stores the text that it does not return in a buffer.  If
-this buffer is not empty, then its contents will be included in the
-value returned by the next call to ``read()``, regardless of the
-``size`` argument, since they are available without reading any new
-bytes from the stream:
-
-    >>> reader.seek(0)            # rewind to the start.
-    >>> reader.readline()         # stores extra text in a buffer
-    u'This is a test file.\n'
-    >>> print(reader.linebuffer)   # examine the buffer contents
-    [u'It is encoded i']
-    >>> reader.read(0)            # returns the contents of the buffer
-    u'It is encoded i'
-    >>> print(reader.linebuffer)   # examine the buffer contents
-    None
-
-Seek and Tell
--------------
-In addition to these basic read operations,
-`SeekableUnicodeStreamReader` also supports the ``seek()`` and
-``tell()`` operations.  However, some care must still be taken when
-using these operations.  In particular, the only file offsets that
-should be passed to ``seek()`` are ``0`` and any offset that has been
-returned by ``tell``.
-
-    >>> stream = BytesIO(b"""\
-    ... This is a test file.
-    ... It is encoded in utf-16.
-    ... """.decode('ascii').encode('utf-16'))
-    >>> reader = SeekableUnicodeStreamReader(stream, 'utf-16')
-    >>> reader.read(20)
-    u'This is a '
-    >>> pos = reader.tell(); print(pos)
-    22
-    >>> reader.read(20)
-    u'test file.'
-    >>> reader.seek(pos)     # rewind to the position from tell.
-    >>> reader.read(20)
-    u'test file.'
-
-The ``seek()`` and ``tell()`` methods work property even when
-``readline()`` is used.
-
-    >>> stream = BytesIO(b"""\
-    ... This is a test file.
-    ... It is encoded in utf-16.
-    ... """.decode('ascii').encode('utf-16'))
-    >>> reader = SeekableUnicodeStreamReader(stream, 'utf-16')
-    >>> reader.readline()
-    u'This is a test file.\n'
-    >>> pos = reader.tell(); print(pos)
-    44
-    >>> reader.readline()
-    u'It is encoded in utf-16.\n'
-    >>> reader.seek(pos)     # rewind to the position from tell.
-    >>> reader.readline()
-    u'It is encoded in utf-16.\n'
-
-
-Squashed Bugs
-=============
-
-svn 5276 fixed a bug in the comment-stripping behavior of
-parse_sexpr_block.
-
-    >>> from io import StringIO
-    >>> from nltk.corpus.reader.util import read_sexpr_block
-    >>> f = StringIO(b"""
-    ... (a b c)
-    ... # This line is a comment.
-    ... (d e f\ng h)""".decode('ascii'))
-    >>> print(read_sexpr_block(f, block_size=38, comment_char='#'))
-    ['(a b c)']
-    >>> print(read_sexpr_block(f, block_size=38, comment_char='#'))
-    ['(d e f\ng h)']
-
-svn 5277 fixed a bug in parse_sexpr_block, which would cause it to
-enter an infinite loop if a file ended mid-sexpr, or ended with a
-token that was not followed by whitespace.  A related bug caused
-an infinite loop if the corpus ended in an unmatched close paren --
-this was fixed in svn 5279
-
-    >>> f = StringIO(b"""
-    ... This file ends mid-sexpr
-    ... (hello (world""".decode('ascii'))
-    >>> for i in range(3): print(read_sexpr_block(f))
-    ['This', 'file', 'ends', 'mid-sexpr']
-    ['(hello (world']
-    []
-
-    >>> f = StringIO(b"This file has no trailing whitespace.".decode('ascii'))
-    >>> for i in range(3): print(read_sexpr_block(f))
-    ['This', 'file', 'has', 'no', 'trailing']
-    ['whitespace.']
-    []
-
-    >>> # Bug fixed in 5279:
-    >>> f = StringIO(b"a b c)".decode('ascii'))
-    >>> for i in range(3): print(read_sexpr_block(f))
-    ['a', 'b']
-    ['c)']
-    []
-
-
-svn 5624 & 5265 fixed a bug in ConcatenatedCorpusView, which caused it
-to return the wrong items when indexed starting at any index beyond
-the first file.
-
-    >>> import nltk
-    >>> sents = nltk.corpus.brown.sents()
-    >>> print(sents[6000])
-    ['Cholesterol', 'and', 'thyroid']
-    >>> print(sents[6000])
-    ['Cholesterol', 'and', 'thyroid']
-
-svn 5728 fixed a bug in Categorized*CorpusReader, which caused them
-to return words from *all* files when just one file was specified.
-
-    >>> from nltk.corpus import reuters
-    >>> reuters.words('training/13085')
-    ['SNYDER', '&', 'lt', ';', 'SOI', '>', 'MAKES', ...]
-    >>> reuters.words('training/5082')
-    ['SHEPPARD', 'RESOURCES', 'TO', 'MERGE', 'WITH', ...]
-
-svn 7227 fixed a bug in the qc corpus reader, which prevented
-access to its tuples() method
-
-    >>> from nltk.corpus import qc
-    >>> qc.tuples('test.txt')
-    [('NUM:dist', 'How far is it from Denver to Aspen ?'), ('LOC:city', 'What county is Modesto , California in ?'), ...]
-
-
-
diff --git a/nlp_resource_data/nltk/test/corpus_fixt.py b/nlp_resource_data/nltk/test/corpus_fixt.py

deleted file mode 100644 (file)

index ce0cd83..0000000
--- a/nlp_resource_data/nltk/test/corpus_fixt.py
+++ /dev/null
@@ -1,4 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import
-
-from nltk.corpus import teardown_module
diff --git a/nlp_resource_data/nltk/test/crubadan.doctest b/nlp_resource_data/nltk/test/crubadan.doctest

deleted file mode 100644 (file)

index 011af25..0000000
--- a/nlp_resource_data/nltk/test/crubadan.doctest
+++ /dev/null
@@ -1,65 +0,0 @@
-.. Copyright (C) 2001-2019 NLTK Project
-.. For license information, see LICENSE.TXT
-
-Crubadan Corpus Reader
-======================
-
-Crubadan is an NLTK corpus reader for ngram files provided
-by the Crubadan project. It supports several languages.
-
-    >>> from nltk.corpus import crubadan
-    >>> crubadan.langs() # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
-    ['abk', 'abn',..., 'zpa', 'zul']
-
-----------------------------------------
-Language code mapping and helper methods
-----------------------------------------
-
-The web crawler that generates the 3-gram frequencies works at the
-level of "writing systems" rather than languages. Writing systems
-are assigned internal 2-3 letter codes that require mapping to the
-standard ISO 639-3 codes. For more information, please refer to 
-the README in nltk_data/crubadan folder after installing it.
-
-To translate ISO 639-3 codes to "Crubadan Code":
-
-    >>> crubadan.iso_to_crubadan('eng')
-    'en'
-    >>> crubadan.iso_to_crubadan('fra')
-    'fr'
-    >>> crubadan.iso_to_crubadan('aaa')
-
-In reverse, print ISO 639-3 code if we have the Crubadan Code:
-
-    >>> crubadan.crubadan_to_iso('en')
-    'eng'
-    >>> crubadan.crubadan_to_iso('fr')
-    'fra'
-    >>> crubadan.crubadan_to_iso('aa')
-
----------------------------
-Accessing ngram frequencies
----------------------------
-
-On initialization the reader will create a dictionary of every
-language supported by the Crubadan project, mapping the ISO 639-3
-language code to its corresponding ngram frequency.
-
-You can access individual language FreqDist and the ngrams within them as follows:
-
-    >>> english_fd = crubadan.lang_freq('eng')
-    >>> english_fd['the']
-    728135
-
-Above accesses the FreqDist of English and returns the frequency of the ngram 'the'.
-A ngram that isn't found within the language will return 0:
-
-    >>> english_fd['sometest']
-    0
-
-A language that isn't supported will raise an exception:
-
-    >>> crubadan.lang_freq('elvish')
-    Traceback (most recent call last):
-    ...
-    RuntimeError: Unsupported language.
diff --git a/nlp_resource_data/nltk/test/data.doctest b/nlp_resource_data/nltk/test/data.doctest

deleted file mode 100644 (file)

index 184c512..0000000
--- a/nlp_resource_data/nltk/test/data.doctest
+++ /dev/null
@@ -1,379 +0,0 @@
-.. Copyright (C) 2001-2019 NLTK Project
-.. For license information, see LICENSE.TXT
-
-=========================================
- Loading Resources From the Data Package
-=========================================
-
-    >>> import nltk.data
-
-Overview
-~~~~~~~~
-The `nltk.data` module contains functions that can be used to load
-NLTK resource files, such as corpora, grammars, and saved processing
-objects.
-
-Loading Data Files
-~~~~~~~~~~~~~~~~~~
-Resources are loaded using the function `nltk.data.load()`, which
-takes as its first argument a URL specifying what file should be
-loaded.  The ``nltk:`` protocol loads files from the NLTK data
-distribution:
-
-    >>> from __future__ import print_function
-    >>> tokenizer = nltk.data.load('nltk:tokenizers/punkt/english.pickle')
-    >>> tokenizer.tokenize('Hello.  This is a test.  It works!')
-    ['Hello.', 'This is a test.', 'It works!']
-
-It is important to note that there should be no space following the
-colon (':') in the URL; 'nltk: tokenizers/punkt/english.pickle' will
-not work!
-
-The ``nltk:`` protocol is used by default if no protocol is specified:
-
-    >>> nltk.data.load('tokenizers/punkt/english.pickle') # doctest: +ELLIPSIS
-    <nltk.tokenize.punkt.PunktSentenceTokenizer object at ...>
-
-But it is also possible to load resources from ``http:``, ``ftp:``,
-and ``file:`` URLs, e.g. ``cfg = nltk.data.load('http://example.com/path/to/toy.cfg')``
-
-    >>> # Load a grammar using an absolute path.
-    >>> url = 'file:%s' % nltk.data.find('grammars/sample_grammars/toy.cfg')
-    >>> url.replace('\\', '/') # doctest: +ELLIPSIS
-    'file:...toy.cfg'
-    >>> print(nltk.data.load(url)) # doctest: +ELLIPSIS
-    Grammar with 14 productions (start state = S)
-        S -> NP VP
-        PP -> P NP
-        ...
-        P -> 'on'
-        P -> 'in'
-
-The second argument to the `nltk.data.load()` function specifies the
-file format, which determines how the file's contents are processed
-before they are returned by ``load()``.  The formats that are
-currently supported by the data module are described by the dictionary
-`nltk.data.FORMATS`:
-
-    >>> for format, descr in sorted(nltk.data.FORMATS.items()):
-    ...     print('{0:<7} {1:}'.format(format, descr)) # doctest: +NORMALIZE_WHITESPACE
-    cfg     A context free grammar.
-    fcfg    A feature CFG.
-    fol     A list of first order logic expressions, parsed with
-    nltk.sem.logic.Expression.fromstring.
-    json    A serialized python object, stored using the json module.
-    logic   A list of first order logic expressions, parsed with
-    nltk.sem.logic.LogicParser.  Requires an additional logic_parser
-    parameter
-    pcfg    A probabilistic CFG.
-    pickle  A serialized python object, stored using the pickle
-    module.
-    raw     The raw (byte string) contents of a file.
-    text    The raw (unicode string) contents of a file. 
-    val     A semantic valuation, parsed by
-    nltk.sem.Valuation.fromstring.
-    yaml    A serialized python object, stored using the yaml module.
-
-`nltk.data.load()` will raise a ValueError if a bad format name is
-specified:
-
-    >>> nltk.data.load('grammars/sample_grammars/toy.cfg', 'bar')
-    Traceback (most recent call last):
-      . . .
-    ValueError: Unknown format type!
-
-By default, the ``"auto"`` format is used, which chooses a format
-based on the filename's extension.  The mapping from file extensions
-to format names is specified by `nltk.data.AUTO_FORMATS`:
-
-    >>> for ext, format in sorted(nltk.data.AUTO_FORMATS.items()):
-    ...     print('.%-7s -> %s' % (ext, format))
-    .cfg     -> cfg
-    .fcfg    -> fcfg
-    .fol     -> fol
-    .json    -> json
-    .logic   -> logic
-    .pcfg    -> pcfg
-    .pickle  -> pickle
-    .text    -> text
-    .txt     -> text
-    .val     -> val
-    .yaml    -> yaml
-
-If `nltk.data.load()` is unable to determine the format based on the
-filename's extension, it will raise a ValueError:
-
-    >>> nltk.data.load('foo.bar')
-    Traceback (most recent call last):
-      . . .
-    ValueError: Could not determine format for foo.bar based on its file
-    extension; use the "format" argument to specify the format explicitly.
-
-Note that by explicitly specifying the ``format`` argument, you can
-override the load method's default processing behavior.  For example,
-to get the raw contents of any file, simply use ``format="raw"``:
-
-    >>> s = nltk.data.load('grammars/sample_grammars/toy.cfg', 'text') 
-    >>> print(s) # doctest: +ELLIPSIS
-    S -> NP VP
-    PP -> P NP
-    NP -> Det N | NP PP
-    VP -> V NP | VP PP
-    ...
-
-Making Local Copies
-~~~~~~~~~~~~~~~~~~~
-..  This will not be visible in the html output: create a tempdir to
-    play in.
-    >>> import tempfile, os
-    >>> tempdir = tempfile.mkdtemp()
-    >>> old_dir = os.path.abspath('.')
-    >>> os.chdir(tempdir)
-
-The function `nltk.data.retrieve()` copies a given resource to a local
-file.  This can be useful, for example, if you want to edit one of the
-sample grammars.
-
-    >>> nltk.data.retrieve('grammars/sample_grammars/toy.cfg')
-    Retrieving 'nltk:grammars/sample_grammars/toy.cfg', saving to 'toy.cfg'
-
-    >>> # Simulate editing the grammar.
-    >>> with open('toy.cfg') as inp:
-    ...     s = inp.read().replace('NP', 'DP')
-    >>> with open('toy.cfg', 'w') as out:
-    ...     _bytes_written = out.write(s)
-
-    >>> # Load the edited grammar, & display it.
-    >>> cfg = nltk.data.load('file:///' + os.path.abspath('toy.cfg'))
-    >>> print(cfg) # doctest: +ELLIPSIS
-    Grammar with 14 productions (start state = S)
-        S -> DP VP
-        PP -> P DP
-        ...
-        P -> 'on'
-        P -> 'in'
-
-The second argument to `nltk.data.retrieve()` specifies the filename
-for the new copy of the file.  By default, the source file's filename
-is used.
-
-    >>> nltk.data.retrieve('grammars/sample_grammars/toy.cfg', 'mytoy.cfg')
-    Retrieving 'nltk:grammars/sample_grammars/toy.cfg', saving to 'mytoy.cfg'
-    >>> os.path.isfile('./mytoy.cfg')
-    True
-    >>> nltk.data.retrieve('grammars/sample_grammars/np.fcfg')
-    Retrieving 'nltk:grammars/sample_grammars/np.fcfg', saving to 'np.fcfg'
-    >>> os.path.isfile('./np.fcfg')
-    True
-
-If a file with the specified (or default) filename already exists in
-the current directory, then `nltk.data.retrieve()` will raise a
-ValueError exception.  It will *not* overwrite the file:
-
-    >>> os.path.isfile('./toy.cfg')
-    True
-    >>> nltk.data.retrieve('grammars/sample_grammars/toy.cfg') # doctest: +ELLIPSIS
-    Traceback (most recent call last):
-      . . .
-    ValueError: File '...toy.cfg' already exists!
-
-..  This will not be visible in the html output: clean up the tempdir.
-    >>> os.chdir(old_dir)
-    >>> for f in os.listdir(tempdir):
-    ...     os.remove(os.path.join(tempdir, f))
-    >>> os.rmdir(tempdir)
-
-Finding Files in the NLTK Data Package
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-The `nltk.data.find()` function searches the NLTK data package for a
-given file, and returns a pointer to that file.  This pointer can
-either be a `FileSystemPathPointer` (whose `path` attribute gives the
-absolute path of the file); or a `ZipFilePathPointer`, specifying a
-zipfile and the name of an entry within that zipfile.  Both pointer
-types define the `open()` method, which can be used to read the string
-contents of the file.
-
-    >>> path = nltk.data.find('corpora/abc/rural.txt')
-    >>> str(path) # doctest: +ELLIPSIS
-    '...rural.txt'
-    >>> print(path.open().read(60).decode())
-    PM denies knowledge of AWB kickbacks
-    The Prime Minister has 
-
-Alternatively, the `nltk.data.load()` function can be used with the
-keyword argument ``format="raw"``:
-
-    >>> s = nltk.data.load('corpora/abc/rural.txt', format='raw')[:60]
-    >>> print(s.decode())
-    PM denies knowledge of AWB kickbacks
-    The Prime Minister has 
-
-Alternatively, you can use the keyword argument ``format="text"``:
-
-    >>> s = nltk.data.load('corpora/abc/rural.txt', format='text')[:60]
-    >>> print(s)
-    PM denies knowledge of AWB kickbacks
-    The Prime Minister has 
-
-Resource Caching
-~~~~~~~~~~~~~~~~
-
-NLTK uses a weakref dictionary to maintain a cache of resources that
-have been loaded.  If you load a resource that is already stored in
-the cache, then the cached copy will be returned.  This behavior can
-be seen by the trace output generated when verbose=True:
-
-    >>> feat0 = nltk.data.load('grammars/book_grammars/feat0.fcfg', verbose=True)
-    <<Loading nltk:grammars/book_grammars/feat0.fcfg>>
-    >>> feat0 = nltk.data.load('grammars/book_grammars/feat0.fcfg', verbose=True)
-    <<Using cached copy of nltk:grammars/book_grammars/feat0.fcfg>>
-
-If you wish to load a resource from its source, bypassing the cache,
-use the ``cache=False`` argument to `nltk.data.load()`.  This can be
-useful, for example, if the resource is loaded from a local file, and
-you are actively editing that file:
-
-    >>> feat0 = nltk.data.load('grammars/book_grammars/feat0.fcfg',cache=False,verbose=True)
-    <<Loading nltk:grammars/book_grammars/feat0.fcfg>>
-
-The cache *no longer* uses weak references.  A resource will not be
-automatically expunged from the cache when no more objects are using
-it.  In the following example, when we clear the variable ``feat0``,
-the reference count for the feature grammar object drops to zero.
-However, the object remains cached:
-
-    >>> del feat0
-    >>> feat0 = nltk.data.load('grammars/book_grammars/feat0.fcfg',
-    ...                        verbose=True)
-    <<Using cached copy of nltk:grammars/book_grammars/feat0.fcfg>>
-
-You can clear the entire contents of the cache, using
-`nltk.data.clear_cache()`:
-
-    >>> nltk.data.clear_cache()
-
-Retrieving other Data Sources
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    >>> formulas = nltk.data.load('grammars/book_grammars/background.fol')
-    >>> for f in formulas: print(str(f))
-    all x.(boxerdog(x) -> dog(x))
-    all x.(boxer(x) -> person(x))
-    all x.-(dog(x) & person(x))
-    all x.(married(x) <-> exists y.marry(x,y))
-    all x.(bark(x) -> dog(x))
-    all x y.(marry(x,y) -> (person(x) & person(y)))
-    -(Vincent = Mia)
-    -(Vincent = Fido)
-    -(Mia = Fido)
-
-Regression Tests
-~~~~~~~~~~~~~~~~
-Create a temp dir for tests that write files:
-
-    >>> import tempfile, os
-    >>> tempdir = tempfile.mkdtemp()
-    >>> old_dir = os.path.abspath('.')
-    >>> os.chdir(tempdir)
-
-The `retrieve()` function accepts all url types:
-
-    >>> urls = ['https://raw.githubusercontent.com/nltk/nltk/develop/nltk/test/toy.cfg',
-    ...         'file:%s' % nltk.data.find('grammars/sample_grammars/toy.cfg'),
-    ...         'nltk:grammars/sample_grammars/toy.cfg',
-    ...         'grammars/sample_grammars/toy.cfg']
-    >>> for i, url in enumerate(urls):
-    ...     nltk.data.retrieve(url, 'toy-%d.cfg' % i) # doctest: +ELLIPSIS
-    Retrieving 'https://raw.githubusercontent.com/nltk/nltk/develop/nltk/test/toy.cfg', saving to 'toy-0.cfg'
-    Retrieving 'file:...toy.cfg', saving to 'toy-1.cfg'
-    Retrieving 'nltk:grammars/sample_grammars/toy.cfg', saving to 'toy-2.cfg'
-    Retrieving 'nltk:grammars/sample_grammars/toy.cfg', saving to 'toy-3.cfg'
-
-Clean up the temp dir:
-
-    >>> os.chdir(old_dir)
-    >>> for f in os.listdir(tempdir):
-    ...     os.remove(os.path.join(tempdir, f))
-    >>> os.rmdir(tempdir)
-
-Lazy Loader
------------
-A lazy loader is a wrapper object that defers loading a resource until
-it is accessed or used in any way.  This is mainly intended for
-internal use by NLTK's corpus readers.
-
-    >>> # Create a lazy loader for toy.cfg.
-    >>> ll = nltk.data.LazyLoader('grammars/sample_grammars/toy.cfg')
-
-    >>> # Show that it's not loaded yet:
-    >>> object.__repr__(ll) # doctest: +ELLIPSIS
-    '<nltk.data.LazyLoader object at ...>'
-
-    >>> # printing it is enough to cause it to be loaded:
-    >>> print(ll)
-    <Grammar with 14 productions>
-
-    >>> # Show that it's now been loaded:
-    >>> object.__repr__(ll) # doctest: +ELLIPSIS
-    '<nltk.grammar.CFG object at ...>'
-
-
-    >>> # Test that accessing an attribute also loads it:
-    >>> ll = nltk.data.LazyLoader('grammars/sample_grammars/toy.cfg')
-    >>> ll.start()
-    S
-    >>> object.__repr__(ll) # doctest: +ELLIPSIS
-    '<nltk.grammar.CFG object at ...>'
-
-Buffered Gzip Reading and Writing
----------------------------------
-Write performance to gzip-compressed is extremely poor when the files become large.
-File creation can become a bottleneck in those cases.
-
-Read performance from large gzipped pickle files was improved in data.py by
-buffering the reads. A similar fix can be applied to writes by buffering
-the writes to a StringIO object first.
-
-This is mainly intended for internal use. The test simply tests that reading
-and writing work as intended and does not test how much improvement buffering
-provides.
-
-    >>> from nltk.compat import StringIO
-    >>> test = nltk.data.BufferedGzipFile('testbuf.gz', 'wb', size=2**10)
-    >>> ans = []
-    >>> for i in range(10000):
-    ...     ans.append(str(i).encode('ascii'))
-    ...     test.write(str(i).encode('ascii'))
-    >>> test.close()
-    >>> test = nltk.data.BufferedGzipFile('testbuf.gz', 'rb')
-    >>> test.read() == b''.join(ans)
-    True
-    >>> test.close()
-    >>> import os
-    >>> os.unlink('testbuf.gz')
-
-JSON Encoding and Decoding
---------------------------
-JSON serialization is used instead of pickle for some classes.
-
-    >>> from nltk import jsontags
-    >>> from nltk.jsontags import JSONTaggedEncoder, JSONTaggedDecoder, register_tag
-    >>> @jsontags.register_tag
-    ... class JSONSerializable:
-    ...     json_tag = 'JSONSerializable'
-    ...
-    ...     def __init__(self, n):
-    ...         self.n = n
-    ...
-    ...     def encode_json_obj(self):
-    ...         return self.n
-    ...
-    ...     @classmethod
-    ...     def decode_json_obj(cls, obj):
-    ...         n = obj
-    ...         return cls(n)
-    ...
-    >>> JSONTaggedEncoder().encode(JSONSerializable(1))
-    '{"!JSONSerializable": 1}'
-    >>> JSONTaggedDecoder().decode('{"!JSONSerializable": 1}').n
-    1
-
diff --git a/nlp_resource_data/nltk/test/dependency.doctest b/nlp_resource_data/nltk/test/dependency.doctest

deleted file mode 100755 (executable)

index 31590c4..0000000
--- a/nlp_resource_data/nltk/test/dependency.doctest
+++ /dev/null
@@ -1,241 +0,0 @@
-.. Copyright (C) 2001-2019 NLTK Project
-.. For license information, see LICENSE.TXT
-
-===================
-Dependency Grammars
-===================
-
-    >>> from nltk.grammar import DependencyGrammar
-    >>> from nltk.parse import (
-    ...     DependencyGraph,
-    ...     ProjectiveDependencyParser,
-    ...     NonprojectiveDependencyParser,
-    ... )
-
-CoNLL Data
-----------
-
-    >>> treebank_data = """Pierre  NNP     2       NMOD
-    ... Vinken  NNP     8       SUB
-    ... ,       ,       2       P
-    ... 61      CD      5       NMOD
-    ... years   NNS     6       AMOD
-    ... old     JJ      2       NMOD
-    ... ,       ,       2       P
-    ... will    MD      0       ROOT
-    ... join    VB      8       VC
-    ... the     DT      11      NMOD
-    ... board   NN      9       OBJ
-    ... as      IN      9       VMOD
-    ... a       DT      15      NMOD
-    ... nonexecutive    JJ      15      NMOD
-    ... director        NN      12      PMOD
-    ... Nov.    NNP     9       VMOD
-    ... 29      CD      16      NMOD
-    ... .       .       9       VMOD
-    ... """
-
-    >>> dg = DependencyGraph(treebank_data)
-    >>> dg.tree().pprint()
-    (will
-      (Vinken Pierre , (old (years 61)) ,)
-      (join (board the) (as (director a nonexecutive)) (Nov. 29) .))
-    >>> for head, rel, dep in dg.triples():
-    ...     print(
-    ...         '({h[0]}, {h[1]}), {r}, ({d[0]}, {d[1]})'
-    ...         .format(h=head, r=rel, d=dep)
-    ...     )
-    (will, MD), SUB, (Vinken, NNP)
-    (Vinken, NNP), NMOD, (Pierre, NNP)
-    (Vinken, NNP), P, (,, ,)
-    (Vinken, NNP), NMOD, (old, JJ)
-    (old, JJ), AMOD, (years, NNS)
-    (years, NNS), NMOD, (61, CD)
-    (Vinken, NNP), P, (,, ,)
-    (will, MD), VC, (join, VB)
-    (join, VB), OBJ, (board, NN)
-    (board, NN), NMOD, (the, DT)
-    (join, VB), VMOD, (as, IN)
-    (as, IN), PMOD, (director, NN)
-    (director, NN), NMOD, (a, DT)
-    (director, NN), NMOD, (nonexecutive, JJ)
-    (join, VB), VMOD, (Nov., NNP)
-    (Nov., NNP), NMOD, (29, CD)
-    (join, VB), VMOD, (., .)
-
-Using a custom cell extractor.
-
-    >>> def custom_extractor(cells):
-    ...     _, tag, head, rel = cells
-    ...     return 'spam', 'spam', tag, tag, '', head, rel
-    >>> dg = DependencyGraph(treebank_data, cell_extractor=custom_extractor)
-    >>> dg.tree().pprint()
-    (spam
-      (spam spam spam (spam (spam spam)) spam)
-      (spam (spam spam) (spam (spam spam spam)) (spam spam) spam))
-
-Custom cell extractors can take in and return an index.
-
-    >>> def custom_extractor(cells, index):
-    ...     word, tag, head, rel = cells
-    ...     return (index, '{}-{}'.format(word, index), word,
-    ...             tag, tag, '', head, rel)
-    >>> dg = DependencyGraph(treebank_data, cell_extractor=custom_extractor)
-    >>> dg.tree().pprint()
-    (will-8
-      (Vinken-2 Pierre-1 ,-3 (old-6 (years-5 61-4)) ,-7)
-      (join-9
-        (board-11 the-10)
-        (as-12 (director-15 a-13 nonexecutive-14))
-        (Nov.-16 29-17)
-        .-18))
-
-Using the dependency-parsed version of the Penn Treebank corpus sample.
-
-    >>> from nltk.corpus import dependency_treebank
-    >>> t = dependency_treebank.parsed_sents()[0]
-    >>> print(t.to_conll(3))  # doctest: +NORMALIZE_WHITESPACE
-    Pierre      NNP     2
-    Vinken      NNP     8
-    ,   ,       2
-    61  CD      5
-    years       NNS     6
-    old JJ      2
-    ,   ,       2
-    will        MD      0
-    join        VB      8
-    the DT      11
-    board       NN      9
-    as  IN      9
-    a   DT      15
-    nonexecutive        JJ      15
-    director    NN      12
-    Nov.        NNP     9
-    29  CD      16
-    .   .       8
-
-Using the output of zpar (like Malt-TAB but with zero-based indexing)
-
-    >>> zpar_data = """
-    ... Pierre NNP     1       NMOD
-    ... Vinken NNP     7       SUB
-    ... ,      ,       1       P
-    ... 61     CD      4       NMOD
-    ... years  NNS     5       AMOD
-    ... old    JJ      1       NMOD
-    ... ,      ,       1       P
-    ... will   MD      -1      ROOT
-    ... join   VB      7       VC
-    ... the    DT      10      NMOD
-    ... board  NN      8       OBJ
-    ... as     IN      8       VMOD
-    ... a      DT      14      NMOD
-    ... nonexecutive   JJ      14      NMOD
-    ... director       NN      11      PMOD
-    ... Nov.   NNP     8       VMOD
-    ... 29     CD      15      NMOD
-    ... .      .       7       P
-    ... """
-
-    >>> zdg = DependencyGraph(zpar_data, zero_based=True)
-    >>> print(zdg.tree())
-    (will
-      (Vinken Pierre , (old (years 61)) ,)
-      (join (board the) (as (director a nonexecutive)) (Nov. 29))
-      .)
-
-
-Projective Dependency Parsing
------------------------------
-
-    >>> grammar = DependencyGrammar.fromstring("""
-    ... 'fell' -> 'price' | 'stock'
-    ... 'price' -> 'of' 'the'
-    ... 'of' -> 'stock'
-    ... 'stock' -> 'the'
-    ... """)
-    >>> print(grammar)
-    Dependency grammar with 5 productions
-      'fell' -> 'price'
-      'fell' -> 'stock'
-      'price' -> 'of' 'the'
-      'of' -> 'stock'
-      'stock' -> 'the'
-
-    >>> dp = ProjectiveDependencyParser(grammar)
-    >>> for t in sorted(dp.parse(['the', 'price', 'of', 'the', 'stock', 'fell'])):
-    ...     print(t)
-    (fell (price the (of (stock the))))
-    (fell (price the of) (stock the))
-    (fell (price the of the) stock)
-
-Non-Projective Dependency Parsing
----------------------------------
-
-    >>> grammar = DependencyGrammar.fromstring("""
-    ... 'taught' -> 'play' | 'man'
-    ... 'man' -> 'the'
-    ... 'play' -> 'golf' | 'dog' | 'to'
-    ... 'dog' -> 'his'
-    ... """)
-    >>> print(grammar)
-    Dependency grammar with 7 productions
-      'taught' -> 'play'
-      'taught' -> 'man'
-      'man' -> 'the'
-      'play' -> 'golf'
-      'play' -> 'dog'
-      'play' -> 'to'
-      'dog' -> 'his'
-
-    >>> dp = NonprojectiveDependencyParser(grammar)
-    >>> g, = dp.parse(['the', 'man', 'taught', 'his', 'dog', 'to', 'play', 'golf'])
-
-    >>> print(g.root['word'])
-    taught
-
-    >>> for _, node in sorted(g.nodes.items()):
-    ...     if node['word'] is not None:
-    ...         print('{address} {word}: {d}'.format(d=node['deps'][''], **node))
-    1 the: []
-    2 man: [1]
-    3 taught: [2, 7]
-    4 his: []
-    5 dog: [4]
-    6 to: []
-    7 play: [5, 6, 8]
-    8 golf: []
-
-    >>> print(g.tree())
-    (taught (man the) (play (dog his) to golf))
-
-Integration with MALT parser
-============================
-
-In case the top relation is different from the default, we can set it. In case
-of MALT parser, it's set to `'null'`.
-
->>> dg_str = """1       I       _       NN      NN      _       2       nn      _       _
-... 2   shot    _       NN      NN      _       0       null    _       _
-... 3   an      _       AT      AT      _       2       dep     _       _
-... 4   elephant        _       NN      NN      _       7       nn      _       _
-... 5   in      _       NN      NN      _       7       nn      _       _
-... 6   my      _       NN      NN      _       7       nn      _       _
-... 7   pajamas _       NNS     NNS     _       3       dobj    _       _
-... """
->>> dg = DependencyGraph(dg_str, top_relation_label='null')
-
->>> len(dg.nodes)
-8
-
->>> dg.root['word'], dg.root['address']
-('shot', 2)
-
->>> print(dg.to_conll(10))  # doctest: +NORMALIZE_WHITESPACE
-1   I       _       NN      NN      _       2       nn      _       _
-2   shot    _       NN      NN      _       0       null    _       _
-3   an      _       AT      AT      _       2       dep     _       _
-4   elephant        _       NN      NN      _       7       nn      _       _
-5   in      _       NN      NN      _       7       nn      _       _
-6   my      _       NN      NN      _       7       nn      _       _
-7   pajamas _       NNS     NNS     _       3       dobj    _       _
diff --git a/nlp_resource_data/nltk/test/discourse.doctest b/nlp_resource_data/nltk/test/discourse.doctest

deleted file mode 100644 (file)

index df18fde..0000000
--- a/nlp_resource_data/nltk/test/discourse.doctest
+++ /dev/null
@@ -1,546 +0,0 @@
-.. Copyright (C) 2001-2019 NLTK Project
-.. For license information, see LICENSE.TXT
-
-==================
-Discourse Checking
-==================
-
-    >>> from nltk import *
-    >>> from nltk.sem import logic
-    >>> logic._counter._value = 0
-
-Introduction
-============
-
-The NLTK discourse module makes it possible to test consistency and
-redundancy of simple discourses, using theorem-proving and
-model-building from `nltk.inference`.
-
-The ``DiscourseTester`` constructor takes a list of sentences as a
-parameter.
-
-    >>> dt = DiscourseTester(['a boxer walks', 'every boxer chases a girl'])
-
-The ``DiscourseTester`` parses each sentence into a list of logical
-forms.  Once we have created ``DiscourseTester`` object, we can
-inspect various properties of the discourse. First off, we might want
-to double-check what sentences are currently stored as the discourse.
-
-    >>> dt.sentences()
-    s0: a boxer walks
-    s1: every boxer chases a girl
-
-As you will see, each sentence receives an identifier `s`\ :subscript:`i`.
-We might also want to check what grammar the ``DiscourseTester`` is
-using (by default, ``book_grammars/discourse.fcfg``):
-
-    >>> dt.grammar() # doctest: +ELLIPSIS
-    % start S
-    # Grammar Rules
-    S[SEM = <app(?subj,?vp)>] -> NP[NUM=?n,SEM=?subj] VP[NUM=?n,SEM=?vp]
-    NP[NUM=?n,SEM=<app(?det,?nom)> ] -> Det[NUM=?n,SEM=?det]  Nom[NUM=?n,SEM=?nom]
-    NP[LOC=?l,NUM=?n,SEM=?np] -> PropN[LOC=?l,NUM=?n,SEM=?np]
-    ...
-
-A different grammar can be invoked by using the optional ``gramfile``
-parameter when a ``DiscourseTester`` object is created.
-
-Readings and Threads
-====================
-
-Depending on
-the grammar used, we may find some sentences have more than one
-logical form. To check this, use the ``readings()`` method. Given a
-sentence identifier of the form `s`\ :subscript:`i`, each reading of
-that sentence is given an identifier `s`\ :sub:`i`-`r`\ :sub:`j`.
-
-
-    >>> dt.readings()
-    <BLANKLINE>
-    s0 readings:
-    <BLANKLINE>
-    s0-r0: exists z1.(boxer(z1) & walk(z1))
-    s0-r1: exists z1.(boxerdog(z1) & walk(z1))
-    <BLANKLINE>
-    s1 readings:
-    <BLANKLINE>
-    s1-r0: all z2.(boxer(z2) -> exists z3.(girl(z3) & chase(z2,z3)))
-    s1-r1: all z1.(boxerdog(z1) -> exists z2.(girl(z2) & chase(z1,z2)))
-
-
-In this case, the only source of ambiguity lies in the word *boxer*,
-which receives two translations: ``boxer`` and ``boxerdog``. The
-intention is that one of these corresponds to the ``person`` sense and
-one to the ``dog`` sense. In principle, we would also expect to see a
-quantifier scope ambiguity in ``s1``. However, the simple grammar we
-are using, namely `sem4.fcfg <sem4.fcfg>`_, doesn't support quantifier
-scope ambiguity.
-
-We can also investigate the readings of a specific sentence:
-
-    >>> dt.readings('a boxer walks')
-    The sentence 'a boxer walks' has these readings:
-        exists x.(boxer(x) & walk(x))
-        exists x.(boxerdog(x) & walk(x))
-
-Given that each sentence is two-ways ambiguous, we potentially have
-four different discourse 'threads', taking all combinations of
-readings. To see these, specify the ``threaded=True`` parameter on
-the ``readings()`` method. Again, each thread is assigned an
-identifier of the form `d`\ :sub:`i`. Following the identifier is a
-list of the readings that constitute that thread.
-
-    >>> dt.readings(threaded=True) # doctest: +NORMALIZE_WHITESPACE
-    d0: ['s0-r0', 's1-r0']
-    d1: ['s0-r0', 's1-r1']
-    d2: ['s0-r1', 's1-r0']
-    d3: ['s0-r1', 's1-r1']
-
-Of course, this simple-minded approach doesn't scale: a discourse with, say, three
-sentences, each of which has 3 readings, will generate 27 different
-threads. It is an interesting exercise to consider how to manage
-discourse ambiguity more efficiently.
-
-Checking Consistency
-====================
-
-Now, we can check whether some or all of the discourse threads are
-consistent, using the ``models()`` method. With no parameter, this
-method will try to find a model for every discourse thread in the
-current discourse. However, we can also specify just one thread, say ``d1``.
-
-    >>> dt.models('d1')
-    --------------------------------------------------------------------------------
-    Model for Discourse Thread d1
-    --------------------------------------------------------------------------------
-    % number = 1
-    % seconds = 0
-    <BLANKLINE>
-    % Interpretation of size 2
-    <BLANKLINE>
-    c1 = 0.
-    <BLANKLINE>
-    f1(0) = 0.
-    f1(1) = 0.
-    <BLANKLINE>
-      boxer(0).
-    - boxer(1).
-    <BLANKLINE>
-    - boxerdog(0).
-    - boxerdog(1).
-    <BLANKLINE>
-    - girl(0).
-    - girl(1).
-    <BLANKLINE>
-      walk(0).
-    - walk(1).
-    <BLANKLINE>
-    - chase(0,0).
-    - chase(0,1).
-    - chase(1,0).
-    - chase(1,1).
-    <BLANKLINE>
-    Consistent discourse: d1 ['s0-r0', 's1-r1']:
-        s0-r0: exists z1.(boxer(z1) & walk(z1))
-        s1-r1: all z1.(boxerdog(z1) -> exists z2.(girl(z2) & chase(z1,z2)))
-    <BLANKLINE>
-
-There are various formats for rendering **Mace4** models --- here,
-we have used the 'cooked' format (which is intended to be
-human-readable). There are a number of points to note.
-
-#. The entities in the domain are all treated as non-negative
-   integers. In this case, there are only two entities, ``0`` and
-   ``1``.
-
-#. The ``-`` symbol indicates negation. So ``0`` is the only
-   ``boxerdog`` and the only thing that ``walk``\ s. Nothing is a
-   ``boxer``, or a ``girl`` or in the ``chase`` relation. Thus the
-   universal sentence is vacuously true.
-
-#. ``c1`` is an introduced constant that denotes ``0``.
-
-#. ``f1`` is a Skolem function, but it plays no significant role in
-   this model.
-
-
-We might want to now add another sentence to the discourse, and there
-is method ``add_sentence()`` for doing just this.
-
-    >>> dt.add_sentence('John is a boxer')
-    >>> dt.sentences()
-    s0: a boxer walks
-    s1: every boxer chases a girl
-    s2: John is a boxer
-
-We can now test all the properties as before; here, we just show a
-couple of them.
-
-    >>> dt.readings()
-    <BLANKLINE>
-    s0 readings:
-    <BLANKLINE>
-    s0-r0: exists z1.(boxer(z1) & walk(z1))
-    s0-r1: exists z1.(boxerdog(z1) & walk(z1))
-    <BLANKLINE>
-    s1 readings:
-    <BLANKLINE>
-    s1-r0: all z1.(boxer(z1) -> exists z2.(girl(z2) & chase(z1,z2)))
-    s1-r1: all z1.(boxerdog(z1) -> exists z2.(girl(z2) & chase(z1,z2)))
-    <BLANKLINE>
-    s2 readings:
-    <BLANKLINE>
-    s2-r0: boxer(John)
-    s2-r1: boxerdog(John)
-    >>> dt.readings(threaded=True) # doctest: +NORMALIZE_WHITESPACE
-    d0: ['s0-r0', 's1-r0', 's2-r0']
-    d1: ['s0-r0', 's1-r0', 's2-r1']
-    d2: ['s0-r0', 's1-r1', 's2-r0']
-    d3: ['s0-r0', 's1-r1', 's2-r1']
-    d4: ['s0-r1', 's1-r0', 's2-r0']
-    d5: ['s0-r1', 's1-r0', 's2-r1']
-    d6: ['s0-r1', 's1-r1', 's2-r0']
-    d7: ['s0-r1', 's1-r1', 's2-r1']
-
-If you are interested in a particular thread, the ``expand_threads()``
-method will remind you of what readings it consists of:
-
-    >>> thread = dt.expand_threads('d1')
-    >>> for rid, reading in thread:
-    ...     print(rid, str(reading.normalize()))
-    s0-r0 exists z1.(boxer(z1) & walk(z1))
-    s1-r0 all z1.(boxer(z1) -> exists z2.(girl(z2) & chase(z1,z2)))
-    s2-r1 boxerdog(John)
-
-Suppose we have already defined a discourse, as follows:
-
-    >>> dt = DiscourseTester(['A student dances', 'Every student is a person'])
-
-Now, when we add a new sentence, is it consistent with what we already
-have? The `` consistchk=True`` parameter of ``add_sentence()`` allows
-us to check:
-
-    >>> dt.add_sentence('No person dances', consistchk=True)
-    Inconsistent discourse: d0 ['s0-r0', 's1-r0', 's2-r0']:
-        s0-r0: exists z1.(student(z1) & dance(z1))
-        s1-r0: all z1.(student(z1) -> person(z1))
-        s2-r0: -exists z1.(person(z1) & dance(z1))
-    <BLANKLINE>
-    >>> dt.readings()
-    <BLANKLINE>
-    s0 readings:
-    <BLANKLINE>
-    s0-r0: exists z1.(student(z1) & dance(z1))
-    <BLANKLINE>
-    s1 readings:
-    <BLANKLINE>
-    s1-r0: all z1.(student(z1) -> person(z1))
-    <BLANKLINE>
-    s2 readings:
-    <BLANKLINE>
-    s2-r0: -exists z1.(person(z1) & dance(z1))
-
-So let's retract the inconsistent sentence:
-
-    >>> dt.retract_sentence('No person dances', verbose=True) # doctest: +NORMALIZE_WHITESPACE
-    Current sentences are
-    s0: A student dances
-    s1: Every student is a person
-
-We can now verify that result is consistent.
-
-    >>> dt.models()
-    --------------------------------------------------------------------------------
-    Model for Discourse Thread d0
-    --------------------------------------------------------------------------------
-    % number = 1
-    % seconds = 0
-    <BLANKLINE>
-    % Interpretation of size 2
-    <BLANKLINE>
-    c1 = 0.
-    <BLANKLINE>
-      dance(0).
-    - dance(1).
-    <BLANKLINE>
-      person(0).
-    - person(1).
-    <BLANKLINE>
-      student(0).
-    - student(1).
-    <BLANKLINE>
-    Consistent discourse: d0 ['s0-r0', 's1-r0']:
-        s0-r0: exists z1.(student(z1) & dance(z1))
-        s1-r0: all z1.(student(z1) -> person(z1))
-    <BLANKLINE>
-
-Checking Informativity
-======================
-
-Let's assume that we are still trying to extend the discourse *A
-student dances.* *Every student is a person.* We add a new sentence,
-but this time, we check whether it is informative with respect to what
-has gone before.
-
-    >>> dt.add_sentence('A person dances', informchk=True)
-    Sentence 'A person dances' under reading 'exists x.(person(x) & dance(x))':
-    Not informative relative to thread 'd0'
-
-In fact, we are just checking whether the new sentence is entailed by
-the preceding discourse.
-
-    >>> dt.models()
-    --------------------------------------------------------------------------------
-    Model for Discourse Thread d0
-    --------------------------------------------------------------------------------
-    % number = 1
-    % seconds = 0
-    <BLANKLINE>
-    % Interpretation of size 2
-    <BLANKLINE>
-    c1 = 0.
-    <BLANKLINE>
-    c2 = 0.
-    <BLANKLINE>
-      dance(0).
-    - dance(1).
-    <BLANKLINE>
-      person(0).
-    - person(1).
-    <BLANKLINE>
-      student(0).
-    - student(1).
-    <BLANKLINE>
-    Consistent discourse: d0 ['s0-r0', 's1-r0', 's2-r0']:
-        s0-r0: exists z1.(student(z1) & dance(z1))
-        s1-r0: all z1.(student(z1) -> person(z1))
-        s2-r0: exists z1.(person(z1) & dance(z1))
-    <BLANKLINE>
-
-
-
-Adding Background Knowledge
-===========================
-
-Let's build a new discourse, and look at the readings of the component sentences:
-
-    >>> dt = DiscourseTester(['Vincent is a boxer', 'Fido is a boxer', 'Vincent is married', 'Fido barks'])
-    >>> dt.readings()
-    <BLANKLINE>
-    s0 readings:
-    <BLANKLINE>
-    s0-r0: boxer(Vincent)
-    s0-r1: boxerdog(Vincent)
-    <BLANKLINE>
-    s1 readings:
-    <BLANKLINE>
-    s1-r0: boxer(Fido)
-    s1-r1: boxerdog(Fido)
-    <BLANKLINE>
-    s2 readings:
-    <BLANKLINE>
-    s2-r0: married(Vincent)
-    <BLANKLINE>
-    s3 readings:
-    <BLANKLINE>
-    s3-r0: bark(Fido)
-
-This gives us a lot of threads:
-
-    >>> dt.readings(threaded=True) # doctest: +NORMALIZE_WHITESPACE
-    d0: ['s0-r0', 's1-r0', 's2-r0', 's3-r0']
-    d1: ['s0-r0', 's1-r1', 's2-r0', 's3-r0']
-    d2: ['s0-r1', 's1-r0', 's2-r0', 's3-r0']
-    d3: ['s0-r1', 's1-r1', 's2-r0', 's3-r0']
-
-
-We can eliminate some of the readings, and hence some of the threads,
-by adding background information.
-
-    >>> import nltk.data
-    >>> bg = nltk.data.load('grammars/book_grammars/background.fol')
-    >>> dt.add_background(bg)
-    >>> dt.background()
-    all x.(boxerdog(x) -> dog(x))
-    all x.(boxer(x) -> person(x))
-    all x.-(dog(x) & person(x))
-    all x.(married(x) <-> exists y.marry(x,y))
-    all x.(bark(x) -> dog(x))
-    all x y.(marry(x,y) -> (person(x) & person(y)))
-    -(Vincent = Mia)
-    -(Vincent = Fido)
-    -(Mia = Fido)
-
-The background information allows us to reject three of the threads as
-inconsistent. To see what remains, use the ``filter=True`` parameter
-on ``readings()``.
-
-    >>> dt.readings(filter=True) # doctest: +NORMALIZE_WHITESPACE
-    d1: ['s0-r0', 's1-r1', 's2-r0', 's3-r0']
-
-The ``models()`` method gives us more information about the surviving thread.
-
-    >>> dt.models()
-    --------------------------------------------------------------------------------
-    Model for Discourse Thread d0
-    --------------------------------------------------------------------------------
-    No model found!
-    <BLANKLINE>
-    --------------------------------------------------------------------------------
-    Model for Discourse Thread d1
-    --------------------------------------------------------------------------------
-    % number = 1
-    % seconds = 0
-    <BLANKLINE>
-    % Interpretation of size 3
-    <BLANKLINE>
-    Fido = 0.
-    <BLANKLINE>
-    Mia = 1.
-    <BLANKLINE>
-    Vincent = 2.
-    <BLANKLINE>
-    f1(0) = 0.
-    f1(1) = 0.
-    f1(2) = 2.
-    <BLANKLINE>
-      bark(0).
-    - bark(1).
-    - bark(2).
-    <BLANKLINE>
-    - boxer(0).
-    - boxer(1).
-      boxer(2).
-    <BLANKLINE>
-      boxerdog(0).
-    - boxerdog(1).
-    - boxerdog(2).
-    <BLANKLINE>
-      dog(0).
-    - dog(1).
-    - dog(2).
-    <BLANKLINE>
-    - married(0).
-    - married(1).
-      married(2).
-    <BLANKLINE>
-    - person(0).
-    - person(1).
-      person(2).
-    <BLANKLINE>
-    - marry(0,0).
-    - marry(0,1).
-    - marry(0,2).
-    - marry(1,0).
-    - marry(1,1).
-    - marry(1,2).
-    - marry(2,0).
-    - marry(2,1).
-      marry(2,2).
-    <BLANKLINE>
-    --------------------------------------------------------------------------------
-    Model for Discourse Thread d2
-    --------------------------------------------------------------------------------
-    No model found!
-    <BLANKLINE>
-    --------------------------------------------------------------------------------
-    Model for Discourse Thread d3
-    --------------------------------------------------------------------------------
-    No model found!
-    <BLANKLINE>
-    Inconsistent discourse: d0 ['s0-r0', 's1-r0', 's2-r0', 's3-r0']:
-        s0-r0: boxer(Vincent)
-        s1-r0: boxer(Fido)
-        s2-r0: married(Vincent)
-        s3-r0: bark(Fido)
-    <BLANKLINE>
-    Consistent discourse: d1 ['s0-r0', 's1-r1', 's2-r0', 's3-r0']:
-        s0-r0: boxer(Vincent)
-        s1-r1: boxerdog(Fido)
-        s2-r0: married(Vincent)
-        s3-r0: bark(Fido)
-    <BLANKLINE>
-    Inconsistent discourse: d2 ['s0-r1', 's1-r0', 's2-r0', 's3-r0']:
-        s0-r1: boxerdog(Vincent)
-        s1-r0: boxer(Fido)
-        s2-r0: married(Vincent)
-        s3-r0: bark(Fido)
-    <BLANKLINE>
-    Inconsistent discourse: d3 ['s0-r1', 's1-r1', 's2-r0', 's3-r0']:
-        s0-r1: boxerdog(Vincent)
-        s1-r1: boxerdog(Fido)
-        s2-r0: married(Vincent)
-        s3-r0: bark(Fido)
-    <BLANKLINE>
-
-
-..  This will not be visible in the html output: create a tempdir to
-    play in.
-    >>> import tempfile, os
-    >>> tempdir = tempfile.mkdtemp()
-    >>> old_dir = os.path.abspath('.')
-    >>> os.chdir(tempdir)
-
-In order to play around with your own version of background knowledge,
-you might want to start off with a local copy of ``background.fol``:
-
-    >>> nltk.data.retrieve('grammars/book_grammars/background.fol')
-    Retrieving 'nltk:grammars/book_grammars/background.fol', saving to 'background.fol'
-
-After you have modified the file, the ``load_fol()`` function will parse
-the strings in the file into expressions of ``nltk.sem.logic``.
-
-    >>> from nltk.inference.discourse import load_fol
-    >>> mybg = load_fol(open('background.fol').read())
-
-The result can be loaded as an argument of ``add_background()`` in the
-manner shown earlier.
-
-..  This will not be visible in the html output: clean up the tempdir.
-    >>> os.chdir(old_dir)
-    >>> for f in os.listdir(tempdir):
-    ...     os.remove(os.path.join(tempdir, f))
-    >>> os.rmdir(tempdir)
-    >>> nltk.data.clear_cache()
-
-
-Regression Testing from book
-============================
-
-    >>> logic._counter._value = 0
-
-    >>> from nltk.tag import RegexpTagger
-    >>> tagger = RegexpTagger(
-    ...     [('^(chases|runs)$', 'VB'),
-    ...      ('^(a)$', 'ex_quant'),
-    ...      ('^(every)$', 'univ_quant'),
-    ...      ('^(dog|boy)$', 'NN'),
-    ...      ('^(He)$', 'PRP')
-    ... ])
-    >>> rc = DrtGlueReadingCommand(depparser=MaltParser(tagger=tagger))
-    >>> dt = DiscourseTester(map(str.split, ['Every dog chases a boy', 'He runs']), rc)
-    >>> dt.readings()
-    <BLANKLINE>
-    s0 readings:
-    <BLANKLINE>
-    s0-r0: ([z2],[boy(z2), (([z5],[dog(z5)]) -> ([],[chases(z5,z2)]))])
-    s0-r1: ([],[(([z1],[dog(z1)]) -> ([z2],[boy(z2), chases(z1,z2)]))])
-    <BLANKLINE>
-    s1 readings:
-    <BLANKLINE>
-    s1-r0: ([z1],[PRO(z1), runs(z1)])
-    >>> dt.readings(show_thread_readings=True)
-    d0: ['s0-r0', 's1-r0'] : ([z1,z2],[boy(z1), (([z3],[dog(z3)]) -> ([],[chases(z3,z1)])), (z2 = z1), runs(z2)])
-    d1: ['s0-r1', 's1-r0'] : INVALID: AnaphoraResolutionException
-    >>> dt.readings(filter=True, show_thread_readings=True)
-    d0: ['s0-r0', 's1-r0'] : ([z1,z3],[boy(z1), (([z2],[dog(z2)]) -> ([],[chases(z2,z1)])), (z3 = z1), runs(z3)])
-
-    >>> logic._counter._value = 0
-
-    >>> from nltk.parse import FeatureEarleyChartParser
-    >>> from nltk.sem.drt import DrtParser
-    >>> grammar = nltk.data.load('grammars/book_grammars/drt.fcfg', logic_parser=DrtParser())
-    >>> parser = FeatureEarleyChartParser(grammar, trace=0)
-    >>> trees = parser.parse('Angus owns a dog'.split())
-    >>> print(list(trees)[0].label()['SEM'].simplify().normalize())
-    ([z1,z2],[Angus(z1), dog(z2), own(z1,z2)])
diff --git a/nlp_resource_data/nltk/test/discourse_fixt.py b/nlp_resource_data/nltk/test/discourse_fixt.py

deleted file mode 100644 (file)

index d3ab46f..0000000
--- a/nlp_resource_data/nltk/test/discourse_fixt.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import
-
-
-# FIXME: the entire discourse.doctest is skipped if Prover9/Mace4 is
-# not installed, but there are pure-python parts that don't need Prover9.
-def setup_module(module):
-    from nose import SkipTest
-    from nltk.inference.mace import Mace
-
-    try:
-        m = Mace()
-        m._find_binary('mace4')
-    except LookupError:
-        raise SkipTest("Mace4/Prover9 is not available so discourse.doctest is skipped")
diff --git a/nlp_resource_data/nltk/test/doctest_nose_plugin.py b/nlp_resource_data/nltk/test/doctest_nose_plugin.py

deleted file mode 100644 (file)

index d77210c..0000000
--- a/nlp_resource_data/nltk/test/doctest_nose_plugin.py
+++ /dev/null
@@ -1,164 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import print_function
-import re
-import sys
-import os
-import codecs
-import doctest
-from nose.util import tolist, anyp
-from nose.plugins.base import Plugin
-from nose.suite import ContextList
-from nose.plugins.doctests import Doctest, log, DocFileCase
-
-ALLOW_UNICODE = doctest.register_optionflag('ALLOW_UNICODE')
-
-
-class _UnicodeOutputChecker(doctest.OutputChecker):
-    _literal_re = re.compile(r"(\W|^)[uU]([rR]?[\'\"])", re.UNICODE)
-
-    def _remove_u_prefixes(self, txt):
-        return re.sub(self._literal_re, r'\1\2', txt)
-
-    def check_output(self, want, got, optionflags):
-        res = doctest.OutputChecker.check_output(self, want, got, optionflags)
-        if res:
-            return True
-        if not (optionflags & ALLOW_UNICODE):
-            return False
-
-        # ALLOW_UNICODE is active and want != got
-        cleaned_want = self._remove_u_prefixes(want)
-        cleaned_got = self._remove_u_prefixes(got)
-        res = doctest.OutputChecker.check_output(
-            self, cleaned_want, cleaned_got, optionflags
-        )
-        return res
-
-
-_checker = _UnicodeOutputChecker()
-
-
-class DoctestPluginHelper(object):
-    """
-    This mixin adds print_function future import to all test cases.
-
-    It also adds support for:
-        '#doctest +ALLOW_UNICODE' option that
-        makes DocTestCase think u'foo' == 'foo'.
-
-        '#doctest doctestencoding=utf-8' option that
-        changes the encoding of doctest files
-    """
-
-    OPTION_BY_NAME = ('doctestencoding',)
-
-    def loadTestsFromFileUnicode(self, filename):
-        if self.extension and anyp(filename.endswith, self.extension):
-            name = os.path.basename(filename)
-            dh = codecs.open(filename, 'r', self.options.get('doctestencoding'))
-            try:
-                doc = dh.read()
-            finally:
-                dh.close()
-
-            fixture_context = None
-            globs = {'__file__': filename}
-            if self.fixtures:
-                base, ext = os.path.splitext(name)
-                dirname = os.path.dirname(filename)
-                sys.path.append(dirname)
-                fixt_mod = base + self.fixtures
-                try:
-                    fixture_context = __import__(fixt_mod, globals(), locals(), ["nop"])
-                except ImportError as e:
-                    log.debug("Could not import %s: %s (%s)", fixt_mod, e, sys.path)
-                log.debug("Fixture module %s resolved to %s", fixt_mod, fixture_context)
-                if hasattr(fixture_context, 'globs'):
-                    globs = fixture_context.globs(globs)
-            parser = doctest.DocTestParser()
-            test = parser.get_doctest(
-                doc, globs=globs, name=name, filename=filename, lineno=0
-            )
-            if test.examples:
-                case = DocFileCase(
-                    test,
-                    optionflags=self.optionflags,
-                    setUp=getattr(fixture_context, 'setup_test', None),
-                    tearDown=getattr(fixture_context, 'teardown_test', None),
-                    result_var=self.doctest_result_var,
-                )
-                if fixture_context:
-                    yield ContextList((case,), context=fixture_context)
-                else:
-                    yield case
-            else:
-                yield False  # no tests to load
-
-    def loadTestsFromFile(self, filename):
-
-        cases = self.loadTestsFromFileUnicode(filename)
-
-        for case in cases:
-            if isinstance(case, ContextList):
-                yield ContextList([self._patchTestCase(c) for c in case], case.context)
-            else:
-                yield self._patchTestCase(case)
-
-    def loadTestsFromModule(self, module):
-        """Load doctests from the module.
-        """
-        for suite in super(DoctestPluginHelper, self).loadTestsFromModule(module):
-            cases = [self._patchTestCase(case) for case in suite._get_tests()]
-            yield self.suiteClass(cases, context=module, can_split=False)
-
-    def _patchTestCase(self, case):
-        if case:
-            case._dt_test.globs['print_function'] = print_function
-            case._dt_checker = _checker
-        return case
-
-    def configure(self, options, config):
-        # it is overriden in order to fix doctest options discovery
-
-        Plugin.configure(self, options, config)
-        self.doctest_result_var = options.doctest_result_var
-        self.doctest_tests = options.doctest_tests
-        self.extension = tolist(options.doctestExtension)
-        self.fixtures = options.doctestFixtures
-        self.finder = doctest.DocTestFinder()
-
-        # super(DoctestPluginHelper, self).configure(options, config)
-        self.optionflags = 0
-        self.options = {}
-
-        if options.doctestOptions:
-            stroptions = ",".join(options.doctestOptions).split(',')
-            for stroption in stroptions:
-                try:
-                    if stroption.startswith('+'):
-                        self.optionflags |= doctest.OPTIONFLAGS_BY_NAME[stroption[1:]]
-                        continue
-                    elif stroption.startswith('-'):
-                        self.optionflags &= ~doctest.OPTIONFLAGS_BY_NAME[stroption[1:]]
-                        continue
-                    try:
-                        key, value = stroption.split('=')
-                    except ValueError:
-                        pass
-                    else:
-                        if not key in self.OPTION_BY_NAME:
-                            raise ValueError()
-                        self.options[key] = value
-                        continue
-                except (AttributeError, ValueError, KeyError):
-                    raise ValueError("Unknown doctest option {}".format(stroption))
-                else:
-                    raise ValueError(
-                        "Doctest option is not a flag or a key/value pair: {} ".format(
-                            stroption
-                        )
-                    )
-
-
-class DoctestFix(DoctestPluginHelper, Doctest):
-    pass
diff --git a/nlp_resource_data/nltk/test/drt.doctest b/nlp_resource_data/nltk/test/drt.doctest

deleted file mode 100644 (file)

index 6163052..0000000
--- a/nlp_resource_data/nltk/test/drt.doctest
+++ /dev/null
@@ -1,517 +0,0 @@
-.. Copyright (C) 2001-2019 NLTK Project
-.. For license information, see LICENSE.TXT
-
-================================
- Discourse Representation Theory
-================================
-
-    >>> from nltk.sem import logic
-    >>> from nltk.inference import TableauProver
-
-Overview
-========
-
-A DRS can be created with the ``DRS()`` constructor. This takes two arguments: a list of
-discourse referents and list of conditions. .
-
-    >>> from nltk.sem.drt import *
-    >>> dexpr = DrtExpression.fromstring
-    >>> man_x = dexpr('man(x)')
-    >>> walk_x = dexpr('walk(x)')
-    >>> x = dexpr('x')
-    >>> print(DRS([x], [man_x, walk_x]))
-    ([x],[man(x), walk(x)])
-
-The ``parse()`` method can also be applied directly to DRS
-expressions, which allows them to be specified more
-easily.
-
-    >>> drs1 = dexpr('([x],[man(x),walk(x)])')
-    >>> print(drs1)
-    ([x],[man(x), walk(x)])
-
-DRSs can be *merged* using the ``+`` operator.
-
-    >>> drs2 = dexpr('([y],[woman(y),stop(y)])')
-    >>> drs3 = drs1 + drs2
-    >>> print(drs3)
-    (([x],[man(x), walk(x)]) + ([y],[woman(y), stop(y)]))
-    >>> print(drs3.simplify())
-    ([x,y],[man(x), walk(x), woman(y), stop(y)])
-
-We can embed DRSs as components of an ``implies`` condition.
-
-    >>> s = '([], [(%s -> %s)])' % (drs1, drs2)
-    >>> print(dexpr(s))
-    ([],[(([x],[man(x), walk(x)]) -> ([y],[woman(y), stop(y)]))])
-
-The ``fol()`` method converts DRSs into FOL formulae.
-
-    >>> print(dexpr(r'([x],[man(x), walks(x)])').fol())
-    exists x.(man(x) & walks(x))
-    >>> print(dexpr(r'([],[(([x],[man(x)]) -> ([],[walks(x)]))])').fol())
-    all x.(man(x) -> walks(x))
-
-In order to visualize a DRS, the ``pretty_format()`` method can be used.
-
-    >>> print(drs3.pretty_format())
-      _________     __________
-     | x       |   | y        |
-    (|---------| + |----------|)
-     | man(x)  |   | woman(y) |
-     | walk(x) |   | stop(y)  |
-     |_________|   |__________|
-
-
-Parse to semantics
-------------------
-
-..
-    >>> logic._counter._value = 0
-
-DRSs can be used for building compositional semantics in a feature
-based grammar. To specify that we want to use DRSs, the appropriate
-logic parser needs be passed as a parameter to ``load_earley()``
-
-    >>> from nltk.parse import load_parser
-    >>> from nltk.sem.drt import DrtParser
-    >>> parser = load_parser('grammars/book_grammars/drt.fcfg', trace=0, logic_parser=DrtParser())
-    >>> for tree in parser.parse('a dog barks'.split()):
-    ...     print(tree.label()['SEM'].simplify())
-    ...
-    ([x],[dog(x), bark(x)])
-
-Alternatively, a ``FeatStructReader`` can be passed with the ``logic_parser`` set on it
-
-    >>> from nltk.featstruct import FeatStructReader
-    >>> from nltk.grammar import FeatStructNonterminal
-    >>> parser = load_parser('grammars/book_grammars/drt.fcfg', trace=0, fstruct_reader=FeatStructReader(fdict_class=FeatStructNonterminal, logic_parser=DrtParser()))
-    >>> for tree in parser.parse('every girl chases a dog'.split()):
-    ...     print(tree.label()['SEM'].simplify().normalize())
-    ...
-    ([],[(([z1],[girl(z1)]) -> ([z2],[dog(z2), chase(z1,z2)]))])
-
-
-
-Unit Tests
-==========
-
-Parser
-------
-
-    >>> print(dexpr(r'([x,y],[sees(x,y)])'))
-    ([x,y],[sees(x,y)])
-    >>> print(dexpr(r'([x],[man(x), walks(x)])'))
-    ([x],[man(x), walks(x)])
-    >>> print(dexpr(r'\x.([],[man(x), walks(x)])'))
-    \x.([],[man(x), walks(x)])
-    >>> print(dexpr(r'\x.\y.([],[sees(x,y)])'))
-    \x y.([],[sees(x,y)])
-
-    >>> print(dexpr(r'([x,y],[(x = y)])'))
-    ([x,y],[(x = y)])
-    >>> print(dexpr(r'([x,y],[(x != y)])'))
-    ([x,y],[-(x = y)])
-
-    >>> print(dexpr(r'\x.([],[walks(x)])(john)'))
-    (\x.([],[walks(x)]))(john)
-    >>> print(dexpr(r'\R.\x.([],[big(x,R)])(\y.([],[mouse(y)]))'))
-    (\R x.([],[big(x,R)]))(\y.([],[mouse(y)]))
-
-    >>> print(dexpr(r'(([x],[walks(x)]) + ([y],[runs(y)]))'))
-    (([x],[walks(x)]) + ([y],[runs(y)]))
-    >>> print(dexpr(r'(([x,y],[walks(x), jumps(y)]) + (([z],[twos(z)]) + ([w],[runs(w)])))'))
-    (([x,y],[walks(x), jumps(y)]) + ([z],[twos(z)]) + ([w],[runs(w)]))
-    >>> print(dexpr(r'((([],[walks(x)]) + ([],[twos(x)])) + ([],[runs(x)]))'))
-    (([],[walks(x)]) + ([],[twos(x)]) + ([],[runs(x)]))
-    >>> print(dexpr(r'((([],[walks(x)]) + ([],[runs(x)])) + (([],[threes(x)]) + ([],[fours(x)])))'))
-    (([],[walks(x)]) + ([],[runs(x)]) + ([],[threes(x)]) + ([],[fours(x)]))
-
-    >>> print(dexpr(r'(([],[walks(x)]) -> ([],[runs(x)]))'))
-    (([],[walks(x)]) -> ([],[runs(x)]))
-
-    >>> print(dexpr(r'([x],[PRO(x), sees(John,x)])'))
-    ([x],[PRO(x), sees(John,x)])
-    >>> print(dexpr(r'([x],[man(x), -([],[walks(x)])])'))
-    ([x],[man(x), -([],[walks(x)])])
-    >>> print(dexpr(r'([],[(([x],[man(x)]) -> ([],[walks(x)]))])'))
-    ([],[(([x],[man(x)]) -> ([],[walks(x)]))])
-
-    >>> print(dexpr(r'DRS([x],[walk(x)])'))
-    ([x],[walk(x)])
-    >>> print(dexpr(r'DRS([x][walk(x)])'))
-    ([x],[walk(x)])
-    >>> print(dexpr(r'([x][walk(x)])'))
-    ([x],[walk(x)])
-
-``simplify()``
---------------
-
-    >>> print(dexpr(r'\x.([],[man(x), walks(x)])(john)').simplify())
-    ([],[man(john), walks(john)])
-    >>> print(dexpr(r'\x.\y.([z],[dog(z),sees(x,y)])(john)(mary)').simplify())
-    ([z],[dog(z), sees(john,mary)])
-    >>> print(dexpr(r'\R x.([],[big(x,R)])(\y.([],[mouse(y)]))').simplify())
-    \x.([],[big(x,\y.([],[mouse(y)]))])
-
-    >>> print(dexpr(r'(([x],[walks(x)]) + ([y],[runs(y)]))').simplify())
-    ([x,y],[walks(x), runs(y)])
-    >>> print(dexpr(r'(([x,y],[walks(x), jumps(y)]) + (([z],[twos(z)]) + ([w],[runs(w)])))').simplify())
-    ([w,x,y,z],[walks(x), jumps(y), twos(z), runs(w)])
-    >>> print(dexpr(r'((([],[walks(x)]) + ([],[runs(x)]) + ([],[threes(x)]) + ([],[fours(x)])))').simplify())
-    ([],[walks(x), runs(x), threes(x), fours(x)])
-    >>> dexpr(r'([x],[man(x)])+([x],[walks(x)])').simplify() == \
-    ... dexpr(r'([x,z1],[man(x), walks(z1)])')
-    True
-    >>> dexpr(r'([y],[boy(y), (([x],[dog(x)]) -> ([],[chase(x,y)]))])+([x],[run(x)])').simplify() == \
-    ... dexpr(r'([y,z1],[boy(y), (([x],[dog(x)]) -> ([],[chase(x,y)])), run(z1)])')
-    True
-
-    >>> dexpr(r'\Q.(([x],[john(x),walks(x)]) + Q)(([x],[PRO(x),leaves(x)]))').simplify() == \
-    ... dexpr(r'([x,z1],[john(x), walks(x), PRO(z1), leaves(z1)])')
-    True
-
-    >>> logic._counter._value = 0
-    >>> print(dexpr('([],[(([x],[dog(x)]) -> ([e,y],[boy(y), chase(e), subj(e,x), obj(e,y)]))])+([e,x],[PRO(x), run(e), subj(e,x)])').simplify().normalize().normalize())
-    ([e02,z5],[(([z3],[dog(z3)]) -> ([e01,z4],[boy(z4), chase(e01), subj(e01,z3), obj(e01,z4)])), PRO(z5), run(e02), subj(e02,z5)])
-
-``fol()``
------------
-
-    >>> print(dexpr(r'([x,y],[sees(x,y)])').fol())
-    exists x y.sees(x,y)
-    >>> print(dexpr(r'([x],[man(x), walks(x)])').fol())
-    exists x.(man(x) & walks(x))
-    >>> print(dexpr(r'\x.([],[man(x), walks(x)])').fol())
-    \x.(man(x) & walks(x))
-    >>> print(dexpr(r'\x y.([],[sees(x,y)])').fol())
-    \x y.sees(x,y)
-
-    >>> print(dexpr(r'\x.([],[walks(x)])(john)').fol())
-    \x.walks(x)(john)
-    >>> print(dexpr(r'\R x.([],[big(x,R)])(\y.([],[mouse(y)]))').fol())
-    (\R x.big(x,R))(\y.mouse(y))
-
-    >>> print(dexpr(r'(([x],[walks(x)]) + ([y],[runs(y)]))').fol())
-    (exists x.walks(x) & exists y.runs(y))
-
-    >>> print(dexpr(r'(([],[walks(x)]) -> ([],[runs(x)]))').fol())
-    (walks(x) -> runs(x))
-
-    >>> print(dexpr(r'([x],[PRO(x), sees(John,x)])').fol())
-    exists x.(PRO(x) & sees(John,x))
-    >>> print(dexpr(r'([x],[man(x), -([],[walks(x)])])').fol())
-    exists x.(man(x) & -walks(x))
-    >>> print(dexpr(r'([],[(([x],[man(x)]) -> ([],[walks(x)]))])').fol())
-    all x.(man(x) -> walks(x))
-
-    >>> print(dexpr(r'([x],[man(x) | walks(x)])').fol())
-    exists x.(man(x) | walks(x))
-    >>> print(dexpr(r'P(x) + ([x],[walks(x)])').fol())
-    (P(x) & exists x.walks(x))
-
-``resolve_anaphora()``
-----------------------
-
-    >>> from nltk.sem.drt import AnaphoraResolutionException
-
-    >>> print(resolve_anaphora(dexpr(r'([x,y,z],[dog(x), cat(y), walks(z), PRO(z)])')))
-    ([x,y,z],[dog(x), cat(y), walks(z), (z = [x,y])])
-    >>> print(resolve_anaphora(dexpr(r'([],[(([x],[dog(x)]) -> ([y],[walks(y), PRO(y)]))])')))
-    ([],[(([x],[dog(x)]) -> ([y],[walks(y), (y = x)]))])
-    >>> print(resolve_anaphora(dexpr(r'(([x,y],[]) + ([],[PRO(x)]))')).simplify())
-    ([x,y],[(x = y)])
-    >>> try: print(resolve_anaphora(dexpr(r'([x],[walks(x), PRO(x)])')))
-    ... except AnaphoraResolutionException as e: print(e)
-    Variable 'x' does not resolve to anything.
-    >>> print(resolve_anaphora(dexpr('([e01,z6,z7],[boy(z6), PRO(z7), run(e01), subj(e01,z7)])')))
-    ([e01,z6,z7],[boy(z6), (z7 = z6), run(e01), subj(e01,z7)])
-
-``equiv()``:
-----------------
-
-    >>> a = dexpr(r'([x],[man(x), walks(x)])')
-    >>> b = dexpr(r'([x],[walks(x), man(x)])')
-    >>> print(a.equiv(b, TableauProver()))
-    True
-
-
-``replace()``:
---------------
-
-    >>> a = dexpr(r'a')
-    >>> w = dexpr(r'w')
-    >>> x = dexpr(r'x')
-    >>> y = dexpr(r'y')
-    >>> z = dexpr(r'z')
-
-
-replace bound
--------------
-
-    >>> print(dexpr(r'([x],[give(x,y,z)])').replace(x.variable, a, False))
-    ([x],[give(x,y,z)])
-    >>> print(dexpr(r'([x],[give(x,y,z)])').replace(x.variable, a, True))
-    ([a],[give(a,y,z)])
-
-replace unbound
----------------
-
-    >>> print(dexpr(r'([x],[give(x,y,z)])').replace(y.variable, a, False))
-    ([x],[give(x,a,z)])
-    >>> print(dexpr(r'([x],[give(x,y,z)])').replace(y.variable, a, True))
-    ([x],[give(x,a,z)])
-
-replace unbound with bound
---------------------------
-
-    >>> dexpr(r'([x],[give(x,y,z)])').replace(y.variable, x, False) == \
-    ... dexpr('([z1],[give(z1,x,z)])')
-    True
-    >>> dexpr(r'([x],[give(x,y,z)])').replace(y.variable, x, True) == \
-    ... dexpr('([z1],[give(z1,x,z)])')
-    True
-
-replace unbound with unbound
-----------------------------
-
-    >>> print(dexpr(r'([x],[give(x,y,z)])').replace(y.variable, z, False))
-    ([x],[give(x,z,z)])
-    >>> print(dexpr(r'([x],[give(x,y,z)])').replace(y.variable, z, True))
-    ([x],[give(x,z,z)])
-
-
-replace unbound
----------------
-
-    >>> print(dexpr(r'([x],[P(x,y,z)])+([y],[Q(x,y,z)])').replace(z.variable, a, False))
-    (([x],[P(x,y,a)]) + ([y],[Q(x,y,a)]))
-    >>> print(dexpr(r'([x],[P(x,y,z)])+([y],[Q(x,y,z)])').replace(z.variable, a, True))
-    (([x],[P(x,y,a)]) + ([y],[Q(x,y,a)]))
-
-replace bound
--------------
-
-    >>> print(dexpr(r'([x],[P(x,y,z)])+([y],[Q(x,y,z)])').replace(x.variable, a, False))
-    (([x],[P(x,y,z)]) + ([y],[Q(x,y,z)]))
-    >>> print(dexpr(r'([x],[P(x,y,z)])+([y],[Q(x,y,z)])').replace(x.variable, a, True))
-    (([a],[P(a,y,z)]) + ([y],[Q(a,y,z)]))
-
-replace unbound with unbound
-----------------------------
-
-    >>> print(dexpr(r'([x],[P(x,y,z)])+([y],[Q(x,y,z)])').replace(z.variable, a, False))
-    (([x],[P(x,y,a)]) + ([y],[Q(x,y,a)]))
-    >>> print(dexpr(r'([x],[P(x,y,z)])+([y],[Q(x,y,z)])').replace(z.variable, a, True))
-    (([x],[P(x,y,a)]) + ([y],[Q(x,y,a)]))
-
-replace unbound with bound on same side
----------------------------------------
-
-    >>> dexpr(r'([x],[P(x,y,z)])+([y],[Q(x,y,w)])').replace(z.variable, x, False) == \
-    ... dexpr(r'(([z1],[P(z1,y,x)]) + ([y],[Q(z1,y,w)]))')
-    True
-    >>> dexpr(r'([x],[P(x,y,z)])+([y],[Q(x,y,w)])').replace(z.variable, x, True) == \
-    ... dexpr(r'(([z1],[P(z1,y,x)]) + ([y],[Q(z1,y,w)]))')
-    True
-
-replace unbound with bound on other side
-----------------------------------------
-
-    >>> dexpr(r'([x],[P(x,y,z)])+([y],[Q(x,y,w)])').replace(w.variable, x, False) == \
-    ... dexpr(r'(([z1],[P(z1,y,z)]) + ([y],[Q(z1,y,x)]))')
-    True
-    >>> dexpr(r'([x],[P(x,y,z)])+([y],[Q(x,y,w)])').replace(w.variable, x, True) == \
-    ... dexpr(r'(([z1],[P(z1,y,z)]) + ([y],[Q(z1,y,x)]))')
-    True
-
-replace unbound with double bound
----------------------------------
-
-    >>> dexpr(r'([x],[P(x,y,z)])+([x],[Q(x,y,w)])').replace(z.variable, x, False) == \
-    ... dexpr(r'(([z1],[P(z1,y,x)]) + ([z1],[Q(z1,y,w)]))')
-    True
-    >>> dexpr(r'([x],[P(x,y,z)])+([x],[Q(x,y,w)])').replace(z.variable, x, True) == \
-    ... dexpr(r'(([z1],[P(z1,y,x)]) + ([z1],[Q(z1,y,w)]))')
-    True
-
-
-regression tests
-----------------
-
-    >>> d = dexpr('([x],[A(c), ([y],[B(x,y,z,a)])->([z],[C(x,y,z,a)])])')
-    >>> print(d)
-    ([x],[A(c), (([y],[B(x,y,z,a)]) -> ([z],[C(x,y,z,a)]))])
-    >>> print(d.pretty_format())
-     ____________________________________
-    | x                                  |
-    |------------------------------------|
-    | A(c)                               |
-    |   ____________      ____________   |
-    |  | y          |    | z          |  |
-    | (|------------| -> |------------|) |
-    |  | B(x,y,z,a) |    | C(x,y,z,a) |  |
-    |  |____________|    |____________|  |
-    |____________________________________|
-    >>> print(str(d))
-    ([x],[A(c), (([y],[B(x,y,z,a)]) -> ([z],[C(x,y,z,a)]))])
-    >>> print(d.fol())
-    exists x.(A(c) & all y.(B(x,y,z,a) -> exists z.C(x,y,z,a)))
-    >>> print(d.replace(Variable('a'), DrtVariableExpression(Variable('r'))))
-    ([x],[A(c), (([y],[B(x,y,z,r)]) -> ([z],[C(x,y,z,r)]))])
-    >>> print(d.replace(Variable('x'), DrtVariableExpression(Variable('r'))))
-    ([x],[A(c), (([y],[B(x,y,z,a)]) -> ([z],[C(x,y,z,a)]))])
-    >>> print(d.replace(Variable('y'), DrtVariableExpression(Variable('r'))))
-    ([x],[A(c), (([y],[B(x,y,z,a)]) -> ([z],[C(x,y,z,a)]))])
-    >>> print(d.replace(Variable('z'), DrtVariableExpression(Variable('r'))))
-    ([x],[A(c), (([y],[B(x,y,r,a)]) -> ([z],[C(x,y,z,a)]))])
-    >>> print(d.replace(Variable('x'), DrtVariableExpression(Variable('r')), True))
-    ([r],[A(c), (([y],[B(r,y,z,a)]) -> ([z],[C(r,y,z,a)]))])
-    >>> print(d.replace(Variable('y'), DrtVariableExpression(Variable('r')), True))
-    ([x],[A(c), (([r],[B(x,r,z,a)]) -> ([z],[C(x,r,z,a)]))])
-    >>> print(d.replace(Variable('z'), DrtVariableExpression(Variable('r')), True))
-    ([x],[A(c), (([y],[B(x,y,r,a)]) -> ([r],[C(x,y,r,a)]))])
-    >>> print(d == dexpr('([l],[A(c), ([m],[B(l,m,z,a)])->([n],[C(l,m,n,a)])])'))
-    True
-    >>> d = dexpr('([],[([x,y],[B(x,y,h), ([a,b],[dee(x,a,g)])])->([z,w],[cee(x,y,f), ([c,d],[E(x,c,d,e)])])])')
-    >>> sorted(d.free())
-    [Variable('B'), Variable('E'), Variable('e'), Variable('f'), Variable('g'), Variable('h')]
-    >>> sorted(d.variables())
-    [Variable('B'), Variable('E'), Variable('e'), Variable('f'), Variable('g'), Variable('h')]
-    >>> sorted(d.get_refs(True))
-    [Variable('a'), Variable('b'), Variable('c'), Variable('d'), Variable('w'), Variable('x'), Variable('y'), Variable('z')]
-    >>> sorted(d.conds[0].get_refs(False))
-    [Variable('x'), Variable('y')]
-    >>> print(dexpr('([x,y],[A(x,y), (x=y), ([],[B(x,y)])->([],[C(x,y)]), ([x,y],[D(x,y)])->([],[E(x,y)]), ([],[F(x,y)])->([x,y],[G(x,y)])])').eliminate_equality())
-    ([x],[A(x,x), (([],[B(x,x)]) -> ([],[C(x,x)])), (([x,y],[D(x,y)]) -> ([],[E(x,y)])), (([],[F(x,x)]) -> ([x,y],[G(x,y)]))])
-    >>> print(dexpr('([x,y],[A(x,y), (x=y)]) -> ([],[B(x,y)])').eliminate_equality())
-    (([x],[A(x,x)]) -> ([],[B(x,x)]))
-    >>> print(dexpr('([x,y],[A(x,y)]) -> ([],[B(x,y), (x=y)])').eliminate_equality())
-    (([x,y],[A(x,y)]) -> ([],[B(x,x)]))
-    >>> print(dexpr('([x,y],[A(x,y), (x=y), ([],[B(x,y)])])').eliminate_equality())
-    ([x],[A(x,x), ([],[B(x,x)])])
-    >>> print(dexpr('([x,y],[A(x,y), ([],[B(x,y), (x=y)])])').eliminate_equality())
-    ([x,y],[A(x,y), ([],[B(x,x)])])
-    >>> print(dexpr('([z8 z9 z10],[A(z8), z8=z10, z9=z10, B(z9), C(z10), D(z10)])').eliminate_equality())
-    ([z9],[A(z9), B(z9), C(z9), D(z9)])
-
-    >>> print(dexpr('([x,y],[A(x,y), (x=y), ([],[B(x,y)]), ([x,y],[C(x,y)])])').eliminate_equality())
-    ([x],[A(x,x), ([],[B(x,x)]), ([x,y],[C(x,y)])])
-    >>> print(dexpr('([x,y],[A(x,y)]) + ([],[B(x,y), (x=y)]) + ([],[C(x,y)])').eliminate_equality())
-    ([x],[A(x,x), B(x,x), C(x,x)])
-    >>> print(dexpr('([x,y],[B(x,y)])+([x,y],[C(x,y)])').replace(Variable('y'), DrtVariableExpression(Variable('x'))))
-    (([x,y],[B(x,y)]) + ([x,y],[C(x,y)]))
-    >>> print(dexpr('(([x,y],[B(x,y)])+([],[C(x,y)]))+([],[D(x,y)])').replace(Variable('y'), DrtVariableExpression(Variable('x'))))
-    (([x,y],[B(x,y)]) + ([],[C(x,y)]) + ([],[D(x,y)]))
-    >>> print(dexpr('(([],[B(x,y)])+([],[C(x,y)]))+([],[D(x,y)])').replace(Variable('y'), DrtVariableExpression(Variable('x'))))
-    (([],[B(x,x)]) + ([],[C(x,x)]) + ([],[D(x,x)]))
-    >>> print(dexpr('(([],[B(x,y), ([x,y],[A(x,y)])])+([],[C(x,y)]))+([],[D(x,y)])').replace(Variable('y'), DrtVariableExpression(Variable('x'))).normalize())
-    (([],[B(z3,z1), ([z2,z3],[A(z3,z2)])]) + ([],[C(z3,z1)]) + ([],[D(z3,z1)]))
-
-
-Parse errors
-============
-
-    >>> def parse_error(drtstring):
-    ...     try: dexpr(drtstring)
-    ...     except logic.LogicalExpressionException as e: print(e)
-
-    >>> parse_error(r'')
-    End of input found.  Expression expected.
-    <BLANKLINE>
-    ^
-    >>> parse_error(r'(')
-    End of input found.  Expression expected.
-    (
-     ^
-    >>> parse_error(r'()')
-    Unexpected token: ')'.  Expression expected.
-    ()
-     ^
-    >>> parse_error(r'([')
-    End of input found.  Expected token ']'.
-    ([
-      ^
-    >>> parse_error(r'([,')
-    ',' is an illegal variable name.  Constants may not be quantified.
-    ([,
-      ^
-    >>> parse_error(r'([x,')
-    End of input found.  Variable expected.
-    ([x,
-        ^
-    >>> parse_error(r'([]')
-    End of input found.  Expected token '['.
-    ([]
-       ^
-    >>> parse_error(r'([][')
-    End of input found.  Expected token ']'.
-    ([][
-        ^
-    >>> parse_error(r'([][,')
-    Unexpected token: ','.  Expression expected.
-    ([][,
-        ^
-    >>> parse_error(r'([][]')
-    End of input found.  Expected token ')'.
-    ([][]
-         ^
-    >>> parse_error(r'([x][man(x)]) |')
-    End of input found.  Expression expected.
-    ([x][man(x)]) |
-                   ^
-
-Pretty Printing
-===============
-
-    >>> dexpr(r"([],[])").pretty_print()
-     __
-    |  |
-    |--|
-    |__|
-
-    >>> dexpr(r"([],[([x],[big(x), dog(x)]) -> ([],[bark(x)]) -([x],[walk(x)])])").pretty_print()
-     _____________________________
-    |                             |
-    |-----------------------------|
-    |   ________      _________   |
-    |  | x      |    |         |  |
-    | (|--------| -> |---------|) |
-    |  | big(x) |    | bark(x) |  |
-    |  | dog(x) |    |_________|  |
-    |  |________|                 |
-    |      _________              |
-    |     | x       |             |
-    | __  |---------|             |
-    |   | | walk(x) |             |
-    |     |_________|             |
-    |_____________________________|
-
-    >>> dexpr(r"([x,y],[x=y]) + ([z],[dog(z), walk(z)])").pretty_print()
-      _________     _________
-     | x y     |   | z       |
-    (|---------| + |---------|)
-     | (x = y) |   | dog(z)  |
-     |_________|   | walk(z) |
-                   |_________|
-
-    >>> dexpr(r"([],[([x],[]) | ([y],[]) | ([z],[dog(z), walk(z)])])").pretty_print()
-     _______________________________
-    |                               |
-    |-------------------------------|
-    |   ___     ___     _________   |
-    |  | x |   | y |   | z       |  |
-    | (|---| | |---| | |---------|) |
-    |  |___|   |___|   | dog(z)  |  |
-    |                  | walk(z) |  |
-    |                  |_________|  |
-    |_______________________________|
-
-    >>> dexpr(r"\P.\Q.(([x],[]) + P(x) + Q(x))(\x.([],[dog(x)]))").pretty_print()
-              ___                        ________
-     \       | x |                 \    |        |
-     /\ P Q.(|---| + P(x) + Q(x))( /\ x.|--------|)
-             |___|                      | dog(x) |
-                                        |________|
-
-
diff --git a/nlp_resource_data/nltk/test/featgram.doctest b/nlp_resource_data/nltk/test/featgram.doctest

deleted file mode 100644 (file)

index a1775f8..0000000
--- a/nlp_resource_data/nltk/test/featgram.doctest
+++ /dev/null
@@ -1,607 +0,0 @@
-.. Copyright (C) 2001-2019 NLTK Project
-.. For license information, see LICENSE.TXT
-
-=========================
- Feature Grammar Parsing
-=========================
-
-.. include:: ../../../nltk_book/definitions.rst
-
-Grammars can be parsed from strings.
-
-    >>> from __future__ import print_function
-    >>> import nltk
-    >>> from nltk import grammar, parse
-    >>> g = """
-    ... % start DP
-    ... DP[AGR=?a] -> D[AGR=?a] N[AGR=?a]
-    ... D[AGR=[NUM='sg', PERS=3]] -> 'this' | 'that'
-    ... D[AGR=[NUM='pl', PERS=3]] -> 'these' | 'those'
-    ... D[AGR=[NUM='pl', PERS=1]] -> 'we'
-    ... D[AGR=[PERS=2]] -> 'you'
-    ... N[AGR=[NUM='sg', GND='m']] -> 'boy'
-    ... N[AGR=[NUM='pl', GND='m']] -> 'boys'
-    ... N[AGR=[NUM='sg', GND='f']] -> 'girl'
-    ... N[AGR=[NUM='pl', GND='f']] -> 'girls'
-    ... N[AGR=[NUM='sg']] -> 'student'
-    ... N[AGR=[NUM='pl']] -> 'students'
-    ... """
-    >>> grammar = grammar.FeatureGrammar.fromstring(g)
-    >>> tokens = 'these girls'.split()
-    >>> parser = parse.FeatureEarleyChartParser(grammar)
-    >>> trees = parser.parse(tokens)
-    >>> for tree in trees: print(tree)
-    (DP[AGR=[GND='f', NUM='pl', PERS=3]]
-      (D[AGR=[NUM='pl', PERS=3]] these)
-      (N[AGR=[GND='f', NUM='pl']] girls))
-
-In general, when we are trying to develop even a very small grammar,
-it is convenient to put the rules in a file where they can be edited,
-tested and revised. Let's assume that we have saved feat0cfg_ as a file named
-``'feat0.fcfg'`` and placed it in the NLTK ``data`` directory. We can
-inspect it as follows:
-
-.. _feat0cfg: http://nltk.svn.sourceforge.net/svnroot/nltk/trunk/nltk/data/grammars/feat0.fcfg
-
-    >>> nltk.data.show_cfg('grammars/book_grammars/feat0.fcfg')
-    % start S
-    # ###################
-    # Grammar Productions
-    # ###################
-    # S expansion productions
-    S -> NP[NUM=?n] VP[NUM=?n]
-    # NP expansion productions
-    NP[NUM=?n] -> N[NUM=?n]
-    NP[NUM=?n] -> PropN[NUM=?n]
-    NP[NUM=?n] -> Det[NUM=?n] N[NUM=?n]
-    NP[NUM=pl] -> N[NUM=pl]
-    # VP expansion productions
-    VP[TENSE=?t, NUM=?n] -> IV[TENSE=?t, NUM=?n]
-    VP[TENSE=?t, NUM=?n] -> TV[TENSE=?t, NUM=?n] NP
-    # ###################
-    # Lexical Productions
-    # ###################
-    Det[NUM=sg] -> 'this' | 'every'
-    Det[NUM=pl] -> 'these' | 'all'
-    Det -> 'the' | 'some' | 'several'
-    PropN[NUM=sg]-> 'Kim' | 'Jody'
-    N[NUM=sg] -> 'dog' | 'girl' | 'car' | 'child'
-    N[NUM=pl] -> 'dogs' | 'girls' | 'cars' | 'children'
-    IV[TENSE=pres,  NUM=sg] -> 'disappears' | 'walks'
-    TV[TENSE=pres, NUM=sg] -> 'sees' | 'likes'
-    IV[TENSE=pres,  NUM=pl] -> 'disappear' | 'walk'
-    TV[TENSE=pres, NUM=pl] -> 'see' | 'like'
-    IV[TENSE=past] -> 'disappeared' | 'walked'
-    TV[TENSE=past] -> 'saw' | 'liked'
-
-Assuming we have saved feat0cfg_ as a file named
-``'feat0.fcfg'``, the function ``parse.load_parser`` allows us to
-read the grammar into NLTK, ready for use in parsing.
-
-
-    >>> cp = parse.load_parser('grammars/book_grammars/feat0.fcfg', trace=1)
-    >>> sent = 'Kim likes children'
-    >>> tokens = sent.split()
-    >>> tokens
-    ['Kim', 'likes', 'children']
-    >>> trees = cp.parse(tokens)
-    |.Kim .like.chil.|
-    |[----]    .    .| [0:1] 'Kim'
-    |.    [----]    .| [1:2] 'likes'
-    |.    .    [----]| [2:3] 'children'
-    |[----]    .    .| [0:1] PropN[NUM='sg'] -> 'Kim' *
-    |[----]    .    .| [0:1] NP[NUM='sg'] -> PropN[NUM='sg'] *
-    |[---->    .    .| [0:1] S[] -> NP[NUM=?n] * VP[NUM=?n] {?n: 'sg'}
-    |.    [----]    .| [1:2] TV[NUM='sg', TENSE='pres'] -> 'likes' *
-    |.    [---->    .| [1:2] VP[NUM=?n, TENSE=?t] -> TV[NUM=?n, TENSE=?t] * NP[] {?n: 'sg', ?t: 'pres'}
-    |.    .    [----]| [2:3] N[NUM='pl'] -> 'children' *
-    |.    .    [----]| [2:3] NP[NUM='pl'] -> N[NUM='pl'] *
-    |.    .    [---->| [2:3] S[] -> NP[NUM=?n] * VP[NUM=?n] {?n: 'pl'}
-    |.    [---------]| [1:3] VP[NUM='sg', TENSE='pres'] -> TV[NUM='sg', TENSE='pres'] NP[] *
-    |[==============]| [0:3] S[] -> NP[NUM='sg'] VP[NUM='sg'] *
-    >>> for tree in trees: print(tree)
-    (S[]
-      (NP[NUM='sg'] (PropN[NUM='sg'] Kim))
-      (VP[NUM='sg', TENSE='pres']
-        (TV[NUM='sg', TENSE='pres'] likes)
-        (NP[NUM='pl'] (N[NUM='pl'] children))))
-
-The parser works directly with
-the underspecified productions given by the grammar. That is, the
-Predictor rule does not attempt to compile out all admissible feature
-combinations before trying to expand the non-terminals on the left hand
-side of a production. However, when the Scanner matches an input word
-against a lexical production that has been predicted, the new edge will
-typically contain fully specified features; e.g., the edge
-[PropN[`num`:feat: = `sg`:fval:] |rarr| 'Kim', (0, 1)]. Recall from
-Chapter 8 that the Fundamental (or Completer) Rule in
-standard CFGs is used to combine an incomplete edge that's expecting a
-nonterminal *B* with a following, complete edge whose left hand side
-matches *B*. In our current setting, rather than checking for a
-complete match, we test whether the expected category *B* will
-`unify`:dt: with the left hand side *B'* of a following complete
-edge. We will explain in more detail in Section 9.2 how
-unification works; for the moment, it is enough to know that as a
-result of unification, any variable values of features in *B* will be
-instantiated by constant values in the corresponding feature structure
-in *B'*, and these instantiated values will be used in the new edge
-added by the Completer. This instantiation can be seen, for example,
-in the edge
-[NP [`num`:feat:\ =\ `sg`:fval:] |rarr| PropN[`num`:feat:\ =\ `sg`:fval:] |dot|, (0, 1)]
-in Example 9.2, where the feature `num`:feat: has been assigned the value `sg`:fval:.
-
-Feature structures in NLTK are ... Atomic feature values can be strings or
-integers.
-
-    >>> fs1 = nltk.FeatStruct(TENSE='past', NUM='sg')
-    >>> print(fs1)
-    [ NUM   = 'sg'   ]
-    [ TENSE = 'past' ]
-
-We can think of a feature structure as being like a Python dictionary,
-and access its values by indexing in the usual way.
-
-    >>> fs1 = nltk.FeatStruct(PER=3, NUM='pl', GND='fem')
-    >>> print(fs1['GND'])
-    fem
-
-We can also define feature structures which have complex values, as
-discussed earlier.
-
-    >>> fs2 = nltk.FeatStruct(POS='N', AGR=fs1)
-    >>> print(fs2)
-    [       [ GND = 'fem' ] ]
-    [ AGR = [ NUM = 'pl'  ] ]
-    [       [ PER = 3     ] ]
-    [                       ]
-    [ POS = 'N'             ]
-    >>> print(fs2['AGR'])
-    [ GND = 'fem' ]
-    [ NUM = 'pl'  ]
-    [ PER = 3     ]
-    >>> print(fs2['AGR']['PER'])
-    3
-
-Feature structures can also be constructed using the ``parse()``
-method of the ``nltk.FeatStruct`` class. Note that in this case, atomic
-feature values do not need to be enclosed in quotes.
-
-    >>> f1 = nltk.FeatStruct("[NUMBER = sg]")
-    >>> f2 = nltk.FeatStruct("[PERSON = 3]")
-    >>> print(nltk.unify(f1, f2))
-    [ NUMBER = 'sg' ]
-    [ PERSON = 3    ]
-
-    >>> f1 = nltk.FeatStruct("[A = [B = b, D = d]]")
-    >>> f2 = nltk.FeatStruct("[A = [C = c, D = d]]")
-    >>> print(nltk.unify(f1, f2))
-    [     [ B = 'b' ] ]
-    [ A = [ C = 'c' ] ]
-    [     [ D = 'd' ] ]
-
-
-Feature Structures as Graphs
-----------------------------
-
-Feature structures are not inherently tied to linguistic objects; they are
-general purpose structures for representing knowledge. For example, we
-could encode information about a person in a feature structure:
-
-    >>> person01 = nltk.FeatStruct("[NAME=Lee, TELNO='01 27 86 42 96',AGE=33]")
-    >>> print(person01)
-    [ AGE   = 33               ]
-    [ NAME  = 'Lee'            ]
-    [ TELNO = '01 27 86 42 96' ]
-
-There are a number of notations for representing reentrancy in
-matrix-style representations of feature structures. In NLTK, we adopt
-the following convention: the first occurrence of a shared feature structure
-is prefixed with an integer in parentheses, such as ``(1)``, and any
-subsequent reference to that structure uses the notation
-``->(1)``, as shown below.
-
-
-    >>> fs = nltk.FeatStruct("""[NAME=Lee, ADDRESS=(1)[NUMBER=74, STREET='rue Pascal'],
-    ...                               SPOUSE=[NAME=Kim, ADDRESS->(1)]]""")
-    >>> print(fs)
-    [ ADDRESS = (1) [ NUMBER = 74           ] ]
-    [               [ STREET = 'rue Pascal' ] ]
-    [                                         ]
-    [ NAME    = 'Lee'                         ]
-    [                                         ]
-    [ SPOUSE  = [ ADDRESS -> (1)  ]           ]
-    [           [ NAME    = 'Kim' ]           ]
-
-There can be any number of tags within a single feature structure.
-
-    >>> fs3 = nltk.FeatStruct("[A=(1)[B=b], C=(2)[], D->(1), E->(2)]")
-    >>> print(fs3)
-    [ A = (1) [ B = 'b' ] ]
-    [                     ]
-    [ C = (2) []          ]
-    [                     ]
-    [ D -> (1)            ]
-    [ E -> (2)            ]
-    >>> fs1 = nltk.FeatStruct(NUMBER=74, STREET='rue Pascal')
-    >>> fs2 = nltk.FeatStruct(CITY='Paris')
-    >>> print(nltk.unify(fs1, fs2))
-    [ CITY   = 'Paris'      ]
-    [ NUMBER = 74           ]
-    [ STREET = 'rue Pascal' ]
-
-Unification is symmetric:
-
-    >>> nltk.unify(fs1, fs2) == nltk.unify(fs2, fs1)
-    True
-
-Unification is commutative:
-
-    >>> fs3 = nltk.FeatStruct(TELNO='01 27 86 42 96')
-    >>> nltk.unify(nltk.unify(fs1, fs2), fs3) == nltk.unify(fs1, nltk.unify(fs2, fs3))
-    True
-
-Unification between `FS`:math:\ :subscript:`0` and `FS`:math:\
-:subscript:`1` will fail if the two feature structures share a path |pi|,
-but the value of |pi| in `FS`:math:\ :subscript:`0` is a distinct
-atom from the value of |pi| in `FS`:math:\ :subscript:`1`. In NLTK,
-this is implemented by setting the result of unification to be
-``None``.
-
-    >>> fs0 = nltk.FeatStruct(A='a')
-    >>> fs1 = nltk.FeatStruct(A='b')
-    >>> print(nltk.unify(fs0, fs1))
-    None
-
-Now, if we look at how unification interacts with structure-sharing,
-things become really interesting.
-
-
-
-    >>> fs0 = nltk.FeatStruct("""[NAME=Lee,
-    ...                                ADDRESS=[NUMBER=74,
-    ...                                         STREET='rue Pascal'],
-    ...                                SPOUSE= [NAME=Kim,
-    ...                                         ADDRESS=[NUMBER=74,
-    ...                                                  STREET='rue Pascal']]]""")
-    >>> print(fs0)
-    [ ADDRESS = [ NUMBER = 74           ]               ]
-    [           [ STREET = 'rue Pascal' ]               ]
-    [                                                   ]
-    [ NAME    = 'Lee'                                   ]
-    [                                                   ]
-    [           [ ADDRESS = [ NUMBER = 74           ] ] ]
-    [ SPOUSE  = [           [ STREET = 'rue Pascal' ] ] ]
-    [           [                                     ] ]
-    [           [ NAME    = 'Kim'                     ] ]
-
-
-    >>> fs1 = nltk.FeatStruct("[SPOUSE=[ADDRESS=[CITY=Paris]]]")
-    >>> print(nltk.unify(fs0, fs1))
-    [ ADDRESS = [ NUMBER = 74           ]               ]
-    [           [ STREET = 'rue Pascal' ]               ]
-    [                                                   ]
-    [ NAME    = 'Lee'                                   ]
-    [                                                   ]
-    [           [           [ CITY   = 'Paris'      ] ] ]
-    [           [ ADDRESS = [ NUMBER = 74           ] ] ]
-    [ SPOUSE  = [           [ STREET = 'rue Pascal' ] ] ]
-    [           [                                     ] ]
-    [           [ NAME    = 'Kim'                     ] ]
-
-    >>> fs2 = nltk.FeatStruct("""[NAME=Lee, ADDRESS=(1)[NUMBER=74, STREET='rue Pascal'],
-    ...                                SPOUSE=[NAME=Kim, ADDRESS->(1)]]""")
-
-
-    >>> print(fs2)
-    [ ADDRESS = (1) [ NUMBER = 74           ] ]
-    [               [ STREET = 'rue Pascal' ] ]
-    [                                         ]
-    [ NAME    = 'Lee'                         ]
-    [                                         ]
-    [ SPOUSE  = [ ADDRESS -> (1)  ]           ]
-    [           [ NAME    = 'Kim' ]           ]
-
-
-    >>> print(nltk.unify(fs2, fs1))
-    [               [ CITY   = 'Paris'      ] ]
-    [ ADDRESS = (1) [ NUMBER = 74           ] ]
-    [               [ STREET = 'rue Pascal' ] ]
-    [                                         ]
-    [ NAME    = 'Lee'                         ]
-    [                                         ]
-    [ SPOUSE  = [ ADDRESS -> (1)  ]           ]
-    [           [ NAME    = 'Kim' ]           ]
-
-
-    >>> fs1 = nltk.FeatStruct("[ADDRESS1=[NUMBER=74, STREET='rue Pascal']]")
-    >>> fs2 = nltk.FeatStruct("[ADDRESS1=?x, ADDRESS2=?x]")
-    >>> print(fs2)
-    [ ADDRESS1 = ?x ]
-    [ ADDRESS2 = ?x ]
-    >>> print(nltk.unify(fs1, fs2))
-    [ ADDRESS1 = (1) [ NUMBER = 74           ] ]
-    [                [ STREET = 'rue Pascal' ] ]
-    [                                          ]
-    [ ADDRESS2 -> (1)                          ]
-
-
-
-
-    >>> sent = 'who do you claim that you like'
-    >>> tokens = sent.split()
-    >>> cp = parse.load_parser('grammars/book_grammars/feat1.fcfg', trace=1)
-    >>> trees = cp.parse(tokens)
-    |.w.d.y.c.t.y.l.|
-    |[-] . . . . . .| [0:1] 'who'
-    |. [-] . . . . .| [1:2] 'do'
-    |. . [-] . . . .| [2:3] 'you'
-    |. . . [-] . . .| [3:4] 'claim'
-    |. . . . [-] . .| [4:5] 'that'
-    |. . . . . [-] .| [5:6] 'you'
-    |. . . . . . [-]| [6:7] 'like'
-    |# . . . . . . .| [0:0] NP[]/NP[] -> *
-    |. # . . . . . .| [1:1] NP[]/NP[] -> *
-    |. . # . . . . .| [2:2] NP[]/NP[] -> *
-    |. . . # . . . .| [3:3] NP[]/NP[] -> *
-    |. . . . # . . .| [4:4] NP[]/NP[] -> *
-    |. . . . . # . .| [5:5] NP[]/NP[] -> *
-    |. . . . . . # .| [6:6] NP[]/NP[] -> *
-    |. . . . . . . #| [7:7] NP[]/NP[] -> *
-    |[-] . . . . . .| [0:1] NP[+WH] -> 'who' *
-    |[-> . . . . . .| [0:1] S[-INV] -> NP[] * VP[] {}
-    |[-> . . . . . .| [0:1] S[-INV]/?x[] -> NP[] * VP[]/?x[] {}
-    |[-> . . . . . .| [0:1] S[-INV] -> NP[] * S[]/NP[] {}
-    |. [-] . . . . .| [1:2] V[+AUX] -> 'do' *
-    |. [-> . . . . .| [1:2] S[+INV] -> V[+AUX] * NP[] VP[] {}
-    |. [-> . . . . .| [1:2] S[+INV]/?x[] -> V[+AUX] * NP[] VP[]/?x[] {}
-    |. [-> . . . . .| [1:2] VP[] -> V[+AUX] * VP[] {}
-    |. [-> . . . . .| [1:2] VP[]/?x[] -> V[+AUX] * VP[]/?x[] {}
-    |. . [-] . . . .| [2:3] NP[-WH] -> 'you' *
-    |. . [-> . . . .| [2:3] S[-INV] -> NP[] * VP[] {}
-    |. . [-> . . . .| [2:3] S[-INV]/?x[] -> NP[] * VP[]/?x[] {}
-    |. . [-> . . . .| [2:3] S[-INV] -> NP[] * S[]/NP[] {}
-    |. [---> . . . .| [1:3] S[+INV] -> V[+AUX] NP[] * VP[] {}
-    |. [---> . . . .| [1:3] S[+INV]/?x[] -> V[+AUX] NP[] * VP[]/?x[] {}
-    |. . . [-] . . .| [3:4] V[-AUX, SUBCAT='clause'] -> 'claim' *
-    |. . . [-> . . .| [3:4] VP[] -> V[-AUX, SUBCAT='clause'] * SBar[] {}
-    |. . . [-> . . .| [3:4] VP[]/?x[] -> V[-AUX, SUBCAT='clause'] * SBar[]/?x[] {}
-    |. . . . [-] . .| [4:5] Comp[] -> 'that' *
-    |. . . . [-> . .| [4:5] SBar[] -> Comp[] * S[-INV] {}
-    |. . . . [-> . .| [4:5] SBar[]/?x[] -> Comp[] * S[-INV]/?x[] {}
-    |. . . . . [-] .| [5:6] NP[-WH] -> 'you' *
-    |. . . . . [-> .| [5:6] S[-INV] -> NP[] * VP[] {}
-    |. . . . . [-> .| [5:6] S[-INV]/?x[] -> NP[] * VP[]/?x[] {}
-    |. . . . . [-> .| [5:6] S[-INV] -> NP[] * S[]/NP[] {}
-    |. . . . . . [-]| [6:7] V[-AUX, SUBCAT='trans'] -> 'like' *
-    |. . . . . . [->| [6:7] VP[] -> V[-AUX, SUBCAT='trans'] * NP[] {}
-    |. . . . . . [->| [6:7] VP[]/?x[] -> V[-AUX, SUBCAT='trans'] * NP[]/?x[] {}
-    |. . . . . . [-]| [6:7] VP[]/NP[] -> V[-AUX, SUBCAT='trans'] NP[]/NP[] *
-    |. . . . . [---]| [5:7] S[-INV]/NP[] -> NP[] VP[]/NP[] *
-    |. . . . [-----]| [4:7] SBar[]/NP[] -> Comp[] S[-INV]/NP[] *
-    |. . . [-------]| [3:7] VP[]/NP[] -> V[-AUX, SUBCAT='clause'] SBar[]/NP[] *
-    |. . [---------]| [2:7] S[-INV]/NP[] -> NP[] VP[]/NP[] *
-    |. [-----------]| [1:7] S[+INV]/NP[] -> V[+AUX] NP[] VP[]/NP[] *
-    |[=============]| [0:7] S[-INV] -> NP[] S[]/NP[] *
-
-    >>> trees = list(trees)
-    >>> for tree in trees: print(tree)
-    (S[-INV]
-      (NP[+WH] who)
-      (S[+INV]/NP[]
-        (V[+AUX] do)
-        (NP[-WH] you)
-        (VP[]/NP[]
-          (V[-AUX, SUBCAT='clause'] claim)
-          (SBar[]/NP[]
-            (Comp[] that)
-            (S[-INV]/NP[]
-              (NP[-WH] you)
-              (VP[]/NP[] (V[-AUX, SUBCAT='trans'] like) (NP[]/NP[] )))))))
-
-A different parser should give the same parse trees, but perhaps in a different order:
-
-    >>> cp2 = parse.load_parser('grammars/book_grammars/feat1.fcfg', trace=1,
-    ...                         parser=parse.FeatureEarleyChartParser)
-    >>> trees2 = cp2.parse(tokens)
-    |.w.d.y.c.t.y.l.|
-    |[-] . . . . . .| [0:1] 'who'
-    |. [-] . . . . .| [1:2] 'do'
-    |. . [-] . . . .| [2:3] 'you'
-    |. . . [-] . . .| [3:4] 'claim'
-    |. . . . [-] . .| [4:5] 'that'
-    |. . . . . [-] .| [5:6] 'you'
-    |. . . . . . [-]| [6:7] 'like'
-    |> . . . . . . .| [0:0] S[-INV] -> * NP[] VP[] {}
-    |> . . . . . . .| [0:0] S[-INV]/?x[] -> * NP[] VP[]/?x[] {}
-    |> . . . . . . .| [0:0] S[-INV] -> * NP[] S[]/NP[] {}
-    |> . . . . . . .| [0:0] S[-INV] -> * Adv[+NEG] S[+INV] {}
-    |> . . . . . . .| [0:0] S[+INV] -> * V[+AUX] NP[] VP[] {}
-    |> . . . . . . .| [0:0] S[+INV]/?x[] -> * V[+AUX] NP[] VP[]/?x[] {}
-    |> . . . . . . .| [0:0] NP[+WH] -> * 'who' {}
-    |[-] . . . . . .| [0:1] NP[+WH] -> 'who' *
-    |[-> . . . . . .| [0:1] S[-INV] -> NP[] * VP[] {}
-    |[-> . . . . . .| [0:1] S[-INV]/?x[] -> NP[] * VP[]/?x[] {}
-    |[-> . . . . . .| [0:1] S[-INV] -> NP[] * S[]/NP[] {}
-    |. > . . . . . .| [1:1] S[-INV]/?x[] -> * NP[] VP[]/?x[] {}
-    |. > . . . . . .| [1:1] S[+INV]/?x[] -> * V[+AUX] NP[] VP[]/?x[] {}
-    |. > . . . . . .| [1:1] V[+AUX] -> * 'do' {}
-    |. > . . . . . .| [1:1] VP[]/?x[] -> * V[-AUX, SUBCAT='trans'] NP[]/?x[] {}
-    |. > . . . . . .| [1:1] VP[]/?x[] -> * V[-AUX, SUBCAT='clause'] SBar[]/?x[] {}
-    |. > . . . . . .| [1:1] VP[]/?x[] -> * V[+AUX] VP[]/?x[] {}
-    |. > . . . . . .| [1:1] VP[] -> * V[-AUX, SUBCAT='intrans'] {}
-    |. > . . . . . .| [1:1] VP[] -> * V[-AUX, SUBCAT='trans'] NP[] {}
-    |. > . . . . . .| [1:1] VP[] -> * V[-AUX, SUBCAT='clause'] SBar[] {}
-    |. > . . . . . .| [1:1] VP[] -> * V[+AUX] VP[] {}
-    |. [-] . . . . .| [1:2] V[+AUX] -> 'do' *
-    |. [-> . . . . .| [1:2] S[+INV]/?x[] -> V[+AUX] * NP[] VP[]/?x[] {}
-    |. [-> . . . . .| [1:2] VP[]/?x[] -> V[+AUX] * VP[]/?x[] {}
-    |. [-> . . . . .| [1:2] VP[] -> V[+AUX] * VP[] {}
-    |. . > . . . . .| [2:2] VP[] -> * V[-AUX, SUBCAT='intrans'] {}
-    |. . > . . . . .| [2:2] VP[] -> * V[-AUX, SUBCAT='trans'] NP[] {}
-    |. . > . . . . .| [2:2] VP[] -> * V[-AUX, SUBCAT='clause'] SBar[] {}
-    |. . > . . . . .| [2:2] VP[] -> * V[+AUX] VP[] {}
-    |. . > . . . . .| [2:2] VP[]/?x[] -> * V[-AUX, SUBCAT='trans'] NP[]/?x[] {}
-    |. . > . . . . .| [2:2] VP[]/?x[] -> * V[-AUX, SUBCAT='clause'] SBar[]/?x[] {}
-    |. . > . . . . .| [2:2] VP[]/?x[] -> * V[+AUX] VP[]/?x[] {}
-    |. . > . . . . .| [2:2] NP[-WH] -> * 'you' {}
-    |. . [-] . . . .| [2:3] NP[-WH] -> 'you' *
-    |. [---> . . . .| [1:3] S[+INV]/?x[] -> V[+AUX] NP[] * VP[]/?x[] {}
-    |. . . > . . . .| [3:3] VP[]/?x[] -> * V[-AUX, SUBCAT='trans'] NP[]/?x[] {}
-    |. . . > . . . .| [3:3] VP[]/?x[] -> * V[-AUX, SUBCAT='clause'] SBar[]/?x[] {}
-    |. . . > . . . .| [3:3] VP[]/?x[] -> * V[+AUX] VP[]/?x[] {}
-    |. . . > . . . .| [3:3] V[-AUX, SUBCAT='clause'] -> * 'claim' {}
-    |. . . [-] . . .| [3:4] V[-AUX, SUBCAT='clause'] -> 'claim' *
-    |. . . [-> . . .| [3:4] VP[]/?x[] -> V[-AUX, SUBCAT='clause'] * SBar[]/?x[] {}
-    |. . . . > . . .| [4:4] SBar[]/?x[] -> * Comp[] S[-INV]/?x[] {}
-    |. . . . > . . .| [4:4] Comp[] -> * 'that' {}
-    |. . . . [-] . .| [4:5] Comp[] -> 'that' *
-    |. . . . [-> . .| [4:5] SBar[]/?x[] -> Comp[] * S[-INV]/?x[] {}
-    |. . . . . > . .| [5:5] S[-INV]/?x[] -> * NP[] VP[]/?x[] {}
-    |. . . . . > . .| [5:5] NP[-WH] -> * 'you' {}
-    |. . . . . [-] .| [5:6] NP[-WH] -> 'you' *
-    |. . . . . [-> .| [5:6] S[-INV]/?x[] -> NP[] * VP[]/?x[] {}
-    |. . . . . . > .| [6:6] VP[]/?x[] -> * V[-AUX, SUBCAT='trans'] NP[]/?x[] {}
-    |. . . . . . > .| [6:6] VP[]/?x[] -> * V[-AUX, SUBCAT='clause'] SBar[]/?x[] {}
-    |. . . . . . > .| [6:6] VP[]/?x[] -> * V[+AUX] VP[]/?x[] {}
-    |. . . . . . > .| [6:6] V[-AUX, SUBCAT='trans'] -> * 'like' {}
-    |. . . . . . [-]| [6:7] V[-AUX, SUBCAT='trans'] -> 'like' *
-    |. . . . . . [->| [6:7] VP[]/?x[] -> V[-AUX, SUBCAT='trans'] * NP[]/?x[] {}
-    |. . . . . . . #| [7:7] NP[]/NP[] -> *
-    |. . . . . . [-]| [6:7] VP[]/NP[] -> V[-AUX, SUBCAT='trans'] NP[]/NP[] *
-    |. . . . . [---]| [5:7] S[-INV]/NP[] -> NP[] VP[]/NP[] *
-    |. . . . [-----]| [4:7] SBar[]/NP[] -> Comp[] S[-INV]/NP[] *
-    |. . . [-------]| [3:7] VP[]/NP[] -> V[-AUX, SUBCAT='clause'] SBar[]/NP[] *
-    |. [-----------]| [1:7] S[+INV]/NP[] -> V[+AUX] NP[] VP[]/NP[] *
-    |[=============]| [0:7] S[-INV] -> NP[] S[]/NP[] *
-
-    >>> sorted(trees) == sorted(trees2)
-    True
-
-
-Let's load a German grammar:
-
-    >>> cp = parse.load_parser('grammars/book_grammars/german.fcfg', trace=0)
-    >>> sent = 'die Katze sieht den Hund'
-    >>> tokens = sent.split()
-    >>> trees = cp.parse(tokens)
-    >>> for tree in trees: print(tree)
-    (S[]
-      (NP[AGR=[GND='fem', NUM='sg', PER=3], CASE='nom']
-        (Det[AGR=[GND='fem', NUM='sg', PER=3], CASE='nom'] die)
-        (N[AGR=[GND='fem', NUM='sg', PER=3]] Katze))
-      (VP[AGR=[NUM='sg', PER=3]]
-        (TV[AGR=[NUM='sg', PER=3], OBJCASE='acc'] sieht)
-        (NP[AGR=[GND='masc', NUM='sg', PER=3], CASE='acc']
-          (Det[AGR=[GND='masc', NUM='sg', PER=3], CASE='acc'] den)
-          (N[AGR=[GND='masc', NUM='sg', PER=3]] Hund))))
-
-Grammar with Binding Operators
-------------------------------
-The `bindop.fcfg`_ grammar is a semantic grammar that uses lambda
-calculus.  Each element has a core semantics, which is a single lambda
-calculus expression; and a set of binding operators, which bind
-variables.
-
-.. _bindop.fcfg: http://nltk.svn.sourceforge.net/svnroot/nltk/trunk/nltk/data/grammars/bindop.fcfg
-
-In order to make the binding operators work right, they need to
-instantiate their bound variable every time they are added to the
-chart.  To do this, we use a special subclass of `Chart`, called
-`InstantiateVarsChart`.
-
-    >>> from nltk.parse.featurechart import InstantiateVarsChart
-    >>> cp = parse.load_parser('grammars/sample_grammars/bindop.fcfg', trace=1,
-    ...                        chart_class=InstantiateVarsChart)
-    >>> print(cp.grammar())
-    Grammar with 15 productions (start state = S[])
-        S[SEM=[BO={?b1+?b2}, CORE=<?vp(?subj)>]] -> NP[SEM=[BO=?b1, CORE=?subj]] VP[SEM=[BO=?b2, CORE=?vp]]
-        VP[SEM=[BO={?b1+?b2}, CORE=<?v(?obj)>]] -> TV[SEM=[BO=?b1, CORE=?v]] NP[SEM=[BO=?b2, CORE=?obj]]
-        VP[SEM=?s] -> IV[SEM=?s]
-        NP[SEM=[BO={?b1+?b2+{bo(?det(?n),@x)}}, CORE=<@x>]] -> Det[SEM=[BO=?b1, CORE=?det]] N[SEM=[BO=?b2, CORE=?n]]
-        Det[SEM=[BO={/}, CORE=<\Q P.exists x.(Q(x) & P(x))>]] -> 'a'
-        N[SEM=[BO={/}, CORE=<dog>]] -> 'dog'
-        N[SEM=[BO={/}, CORE=<dog>]] -> 'cat'
-        N[SEM=[BO={/}, CORE=<dog>]] -> 'mouse'
-        IV[SEM=[BO={/}, CORE=<\x.bark(x)>]] -> 'barks'
-        IV[SEM=[BO={/}, CORE=<\x.bark(x)>]] -> 'eats'
-        IV[SEM=[BO={/}, CORE=<\x.bark(x)>]] -> 'walks'
-        TV[SEM=[BO={/}, CORE=<\x y.feed(y,x)>]] -> 'feeds'
-        TV[SEM=[BO={/}, CORE=<\x y.feed(y,x)>]] -> 'walks'
-        NP[SEM=[BO={bo(\P.P(John),@x)}, CORE=<@x>]] -> 'john'
-        NP[SEM=[BO={bo(\P.P(John),@x)}, CORE=<@x>]] -> 'alex'
-
-A simple intransitive sentence:
-
-    >>> from nltk.sem import logic
-    >>> logic._counter._value = 100
-
-    >>> trees = cp.parse('john barks'.split())
-    |. john.barks.|
-    |[-----]     .| [0:1] 'john'
-    |.     [-----]| [1:2] 'barks'
-    |[-----]     .| [0:1] NP[SEM=[BO={bo(\P.P(John),z101)}, CORE=<z101>]] -> 'john' *
-    |[----->     .| [0:1] S[SEM=[BO={?b1+?b2}, CORE=<?vp(?subj)>]] -> NP[SEM=[BO=?b1, CORE=?subj]] * VP[SEM=[BO=?b2, CORE=?vp]] {?b1: {bo(\P.P(John),z2)}, ?subj: <IndividualVariableExpression z2>}
-    |.     [-----]| [1:2] IV[SEM=[BO={/}, CORE=<\x.bark(x)>]] -> 'barks' *
-    |.     [-----]| [1:2] VP[SEM=[BO={/}, CORE=<\x.bark(x)>]] -> IV[SEM=[BO={/}, CORE=<\x.bark(x)>]] *
-    |[===========]| [0:2] S[SEM=[BO={bo(\P.P(John),z2)}, CORE=<bark(z2)>]] -> NP[SEM=[BO={bo(\P.P(John),z2)}, CORE=<z2>]] VP[SEM=[BO={/}, CORE=<\x.bark(x)>]] *
-    >>> for tree in trees: print(tree)
-    (S[SEM=[BO={bo(\P.P(John),z2)}, CORE=<bark(z2)>]]
-      (NP[SEM=[BO={bo(\P.P(John),z101)}, CORE=<z101>]] john)
-      (VP[SEM=[BO={/}, CORE=<\x.bark(x)>]]
-        (IV[SEM=[BO={/}, CORE=<\x.bark(x)>]] barks)))
-
-A transitive sentence:
-
-    >>> trees = cp.parse('john feeds a dog'.split())
-    |.joh.fee. a .dog.|
-    |[---]   .   .   .| [0:1] 'john'
-    |.   [---]   .   .| [1:2] 'feeds'
-    |.   .   [---]   .| [2:3] 'a'
-    |.   .   .   [---]| [3:4] 'dog'
-    |[---]   .   .   .| [0:1] NP[SEM=[BO={bo(\P.P(John),z102)}, CORE=<z102>]] -> 'john' *
-    |[--->   .   .   .| [0:1] S[SEM=[BO={?b1+?b2}, CORE=<?vp(?subj)>]] -> NP[SEM=[BO=?b1, CORE=?subj]] * VP[SEM=[BO=?b2, CORE=?vp]] {?b1: {bo(\P.P(John),z2)}, ?subj: <IndividualVariableExpression z2>}
-    |.   [---]   .   .| [1:2] TV[SEM=[BO={/}, CORE=<\x y.feed(y,x)>]] -> 'feeds' *
-    |.   [--->   .   .| [1:2] VP[SEM=[BO={?b1+?b2}, CORE=<?v(?obj)>]] -> TV[SEM=[BO=?b1, CORE=?v]] * NP[SEM=[BO=?b2, CORE=?obj]] {?b1: {/}, ?v: <LambdaExpression \x y.feed(y,x)>}
-    |.   .   [---]   .| [2:3] Det[SEM=[BO={/}, CORE=<\Q P.exists x.(Q(x) & P(x))>]] -> 'a' *
-    |.   .   [--->   .| [2:3] NP[SEM=[BO={?b1+?b2+{bo(?det(?n),@x)}}, CORE=<@x>]] -> Det[SEM=[BO=?b1, CORE=?det]] * N[SEM=[BO=?b2, CORE=?n]] {?b1: {/}, ?det: <LambdaExpression \Q P.exists x.(Q(x) & P(x))>}
-    |.   .   .   [---]| [3:4] N[SEM=[BO={/}, CORE=<dog>]] -> 'dog' *
-    |.   .   [-------]| [2:4] NP[SEM=[BO={bo(\P.exists x.(dog(x) & P(x)),z103)}, CORE=<z103>]] -> Det[SEM=[BO={/}, CORE=<\Q P.exists x.(Q(x) & P(x))>]] N[SEM=[BO={/}, CORE=<dog>]] *
-    |.   .   [------->| [2:4] S[SEM=[BO={?b1+?b2}, CORE=<?vp(?subj)>]] -> NP[SEM=[BO=?b1, CORE=?subj]] * VP[SEM=[BO=?b2, CORE=?vp]] {?b1: {bo(\P.exists x.(dog(x) & P(x)),z2)}, ?subj: <IndividualVariableExpression z2>}
-    |.   [-----------]| [1:4] VP[SEM=[BO={bo(\P.exists x.(dog(x) & P(x)),z2)}, CORE=<\y.feed(y,z2)>]] -> TV[SEM=[BO={/}, CORE=<\x y.feed(y,x)>]] NP[SEM=[BO={bo(\P.exists x.(dog(x) & P(x)),z2)}, CORE=<z2>]] *
-    |[===============]| [0:4] S[SEM=[BO={bo(\P.P(John),z2), bo(\P.exists x.(dog(x) & P(x)),z3)}, CORE=<feed(z2,z3)>]] -> NP[SEM=[BO={bo(\P.P(John),z2)}, CORE=<z2>]] VP[SEM=[BO={bo(\P.exists x.(dog(x) & P(x)),z3)}, CORE=<\y.feed(y,z3)>]] *
-
-    >>> for tree in trees: print(tree)
-    (S[SEM=[BO={bo(\P.P(John),z2), bo(\P.exists x.(dog(x) & P(x)),z3)}, CORE=<feed(z2,z3)>]]
-      (NP[SEM=[BO={bo(\P.P(John),z102)}, CORE=<z102>]] john)
-      (VP[SEM=[BO={bo(\P.exists x.(dog(x) & P(x)),z2)}, CORE=<\y.feed(y,z2)>]]
-        (TV[SEM=[BO={/}, CORE=<\x y.feed(y,x)>]] feeds)
-        (NP[SEM=[BO={bo(\P.exists x.(dog(x) & P(x)),z103)}, CORE=<z103>]]
-          (Det[SEM=[BO={/}, CORE=<\Q P.exists x.(Q(x) & P(x))>]] a)
-          (N[SEM=[BO={/}, CORE=<dog>]] dog))))
-
-Turn down the verbosity:
-
-    >>> cp = parse.load_parser('grammars/sample_grammars/bindop.fcfg', trace=0,
-    ...                       chart_class=InstantiateVarsChart)
-
-Reuse the same lexical item twice:
-
-    >>> trees = cp.parse('john feeds john'.split())
-    >>> for tree in trees: print(tree)
-    (S[SEM=[BO={bo(\P.P(John),z2), bo(\P.P(John),z3)}, CORE=<feed(z2,z3)>]]
-      (NP[SEM=[BO={bo(\P.P(John),z104)}, CORE=<z104>]] john)
-      (VP[SEM=[BO={bo(\P.P(John),z2)}, CORE=<\y.feed(y,z2)>]]
-        (TV[SEM=[BO={/}, CORE=<\x y.feed(y,x)>]] feeds)
-        (NP[SEM=[BO={bo(\P.P(John),z105)}, CORE=<z105>]] john)))
-
-    >>> trees = cp.parse('a dog feeds a dog'.split())
-    >>> for tree in trees: print(tree)
-    (S[SEM=[BO={bo(\P.exists x.(dog(x) & P(x)),z2), bo(\P.exists x.(dog(x) & P(x)),z3)}, CORE=<feed(z2,z3)>]]
-      (NP[SEM=[BO={bo(\P.exists x.(dog(x) & P(x)),z106)}, CORE=<z106>]]
-        (Det[SEM=[BO={/}, CORE=<\Q P.exists x.(Q(x) & P(x))>]] a)
-        (N[SEM=[BO={/}, CORE=<dog>]] dog))
-      (VP[SEM=[BO={bo(\P.exists x.(dog(x) & P(x)),z2)}, CORE=<\y.feed(y,z2)>]]
-        (TV[SEM=[BO={/}, CORE=<\x y.feed(y,x)>]] feeds)
-        (NP[SEM=[BO={bo(\P.exists x.(dog(x) & P(x)),z107)}, CORE=<z107>]]
-          (Det[SEM=[BO={/}, CORE=<\Q P.exists x.(Q(x) & P(x))>]] a)
-          (N[SEM=[BO={/}, CORE=<dog>]] dog))))
diff --git a/nlp_resource_data/nltk/test/featstruct.doctest b/nlp_resource_data/nltk/test/featstruct.doctest

deleted file mode 100644 (file)

index 8c35dad..0000000
--- a/nlp_resource_data/nltk/test/featstruct.doctest
+++ /dev/null
@@ -1,1229 +0,0 @@
-.. Copyright (C) 2001-2019 NLTK Project
-.. For license information, see LICENSE.TXT
-
-==================================
- Feature Structures & Unification
-==================================
-    >>> from __future__ import print_function
-    >>> from nltk.featstruct import FeatStruct
-    >>> from nltk.sem.logic import Variable, VariableExpression, Expression
-
-.. note:: For now, featstruct uses the older lambdalogic semantics
-   module.  Eventually, it should be updated to use the new first
-   order predicate logic module.
-
-Overview
-~~~~~~~~
-A feature structure is a mapping from feature identifiers to feature
-values, where feature values can be simple values (like strings or
-ints), nested feature structures, or variables:
-
-    >>> fs1 = FeatStruct(number='singular', person=3)
-    >>> print(fs1)
-    [ number = 'singular' ]
-    [ person = 3          ]
-
-Feature structure may be nested:
-
-    >>> fs2 = FeatStruct(type='NP', agr=fs1)
-    >>> print(fs2)
-    [ agr  = [ number = 'singular' ] ]
-    [        [ person = 3          ] ]
-    [                                ]
-    [ type = 'NP'                    ]
-
-Variables are used to indicate that two features should be assigned
-the same value.  For example, the following feature structure requires
-that the feature fs3['agr']['number'] be bound to the same value as the
-feature fs3['subj']['number'].
-
-    >>> fs3 = FeatStruct(agr=FeatStruct(number=Variable('?n')),
-    ...                  subj=FeatStruct(number=Variable('?n')))
-    >>> print(fs3)
-    [ agr  = [ number = ?n ] ]
-    [                        ]
-    [ subj = [ number = ?n ] ]
-
-Feature structures are typically used to represent partial information
-about objects.  A feature name that is not mapped to a value stands
-for a feature whose value is unknown (*not* a feature without a
-value).  Two feature structures that represent (potentially
-overlapping) information about the same object can be combined by
-*unification*.
-
-    >>> print(fs2.unify(fs3))
-    [ agr  = [ number = 'singular' ] ]
-    [        [ person = 3          ] ]
-    [                                ]
-    [ subj = [ number = 'singular' ] ]
-    [                                ]
-    [ type = 'NP'                    ]
-
-When two inconsistent feature structures are unified, the unification
-fails and returns ``None``.
-
-    >>> fs4 = FeatStruct(agr=FeatStruct(person=1))
-    >>> print(fs4.unify(fs2))
-    None
-    >>> print(fs2.unify(fs4))
-    None
-
-..
-    >>> del fs1, fs2, fs3, fs4 # clean-up
-
-Feature Structure Types
------------------------
-There are actually two types of feature structure:
-
-- *feature dictionaries*, implemented by `FeatDict`, act like
-  Python dictionaries.  Feature identifiers may be strings or
-  instances of the `Feature` class.
-- *feature lists*, implemented by `FeatList`, act like Python
-  lists.  Feature identifiers are integers.
-
-When you construct a feature structure using the `FeatStruct`
-constructor, it will automatically decide which type is appropriate:
-
-    >>> type(FeatStruct(number='singular'))
-    <class 'nltk.featstruct.FeatDict'>
-    >>> type(FeatStruct([1,2,3]))
-    <class 'nltk.featstruct.FeatList'>
-
-Usually, we will just use feature dictionaries; but sometimes feature
-lists can be useful too.  Two feature lists will unify with each other
-only if they have equal lengths, and all of their feature values
-match.  If you wish to write a feature list that contains 'unknown'
-values, you must use variables:
-
-    >>> fs1 = FeatStruct([1,2,Variable('?y')])
-    >>> fs2 = FeatStruct([1,Variable('?x'),3])
-    >>> fs1.unify(fs2)
-    [1, 2, 3]
-
-..
-    >>> del fs1, fs2 # clean-up
-
-Parsing Feature Structure Strings
----------------------------------
-Feature structures can be constructed directly from strings.  Often,
-this is more convenient than constructing them directly.  NLTK can
-parse most feature strings to produce the corresponding feature
-structures.  (But you must restrict your base feature values to
-strings, ints, logic expressions (`nltk.sem.logic.Expression`), and a
-few other types discussed below).
-
-Feature dictionaries are written like Python dictionaries, except that
-keys are not put in quotes; and square brackets (``[]``) are used
-instead of braces (``{}``):
-
-    >>> FeatStruct('[tense="past", agr=[number="sing", person=3]]')
-    [agr=[number='sing', person=3], tense='past']
-
-If a feature value is a single alphanumeric word, then it does not
-need to be quoted -- it will be automatically treated as a string:
-
-    >>> FeatStruct('[tense=past, agr=[number=sing, person=3]]')
-    [agr=[number='sing', person=3], tense='past']
-
-Feature lists are written like python lists:
-
-    >>> FeatStruct('[1, 2, 3]')
-    [1, 2, 3]
-
-The expression ``[]`` is treated as an empty feature dictionary, not
-an empty feature list:
-
-    >>> type(FeatStruct('[]'))
-    <class 'nltk.featstruct.FeatDict'>
-
-Feature Paths
--------------
-Features can be specified using *feature paths*, or tuples of feature
-identifiers that specify path through the nested feature structures to
-a value.
-
-    >>> fs1 = FeatStruct('[x=1, y=[1,2,[z=3]]]')
-    >>> fs1['y']
-    [1, 2, [z=3]]
-    >>> fs1['y', 2]
-    [z=3]
-    >>> fs1['y', 2, 'z']
-    3
-
-..
-    >>> del fs1 # clean-up
-
-Reentrance
-----------
-Feature structures may contain reentrant feature values.  A *reentrant
-feature value* is a single feature structure that can be accessed via
-multiple feature paths.
-
-    >>> fs1 = FeatStruct(x='val')
-    >>> fs2 = FeatStruct(a=fs1, b=fs1)
-    >>> print(fs2)
-    [ a = (1) [ x = 'val' ] ]
-    [                       ]
-    [ b -> (1)              ]
-    >>> fs2
-    [a=(1)[x='val'], b->(1)]
-
-As you can see, reentrane is displayed by marking a feature structure
-with a unique identifier, in this case ``(1)``, the first time it is
-encountered; and then using the special form ``var -> id`` whenever it
-is encountered again.  You can use the same notation to directly
-create reentrant feature structures from strings.
-
-    >>> FeatStruct('[a=(1)[], b->(1), c=[d->(1)]]')
-    [a=(1)[], b->(1), c=[d->(1)]]
-
-Reentrant feature structures may contain cycles:
-
-    >>> fs3 = FeatStruct('(1)[a->(1)]')
-    >>> fs3['a', 'a', 'a', 'a']
-    (1)[a->(1)]
-    >>> fs3['a', 'a', 'a', 'a'] is fs3
-    True
-
-Unification preserves the reentrance relations imposed by both of the
-unified feature structures.  In the feature structure resulting from
-unification, any modifications to a reentrant feature value will be
-visible using any of its feature paths.
-
-    >>> fs3.unify(FeatStruct('[a=[b=12], c=33]'))
-    (1)[a->(1), b=12, c=33]
-
-..
-    >>> del fs1, fs2, fs3 # clean-up
-
-Feature Structure Equality
---------------------------
-Two feature structures are considered equal if they assign the same
-values to all features, *and* they contain the same reentrances.
-
-    >>> fs1 = FeatStruct('[a=(1)[x=1], b->(1)]')
-    >>> fs2 = FeatStruct('[a=(1)[x=1], b->(1)]')
-    >>> fs3 = FeatStruct('[a=[x=1], b=[x=1]]')
-    >>> fs1 == fs1, fs1 is fs1
-    (True, True)
-    >>> fs1 == fs2, fs1 is fs2
-    (True, False)
-    >>> fs1 == fs3, fs1 is fs3
-    (False, False)
-
-Note that this differs from how Python dictionaries and lists define
-equality -- in particular, Python dictionaries and lists ignore
-reentrance relations.  To test two feature structures for equality
-while ignoring reentrance relations, use the `equal_values()` method:
-
-    >>> fs1.equal_values(fs1)
-    True
-    >>> fs1.equal_values(fs2)
-    True
-    >>> fs1.equal_values(fs3)
-    True
-
-..
-    >>> del fs1, fs2, fs3 # clean-up
-
-Feature Value Sets & Feature Value Tuples
------------------------------------------
-`nltk.featstruct` defines two new data types that are intended to be
-used as feature values: `FeatureValueTuple` and `FeatureValueSet`.
-Both of these types are considered base values -- i.e., unification
-does *not* apply to them.  However, variable binding *does* apply to
-any values that they contain.
-
-Feature value tuples are written with parentheses:
-
-    >>> fs1 = FeatStruct('[x=(?x, ?y)]')
-    >>> fs1
-    [x=(?x, ?y)]
-    >>> fs1.substitute_bindings({Variable('?x'): 1, Variable('?y'): 2})
-    [x=(1, 2)]
-
-Feature sets are written with braces:
-
-    >>> fs1 = FeatStruct('[x={?x, ?y}]')
-    >>> fs1
-    [x={?x, ?y}]
-    >>> fs1.substitute_bindings({Variable('?x'): 1, Variable('?y'): 2})
-    [x={1, 2}]
-
-In addition to the basic feature value tuple & set classes, nltk
-defines feature value unions (for sets) and feature value
-concatenations (for tuples).  These are written using '+', and can be
-used to combine sets & tuples:
-
-    >>> fs1 = FeatStruct('[x=((1, 2)+?z), z=?z]')
-    >>> fs1
-    [x=((1, 2)+?z), z=?z]
-    >>> fs1.unify(FeatStruct('[z=(3, 4, 5)]'))
-    [x=(1, 2, 3, 4, 5), z=(3, 4, 5)]
-
-Thus, feature value tuples and sets can be used to build up tuples
-and sets of values over the corse of unification.  For example, when
-parsing sentences using a semantic feature grammar, feature sets or
-feature tuples can be used to build a list of semantic predicates as
-the sentence is parsed.
-
-As was mentioned above, unification does not apply to feature value
-tuples and sets.  One reason for this that it's impossible to define a
-single correct answer for unification when concatenation is used.
-Consider the following example:
-
-    >>> fs1 = FeatStruct('[x=(1, 2, 3, 4)]')
-    >>> fs2 = FeatStruct('[x=(?a+?b), a=?a, b=?b]')
-
-If unification applied to feature tuples, then the unification
-algorithm would have to arbitrarily choose how to divide the tuple
-(1,2,3,4) into two parts.  Instead, the unification algorithm refuses
-to make this decision, and simply unifies based on value.  Because
-(1,2,3,4) is not equal to (?a+?b), fs1 and fs2 will not unify:
-
-    >>> print(fs1.unify(fs2))
-    None
-
-If you need a list-like structure that unification does apply to, use
-`FeatList`.
-
-..
-    >>> del fs1, fs2 # clean-up
-
-Light-weight Feature Structures
--------------------------------
-Many of the functions defined by `nltk.featstruct` can be applied
-directly to simple Python dictionaries and lists, rather than to
-full-fledged `FeatDict` and `FeatList` objects.  In other words,
-Python ``dicts`` and ``lists`` can be used as "light-weight" feature
-structures.
-
-    >>> # Note: pprint prints dicts sorted
-    >>> from pprint import pprint
-    >>> from nltk.featstruct import unify
-    >>> pprint(unify(dict(x=1, y=dict()), dict(a='a', y=dict(b='b'))))
-    {'a': 'a', 'x': 1, 'y': {'b': 'b'}}
-
-However, you should keep in mind the following caveats:
-
-- Python dictionaries & lists ignore reentrance when checking for
-  equality between values.  But two FeatStructs with different
-  reentrances are considered nonequal, even if all their base
-  values are equal.
-
-- FeatStructs can be easily frozen, allowing them to be used as
-  keys in hash tables.  Python dictionaries and lists can not.
-
-- FeatStructs display reentrance in their string representations;
-  Python dictionaries and lists do not.
-
-- FeatStructs may *not* be mixed with Python dictionaries and lists
-  (e.g., when performing unification).
-
-- FeatStructs provide a number of useful methods, such as `walk()`
-  and `cyclic()`, which are not available for Python dicts & lists.
-
-In general, if your feature structures will contain any reentrances,
-or if you plan to use them as dictionary keys, it is strongly
-recommended that you use full-fledged `FeatStruct` objects.
-
-Custom Feature Values
----------------------
-The abstract base class `CustomFeatureValue` can be used to define new
-base value types that have custom unification methods.  For example,
-the following feature value type encodes a range, and defines
-unification as taking the intersection on the ranges:
-
-    >>> from functools import total_ordering
-    >>> from nltk.featstruct import CustomFeatureValue, UnificationFailure
-    >>> @total_ordering
-    ... class Range(CustomFeatureValue):
-    ...     def __init__(self, low, high):
-    ...         assert low <= high
-    ...         self.low = low
-    ...         self.high = high
-    ...     def unify(self, other):
-    ...         if not isinstance(other, Range):
-    ...             return UnificationFailure
-    ...         low = max(self.low, other.low)
-    ...         high = min(self.high, other.high)
-    ...         if low <= high: return Range(low, high)
-    ...         else: return UnificationFailure
-    ...     def __repr__(self):
-    ...         return '(%s<x<%s)' % (self.low, self.high)
-    ...     def __eq__(self, other):
-    ...         if not isinstance(other, Range):
-    ...             return False
-    ...         return (self.low == other.low) and (self.high == other.high)
-    ...     def __lt__(self, other):
-    ...         if not isinstance(other, Range):
-    ...             return True
-    ...         return (self.low, self.high) < (other.low, other.high)
-
-    >>> fs1 = FeatStruct(x=Range(5,8), y=FeatStruct(z=Range(7,22)))
-    >>> print(fs1.unify(FeatStruct(x=Range(6, 22))))
-    [ x = (6<x<8)          ]
-    [                      ]
-    [ y = [ z = (7<x<22) ] ]
-    >>> print(fs1.unify(FeatStruct(x=Range(9, 12))))
-    None
-    >>> print(fs1.unify(FeatStruct(x=12)))
-    None
-    >>> print(fs1.unify(FeatStruct('[x=?x, y=[z=?x]]')))
-    [ x = (7<x<8)         ]
-    [                     ]
-    [ y = [ z = (7<x<8) ] ]
-
-Regression Tests
-~~~~~~~~~~~~~~~~
-
-Dictionary access methods (non-mutating)
-----------------------------------------
-
-    >>> fs1 = FeatStruct(a=1, b=2, c=3)
-    >>> fs2 = FeatStruct(x=fs1, y='x')
-
-Feature structures support all dictionary methods (excluding the class
-method `dict.fromkeys()`).  Non-mutating methods:
-
-    >>> sorted(fs2.keys())                               # keys()
-    ['x', 'y']
-    >>> sorted(fs2.values())                             # values()
-    [[a=1, b=2, c=3], 'x']
-    >>> sorted(fs2.items())                              # items()
-    [('x', [a=1, b=2, c=3]), ('y', 'x')]
-    >>> sorted(fs2)                                      # __iter__()
-    ['x', 'y']
-    >>> 'a' in fs2, 'x' in fs2                           # __contains__()
-    (False, True)
-    >>> fs2.has_key('a'), fs2.has_key('x')               # has_key()
-    (False, True)
-    >>> fs2['x'], fs2['y']                               # __getitem__()
-    ([a=1, b=2, c=3], 'x')
-    >>> fs2['a']                                         # __getitem__()
-    Traceback (most recent call last):
-      . . .
-    KeyError: 'a'
-    >>> fs2.get('x'), fs2.get('y'), fs2.get('a')         # get()
-    ([a=1, b=2, c=3], 'x', None)
-    >>> fs2.get('x', 'hello'), fs2.get('a', 'hello')     # get()
-    ([a=1, b=2, c=3], 'hello')
-    >>> len(fs1), len(fs2)                               # __len__
-    (3, 2)
-    >>> fs2.copy()                                       # copy()
-    [x=[a=1, b=2, c=3], y='x']
-    >>> fs2.copy() is fs2                                # copy()
-    False
-
-Note: by default, `FeatStruct.copy()` does a deep copy.  Use
-`FeatStruct.copy(deep=False)` for a shallow copy.
-
-..
-    >>> del fs1, fs2 # clean-up.
-
-Dictionary access methods (mutating)
-------------------------------------
-    >>> fs1 = FeatStruct(a=1, b=2, c=3)
-    >>> fs2 = FeatStruct(x=fs1, y='x')
-
-Setting features (`__setitem__()`)
-
-    >>> fs1['c'] = 5
-    >>> fs1
-    [a=1, b=2, c=5]
-    >>> fs1['x'] = 12
-    >>> fs1
-    [a=1, b=2, c=5, x=12]
-    >>> fs2['x', 'a'] = 2
-    >>> fs2
-    [x=[a=2, b=2, c=5, x=12], y='x']
-    >>> fs1
-    [a=2, b=2, c=5, x=12]
-
-Deleting features (`__delitem__()`)
-
-    >>> del fs1['x']
-    >>> fs1
-    [a=2, b=2, c=5]
-    >>> del fs2['x', 'a']
-    >>> fs1
-    [b=2, c=5]
-
-`setdefault()`:
-
-    >>> fs1.setdefault('b', 99)
-    2
-    >>> fs1
-    [b=2, c=5]
-    >>> fs1.setdefault('x', 99)
-    99
-    >>> fs1
-    [b=2, c=5, x=99]
-
-`update()`:
-
-    >>> fs2.update({'a':'A', 'b':'B'}, c='C')
-    >>> fs2
-    [a='A', b='B', c='C', x=[b=2, c=5, x=99], y='x']
-
-`pop()`:
-
-    >>> fs2.pop('a')
-    'A'
-    >>> fs2
-    [b='B', c='C', x=[b=2, c=5, x=99], y='x']
-    >>> fs2.pop('a')
-    Traceback (most recent call last):
-      . . .
-    KeyError: 'a'
-    >>> fs2.pop('a', 'foo')
-    'foo'
-    >>> fs2
-    [b='B', c='C', x=[b=2, c=5, x=99], y='x']
-
-`clear()`:
-
-    >>> fs1.clear()
-    >>> fs1
-    []
-    >>> fs2
-    [b='B', c='C', x=[], y='x']
-
-`popitem()`:
-
-    >>> sorted([fs2.popitem() for i in range(len(fs2))])
-    [('b', 'B'), ('c', 'C'), ('x', []), ('y', 'x')]
-    >>> fs2
-    []
-
-Once a feature structure has been frozen, it may not be mutated.
-
-    >>> fs1 = FeatStruct('[x=1, y=2, z=[a=3]]')
-    >>> fs1.freeze()
-    >>> fs1.frozen()
-    True
-    >>> fs1['z'].frozen()
-    True
-
-    >>> fs1['x'] = 5
-    Traceback (most recent call last):
-      . . .
-    ValueError: Frozen FeatStructs may not be modified.
-    >>> del fs1['x']
-    Traceback (most recent call last):
-      . . .
-    ValueError: Frozen FeatStructs may not be modified.
-    >>> fs1.clear()
-    Traceback (most recent call last):
-      . . .
-    ValueError: Frozen FeatStructs may not be modified.
-    >>> fs1.pop('x')
-    Traceback (most recent call last):
-      . . .
-    ValueError: Frozen FeatStructs may not be modified.
-    >>> fs1.popitem()
-    Traceback (most recent call last):
-      . . .
-    ValueError: Frozen FeatStructs may not be modified.
-    >>> fs1.setdefault('x')
-    Traceback (most recent call last):
-      . . .
-    ValueError: Frozen FeatStructs may not be modified.
-    >>> fs1.update(z=22)
-    Traceback (most recent call last):
-      . . .
-    ValueError: Frozen FeatStructs may not be modified.
-
-..
-    >>> del fs1, fs2 # clean-up.
-
-Feature Paths
--------------
-Make sure that __getitem__ with feature paths works as intended:
-
-    >>> fs1 = FeatStruct(a=1, b=2,
-    ...                 c=FeatStruct(
-    ...                     d=FeatStruct(e=12),
-    ...                     f=FeatStruct(g=55, h='hello')))
-    >>> fs1[()]
-    [a=1, b=2, c=[d=[e=12], f=[g=55, h='hello']]]
-    >>> fs1['a'], fs1[('a',)]
-    (1, 1)
-    >>> fs1['c','d','e']
-    12
-    >>> fs1['c','f','g']
-    55
-
-Feature paths that select unknown features raise KeyError:
-
-    >>> fs1['c', 'f', 'e']
-    Traceback (most recent call last):
-      . . .
-    KeyError: ('c', 'f', 'e')
-    >>> fs1['q', 'p']
-    Traceback (most recent call last):
-      . . .
-    KeyError: ('q', 'p')
-
-Feature paths that try to go 'through' a feature that's not a feature
-structure raise KeyError:
-
-    >>> fs1['a', 'b']
-    Traceback (most recent call last):
-      . . .
-    KeyError: ('a', 'b')
-
-Feature paths can go through reentrant structures:
-
-    >>> fs2 = FeatStruct('(1)[a=[b=[c->(1), d=5], e=11]]')
-    >>> fs2['a', 'b', 'c', 'a', 'e']
-    11
-    >>> fs2['a', 'b', 'c', 'a', 'b', 'd']
-    5
-    >>> fs2[tuple('abcabcabcabcabcabcabcabcabcabca')]
-    (1)[b=[c=[a->(1)], d=5], e=11]
-
-Indexing requires strings, `Feature`\s, or tuples; other types raise a
-TypeError:
-
-    >>> fs2[12]
-    Traceback (most recent call last):
-      . . .
-    TypeError: Expected feature name or path.  Got 12.
-    >>> fs2[list('abc')]
-    Traceback (most recent call last):
-      . . .
-    TypeError: Expected feature name or path.  Got ['a', 'b', 'c'].
-
-Feature paths can also be used with `get()`, `has_key()`, and
-`__contains__()`.
-
-    >>> fpath1 = tuple('abcabc')
-    >>> fpath2 = tuple('abcabz')
-    >>> fs2.get(fpath1), fs2.get(fpath2)
-    ((1)[a=[b=[c->(1), d=5], e=11]], None)
-    >>> fpath1 in fs2, fpath2 in fs2
-    (True, False)
-    >>> fs2.has_key(fpath1), fs2.has_key(fpath2)
-    (True, False)
-
-..
-    >>> del fs1, fs2 # clean-up
-
-Reading Feature Structures
---------------------------
-
-Empty feature struct:
-
-    >>> FeatStruct('[]')
-    []
-
-Test features with integer values:
-
-    >>> FeatStruct('[a=12, b=-33, c=0]')
-    [a=12, b=-33, c=0]
-
-Test features with string values.  Either single or double quotes may
-be used.  Strings are evaluated just like python strings -- in
-particular, you can use escape sequences and 'u' and 'r' prefixes, and
-triple-quoted strings.
-
-    >>> FeatStruct('[a="", b="hello", c="\'", d=\'\', e=\'"\']')
-    [a='', b='hello', c="'", d='', e='"']
-    >>> FeatStruct(r'[a="\\", b="\"", c="\x6f\\y", d="12"]')
-    [a='\\', b='"', c='o\\y', d='12']
-    >>> FeatStruct(r'[b=r"a\b\c"]')
-    [b='a\\b\\c']
-    >>> FeatStruct('[x="""a"""]')
-    [x='a']
-
-Test parsing of reentrant feature structures.
-
-    >>> FeatStruct('[a=(1)[], b->(1)]')
-    [a=(1)[], b->(1)]
-    >>> FeatStruct('[a=(1)[x=1, y=2], b->(1)]')
-    [a=(1)[x=1, y=2], b->(1)]
-
-Test parsing of cyclic feature structures.
-
-    >>> FeatStruct('[a=(1)[b->(1)]]')
-    [a=(1)[b->(1)]]
-    >>> FeatStruct('(1)[a=[b=[c->(1)]]]')
-    (1)[a=[b=[c->(1)]]]
-
-Strings of the form "+name" and "-name" may be used to specify boolean
-values.
-
-    >>> FeatStruct('[-bar, +baz, +foo]')
-    [-bar, +baz, +foo]
-
-None, True, and False are recognized as values:
-
-    >>> FeatStruct('[bar=True, baz=False, foo=None]')
-    [+bar, -baz, foo=None]
-
-Special features:
-
-    >>> FeatStruct('NP/VP')
-    NP[]/VP[]
-    >>> FeatStruct('?x/?x')
-    ?x[]/?x[]
-    >>> print(FeatStruct('VP[+fin, agr=?x, tense=past]/NP[+pl, agr=?x]'))
-    [ *type*  = 'VP'              ]
-    [                             ]
-    [           [ *type* = 'NP' ] ]
-    [ *slash* = [ agr    = ?x   ] ]
-    [           [ pl     = True ] ]
-    [                             ]
-    [ agr     = ?x                ]
-    [ fin     = True              ]
-    [ tense   = 'past'            ]
-
-Here the slash feature gets coerced:
-    >>> FeatStruct('[*slash*=a, x=b, *type*="NP"]')
-    NP[x='b']/a[]
-
-    >>> FeatStruct('NP[sem=<bob>]/NP')
-    NP[sem=<bob>]/NP[]
-    >>> FeatStruct('S[sem=<walk(bob)>]')
-    S[sem=<walk(bob)>]
-    >>> print(FeatStruct('NP[sem=<bob>]/NP'))
-    [ *type*  = 'NP'              ]
-    [                             ]
-    [ *slash* = [ *type* = 'NP' ] ]
-    [                             ]
-    [ sem     = <bob>             ]
-
-Playing with ranges:
-
-    >>> from nltk.featstruct import RangeFeature, FeatStructReader
-    >>> width = RangeFeature('width')
-    >>> reader = FeatStructReader([width])
-    >>> fs1 = reader.fromstring('[*width*=-5:12]')
-    >>> fs2 = reader.fromstring('[*width*=2:123]')
-    >>> fs3 = reader.fromstring('[*width*=-7:-2]')
-    >>> fs1.unify(fs2)
-    [*width*=(2, 12)]
-    >>> fs1.unify(fs3)
-    [*width*=(-5, -2)]
-    >>> print(fs2.unify(fs3)) # no overlap in width.
-    None
-
-The slash feature has a default value of 'False':
-
-    >>> print(FeatStruct('NP[]/VP').unify(FeatStruct('NP[]'), trace=1))
-    <BLANKLINE>
-    Unification trace:
-       / NP[]/VP[]
-      |\ NP[]
-      |
-      | Unify feature: *type*
-      |    / 'NP'
-      |   |\ 'NP'
-      |   |
-      |   +-->'NP'
-      |
-      | Unify feature: *slash*
-      |    / VP[]
-      |   |\ False
-      |   |
-      X   X <-- FAIL
-    None
-
-The demo structures from category.py.  They all parse, but they don't
-do quite the right thing, -- ?x vs x.
-
-    >>> FeatStruct(pos='n', agr=FeatStruct(number='pl', gender='f'))
-    [agr=[gender='f', number='pl'], pos='n']
-    >>> FeatStruct(r'NP[sem=<bob>]/NP')
-    NP[sem=<bob>]/NP[]
-    >>> FeatStruct(r'S[sem=<app(?x, ?y)>]')
-    S[sem=<?x(?y)>]
-    >>> FeatStruct('?x/?x')
-    ?x[]/?x[]
-    >>> FeatStruct('VP[+fin, agr=?x, tense=past]/NP[+pl, agr=?x]')
-    VP[agr=?x, +fin, tense='past']/NP[agr=?x, +pl]
-    >>> FeatStruct('S[sem = <app(?subj, ?vp)>]')
-    S[sem=<?subj(?vp)>]
-
-    >>> FeatStruct('S')
-    S[]
-
-The parser also includes support for reading sets and tuples.
-
-    >>> FeatStruct('[x={1,2,2,2}, y={/}]')
-    [x={1, 2}, y={/}]
-    >>> FeatStruct('[x=(1,2,2,2), y=()]')
-    [x=(1, 2, 2, 2), y=()]
-    >>> print(FeatStruct('[x=(1,[z=(1,2,?x)],?z,{/})]'))
-    [ x = (1, [ z = (1, 2, ?x) ], ?z, {/}) ]
-
-Note that we can't put a featstruct inside a tuple, because doing so
-would hash it, and it's not frozen yet:
-
-    >>> print(FeatStruct('[x={[]}]'))
-    Traceback (most recent call last):
-      . . .
-    TypeError: FeatStructs must be frozen before they can be hashed.
-
-There's a special syntax for taking the union of sets: "{...+...}".
-The elements should only be variables or sets.
-
-    >>> FeatStruct('[x={?a+?b+{1,2,3}}]')
-    [x={?a+?b+{1, 2, 3}}]
-
-There's a special syntax for taking the concatenation of tuples:
-"(...+...)".  The elements should only be variables or tuples.
-
-    >>> FeatStruct('[x=(?a+?b+(1,2,3))]')
-    [x=(?a+?b+(1, 2, 3))]
-
-Parsing gives helpful messages if your string contains an error.
-
-    >>> FeatStruct('[a=, b=5]]')
-    Traceback (most recent call last):
-      . . .
-    ValueError: Error parsing feature structure
-        [a=, b=5]]
-           ^ Expected value
-    >>> FeatStruct('[a=12 22, b=33]')
-    Traceback (most recent call last):
-      . . .
-    ValueError: Error parsing feature structure
-        [a=12 22, b=33]
-             ^ Expected comma
-    >>> FeatStruct('[a=5] [b=6]')
-    Traceback (most recent call last):
-      . . .
-    ValueError: Error parsing feature structure
-        [a=5] [b=6]
-              ^ Expected end of string
-    >>> FeatStruct(' *++*')
-    Traceback (most recent call last):
-      . . .
-    ValueError: Error parsing feature structure
-        *++*
-        ^ Expected open bracket or identifier
-    >>> FeatStruct('[x->(1)]')
-    Traceback (most recent call last):
-      . . .
-    ValueError: Error parsing feature structure
-        [x->(1)]
-            ^ Expected bound identifier
-    >>> FeatStruct('[x->y]')
-    Traceback (most recent call last):
-      . . .
-    ValueError: Error parsing feature structure
-        [x->y]
-            ^ Expected identifier
-    >>> FeatStruct('')
-    Traceback (most recent call last):
-      . . .
-    ValueError: Error parsing feature structure
-    <BLANKLINE>
-        ^ Expected open bracket or identifier
-
-
-Unification
------------
-Very simple unifications give the expected results:
-
-    >>> FeatStruct().unify(FeatStruct())
-    []
-    >>> FeatStruct(number='singular').unify(FeatStruct())
-    [number='singular']
-    >>> FeatStruct().unify(FeatStruct(number='singular'))
-    [number='singular']
-    >>> FeatStruct(number='singular').unify(FeatStruct(person=3))
-    [number='singular', person=3]
-
-Merging nested structures:
-
-    >>> fs1 = FeatStruct('[A=[B=b]]')
-    >>> fs2 = FeatStruct('[A=[C=c]]')
-    >>> fs1.unify(fs2)
-    [A=[B='b', C='c']]
-    >>> fs2.unify(fs1)
-    [A=[B='b', C='c']]
-
-A basic case of reentrant unification
-
-    >>> fs4 = FeatStruct('[A=(1)[B=b], E=[F->(1)]]')
-    >>> fs5 = FeatStruct("[A=[C='c'], E=[F=[D='d']]]")
-    >>> fs4.unify(fs5)
-    [A=(1)[B='b', C='c', D='d'], E=[F->(1)]]
-    >>> fs5.unify(fs4)
-    [A=(1)[B='b', C='c', D='d'], E=[F->(1)]]
-
-More than 2 paths to a value
-
-    >>> fs1 = FeatStruct("[a=[],b=[],c=[],d=[]]")
-    >>> fs2 = FeatStruct('[a=(1)[], b->(1), c->(1), d->(1)]')
-    >>> fs1.unify(fs2)
-    [a=(1)[], b->(1), c->(1), d->(1)]
-
-fs1[a] gets unified with itself
-
-    >>> fs1 = FeatStruct('[x=(1)[], y->(1)]')
-    >>> fs2 = FeatStruct('[x=(1)[], y->(1)]')
-    >>> fs1.unify(fs2)
-    [x=(1)[], y->(1)]
-
-Bound variables should get forwarded appropriately
-
-    >>> fs1 = FeatStruct('[A=(1)[X=x], B->(1), C=?cvar, D=?dvar]')
-    >>> fs2 = FeatStruct('[A=(1)[Y=y], B=(2)[Z=z], C->(1), D->(2)]')
-    >>> fs1.unify(fs2)
-    [A=(1)[X='x', Y='y', Z='z'], B->(1), C->(1), D->(1)]
-    >>> fs2.unify(fs1)
-    [A=(1)[X='x', Y='y', Z='z'], B->(1), C->(1), D->(1)]
-
-Cyclic structure created by unification.
-
-    >>> fs1 = FeatStruct('[F=(1)[], G->(1)]')
-    >>> fs2 = FeatStruct('[F=[H=(2)[]], G->(2)]')
-    >>> fs3 = fs1.unify(fs2)
-    >>> fs3
-    [F=(1)[H->(1)], G->(1)]
-    >>> fs3['F'] is fs3['G']
-    True
-    >>> fs3['F'] is fs3['G']['H']
-    True
-    >>> fs3['F'] is fs3['G']['H']['H']
-    True
-    >>> fs3['F'] is fs3['F']['H']['H']['H']['H']['H']['H']['H']['H']
-    True
-
-Cyclic structure created w/ variables.
-
-    >>> fs1 = FeatStruct('[F=[H=?x]]')
-    >>> fs2 = FeatStruct('[F=?x]')
-    >>> fs3 = fs1.unify(fs2, rename_vars=False)
-    >>> fs3
-    [F=(1)[H->(1)]]
-    >>> fs3['F'] is fs3['F']['H']
-    True
-    >>> fs3['F'] is fs3['F']['H']['H']
-    True
-    >>> fs3['F'] is fs3['F']['H']['H']['H']['H']['H']['H']['H']['H']
-    True
-
-Unifying w/ a cyclic feature structure.
-
-    >>> fs4 = FeatStruct('[F=[H=[H=[H=(1)[]]]], K->(1)]')
-    >>> fs3.unify(fs4)
-    [F=(1)[H->(1)], K->(1)]
-    >>> fs4.unify(fs3)
-    [F=(1)[H->(1)], K->(1)]
-
-Variable bindings should preserve reentrance.
-
-    >>> bindings = {}
-    >>> fs1 = FeatStruct("[a=?x]")
-    >>> fs2 = fs1.unify(FeatStruct("[a=[]]"), bindings)
-    >>> fs2['a'] is bindings[Variable('?x')]
-    True
-    >>> fs2.unify(FeatStruct("[b=?x]"), bindings)
-    [a=(1)[], b->(1)]
-
-Aliased variable tests
-
-    >>> fs1 = FeatStruct("[a=?x, b=?x]")
-    >>> fs2 = FeatStruct("[b=?y, c=?y]")
-    >>> bindings = {}
-    >>> fs3 = fs1.unify(fs2, bindings)
-    >>> fs3
-    [a=?x, b=?x, c=?x]
-    >>> bindings
-    {Variable('?y'): Variable('?x')}
-    >>> fs3.unify(FeatStruct("[a=1]"))
-    [a=1, b=1, c=1]
-
-If we keep track of the bindings, then we can use the same variable
-over multiple calls to unify.
-
-    >>> bindings = {}
-    >>> fs1 = FeatStruct('[a=?x]')
-    >>> fs2 = fs1.unify(FeatStruct('[a=[]]'), bindings)
-    >>> fs2.unify(FeatStruct('[b=?x]'), bindings)
-    [a=(1)[], b->(1)]
-    >>> bindings
-    {Variable('?x'): []}
-
-..
-    >>> del fs1, fs2, fs3, fs4, fs5 # clean-up
-
-Unification Bindings
---------------------
-
-    >>> bindings = {}
-    >>> fs1 = FeatStruct('[a=?x]')
-    >>> fs2 = FeatStruct('[a=12]')
-    >>> fs3 = FeatStruct('[b=?x]')
-    >>> fs1.unify(fs2, bindings)
-    [a=12]
-    >>> bindings
-    {Variable('?x'): 12}
-    >>> fs3.substitute_bindings(bindings)
-    [b=12]
-    >>> fs3 # substitute_bindings didn't mutate fs3.
-    [b=?x]
-    >>> fs2.unify(fs3, bindings)
-    [a=12, b=12]
-
-    >>> bindings = {}
-    >>> fs1 = FeatStruct('[a=?x, b=1]')
-    >>> fs2 = FeatStruct('[a=5, b=?x]')
-    >>> fs1.unify(fs2, bindings)
-    [a=5, b=1]
-    >>> sorted(bindings.items())
-    [(Variable('?x'), 5), (Variable('?x2'), 1)]
-
-..
-    >>> del fs1, fs2, fs3 # clean-up
-
-Expressions
------------
-
-    >>> e = Expression.fromstring('\\P y.P(z,y)')
-    >>> fs1 = FeatStruct(x=e, y=Variable('z'))
-    >>> fs2 = FeatStruct(y=VariableExpression(Variable('John')))
-    >>> fs1.unify(fs2)
-    [x=<\P y.P(John,y)>, y=<John>]
-
-Remove Variables
-----------------
-
-    >>> FeatStruct('[a=?x, b=12, c=[d=?y]]').remove_variables()
-    [b=12, c=[]]
-    >>> FeatStruct('(1)[a=[b=?x,c->(1)]]').remove_variables()
-    (1)[a=[c->(1)]]
-
-Equality & Hashing
-------------------
-The `equal_values` method checks whether two feature structures assign
-the same value to every feature.  If the optional argument
-``check_reentrances`` is supplied, then it also returns false if there
-is any difference in the reentrances.
-
-    >>> a = FeatStruct('(1)[x->(1)]')
-    >>> b = FeatStruct('(1)[x->(1)]')
-    >>> c = FeatStruct('(1)[x=[x->(1)]]')
-    >>> d = FeatStruct('[x=(1)[x->(1)]]')
-    >>> e = FeatStruct('(1)[x=[x->(1), y=1], y=1]')
-    >>> def compare(x,y):
-    ...     assert x.equal_values(y, True) == y.equal_values(x, True)
-    ...     assert x.equal_values(y, False) == y.equal_values(x, False)
-    ...     if x.equal_values(y, True):
-    ...         assert x.equal_values(y, False)
-    ...         print('equal values, same reentrance')
-    ...     elif x.equal_values(y, False):
-    ...         print('equal values, different reentrance')
-    ...     else:
-    ...         print('different values')
-
-    >>> compare(a, a)
-    equal values, same reentrance
-    >>> compare(a, b)
-    equal values, same reentrance
-    >>> compare(a, c)
-    equal values, different reentrance
-    >>> compare(a, d)
-    equal values, different reentrance
-    >>> compare(c, d)
-    equal values, different reentrance
-    >>> compare(a, e)
-    different values
-    >>> compare(c, e)
-    different values
-    >>> compare(d, e)
-    different values
-    >>> compare(e, e)
-    equal values, same reentrance
-
-Feature structures may not be hashed until they are frozen:
-
-    >>> hash(a)
-    Traceback (most recent call last):
-      . . .
-    TypeError: FeatStructs must be frozen before they can be hashed.
-    >>> a.freeze()
-    >>> v = hash(a)
-
-Feature structures define hash consistently.  The following example
-looks at the hash value for each (fs1,fs2) pair; if their hash values
-are not equal, then they must not be equal.  If their hash values are
-equal, then display a message, and indicate whether their values are
-indeed equal.  Note that c and d currently have the same hash value,
-even though they are not equal.  That is not a bug, strictly speaking,
-but it wouldn't be a bad thing if it changed.
-
-    >>> for fstruct in (a, b, c, d, e):
-    ...     fstruct.freeze()
-    >>> for fs1_name in 'abcde':
-    ...     for fs2_name in 'abcde':
-    ...         fs1 = locals()[fs1_name]
-    ...         fs2 = locals()[fs2_name]
-    ...         if hash(fs1) != hash(fs2):
-    ...             assert fs1 != fs2
-    ...         else:
-    ...             print('%s and %s have the same hash value,' %
-    ...                    (fs1_name, fs2_name))
-    ...             if fs1 == fs2: print('and are equal')
-    ...             else: print('and are not equal')
-    a and a have the same hash value, and are equal
-    a and b have the same hash value, and are equal
-    b and a have the same hash value, and are equal
-    b and b have the same hash value, and are equal
-    c and c have the same hash value, and are equal
-    c and d have the same hash value, and are not equal
-    d and c have the same hash value, and are not equal
-    d and d have the same hash value, and are equal
-    e and e have the same hash value, and are equal
-
-..
-    >>> del a, b, c, d, e, v # clean-up
-
-Tracing
--------
-
-    >>> fs1 = FeatStruct('[a=[b=(1)[], c=?x], d->(1), e=[f=?x]]')
-    >>> fs2 = FeatStruct('[a=(1)[c="C"], e=[g->(1)]]')
-    >>> fs1.unify(fs2, trace=True)
-    <BLANKLINE>
-    Unification trace:
-       / [a=[b=(1)[], c=?x], d->(1), e=[f=?x]]
-      |\ [a=(1)[c='C'], e=[g->(1)]]
-      |
-      | Unify feature: a
-      |    / [b=[], c=?x]
-      |   |\ [c='C']
-      |   |
-      |   | Unify feature: a.c
-      |   |    / ?x
-      |   |   |\ 'C'
-      |   |   |
-      |   |   +-->Variable('?x')
-      |   |
-      |   +-->[b=[], c=?x]
-      |       Bindings: {?x: 'C'}
-      |
-      | Unify feature: e
-      |    / [f=?x]
-      |   |\ [g=[c='C']]
-      |   |
-      |   +-->[f=?x, g=[b=[], c=?x]]
-      |       Bindings: {?x: 'C'}
-      |
-      +-->[a=(1)[b=(2)[], c='C'], d->(2), e=[f='C', g->(1)]]
-          Bindings: {?x: 'C'}
-    [a=(1)[b=(2)[], c='C'], d->(2), e=[f='C', g->(1)]]
-    >>>
-    >>> fs1 = FeatStruct('[a=?x, b=?z, c=?z]')
-    >>> fs2 = FeatStruct('[a=?y, b=?y, c=?q]')
-    >>> #fs1.unify(fs2, trace=True)
-    >>>
-
-..
-    >>> del fs1, fs2 # clean-up
-
-Unification on Dicts & Lists
-----------------------------
-It's possible to do unification on dictionaries:
-
-    >>> from nltk.featstruct import unify
-    >>> pprint(unify(dict(x=1, y=dict(z=2)), dict(x=1, q=5)), width=1)
-    {'q': 5, 'x': 1, 'y': {'z': 2}}
-
-It's possible to do unification on lists as well:
-
-    >>> unify([1, 2, 3], [1, Variable('x'), 3])
-    [1, 2, 3]
-
-Mixing dicts and lists is fine:
-
-    >>> pprint(unify([dict(x=1, y=dict(z=2)),3], [dict(x=1, q=5),3]),
-    ...               width=1)
-    [{'q': 5, 'x': 1, 'y': {'z': 2}}, 3]
-
-Mixing dicts and FeatStructs is discouraged:
-
-    >>> unify(dict(x=1), FeatStruct(x=1))
-    Traceback (most recent call last):
-      . . .
-    ValueError: Mixing FeatStruct objects with Python dicts and lists is not supported.
-
-But you can do it if you really want, by explicitly stating that both
-dictionaries and FeatStructs should be treated as feature structures:
-
-    >>> unify(dict(x=1), FeatStruct(x=1), fs_class=(dict, FeatStruct))
-    {'x': 1}
-
-Finding Conflicts
------------------
-
-    >>> from nltk.featstruct import conflicts
-    >>> fs1 = FeatStruct('[a=[b=(1)[c=2], d->(1), e=[f->(1)]]]')
-    >>> fs2 = FeatStruct('[a=[b=[c=[x=5]], d=[c=2], e=[f=[c=3]]]]')
-    >>> for path in conflicts(fs1, fs2):
-    ...     print('%-8s: %r vs %r' % ('.'.join(path), fs1[path], fs2[path]))
-    a.b.c   : 2 vs [x=5]
-    a.e.f.c : 2 vs 3
-
-..
-    >>> del fs1, fs2 # clean-up
-
-Retracting Bindings
--------------------
-
-    >>> from nltk.featstruct import retract_bindings
-    >>> bindings = {}
-    >>> fs1 = FeatStruct('[a=?x, b=[c=?y]]')
-    >>> fs2 = FeatStruct('[a=(1)[c=[d=1]], b->(1)]')
-    >>> fs3 = fs1.unify(fs2, bindings)
-    >>> print(fs3)
-    [ a = (1) [ c = [ d = 1 ] ] ]
-    [                           ]
-    [ b -> (1)                  ]
-    >>> pprint(bindings)
-    {Variable('?x'): [c=[d=1]], Variable('?y'): [d=1]}
-    >>> retract_bindings(fs3, bindings)
-    [a=?x, b=?x]
-    >>> pprint(bindings)
-    {Variable('?x'): [c=?y], Variable('?y'): [d=1]}
-
-Squashed Bugs
-~~~~~~~~~~~~~
-In svn rev 5167, unifying two feature structures that used the same
-variable would cause those variables to become aliased in the output.
-
-    >>> fs1 = FeatStruct('[a=?x]')
-    >>> fs2 = FeatStruct('[b=?x]')
-    >>> fs1.unify(fs2)
-    [a=?x, b=?x2]
-
-There was a bug in svn revision 5172 that caused `rename_variables` to
-rename variables to names that are already used.
-
-    >>> FeatStruct('[a=?x, b=?x2]').rename_variables(
-    ...     vars=[Variable('?x')])
-    [a=?x3, b=?x2]
-    >>> fs1 = FeatStruct('[a=?x]')
-    >>> fs2 = FeatStruct('[a=?x, b=?x2]')
-    >>> fs1.unify(fs2)
-    [a=?x, b=?x2]
-
-There was a bug in svn rev 5167 that caused us to get the following
-example wrong.  Basically the problem was that we only followed
-'forward' pointers for other, not self, when unifying two feature
-structures.  (nb: this test assumes that features are unified in
-alphabetical order -- if they are not, it might pass even if the bug
-is present.)
-
-    >>> fs1 = FeatStruct('[a=[x=1], b=?x, c=?x]')
-    >>> fs2 = FeatStruct('[a=(1)[], b->(1), c=[x=2]]')
-    >>> print(fs1.unify(fs2))
-    None
-
-..
-    >>> del fs1, fs2 # clean-up
diff --git a/nlp_resource_data/nltk/test/framenet.doctest b/nlp_resource_data/nltk/test/framenet.doctest

deleted file mode 100644 (file)

index 6de3a41..0000000
--- a/nlp_resource_data/nltk/test/framenet.doctest
+++ /dev/null
@@ -1,288 +0,0 @@
-.. Copyright (C) 2001-2019 NLTK Project
-.. For license information, see LICENSE.TXT
-
-========
-FrameNet
-========
-
-The FrameNet corpus is a lexical database of English that is both human-
-and machine-readable, based on annotating examples of how words are used
-in actual texts. FrameNet is based on a theory of meaning called Frame
-Semantics, deriving from the work of Charles J. Fillmore and colleagues.
-The basic idea is straightforward: that the meanings of most words can
-best be understood on the basis of a semantic frame: a description of a
-type of event, relation, or entity and the participants in it. For
-example, the concept of cooking typically involves a person doing the
-cooking (Cook), the food that is to be cooked (Food), something to hold
-the food while cooking (Container) and a source of heat
-(Heating_instrument). In the FrameNet project, this is represented as a
-frame called Apply_heat, and the Cook, Food, Heating_instrument and
-Container are called frame elements (FEs). Words that evoke this frame,
-such as fry, bake, boil, and broil, are called lexical units (LUs) of
-the Apply_heat frame. The job of FrameNet is to define the frames
-and to annotate sentences to show how the FEs fit syntactically around
-the word that evokes the frame.
-
-------
-Frames
-------
-
-A Frame is a script-like conceptual structure that describes a
-particular type of situation, object, or event along with the
-participants and props that are needed for that Frame. For
-example, the "Apply_heat" frame describes a common situation
-involving a Cook, some Food, and a Heating_Instrument, and is
-evoked by words such as bake, blanch, boil, broil, brown,
-simmer, steam, etc.
-
-We call the roles of a Frame "frame elements" (FEs) and the
-frame-evoking words are called "lexical units" (LUs).
-
-FrameNet includes relations between Frames. Several types of
-relations are defined, of which the most important are:
-
-- Inheritance: An IS-A relation. The child frame is a subtype
-  of the parent frame, and each FE in the parent is bound to
-  a corresponding FE in the child. An example is the
-  "Revenge" frame which inherits from the
-  "Rewards_and_punishments" frame.
-
-- Using: The child frame presupposes the parent frame as
-  background, e.g the "Speed" frame "uses" (or presupposes)
-  the "Motion" frame; however, not all parent FEs need to be
-  bound to child FEs.
-
-- Subframe: The child frame is a subevent of a complex event
-  represented by the parent, e.g. the "Criminal_process" frame
-  has subframes of "Arrest", "Arraignment", "Trial", and
-  "Sentencing".
-
-- Perspective_on: The child frame provides a particular
-  perspective on an un-perspectivized parent frame. A pair of
-  examples consists of the "Hiring" and "Get_a_job" frames,
-  which perspectivize the "Employment_start" frame from the
-  Employer's and the Employee's point of view, respectively.
-
-To get a list of all of the Frames in FrameNet, you can use the
-`frames()` function. If you supply a regular expression pattern to the
-`frames()` function, you will get a list of all Frames whose names match
-that pattern:
-
-    >>> from pprint import pprint
-    >>> from operator import itemgetter
-    >>> from nltk.corpus import framenet as fn
-    >>> from nltk.corpus.reader.framenet import PrettyList
-    >>> x = fn.frames(r'(?i)crim')
-    >>> x.sort(key=itemgetter('ID'))
-    >>> x
-    [<frame ID=200 name=Criminal_process>, <frame ID=500 name=Criminal_investigation>, ...]
-    >>> PrettyList(sorted(x, key=itemgetter('ID')))
-    [<frame ID=200 name=Criminal_process>, <frame ID=500 name=Criminal_investigation>, ...]
-
-To get the details of a particular Frame, you can use the `frame()`
-function passing in the frame number:
-
-    >>> from pprint import pprint
-    >>> from nltk.corpus import framenet as fn
-    >>> f = fn.frame(202)
-    >>> f.ID
-    202
-    >>> f.name
-    'Arrest'
-    >>> f.definition # doctest: +ELLIPSIS
-    "Authorities charge a Suspect, who is under suspicion of having committed a crime..."
-    >>> len(f.lexUnit)
-    11
-    >>> pprint(sorted([x for x in f.FE]))
-    ['Authorities',
-     'Charges',
-     'Co-participant',
-     'Manner',
-     'Means',
-     'Offense',
-     'Place',
-     'Purpose',
-     'Source_of_legal_authority',
-     'Suspect',
-     'Time',
-     'Type']
-    >>> pprint(f.frameRelations)
-    [<Parent=Intentionally_affect -- Inheritance -> Child=Arrest>, <Complex=Criminal_process -- Subframe -> Component=Arrest>, ...]
-
-The `frame()` function shown above returns a dict object containing
-detailed information about the Frame. See the documentation on the
-`frame()` function for the specifics.
-
-You can also search for Frames by their Lexical Units (LUs). The
-`frames_by_lemma()` function returns a list of all frames that contain
-LUs in which the 'name' attribute of the LU matchs the given regular
-expression. Note that LU names are composed of "lemma.POS", where the
-"lemma" part can be made up of either a single lexeme (e.g. 'run') or
-multiple lexemes (e.g. 'a little') (see below).
-
-    >>> PrettyList(sorted(fn.frames_by_lemma(r'(?i)a little'), key=itemgetter('ID'))) # doctest: +ELLIPSIS
-    [<frame ID=189 name=Quanti...>, <frame ID=2001 name=Degree>]
-
--------------
-Lexical Units
--------------
-
-A lexical unit (LU) is a pairing of a word with a meaning. For
-example, the "Apply_heat" Frame describes a common situation
-involving a Cook, some Food, and a Heating Instrument, and is
-_evoked_ by words such as bake, blanch, boil, broil, brown,
-simmer, steam, etc. These frame-evoking words are the LUs in the
-Apply_heat frame. Each sense of a polysemous word is a different
-LU.
-
-We have used the word "word" in talking about LUs. The reality
-is actually rather complex. When we say that the word "bake" is
-polysemous, we mean that the lemma "bake.v" (which has the
-word-forms "bake", "bakes", "baked", and "baking") is linked to
-three different frames:
-
-- Apply_heat: "Michelle baked the potatoes for 45 minutes."
-
-- Cooking_creation: "Michelle baked her mother a cake for her birthday."
-
-- Absorb_heat: "The potatoes have to bake for more than 30 minutes."
-
-These constitute three different LUs, with different
-definitions.
-
-Multiword expressions such as "given name" and hyphenated words
-like "shut-eye" can also be LUs. Idiomatic phrases such as
-"middle of nowhere" and "give the slip (to)" are also defined as
-LUs in the appropriate frames ("Isolated_places" and "Evading",
-respectively), and their internal structure is not analyzed.
-
-Framenet provides multiple annotated examples of each sense of a
-word (i.e. each LU).  Moreover, the set of examples
-(approximately 20 per LU) illustrates all of the combinatorial
-possibilities of the lexical unit.
-
-Each LU is linked to a Frame, and hence to the other words which
-evoke that Frame. This makes the FrameNet database similar to a
-thesaurus, grouping together semantically similar words.
-
-In the simplest case, frame-evoking words are verbs such as
-"fried" in:
-
-   "Matilde fried the catfish in a heavy iron skillet."
-
-Sometimes event nouns may evoke a Frame. For example,
-"reduction" evokes "Cause_change_of_scalar_position" in:
-
-   "...the reduction of debt levels to $665 million from $2.6 billion."
-
-Adjectives may also evoke a Frame. For example, "asleep" may
-evoke the "Sleep" frame as in:
-
-   "They were asleep for hours."
-
-Many common nouns, such as artifacts like "hat" or "tower",
-typically serve as dependents rather than clearly evoking their
-own frames.
-
-Details for a specific lexical unit can be obtained using this class's
-`lus()` function, which takes an optional regular expression
-pattern that will be matched against the name of the lexical unit:
-
-    >>> from pprint import pprint
-    >>> PrettyList(sorted(fn.lus(r'(?i)a little'), key=itemgetter('ID')))
-    [<lu ID=14733 name=a little.n>, <lu ID=14743 name=a little.adv>, ...]
-
-You can obtain detailed information on a particular LU by calling the
-`lu()` function and passing in an LU's 'ID' number:
-
-    >>> from pprint import pprint
-    >>> from nltk.corpus import framenet as fn
-    >>> fn.lu(256).name
-    'foresee.v'
-    >>> fn.lu(256).definition
-    'COD: be aware of beforehand; predict.'
-    >>> fn.lu(256).frame.name
-    'Expectation'
-    >>> fn.lu(256).lexemes[0].name
-    'foresee'
-
-Note that LU names take the form of a dotted string (e.g. "run.v" or "a
-little.adv") in which a lemma preceeds the "." and a part of speech
-(POS) follows the dot. The lemma may be composed of a single lexeme
-(e.g. "run") or of multiple lexemes (e.g. "a little"). The list of
-POSs used in the LUs is:
-
-v    - verb
-n    - noun
-a    - adjective
-adv  - adverb
-prep - preposition
-num  - numbers
-intj - interjection
-art  - article
-c    - conjunction
-scon - subordinating conjunction
-
-For more detailed information about the info that is contained in the
-dict that is returned by the `lu()` function, see the documentation on
-the `lu()` function.
-
--------------------
-Annotated Documents
--------------------
-
-The FrameNet corpus contains a small set of annotated documents. A list
-of these documents can be obtained by calling the `docs()` function:
-
-    >>> from pprint import pprint
-    >>> from nltk.corpus import framenet as fn
-    >>> d = fn.docs('BellRinging')[0]
-    >>> d.corpname
-    'PropBank'
-    >>> d.sentence[49] # doctest: +ELLIPSIS
-    full-text sentence (...) in BellRinging:
-    <BLANKLINE>
-    <BLANKLINE>
-    [POS] 17 tags
-    <BLANKLINE>
-    [POS_tagset] PENN
-    <BLANKLINE>
-    [text] + [annotationSet]
-    <BLANKLINE>
-    `` I live in hopes that the ringers themselves will be drawn into
-                 *****          *******                    *****
-                 Desir          Cause_t                    Cause
-                 [1]            [3]                        [2]
-    <BLANKLINE>
-     that fuller life .
-          ******
-          Comple
-          [4]
-     (Desir=Desiring, Cause_t=Cause_to_make_noise, Cause=Cause_motion, Comple=Completeness)
-    <BLANKLINE>
-
-    >>> d.sentence[49].annotationSet[1] # doctest: +ELLIPSIS
-    annotation set (...):
-    <BLANKLINE>
-    [status] MANUAL
-    <BLANKLINE>
-    [LU] (6605) hope.n in Desiring
-    <BLANKLINE>
-    [frame] (366) Desiring
-    <BLANKLINE>
-    [GF] 2 relations
-    <BLANKLINE>
-    [PT] 2 phrases
-    <BLANKLINE>
-    [text] + [Target] + [FE] + [Noun]
-    <BLANKLINE>
-    `` I live in hopes that the ringers themselves will be drawn into
-       - ^^^^ ^^ ***** ----------------------------------------------
-       E supp su       Event
-    <BLANKLINE>
-     that fuller life .
-    -----------------
-    <BLANKLINE>
-     (E=Experiencer, su=supp)
-    <BLANKLINE>
-    <BLANKLINE>
diff --git a/nlp_resource_data/nltk/test/generate.doctest b/nlp_resource_data/nltk/test/generate.doctest

deleted file mode 100644 (file)

index 4453518..0000000
--- a/nlp_resource_data/nltk/test/generate.doctest
+++ /dev/null
@@ -1,67 +0,0 @@
-.. Copyright (C) 2001-2019 NLTK Project
-.. For license information, see LICENSE.TXT
-
-===============================================
-Generating sentences from context-free grammars
-===============================================
-
-An example grammar:
-
-    >>> from nltk.parse.generate import generate, demo_grammar
-    >>> from nltk import CFG
-    >>> grammar = CFG.fromstring(demo_grammar)
-    >>> print(grammar)
-    Grammar with 13 productions (start state = S)
-        S -> NP VP
-        NP -> Det N
-        PP -> P NP
-        VP -> 'slept'
-        VP -> 'saw' NP
-        VP -> 'walked' PP
-        Det -> 'the'
-        Det -> 'a'
-        N -> 'man'
-        N -> 'park'
-        N -> 'dog'
-        P -> 'in'
-        P -> 'with'
-
-The first 10 generated sentences:
-
-    >>> for sentence in generate(grammar, n=10):
-    ...     print(' '.join(sentence))
-    the man slept
-    the man saw the man
-    the man saw the park
-    the man saw the dog
-    the man saw a man
-    the man saw a park
-    the man saw a dog
-    the man walked in the man
-    the man walked in the park
-    the man walked in the dog
-
-All sentences of max depth 4:
-
-    >>> for sentence in generate(grammar, depth=4):
-    ...     print(' '.join(sentence))
-    the man slept
-    the park slept
-    the dog slept
-    a man slept
-    a park slept
-    a dog slept
-
-The number of sentences of different max depths:
-
-    >>> len(list(generate(grammar, depth=3)))
-    0
-    >>> len(list(generate(grammar, depth=4)))
-    6
-    >>> len(list(generate(grammar, depth=5)))
-    42
-    >>> len(list(generate(grammar, depth=6)))
-    114
-    >>> len(list(generate(grammar)))
-    114
-
diff --git a/nlp_resource_data/nltk/test/gensim.doctest b/nlp_resource_data/nltk/test/gensim.doctest

deleted file mode 100644 (file)

index 2e27597..0000000
--- a/nlp_resource_data/nltk/test/gensim.doctest
+++ /dev/null
@@ -1,140 +0,0 @@
-.. Copyright (C) 2001-2019 NLTK Project
-.. For license information, see LICENSE.TXT
-
-=======================================
-Demonstrate word embedding using Gensim
-=======================================
-
-We demonstrate three functions:
-- Train the word embeddings using brown corpus;
-- Load the pre-trained model and perform simple tasks; and
-- Pruning the pre-trained binary model.
-
-    >>> import gensim
-
----------------
-Train the model
----------------
-
-Here we train a word embedding using the Brown Corpus:
-
-    >>> from nltk.corpus import brown
-    >>> model = gensim.models.Word2Vec(brown.sents())
-
-It might take some time to train the model. So, after it is trained, it can be saved as follows:
-
-    >>> model.save('brown.embedding')
-    >>> new_model = gensim.models.Word2Vec.load('brown.embedding')
-
-The model will be the list of words with their embedding. We can easily get the vector representation of a word.
-    >>> len(new_model['university'])
-    100
-
-There are some supporting functions already implemented in Gensim to manipulate with word embeddings.
-For example, to compute the cosine similarity between 2 words:
-
-    >>> new_model.similarity('university','school') > 0.3
-    True
-
----------------------------
-Using the pre-trained model
----------------------------
-
-NLTK includes a pre-trained model which is part of a model that is trained on 100 billion words from the Google News Dataset.
-The full model is from https://code.google.com/p/word2vec/ (about 3 GB).
-
-    >>> from nltk.data import find
-    >>> word2vec_sample = str(find('models/word2vec_sample/pruned.word2vec.txt'))
-    >>> model = gensim.models.KeyedVectors.load_word2vec_format(word2vec_sample, binary=False)
-
-We pruned the model to only include the most common words (~44k words).
-
-    >>> len(model.vocab)
-    43981
-
-Each word is represented in the space of 300 dimensions:
-
-    >>> len(model['university'])
-    300
-
-Finding the top n words that are similar to a target word is simple. The result is the list of n words with the score.
-
-    >>> model.most_similar(positive=['university'], topn = 3)
-    [(u'universities', 0.70039...), (u'faculty', 0.67809...), (u'undergraduate', 0.65870...)]
-
-Finding a word that is not in a list is also supported, although, implementing this by yourself is simple.
-
-    >>> model.doesnt_match('breakfast cereal dinner lunch'.split())
-    'cereal'
-
-Mikolov et al. (2013) figured out that word embedding captures much of syntactic and semantic regularities. For example,
-the vector 'King - Man + Woman' is close to 'Queen' and 'Germany - Berlin + Paris' is close to 'France'.
-
-    >>> model.most_similar(positive=['woman','king'], negative=['man'], topn = 1)
-    [(u'queen', 0.71181...)]
-
-    >>> model.most_similar(positive=['Paris','Germany'], negative=['Berlin'], topn = 1)
-    [(u'France', 0.78840...)]
-
-We can visualize the word embeddings using t-SNE (http://lvdmaaten.github.io/tsne/). For this demonstration, we visualize the first 1000 words.
-
-|    import numpy as np
-|    labels = []
-|    count = 0
-|    max_count = 1000
-|    X = np.zeros(shape=(max_count,len(model['university'])))
-|
-|    for term in model.vocab:
-|        X[count] = model[term]
-|        labels.append(term)
-|        count+= 1
-|        if count >= max_count: break
-|
-|    # It is recommended to use PCA first to reduce to ~50 dimensions
-|    from sklearn.decomposition import PCA
-|    pca = PCA(n_components=50)
-|    X_50 = pca.fit_transform(X)
-|
-|    # Using TSNE to further reduce to 2 dimensions
-|    from sklearn.manifold import TSNE
-|    model_tsne = TSNE(n_components=2, random_state=0)
-|    Y = model_tsne.fit_transform(X_50)
-|
-|    # Show the scatter plot
-|    import matplotlib.pyplot as plt
-|    plt.scatter(Y[:,0], Y[:,1], 20)
-|
-|    # Add labels
-|    for label, x, y in zip(labels, Y[:, 0], Y[:, 1]):
-|        plt.annotate(label, xy = (x,y), xytext = (0, 0), textcoords = 'offset points', size = 10)
-|
-|    plt.show()
-
-------------------------------
-Prune the trained binary model
-------------------------------
-
-Here is the supporting code to extract part of the binary model (GoogleNews-vectors-negative300.bin.gz) from https://code.google.com/p/word2vec/
-We use this code to get the `word2vec_sample` model.
-
-|    import gensim
-|    from gensim.models.word2vec import Word2Vec
-|    # Load the binary model
-|    model = Word2Vec.load_word2vec_format('GoogleNews-vectors-negative300.bin.gz', binary = True);
-|
-|    # Only output word that appear in the Brown corpus
-|    from nltk.corpus import brown
-|    words = set(brown.words())
-|    print (len(words))
-|
-|    # Output presented word to a temporary file
-|    out_file = 'pruned.word2vec.txt'
-|    f = open(out_file,'wb')
-|
-|    word_presented = words.intersection(model.vocab.keys())
-|    f.write('{} {}\n'.format(len(word_presented),len(model['word'])))
-|
-|    for word in word_presented:
-|        f.write('{} {}\n'.format(word, ' '.join(str(value) for value in model[word])))
-|
-|    f.close()
diff --git a/nlp_resource_data/nltk/test/gensim_fixt.py b/nlp_resource_data/nltk/test/gensim_fixt.py

deleted file mode 100644 (file)

index b1a6d2e..0000000
--- a/nlp_resource_data/nltk/test/gensim_fixt.py
+++ /dev/null
@@ -1,11 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import
-
-
-def setup_module(module):
-    from nose import SkipTest
-
-    try:
-        import gensim
-    except ImportError:
-        raise SkipTest("Gensim doctest requires gensim")
diff --git a/nlp_resource_data/nltk/test/gluesemantics.doctest b/nlp_resource_data/nltk/test/gluesemantics.doctest

deleted file mode 100644 (file)

index 7bf29a0..0000000
--- a/nlp_resource_data/nltk/test/gluesemantics.doctest
+++ /dev/null
@@ -1,384 +0,0 @@
-.. Copyright (C) 2001-2019 NLTK Project
-.. For license information, see LICENSE.TXT
-
-==============================================================================
- Glue Semantics
-==============================================================================
-
-.. include:: ../../../nltk_book/definitions.rst
-
-
-======================
-Linear logic
-======================
-
-    >>> from nltk.sem import logic
-    >>> from nltk.sem.glue import *
-    >>> from nltk.sem.linearlogic import *
-
-    >>> from nltk.sem.linearlogic import Expression
-    >>> read_expr = Expression.fromstring
-
-Parser
-
-    >>> print(read_expr(r'f'))
-    f
-    >>> print(read_expr(r'(g -o f)'))
-    (g -o f)
-    >>> print(read_expr(r'(g -o (h -o f))'))
-    (g -o (h -o f))
-    >>> print(read_expr(r'((g -o G) -o G)'))
-    ((g -o G) -o G)
-    >>> print(read_expr(r'(g -o f)(g)'))
-    (g -o f)(g)
-    >>> print(read_expr(r'((g -o G) -o G)((g -o f))'))
-    ((g -o G) -o G)((g -o f))
-
-Simplify
-
-    >>> print(read_expr(r'f').simplify())
-    f
-    >>> print(read_expr(r'(g -o f)').simplify())
-    (g -o f)
-    >>> print(read_expr(r'((g -o G) -o G)').simplify())
-    ((g -o G) -o G)
-    >>> print(read_expr(r'(g -o f)(g)').simplify())
-    f
-    >>> try: read_expr(r'(g -o f)(f)').simplify()
-    ... except LinearLogicApplicationException as e: print(e)
-    ...
-    Cannot apply (g -o f) to f. Cannot unify g with f given {}
-    >>> print(read_expr(r'(G -o f)(g)').simplify())
-    f
-    >>> print(read_expr(r'((g -o G) -o G)((g -o f))').simplify())
-    f
-
-Test BindingDict
-
-    >>> h = ConstantExpression('h')
-    >>> g = ConstantExpression('g')
-    >>> f = ConstantExpression('f')
-
-    >>> H = VariableExpression('H')
-    >>> G = VariableExpression('G')
-    >>> F = VariableExpression('F')
-
-    >>> d1 = BindingDict({H: h})
-    >>> d2 = BindingDict({F: f, G: F})
-    >>> d12 = d1 + d2
-    >>> all12 = ['%s: %s' % (v, d12[v]) for v in d12.d]
-    >>> all12.sort()
-    >>> print(all12)
-    ['F: f', 'G: f', 'H: h']
-
-    >>> BindingDict([(F,f),(G,g),(H,h)]) == BindingDict({F:f, G:g, H:h})
-    True
-
-    >>> d4 = BindingDict({F: f})
-    >>> try: d4[F] = g
-    ... except VariableBindingException as e: print(e)
-    Variable F already bound to another value
-
-Test Unify
-
-    >>> try: f.unify(g, BindingDict())
-    ... except UnificationException as e: print(e)
-    ...
-    Cannot unify f with g given {}
-
-    >>> f.unify(G, BindingDict()) == BindingDict({G: f})
-    True
-    >>> try: f.unify(G, BindingDict({G: h}))
-    ... except UnificationException as e: print(e)
-    ...
-    Cannot unify f with G given {G: h}
-    >>> f.unify(G, BindingDict({G: f})) == BindingDict({G: f})
-    True
-    >>> f.unify(G, BindingDict({H: f})) == BindingDict({G: f, H: f})
-    True
-
-    >>> G.unify(f, BindingDict()) == BindingDict({G: f})
-    True
-    >>> try: G.unify(f, BindingDict({G: h}))
-    ... except UnificationException as e: print(e)
-    ...
-    Cannot unify G with f given {G: h}
-    >>> G.unify(f, BindingDict({G: f})) == BindingDict({G: f})
-    True
-    >>> G.unify(f, BindingDict({H: f})) == BindingDict({G: f, H: f})
-    True
-
-    >>> G.unify(F, BindingDict()) == BindingDict({G: F})
-    True
-    >>> try: G.unify(F, BindingDict({G: H}))
-    ... except UnificationException as e: print(e)
-    ...
-    Cannot unify G with F given {G: H}
-    >>> G.unify(F, BindingDict({G: F})) == BindingDict({G: F})
-    True
-    >>> G.unify(F, BindingDict({H: F})) == BindingDict({G: F, H: F})
-    True
-
-Test Compile
-
-    >>> print(read_expr('g').compile_pos(Counter(), GlueFormula))
-    (<ConstantExpression g>, [])
-    >>> print(read_expr('(g -o f)').compile_pos(Counter(), GlueFormula))
-    (<ImpExpression (g -o f)>, [])
-    >>> print(read_expr('(g -o (h -o f))').compile_pos(Counter(), GlueFormula))
-    (<ImpExpression (g -o (h -o f))>, [])
-
-
-======================
-Glue
-======================
-
-Demo of "John walks"
---------------------
-
-    >>> john = GlueFormula("John", "g")
-    >>> print(john)
-    John : g
-    >>> walks = GlueFormula(r"\x.walks(x)", "(g -o f)")
-    >>> print(walks)
-    \x.walks(x) : (g -o f)
-    >>> print(walks.applyto(john))
-    \x.walks(x)(John) : (g -o f)(g)
-    >>> print(walks.applyto(john).simplify())
-    walks(John) : f
-
-
-Demo of "A dog walks"
----------------------
-
-    >>> a = GlueFormula("\P Q.some x.(P(x) and Q(x))", "((gv -o gr) -o ((g -o G) -o G))")
-    >>> print(a)
-    \P Q.exists x.(P(x) & Q(x)) : ((gv -o gr) -o ((g -o G) -o G))
-    >>> man = GlueFormula(r"\x.man(x)", "(gv -o gr)")
-    >>> print(man)
-    \x.man(x) : (gv -o gr)
-    >>> walks = GlueFormula(r"\x.walks(x)", "(g -o f)")
-    >>> print(walks)
-    \x.walks(x) : (g -o f)
-    >>> a_man = a.applyto(man)
-    >>> print(a_man.simplify())
-    \Q.exists x.(man(x) & Q(x)) : ((g -o G) -o G)
-    >>> a_man_walks = a_man.applyto(walks)
-    >>> print(a_man_walks.simplify())
-    exists x.(man(x) & walks(x)) : f
-
-
-Demo of 'every girl chases a dog'
----------------------------------
-
-Individual words:
-
-    >>> every = GlueFormula("\P Q.all x.(P(x) -> Q(x))", "((gv -o gr) -o ((g -o G) -o G))")
-    >>> print(every)
-    \P Q.all x.(P(x) -> Q(x)) : ((gv -o gr) -o ((g -o G) -o G))
-    >>> girl = GlueFormula(r"\x.girl(x)", "(gv -o gr)")
-    >>> print(girl)
-    \x.girl(x) : (gv -o gr)
-    >>> chases = GlueFormula(r"\x y.chases(x,y)", "(g -o (h -o f))")
-    >>> print(chases)
-    \x y.chases(x,y) : (g -o (h -o f))
-    >>> a = GlueFormula("\P Q.some x.(P(x) and Q(x))", "((hv -o hr) -o ((h -o H) -o H))")
-    >>> print(a)
-    \P Q.exists x.(P(x) & Q(x)) : ((hv -o hr) -o ((h -o H) -o H))
-    >>> dog = GlueFormula(r"\x.dog(x)", "(hv -o hr)")
-    >>> print(dog)
-    \x.dog(x) : (hv -o hr)
-
-Noun Quantification can only be done one way:
-
-    >>> every_girl = every.applyto(girl)
-    >>> print(every_girl.simplify())
-    \Q.all x.(girl(x) -> Q(x)) : ((g -o G) -o G)
-    >>> a_dog = a.applyto(dog)
-    >>> print(a_dog.simplify())
-    \Q.exists x.(dog(x) & Q(x)) : ((h -o H) -o H)
-
-The first reading is achieved by combining 'chases' with 'a dog' first.
-Since 'a girl' requires something of the form '(h -o H)' we must
-get rid of the 'g' in the glue of 'see'.  We will do this with
-the '-o elimination' rule.  So, x1 will be our subject placeholder.
-
-    >>> xPrime = GlueFormula("x1", "g")
-    >>> print(xPrime)
-    x1 : g
-    >>> xPrime_chases = chases.applyto(xPrime)
-    >>> print(xPrime_chases.simplify())
-    \y.chases(x1,y) : (h -o f)
-    >>> xPrime_chases_a_dog = a_dog.applyto(xPrime_chases)
-    >>> print(xPrime_chases_a_dog.simplify())
-    exists x.(dog(x) & chases(x1,x)) : f
-
-Now we can retract our subject placeholder using lambda-abstraction and
-combine with the true subject.
-
-    >>> chases_a_dog = xPrime_chases_a_dog.lambda_abstract(xPrime)
-    >>> print(chases_a_dog.simplify())
-    \x1.exists x.(dog(x) & chases(x1,x)) : (g -o f)
-    >>> every_girl_chases_a_dog = every_girl.applyto(chases_a_dog)
-    >>> r1 = every_girl_chases_a_dog.simplify()
-    >>> r2 = GlueFormula(r'all x.(girl(x) -> exists z1.(dog(z1) & chases(x,z1)))', 'f')
-    >>> r1 == r2
-    True
-
-The second reading is achieved by combining 'every girl' with 'chases' first.
-
-    >>> xPrime = GlueFormula("x1", "g")
-    >>> print(xPrime)
-    x1 : g
-    >>> xPrime_chases = chases.applyto(xPrime)
-    >>> print(xPrime_chases.simplify())
-    \y.chases(x1,y) : (h -o f)
-    >>> yPrime = GlueFormula("x2", "h")
-    >>> print(yPrime)
-    x2 : h
-    >>> xPrime_chases_yPrime = xPrime_chases.applyto(yPrime)
-    >>> print(xPrime_chases_yPrime.simplify())
-    chases(x1,x2) : f
-    >>> chases_yPrime = xPrime_chases_yPrime.lambda_abstract(xPrime)
-    >>> print(chases_yPrime.simplify())
-    \x1.chases(x1,x2) : (g -o f)
-    >>> every_girl_chases_yPrime = every_girl.applyto(chases_yPrime)
-    >>> print(every_girl_chases_yPrime.simplify())
-    all x.(girl(x) -> chases(x,x2)) : f
-    >>> every_girl_chases = every_girl_chases_yPrime.lambda_abstract(yPrime)
-    >>> print(every_girl_chases.simplify())
-    \x2.all x.(girl(x) -> chases(x,x2)) : (h -o f)
-    >>> every_girl_chases_a_dog = a_dog.applyto(every_girl_chases)
-    >>> r1 = every_girl_chases_a_dog.simplify()
-    >>> r2 = GlueFormula(r'exists x.(dog(x) & all z2.(girl(z2) -> chases(z2,x)))', 'f')
-    >>> r1 == r2
-    True
-
-
-Compilation
------------
-
-    >>> for cp in GlueFormula('m', '(b -o a)').compile(Counter()): print(cp)
-    m : (b -o a) : {1}
-    >>> for cp in GlueFormula('m', '((c -o b) -o a)').compile(Counter()): print(cp)
-    v1 : c : {1}
-    m : (b[1] -o a) : {2}
-    >>> for cp in GlueFormula('m', '((d -o (c -o b)) -o a)').compile(Counter()): print(cp)
-    v1 : c : {1}
-    v2 : d : {2}
-    m : (b[1, 2] -o a) : {3}
-    >>> for cp in GlueFormula('m', '((d -o e) -o ((c -o b) -o a))').compile(Counter()): print(cp)
-    v1 : d : {1}
-    v2 : c : {2}
-    m : (e[1] -o (b[2] -o a)) : {3}
-    >>> for cp in GlueFormula('m', '(((d -o c) -o b) -o a)').compile(Counter()): print(cp)
-    v1 : (d -o c) : {1}
-    m : (b[1] -o a) : {2}
-    >>> for cp in GlueFormula('m', '((((e -o d) -o c) -o b) -o a)').compile(Counter()): print(cp)
-    v1 : e : {1}
-    v2 : (d[1] -o c) : {2}
-    m : (b[2] -o a) : {3}
-
-
-Demo of 'a man walks' using Compilation
----------------------------------------
-
-Premises
-
-    >>> a = GlueFormula('\\P Q.some x.(P(x) and Q(x))', '((gv -o gr) -o ((g -o G) -o G))')
-    >>> print(a)
-    \P Q.exists x.(P(x) & Q(x)) : ((gv -o gr) -o ((g -o G) -o G))
-
-    >>> man = GlueFormula('\\x.man(x)', '(gv -o gr)')
-    >>> print(man)
-    \x.man(x) : (gv -o gr)
-
-    >>> walks = GlueFormula('\\x.walks(x)', '(g -o f)')
-    >>> print(walks)
-    \x.walks(x) : (g -o f)
-
-Compiled Premises:
-
-    >>> counter = Counter()
-    >>> ahc = a.compile(counter)
-    >>> g1 = ahc[0]
-    >>> print(g1)
-    v1 : gv : {1}
-    >>> g2 = ahc[1]
-    >>> print(g2)
-    v2 : g : {2}
-    >>> g3 = ahc[2]
-    >>> print(g3)
-    \P Q.exists x.(P(x) & Q(x)) : (gr[1] -o (G[2] -o G)) : {3}
-    >>> g4 = man.compile(counter)[0]
-    >>> print(g4)
-    \x.man(x) : (gv -o gr) : {4}
-    >>> g5 = walks.compile(counter)[0]
-    >>> print(g5)
-    \x.walks(x) : (g -o f) : {5}
-
-Derivation:
-
-    >>> g14 = g4.applyto(g1)
-    >>> print(g14.simplify())
-    man(v1) : gr : {1, 4}
-    >>> g134 = g3.applyto(g14)
-    >>> print(g134.simplify())
-    \Q.exists x.(man(x) & Q(x)) : (G[2] -o G) : {1, 3, 4}
-    >>> g25 = g5.applyto(g2)
-    >>> print(g25.simplify())
-    walks(v2) : f : {2, 5}
-    >>> g12345 = g134.applyto(g25)
-    >>> print(g12345.simplify())
-    exists x.(man(x) & walks(x)) : f : {1, 2, 3, 4, 5}
-
----------------------------------
-Dependency Graph to Glue Formulas
----------------------------------
-    >>> from nltk.corpus.reader.dependency import DependencyGraph
-
-    >>> depgraph = DependencyGraph("""1        John    _       NNP     NNP     _       2       SUBJ    _       _
-    ... 2      sees    _       VB      VB      _       0       ROOT    _       _
-    ... 3      a       _       ex_quant        ex_quant        _       4       SPEC    _       _
-    ... 4      dog     _       NN      NN      _       2       OBJ     _       _
-    ... """)
-    >>> gfl = GlueDict('nltk:grammars/sample_grammars/glue.semtype').to_glueformula_list(depgraph)
-    >>> print(gfl) # doctest: +SKIP
-    [\x y.sees(x,y) : (f -o (i -o g)),
-     \x.dog(x) : (iv -o ir),
-     \P Q.exists x.(P(x) & Q(x)) : ((iv -o ir) -o ((i -o I3) -o I3)),
-     \P Q.exists x.(P(x) & Q(x)) : ((fv -o fr) -o ((f -o F4) -o F4)),
-     \x.John(x) : (fv -o fr)]
-    >>> glue = Glue()
-    >>> for r in sorted([r.simplify().normalize() for r in glue.get_readings(glue.gfl_to_compiled(gfl))], key=str):
-    ...     print(r)
-    exists z1.(John(z1) & exists z2.(dog(z2) & sees(z1,z2)))
-    exists z1.(dog(z1) & exists z2.(John(z2) & sees(z2,z1)))
-
------------------------------------
-Dependency Graph to LFG f-structure
------------------------------------
-    >>> from nltk.sem.lfg import FStructure
-
-    >>> fstruct = FStructure.read_depgraph(depgraph)
-
-    >>> print(fstruct) # doctest: +SKIP
-    f:[pred 'sees'
-       obj h:[pred 'dog'
-              spec 'a']
-       subj g:[pred 'John']]
-
-    >>> fstruct.to_depgraph().tree().pprint()
-    (sees (dog a) John)
-
----------------------------------
-LFG f-structure to Glue
----------------------------------
-    >>> fstruct.to_glueformula_list(GlueDict('nltk:grammars/sample_grammars/glue.semtype')) # doctest: +SKIP
-    [\x y.sees(x,y) : (i -o (g -o f)),
-     \x.dog(x) : (gv -o gr),
-     \P Q.exists x.(P(x) & Q(x)) : ((gv -o gr) -o ((g -o G3) -o G3)),
-     \P Q.exists x.(P(x) & Q(x)) : ((iv -o ir) -o ((i -o I4) -o I4)),
-     \x.John(x) : (iv -o ir)]
-
-.. see gluesemantics_malt.doctest for more
diff --git a/nlp_resource_data/nltk/test/gluesemantics_malt.doctest b/nlp_resource_data/nltk/test/gluesemantics_malt.doctest

deleted file mode 100644 (file)

index 1329794..0000000
--- a/nlp_resource_data/nltk/test/gluesemantics_malt.doctest
+++ /dev/null
@@ -1,68 +0,0 @@
-.. Copyright (C) 2001-2019 NLTK Project
-.. For license information, see LICENSE.TXT
-
-.. see also: gluesemantics.doctest
-
-==============================================================================
- Glue Semantics
-==============================================================================
-
-    >>> from nltk.sem.glue import *
-    >>> nltk.sem.logic._counter._value = 0
-
---------------------------------
-Initialize the Dependency Parser
---------------------------------
-    >>> from nltk.parse.malt import MaltParser
-
-    >>> tagger = RegexpTagger(
-    ...     [('^(John|Mary)$', 'NNP'),
-    ...      ('^(sees|chases)$', 'VB'),
-    ...      ('^(a)$', 'ex_quant'),
-    ...      ('^(every)$', 'univ_quant'),
-    ...      ('^(girl|dog)$', 'NN')
-    ... ])
-    >>> depparser = MaltParser(tagger=tagger)
-
---------------------
-Automated Derivation
---------------------
-    >>> glue = Glue(depparser=depparser)
-    >>> readings = glue.parse_to_meaning('every girl chases a dog'.split())
-    >>> for reading in sorted([r.simplify().normalize() for r in readings], key=str):
-    ...     print(reading.normalize())
-    all z1.(girl(z1) -> exists z2.(dog(z2) & chases(z1,z2)))
-    exists z1.(dog(z1) & all z2.(girl(z2) -> chases(z2,z1)))
-
-    >>> drtglue = DrtGlue(depparser=depparser)
-    >>> readings = drtglue.parse_to_meaning('every girl chases a dog'.split())
-    >>> for reading in sorted([r.simplify().normalize() for r in readings], key=str):
-    ...     print(reading)
-    ([],[(([z1],[girl(z1)]) -> ([z2],[dog(z2), chases(z1,z2)]))])
-    ([z1],[dog(z1), (([z2],[girl(z2)]) -> ([],[chases(z2,z1)]))])
-
---------------
-With inference
---------------
-
-Checking for equality of two DRSs is very useful when generating readings of a sentence.
-For example, the ``glue`` module generates two readings for the sentence
-*John sees Mary*:
-
-    >>> from nltk.sem.glue import DrtGlue
-    >>> readings = drtglue.parse_to_meaning('John sees Mary'.split())
-    >>> for drs in sorted([r.simplify().normalize() for r in readings], key=str):
-    ...     print(drs)
-    ([z1,z2],[John(z1), Mary(z2), sees(z1,z2)])
-    ([z1,z2],[Mary(z1), John(z2), sees(z2,z1)])
-
-However, it is easy to tell that these two readings are logically the
-same, and therefore one of them is superfluous.  We can use the theorem prover
-to determine this equivalence, and then delete one of them.  A particular
-theorem prover may be specified, or the argument may be left off to use the
-default.
-
-    >>> readings[0].equiv(readings[1])
-    True
-
-
diff --git a/nlp_resource_data/nltk/test/gluesemantics_malt_fixt.py b/nlp_resource_data/nltk/test/gluesemantics_malt_fixt.py

deleted file mode 100644 (file)

index 70e149a..0000000
--- a/nlp_resource_data/nltk/test/gluesemantics_malt_fixt.py
+++ /dev/null
@@ -1,12 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import
-
-
-def setup_module(module):
-    from nose import SkipTest
-    from nltk.parse.malt import MaltParser
-
-    try:
-        depparser = MaltParser('maltparser-1.7.2')
-    except LookupError:
-        raise SkipTest("MaltParser is not available")
diff --git a/nlp_resource_data/nltk/test/grammar.doctest b/nlp_resource_data/nltk/test/grammar.doctest

deleted file mode 100644 (file)

index 7cae9d9..0000000
--- a/nlp_resource_data/nltk/test/grammar.doctest
+++ /dev/null
@@ -1,48 +0,0 @@
-.. Copyright (C) 2001-2019 NLTK Project
-.. For license information, see LICENSE.TXT
-
-===============
-Grammar Parsing
-===============
-
-Grammars can be parsed from strings:
-
-    >>> from nltk import CFG
-    >>> grammar = CFG.fromstring("""
-    ... S -> NP VP
-    ... PP -> P NP
-    ... NP -> Det N | NP PP
-    ... VP -> V NP | VP PP
-    ... Det -> 'a' | 'the'
-    ... N -> 'dog' | 'cat'
-    ... V -> 'chased' | 'sat'
-    ... P -> 'on' | 'in'
-    ... """)
-    >>> grammar
-    <Grammar with 14 productions>
-    >>> grammar.start()
-    S
-    >>> grammar.productions() # doctest: +NORMALIZE_WHITESPACE
-    [S -> NP VP, PP -> P NP, NP -> Det N, NP -> NP PP, VP -> V NP, VP -> VP PP,
-    Det -> 'a', Det -> 'the', N -> 'dog', N -> 'cat', V -> 'chased', V -> 'sat',
-    P -> 'on', P -> 'in']
-
-Probabilistic CFGs:
-   
-    >>> from nltk import PCFG
-    >>> toy_pcfg1 = PCFG.fromstring("""
-    ... S -> NP VP [1.0]
-    ... NP -> Det N [0.5] | NP PP [0.25] | 'John' [0.1] | 'I' [0.15]
-    ... Det -> 'the' [0.8] | 'my' [0.2]
-    ... N -> 'man' [0.5] | 'telescope' [0.5]
-    ... VP -> VP PP [0.1] | V NP [0.7] | V [0.2]
-    ... V -> 'ate' [0.35] | 'saw' [0.65]
-    ... PP -> P NP [1.0]
-    ... P -> 'with' [0.61] | 'under' [0.39]
-    ... """)
-
-Chomsky Normal Form grammar (Test for bug 474)
-
-    >>> g = CFG.fromstring("VP^<TOP> -> VBP NP^<VP-TOP>")
-    >>> g.productions()[0].lhs()
-    VP^<TOP>
diff --git a/nlp_resource_data/nltk/test/grammartestsuites.doctest b/nlp_resource_data/nltk/test/grammartestsuites.doctest

deleted file mode 100644 (file)

index 4221537..0000000
--- a/nlp_resource_data/nltk/test/grammartestsuites.doctest
+++ /dev/null
@@ -1,109 +0,0 @@
-.. Copyright (C) 2001-2019 NLTK Project
-.. For license information, see LICENSE.TXT
-
-==========================
- Test Suites for Grammars
-==========================
-
-Sentences in the test suite are divided into two classes:
-
-- grammatical (*accept*) and
-- ungrammatical (*reject*).
-
-If a sentence should parse accordng to the grammar, the value of
-``trees`` will be a non-empty list. If a sentence should be rejected
-according to the grammar, then the value of ``trees`` will be ``None``.
-
-    >>> from nltk.parse import TestGrammar
-    >>> germantest1 = {}
-    >>> germantest1['doc'] = "Tests for person agreement"
-    >>> germantest1['accept'] = [
-    ... 'ich komme',
-    ... 'ich sehe mich',
-    ... 'du kommst',
-    ... 'du siehst mich',
-    ... 'sie kommt',
-    ... 'sie sieht mich',
-    ... 'ihr kommt',
-    ... 'wir kommen',
-    ... 'sie kommen',
-    ... 'du magst mich',
-    ... 'er mag mich',
-    ... 'du folgst mir',
-    ... 'sie hilft mir',
-    ... ]
-    >>> germantest1['reject'] = [
-    ... 'ich kommt',
-    ... 'ich kommst',
-    ... 'ich siehst mich',
-    ... 'du komme',
-    ... 'du sehe mich',
-    ... 'du kommt',
-    ... 'er komme',
-    ... 'er siehst mich',
-    ... 'wir komme',
-    ... 'wir kommst',
-    ... 'die Katzen kommst',
-    ... 'sie komme',
-    ... 'sie kommst',
-    ... 'du mag mich',
-    ... 'er magst mich',
-    ... 'du folgt mir',
-    ... 'sie hilfst mir',
-    ... ]
-    >>> germantest2 = {}
-    >>> germantest2['doc'] = "Tests for number agreement"
-    >>> germantest2['accept'] = [
-    ... 'der Hund kommt',
-    ... 'die Hunde kommen',
-    ... 'ich komme',
-    ... 'wir kommen',
-    ... 'ich sehe die Katzen',
-    ... 'ich folge den Katzen',
-    ... 'ich sehe die Katzen',
-    ... 'ich folge den Katzen',
-    ... 'wir sehen die Katzen',
-    ... 'wir folgen den Katzen'
-    ... ]
-    >>> germantest2['reject'] = [
-    ... 'ich kommen',
-    ... 'wir komme',
-    ... 'der Hunde kommt',
-    ... 'der Hunde kommen',
-    ... 'die Katzen kommt',
-    ... 'ich sehe der Hunde', 
-    ... 'ich folge den Hund',
-    ... 'ich sehen der Hunde', 
-    ... 'ich folgen den Hund',
-    ... 'wir sehe die Katzen',
-    ... 'wir folge den Katzen'
-    ... ]
-    >>> germantest3 = {}
-    >>> germantest3['doc'] = "Tests for case government and subcategorization"
-    >>> germantest3['accept'] = [
-    ... 'der Hund sieht mich', 
-    ... 'der Hund kommt',
-    ... 'ich sehe den Hund',
-    ... 'ich helfe dem Hund',
-    ... ]
-    >>> germantest3['reject'] = [
-    ... 'ich sehe',
-    ... 'ich helfe',
-    ... 'ich komme den Hund',
-    ... 'ich sehe den Hund die Katzen',
-    ... 'du hilfst mich',
-    ... 'du siehst mir',
-    ... 'du siehst ich',
-    ... 'der Hunde kommt mich',
-    ... 'die Hunde sehe die Hunde', 
-    ... 'der Hund sehe die Hunde', 
-    ... 'ich hilft den Hund',
-    ... 'ich hilft der Hund',
-    ... 'ich sehe dem Hund',
-    ... ]
-    >>> germantestsuites = [germantest1, germantest2, germantest3]
-    >>> tester = TestGrammar('grammars/book_grammars/german.fcfg', germantestsuites)
-    >>> tester.run()
-    Tests for person agreement: All tests passed!
-    Tests for number agreement: All tests passed!
-    Tests for case government and subcategorization: All tests passed!
diff --git a/nlp_resource_data/nltk/test/index.doctest b/nlp_resource_data/nltk/test/index.doctest

deleted file mode 100644 (file)

index 7ce8167..0000000
--- a/nlp_resource_data/nltk/test/index.doctest
+++ /dev/null
@@ -1,100 +0,0 @@
-.. Copyright (C) 2001-2019 NLTK Project
-.. For license information, see LICENSE.TXT
-
-.. _align howto: align.html
-.. _ccg howto: ccg.html
-.. _chat80 howto: chat80.html
-.. _childes howto: childes.html
-.. _chunk howto: chunk.html
-.. _classify howto: classify.html
-.. _collocations howto: collocations.html
-.. _compat howto: compat.html
-.. _corpus howto: corpus.html
-.. _data howto: data.html
-.. _dependency howto: dependency.html
-.. _discourse howto: discourse.html
-.. _drt howto: drt.html
-.. _featgram howto: featgram.html
-.. _featstruct howto: featstruct.html
-.. _framenet howto: framenet.html
-.. _generate howto: generate.html
-.. _gluesemantics howto: gluesemantics.html
-.. _gluesemantics_malt howto: gluesemantics_malt.html
-.. _grammar howto: grammar.html
-.. _grammartestsuites howto: grammartestsuites.html
-.. _index howto: index.html
-.. _inference howto: inference.html
-.. _internals howto: internals.html
-.. _japanese howto: japanese.html
-.. _logic howto: logic.html
-.. _metrics howto: metrics.html
-.. _misc howto: misc.html
-.. _nonmonotonic howto: nonmonotonic.html
-.. _parse howto: parse.html
-.. _portuguese_en howto: portuguese_en.html
-.. _probability howto: probability.html
-.. _propbank howto: propbank.html
-.. _relextract howto: relextract.html
-.. _resolution howto: resolution.html
-.. _semantics howto: semantics.html
-.. _simple howto: simple.html
-.. _stem howto: stem.html
-.. _tag howto: tag.html
-.. _tokenize howto: tokenize.html
-.. _toolbox howto: toolbox.html
-.. _tree howto: tree.html
-.. _treetransforms howto: treetransforms.html
-.. _util howto: util.html
-.. _wordnet howto: wordnet.html
-.. _wordnet_lch howto: wordnet_lch.html
-
-===========
-NLTK HOWTOs
-===========
-
-* `align HOWTO`_
-* `ccg HOWTO`_
-* `chat80 HOWTO`_
-* `childes HOWTO`_
-* `chunk HOWTO`_
-* `classify HOWTO`_
-* `collocations HOWTO`_
-* `compat HOWTO`_
-* `corpus HOWTO`_
-* `data HOWTO`_
-* `dependency HOWTO`_
-* `discourse HOWTO`_
-* `drt HOWTO`_
-* `featgram HOWTO`_
-* `featstruct HOWTO`_
-* `framenet HOWTO`_
-* `generate HOWTO`_
-* `gluesemantics HOWTO`_
-* `gluesemantics_malt HOWTO`_
-* `grammar HOWTO`_
-* `grammartestsuites HOWTO`_
-* `index HOWTO`_
-* `inference HOWTO`_
-* `internals HOWTO`_
-* `japanese HOWTO`_
-* `logic HOWTO`_
-* `metrics HOWTO`_
-* `misc HOWTO`_
-* `nonmonotonic HOWTO`_
-* `parse HOWTO`_
-* `portuguese_en HOWTO`_
-* `probability HOWTO`_
-* `propbank HOWTO`_
-* `relextract HOWTO`_
-* `resolution HOWTO`_
-* `semantics HOWTO`_
-* `simple HOWTO`_
-* `stem HOWTO`_
-* `tag HOWTO`_
-* `tokenize HOWTO`_
-* `toolbox HOWTO`_
-* `tree HOWTO`_
-* `treetransforms HOWTO`_
-* `util HOWTO`_
-* `wordnet HOWTO`_
-* `wordnet_lch HOWTO`_
diff --git a/nlp_resource_data/nltk/test/inference.doctest b/nlp_resource_data/nltk/test/inference.doctest

deleted file mode 100644 (file)

index c2a41a3..0000000
--- a/nlp_resource_data/nltk/test/inference.doctest
+++ /dev/null
@@ -1,534 +0,0 @@
-.. Copyright (C) 2001-2019 NLTK Project
-.. For license information, see LICENSE.TXT
-
-====================================
-Logical Inference and Model Building
-====================================
-
-    >>> from nltk import *
-    >>> from nltk.sem.drt import DrtParser
-    >>> from nltk.sem import logic
-    >>> logic._counter._value = 0
-
-------------
-Introduction
-------------
-
-Within the area of automated reasoning, first order theorem proving
-and model building (or model generation) have both received much
-attention, and have given rise to highly sophisticated techniques. We
-focus therefore on providing an NLTK interface to third party tools
-for these tasks.  In particular, the module ``nltk.inference`` can be
-used to access both theorem provers and model builders.
-
----------------------------------
-NLTK Interface to Theorem Provers
----------------------------------
-
-The main class used to interface with a theorem prover is the ``Prover``
-class, found in ``nltk.api``.  The ``prove()`` method takes three optional
-arguments: a goal, a list of assumptions, and a ``verbose`` boolean to
-indicate whether the proof should be printed to the console.  The proof goal
-and any assumptions need to be instances of the ``Expression`` class
-specified by ``nltk.sem.logic``.  There are currently three theorem provers
-included with NLTK: ``Prover9``, ``TableauProver``, and
-``ResolutionProver``.  The first is an off-the-shelf prover, while the other
-two are written in Python and included in the ``nltk.inference`` package.
-
-    >>> from nltk.sem import Expression
-    >>> read_expr = Expression.fromstring
-    >>> p1 = read_expr('man(socrates)')
-    >>> p2 = read_expr('all x.(man(x) -> mortal(x))')
-    >>> c  = read_expr('mortal(socrates)')
-    >>> Prover9().prove(c, [p1,p2])
-    True
-    >>> TableauProver().prove(c, [p1,p2])
-    True
-    >>> ResolutionProver().prove(c, [p1,p2], verbose=True)
-    [1] {-mortal(socrates)}     A
-    [2] {man(socrates)}         A
-    [3] {-man(z2), mortal(z2)}  A
-    [4] {-man(socrates)}        (1, 3)
-    [5] {mortal(socrates)}      (2, 3)
-    [6] {}                      (1, 5)
-    <BLANKLINE>
-    True
-
----------------------
-The ``ProverCommand``
----------------------
-
-A ``ProverCommand`` is a stateful holder for a theorem
-prover.  The command stores a theorem prover instance (of type ``Prover``),
-a goal, a list of assumptions, the result of the proof, and a string version
-of the entire proof.  Corresponding to the three included ``Prover``
-implementations, there are three ``ProverCommand`` implementations:
-``Prover9Command``, ``TableauProverCommand``, and
-``ResolutionProverCommand``.
-
-The ``ProverCommand``'s constructor takes its goal and assumptions.  The
-``prove()`` command executes the ``Prover`` and ``proof()``
-returns a String form of the proof
-If the ``prove()`` method has not been called,
-then the prover command will be unable to display a proof.
-
-    >>> prover = ResolutionProverCommand(c, [p1,p2])
-    >>> print(prover.proof()) # doctest: +ELLIPSIS
-    Traceback (most recent call last):
-      File "...", line 1212, in __run
-        compileflags, 1) in test.globs
-      File "<doctest nltk/test/inference.doctest[10]>", line 1, in <module>
-      File "...", line ..., in proof
-        raise LookupError("You have to call prove() first to get a proof!")
-    LookupError: You have to call prove() first to get a proof!
-    >>> prover.prove()
-    True
-    >>> print(prover.proof())
-    [1] {-mortal(socrates)}     A
-    [2] {man(socrates)}         A
-    [3] {-man(z4), mortal(z4)}  A
-    [4] {-man(socrates)}        (1, 3)
-    [5] {mortal(socrates)}      (2, 3)
-    [6] {}                      (1, 5)
-    <BLANKLINE>
-
-The prover command stores the result of proving so that if ``prove()`` is
-called again, then the command can return the result without executing the
-prover again.  This allows the user to access the result of the proof without
-wasting time re-computing what it already knows.
-
-    >>> prover.prove()
-    True
-    >>> prover.prove()
-    True
-
-The assumptions and goal may be accessed using the ``assumptions()`` and
-``goal()`` methods, respectively.
-
-    >>> prover.assumptions()
-    [<ApplicationExpression man(socrates)>, <Alread_expression all x.(man(x) -> mortal(x))>]
-    >>> prover.goal()
-    <ApplicationExpression mortal(socrates)>
-
-The assumptions list may be modified using the ``add_assumptions()`` and
-``retract_assumptions()`` methods.  Both methods take a list of ``Expression``
-objects.  Since adding or removing assumptions may change the result of the
-proof, the stored result is cleared when either of these methods are called.
-That means that ``proof()`` will be unavailable until ``prove()`` is called and
-a call to ``prove()`` will execute the theorem prover.
-
-    >>> prover.retract_assumptions([read_expr('man(socrates)')])
-    >>> print(prover.proof()) # doctest: +ELLIPSIS
-    Traceback (most recent call last):
-      File "...", line 1212, in __run
-        compileflags, 1) in test.globs
-      File "<doctest nltk/test/inference.doctest[10]>", line 1, in <module>
-      File "...", line ..., in proof
-        raise LookupError("You have to call prove() first to get a proof!")
-    LookupError: You have to call prove() first to get a proof!
-    >>> prover.prove()
-    False
-    >>> print(prover.proof())
-    [1] {-mortal(socrates)}     A
-    [2] {-man(z6), mortal(z6)}  A
-    [3] {-man(socrates)}        (1, 2)
-    <BLANKLINE>
-    >>> prover.add_assumptions([read_expr('man(socrates)')])
-    >>> prover.prove()
-    True
-
--------
-Prover9
--------
-
-Prover9 Installation
-~~~~~~~~~~~~~~~~~~~~
-
-You can download Prover9 from http://www.cs.unm.edu/~mccune/prover9/.
-
-Extract the source code into a suitable directory and follow the
-instructions in the Prover9 ``README.make`` file to compile the executables.
-Install these into an appropriate location; the
-``prover9_search`` variable is currently configured to look in the
-following locations:
-
-    >>> p = Prover9()
-    >>> p.binary_locations() # doctest: +NORMALIZE_WHITESPACE
-    ['/usr/local/bin/prover9',
-     '/usr/local/bin/prover9/bin',
-     '/usr/local/bin',
-     '/usr/bin',
-     '/usr/local/prover9',
-     '/usr/local/share/prover9']
-
-Alternatively, the environment variable ``PROVER9HOME`` may be configured with
-the binary's location.
-
-The path to the correct directory can be set manually in the following
-manner:
-
-    >>> config_prover9(path='/usr/local/bin') # doctest: +SKIP
-    [Found prover9: /usr/local/bin/prover9]
-
-If the executables cannot be found, ``Prover9`` will issue a warning message:
-
-    >>> p.prove() # doctest: +SKIP
-    Traceback (most recent call last):
-      ...
-    LookupError:
-    ===========================================================================
-      NLTK was unable to find the prover9 executable!  Use config_prover9() or
-      set the PROVER9HOME environment variable.
-    <BLANKLINE>
-        >> config_prover9('/path/to/prover9')
-    <BLANKLINE>
-      For more information, on prover9, see:
-        <http://www.cs.unm.edu/~mccune/prover9/>
-    ===========================================================================
-
-
-Using Prover9
-~~~~~~~~~~~~~
-
-The general case in theorem proving is to determine whether ``S |- g``
-holds, where ``S`` is a possibly empty set of assumptions, and ``g``
-is a proof goal.
-
-As mentioned earlier, NLTK input to ``Prover9`` must be
-``Expression``\ s of ``nltk.sem.logic``. A ``Prover9`` instance is
-initialized with a proof goal and, possibly, some assumptions. The
-``prove()`` method attempts to find a proof of the goal, given the
-list of assumptions (in this case, none).
-
-    >>> goal = read_expr('(man(x) <-> --man(x))')
-    >>> prover = Prover9Command(goal)
-    >>> prover.prove()
-    True
-
-Given a ``ProverCommand`` instance ``prover``, the method
-``prover.proof()`` will return a String of the extensive proof information
-provided by Prover9, shown in abbreviated form here::
-
-    ============================== Prover9 ===============================
-    Prover9 (32) version ...
-    Process ... was started by ... on ...
-    ...
-    The command was ".../prover9 -f ...".
-    ============================== end of head ===========================
-
-    ============================== INPUT =================================
-
-    % Reading from file /var/...
-
-
-    formulas(goals).
-    (all x (man(x) -> man(x))).
-    end_of_list.
-
-    ...
-    ============================== end of search =========================
-
-    THEOREM PROVED
-
-    Exiting with 1 proof.
-
-    Process 6317 exit (max_proofs) Mon Jan 21 15:23:28 2008
-
-
-As mentioned earlier, we may want to list some assumptions for
-the proof, as shown here.
-
-    >>> g = read_expr('mortal(socrates)')
-    >>> a1 = read_expr('all x.(man(x) -> mortal(x))')
-    >>> prover = Prover9Command(g, assumptions=[a1])
-    >>> prover.print_assumptions()
-    all x.(man(x) -> mortal(x))
-
-However, the assumptions are not sufficient to derive the goal:
-
-    >>> print(prover.prove())
-    False
-
-So let's add another assumption:
-
-    >>> a2 = read_expr('man(socrates)')
-    >>> prover.add_assumptions([a2])
-    >>> prover.print_assumptions()
-    all x.(man(x) -> mortal(x))
-    man(socrates)
-    >>> print(prover.prove())
-    True
-
-We can also show the assumptions in ``Prover9`` format.
-
-    >>> prover.print_assumptions(output_format='Prover9')
-    all x (man(x) -> mortal(x))
-    man(socrates)
-
-    >>> prover.print_assumptions(output_format='Spass')
-    Traceback (most recent call last):
-      . . .
-    NameError: Unrecognized value for 'output_format': Spass
-
-Assumptions can be retracted from the list of assumptions.
-
-    >>> prover.retract_assumptions([a1])
-    >>> prover.print_assumptions()
-    man(socrates)
-    >>> prover.retract_assumptions([a1])
-
-Statements can be loaded from a file and parsed. We can then add these
-statements as new assumptions.
-
-    >>> g = read_expr('all x.(boxer(x) -> -boxerdog(x))')
-    >>> prover = Prover9Command(g)
-    >>> prover.prove()
-    False
-    >>> import nltk.data
-    >>> new = nltk.data.load('grammars/sample_grammars/background0.fol')
-    >>> for a in new:
-    ...     print(a)
-    all x.(boxerdog(x) -> dog(x))
-    all x.(boxer(x) -> person(x))
-    all x.-(dog(x) & person(x))
-    exists x.boxer(x)
-    exists x.boxerdog(x)
-    >>> prover.add_assumptions(new)
-    >>> print(prover.prove())
-    True
-    >>> print(prover.proof()) # doctest: +ELLIPSIS
-    ============================== prooftrans ============================
-    Prover9 (...) version ...
-    Process ... was started by ... on ...
-    ...
-    The command was ".../prover9".
-    ============================== end of head ===========================
-    <BLANKLINE>
-    ============================== end of input ==========================
-    <BLANKLINE>
-    ============================== PROOF =================================
-    <BLANKLINE>
-    % -------- Comments from original proof --------
-    % Proof 1 at ... seconds.
-    % Length of proof is 13.
-    % Level of proof is 4.
-    % Maximum clause weight is 0.000.
-    % Given clauses 0.
-    <BLANKLINE>
-    <BLANKLINE>
-    1 (all x (boxerdog(x) -> dog(x))).  [assumption].
-    2 (all x (boxer(x) -> person(x))).  [assumption].
-    3 (all x -(dog(x) & person(x))).  [assumption].
-    6 (all x (boxer(x) -> -boxerdog(x))).  [goal].
-    8 -boxerdog(x) | dog(x).  [clausify(1)].
-    9 boxerdog(c3).  [deny(6)].
-    11 -boxer(x) | person(x).  [clausify(2)].
-    12 boxer(c3).  [deny(6)].
-    14 -dog(x) | -person(x).  [clausify(3)].
-    15 dog(c3).  [resolve(9,a,8,a)].
-    18 person(c3).  [resolve(12,a,11,a)].
-    19 -person(c3).  [resolve(15,a,14,a)].
-    20 $F.  [resolve(19,a,18,a)].
-    <BLANKLINE>
-    ============================== end of proof ==========================
-
-----------------------
-The equiv() method
-----------------------
-
-One application of the theorem prover functionality is to check if
-two Expressions have the same meaning.
-The ``equiv()`` method calls a theorem prover to determine whether two
-Expressions are logically equivalent.
-
-    >>> a = read_expr(r'exists x.(man(x) & walks(x))')
-    >>> b = read_expr(r'exists x.(walks(x) & man(x))')
-    >>> print(a.equiv(b))
-    True
-
-The same method can be used on Discourse Representation Structures (DRSs).
-In this case, each DRS is converted to a first order logic form, and then
-passed to the theorem prover.
-
-    >>> dp = DrtParser()
-    >>> a = dp.parse(r'([x],[man(x), walks(x)])')
-    >>> b = dp.parse(r'([x],[walks(x), man(x)])')
-    >>> print(a.equiv(b))
-    True
-
-
---------------------------------
-NLTK Interface to Model Builders
---------------------------------
-
-The top-level to model builders is parallel to that for
-theorem-provers. The ``ModelBuilder`` interface is located
-in ``nltk.inference.api``.  It is currently only implemented by
-``Mace``, which interfaces with the Mace4 model builder.
-
-Typically we use a model builder to show that some set of formulas has
-a model, and is therefore consistent. One way of doing this is by
-treating our candidate set of sentences as assumptions, and leaving
-the goal unspecified.
-Thus, the following interaction shows how both ``{a, c1}`` and ``{a, c2}``
-are consistent sets, since Mace succeeds in a building a
-model for each of them, while ``{c1, c2}`` is inconsistent.
-
-    >>> a3 = read_expr('exists x.(man(x) and walks(x))')
-    >>> c1 = read_expr('mortal(socrates)')
-    >>> c2 = read_expr('-mortal(socrates)')
-    >>> mace = Mace()
-    >>> print(mace.build_model(None, [a3, c1]))
-    True
-    >>> print(mace.build_model(None, [a3, c2]))
-    True
-
-We can also use the model builder as an adjunct to theorem prover.
-Let's suppose we are trying to prove ``S |- g``, i.e. that ``g``
-is logically entailed by assumptions ``S = {s1, s2, ..., sn}``.
-We can this same input to Mace4, and the model builder will try to
-find a counterexample, that is, to show that ``g`` does *not* follow
-from ``S``. So, given this input, Mace4 will try to find a model for
-the set ``S' = {s1, s2, ..., sn, (not g)}``. If ``g`` fails to follow
-from ``S``, then Mace4 may well return with a counterexample faster
-than Prover9 concludes that it cannot find the required proof.
-Conversely, if ``g`` *is* provable from ``S``, Mace4 may take a long
-time unsuccessfully trying to find a counter model, and will eventually give up.
-
-In the following example, we see that the model builder does succeed
-in building a model of the assumptions together with the negation of
-the goal. That is, it succeeds in finding a model
-where there is a woman that every man loves; Adam is a man; Eve is a
-woman; but Adam does not love Eve.
-
-    >>> a4 = read_expr('exists y. (woman(y) & all x. (man(x) -> love(x,y)))')
-    >>> a5 = read_expr('man(adam)')
-    >>> a6 = read_expr('woman(eve)')
-    >>> g = read_expr('love(adam,eve)')
-    >>> print(mace.build_model(g, [a4, a5, a6]))
-    True
-
-The Model Builder will fail to find a model if the assumptions do entail
-the goal.  Mace will continue to look for models of ever-increasing sizes
-until the end_size number is reached.  By default, end_size is 500,
-but it can be set manually for quicker response time.
-
-    >>> a7 = read_expr('all x.(man(x) -> mortal(x))')
-    >>> a8 = read_expr('man(socrates)')
-    >>> g2 = read_expr('mortal(socrates)')
-    >>> print(Mace(end_size=50).build_model(g2, [a7, a8]))
-    False
-
-There is also a ``ModelBuilderCommand`` class that, like ``ProverCommand``,
-stores a ``ModelBuilder``, a goal, assumptions, a result, and a model.  The
-only implementation in NLTK is ``MaceCommand``.
-
-
------
-Mace4
------
-
-Mace4 Installation
-~~~~~~~~~~~~~~~~~~
-
-Mace4 is packaged with Prover9, and can be downloaded from the same
-source, namely http://www.cs.unm.edu/~mccune/prover9/. It is installed
-in the same manner as Prover9.
-
-Using Mace4
-~~~~~~~~~~~
-
-Check whether Mace4 can find a model.
-
-    >>> a = read_expr('(see(mary,john) & -(mary = john))')
-    >>> mb = MaceCommand(assumptions=[a])
-    >>> mb.build_model()
-    True
-
-Show the model in 'tabular' format.
-
-    >>> print(mb.model(format='tabular'))
-    % number = 1
-    % seconds = 0
-    <BLANKLINE>
-    % Interpretation of size 2
-    <BLANKLINE>
-     john : 0
-    <BLANKLINE>
-     mary : 1
-    <BLANKLINE>
-     see :
-           | 0 1
-        ---+----
-         0 | 0 0
-         1 | 1 0
-    <BLANKLINE>
-
-Show the model in 'tabular' format.
-
-    >>> print(mb.model(format='cooked'))
-    % number = 1
-    % seconds = 0
-    <BLANKLINE>
-    % Interpretation of size 2
-    <BLANKLINE>
-    john = 0.
-    <BLANKLINE>
-    mary = 1.
-    <BLANKLINE>
-    - see(0,0).
-    - see(0,1).
-      see(1,0).
-    - see(1,1).
-    <BLANKLINE>
-
-The property ``valuation`` accesses the stored ``Valuation``.
-
-    >>> print(mb.valuation)
-    {'john': 'a', 'mary': 'b', 'see': {('b', 'a')}}
-
-We can return to our earlier example and inspect the model:
-
-    >>> mb = MaceCommand(g, assumptions=[a4, a5, a6])
-    >>> m = mb.build_model()
-    >>> print(mb.model(format='cooked'))
-    % number = 1
-    % seconds = 0
-    <BLANKLINE>
-    % Interpretation of size 2
-    <BLANKLINE>
-    adam = 0.
-    <BLANKLINE>
-    eve = 0.
-    <BLANKLINE>
-    c1 = 1.
-    <BLANKLINE>
-      man(0).
-    - man(1).
-    <BLANKLINE>
-      woman(0).
-      woman(1).
-    <BLANKLINE>
-    - love(0,0).
-      love(0,1).
-    - love(1,0).
-    - love(1,1).
-    <BLANKLINE>
-
-Here, we can see that ``adam`` and ``eve`` have been assigned the same
-individual, namely ``0`` as value; ``0`` is both a man and a woman; a second
-individual ``1`` is also a woman; and ``0`` loves ``1``. Thus, this is
-an interpretation in which there is a woman that every man loves but
-Adam doesn't love Eve.
-
-Mace can also be used with propositional logic.
-
-    >>> p = read_expr('P')
-    >>> q = read_expr('Q')
-    >>> mb = MaceCommand(q, [p, p>-q])
-    >>> mb.build_model()
-    True
-    >>> mb.valuation['P']
-    True
-    >>> mb.valuation['Q']
-    False
diff --git a/nlp_resource_data/nltk/test/inference_fixt.py b/nlp_resource_data/nltk/test/inference_fixt.py

deleted file mode 100644 (file)

index 3fe9d03..0000000
--- a/nlp_resource_data/nltk/test/inference_fixt.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import
-
-
-def setup_module(module):
-    from nose import SkipTest
-    from nltk.inference.mace import Mace
-
-    try:
-        m = Mace()
-        m._find_binary('mace4')
-    except LookupError:
-        raise SkipTest(
-            "Mace4/Prover9 is not available so inference.doctest was skipped"
-        )
diff --git a/nlp_resource_data/nltk/test/internals.doctest b/nlp_resource_data/nltk/test/internals.doctest

deleted file mode 100644 (file)

index 74c2bd9..0000000
--- a/nlp_resource_data/nltk/test/internals.doctest
+++ /dev/null
@@ -1,140 +0,0 @@
-.. Copyright (C) 2001-2019 NLTK Project
-.. For license information, see LICENSE.TXT
-
-==========================================
- Unit tests for the nltk.utilities module
-==========================================
-
-overridden()
-~~~~~~~~~~~~
-    >>> from nltk.internals import overridden
-
-The typical use case is in defining methods for an interface or
-abstract base class, in such a way that subclasses don't have to
-implement all of the methods:
-
-    >>> class EaterI(object):
-    ...     '''Subclass must define eat() or batch_eat().'''
-    ...     def eat(self, food):
-    ...         if overridden(self.batch_eat):
-    ...             return self.batch_eat([food])[0]
-    ...         else:
-    ...             raise NotImplementedError()
-    ...     def batch_eat(self, foods):
-    ...         return [self.eat(food) for food in foods]
-
-As long as a subclass implements one method, it will be used to
-perform the other method:
-
-    >>> class GoodEater1(EaterI):
-    ...     def eat(self, food):
-    ...         return 'yum'
-    >>> GoodEater1().eat('steak')
-    'yum'
-    >>> GoodEater1().batch_eat(['steak', 'peas'])
-    ['yum', 'yum']
-
-    >>> class GoodEater2(EaterI):
-    ...     def batch_eat(self, foods):
-    ...         return ['yum' for food in foods]
-    >>> GoodEater2().eat('steak')
-    'yum'
-    >>> GoodEater2().batch_eat(['steak', 'peas'])
-    ['yum', 'yum']
-
-But if a subclass doesn't implement either one, then they'll get an
-error when they try to call them.  (nb this is better than infinite
-recursion):
-
-    >>> class BadEater1(EaterI):
-    ...     pass
-    >>> BadEater1().eat('steak')
-    Traceback (most recent call last):
-      . . .
-    NotImplementedError
-    >>> BadEater1().batch_eat(['steak', 'peas'])
-    Traceback (most recent call last):
-      . . .
-    NotImplementedError
-
-Trying to use the abstract base class itself will also result in an
-error:
-
-    >>> class EaterI(EaterI):
-    ...     pass
-    >>> EaterI().eat('steak')
-    Traceback (most recent call last):
-      . . .
-    NotImplementedError
-    >>> EaterI().batch_eat(['steak', 'peas'])
-    Traceback (most recent call last):
-      . . .
-    NotImplementedError
-
-It's ok to use intermediate abstract classes:
-
-    >>> class AbstractEater(EaterI):
-    ...     pass
-
-    >>> class GoodEater3(AbstractEater):
-    ...     def eat(self, food):
-    ...         return 'yum'
-    ...
-    >>> GoodEater3().eat('steak')
-    'yum'
-    >>> GoodEater3().batch_eat(['steak', 'peas'])
-    ['yum', 'yum']
-
-    >>> class GoodEater4(AbstractEater):
-    ...     def batch_eat(self, foods):
-    ...         return ['yum' for food in foods]
-    >>> GoodEater4().eat('steak')
-    'yum'
-    >>> GoodEater4().batch_eat(['steak', 'peas'])
-    ['yum', 'yum']
-
-    >>> class BadEater2(AbstractEater):
-    ...     pass
-    >>> BadEater2().eat('steak')
-    Traceback (most recent call last):
-      . . .
-    NotImplementedError
-    >>> BadEater2().batch_eat(['steak', 'peas'])
-    Traceback (most recent call last):
-      . . .
-    NotImplementedError
-
-Here's some extra tests:
-
-    >>> class A(object):
-    ...     def f(x): pass
-    >>> class B(A):
-    ...     def f(x): pass
-    >>> class C(A): pass
-    >>> class D(B): pass
-
-    >>> overridden(A().f)
-    False
-    >>> overridden(B().f)
-    True
-    >>> overridden(C().f)
-    False
-    >>> overridden(D().f)
-    True
-
-It works for classic classes, too:
-
-    >>> class A:
-    ...     def f(x): pass
-    >>> class B(A):
-    ...     def f(x): pass
-    >>> class C(A): pass
-    >>> class D(B): pass
-    >>> overridden(A().f)
-    False
-    >>> overridden(B().f)
-    True
-    >>> overridden(C().f)
-    False
-    >>> overridden(D().f)
-    True
diff --git a/nlp_resource_data/nltk/test/japanese.doctest b/nlp_resource_data/nltk/test/japanese.doctest

deleted file mode 100644 (file)

index 181b080..0000000
--- a/nlp_resource_data/nltk/test/japanese.doctest
+++ /dev/null
@@ -1,48 +0,0 @@
-.. Copyright (C) 2001-2019 NLTK Project
-.. For license information, see LICENSE.TXT
-
-============================
-Japanese Language Processing
-============================
-
-    >>> from nltk import *
-
--------------
-Corpus Access
--------------
-
-KNB Corpus
-----------
-
-    >>> from nltk.corpus import knbc
-
-Access the words: this should produce a list of strings:
-
-    >>> type(knbc.words()[0]) is not bytes
-    True
-
-Access the sentences: this should produce a list of lists of strings:
-
-    >>> type(knbc.sents()[0][0]) is not bytes
-    True
-
-Access the tagged words: this should produce a list of word, tag pairs:
-
-    >>> type(knbc.tagged_words()[0])
-    <... 'tuple'>
-
-Access the tagged sentences: this should produce a list of lists of word, tag pairs:
-
-    >>> type(knbc.tagged_sents()[0][0])
-    <... 'tuple'>
-
-
-JEITA Corpus
-------------
-
-    >>> from nltk.corpus import jeita
-
-Access the tagged words: this should produce a list of word, tag pairs, where a tag is a string:
-
-    >>> type(jeita.tagged_words()[0][1]) is not bytes
-    True
diff --git a/nlp_resource_data/nltk/test/lm.doctest b/nlp_resource_data/nltk/test/lm.doctest

deleted file mode 100644 (file)

index f3bde33..0000000
--- a/nlp_resource_data/nltk/test/lm.doctest
+++ /dev/null
@@ -1,131 +0,0 @@
-.. Copyright (C) 2001-2019 NLTK Project
-.. For license information, see LICENSE.TXT
-
-.. -*- coding: utf-8 -*-
-
-
-Regression Tests
-================
-
-
-Issue 167
----------
-https://github.com/nltk/nltk/issues/167
-
-    >>> from nltk.corpus import brown
-    >>> from nltk.lm.preprocessing import padded_everygram_pipeline
-    >>> ngram_order = 3
-    >>> train_data, vocab_data = padded_everygram_pipeline(
-    ...     ngram_order,
-    ...     brown.sents(categories="news")
-    ... )
-
-    >>> from nltk.lm import WittenBellInterpolated
-    >>> lm = WittenBellInterpolated(ngram_order)
-    >>> lm.fit(train_data, vocab_data)
-
-Sentence containing an unseen word should result in infinite entropy because
-Witten-Bell is based ultimately on MLE, which cannot handle unseen ngrams.
-Crucially, it shouldn't raise any exceptions for unseen words.
-
-    >>> from nltk.util import ngrams
-    >>> sent = ngrams("This is a sentence with the word aaddvark".split(), 3)
-    >>> lm.entropy(sent)
-    inf
-
-If we remove all unseen ngrams from the sentence, we'll get a non-infinite value
-for the entropy.
-
-    >>> sent = ngrams("This is a sentence".split(), 3)
-    >>> lm.entropy(sent)
-    17.41365588455936
-
-
-Issue 367
----------
-https://github.com/nltk/nltk/issues/367
-
-Reproducing Dan Blanchard's example:
-https://github.com/nltk/nltk/issues/367#issuecomment-14646110
-
-    >>> from nltk.lm import Lidstone, Vocabulary
-    >>> word_seq = list('aaaababaaccbacb')
-    >>> ngram_order = 2
-    >>> from nltk.util import everygrams
-    >>> train_data = [everygrams(word_seq, max_len=ngram_order)]
-    >>> V = Vocabulary(['a', 'b', 'c', ''])
-    >>> lm = Lidstone(0.2, ngram_order, vocabulary=V)
-    >>> lm.fit(train_data)
-
-For doctest to work we have to sort the vocabulary keys.
-
-    >>> V_keys = sorted(V)
-    >>> round(sum(lm.score(w, ("b",)) for w in V_keys), 6)
-    1.0
-    >>> round(sum(lm.score(w, ("a",)) for w in V_keys), 6)
-    1.0
-
-    >>> [lm.score(w, ("b",)) for w in V_keys]
-    [0.05, 0.05, 0.8, 0.05, 0.05]
-    >>> [round(lm.score(w, ("a",)), 4) for w in V_keys]
-    [0.0222, 0.0222, 0.4667, 0.2444, 0.2444]
-
-
-Here's reproducing @afourney's comment:
-https://github.com/nltk/nltk/issues/367#issuecomment-15686289
-
-    >>> sent = ['foo', 'foo', 'foo', 'foo', 'bar', 'baz']
-    >>> ngram_order = 3
-    >>> from nltk.lm.preprocessing import padded_everygram_pipeline
-    >>> train_data, vocab_data = padded_everygram_pipeline(ngram_order, [sent])
-    >>> from nltk.lm import Lidstone
-    >>> lm = Lidstone(0.2, ngram_order)
-    >>> lm.fit(train_data, vocab_data)
-
-The vocabulary includes the "UNK" symbol as well as two padding symbols.
-
-    >>> len(lm.vocab)
-    6
-    >>> word = "foo"
-    >>> context = ("bar", "baz")
-
-The raw counts.
-
-    >>> lm.context_counts(context)[word]
-    0
-    >>> lm.context_counts(context).N()
-    1
-
-Counts with Lidstone smoothing.
-
-    >>> lm.context_counts(context)[word] + lm.gamma
-    0.2
-    >>> lm.context_counts(context).N() + len(lm.vocab) * lm.gamma
-    2.2
-
-Without any backoff, just using Lidstone smoothing, P("foo" | "bar", "baz") should be:
-0.2 / 2.2 ~= 0.090909
-
-    >>> round(lm.score(word, context), 6)
-    0.090909
-
-
-Issue 380
----------
-https://github.com/nltk/nltk/issues/380
-
-Reproducing setup akin to this comment:
-https://github.com/nltk/nltk/issues/380#issue-12879030
-
-For speed take only the first 100 sentences of reuters. Shouldn't affect the test.
-    >>> from nltk.corpus import reuters
-    >>> sents = reuters.sents()[:100]
-    >>> ngram_order = 3
-    >>> from nltk.lm.preprocessing import padded_everygram_pipeline
-    >>> train_data, vocab_data = padded_everygram_pipeline(ngram_order, sents)
-
-    >>> from nltk.lm import Lidstone
-    >>> lm = Lidstone(0.2, ngram_order)
-    >>> lm.fit(train_data, vocab_data)
-    >>> lm.score("said", ("",)) < 1
-    True
diff --git a/nlp_resource_data/nltk/test/logic.doctest b/nlp_resource_data/nltk/test/logic.doctest

deleted file mode 100644 (file)

index ab27009..0000000
--- a/nlp_resource_data/nltk/test/logic.doctest
+++ /dev/null
@@ -1,1098 +0,0 @@
-.. Copyright (C) 2001-2019 NLTK Project
-.. For license information, see LICENSE.TXT
-
-=======================
-Logic & Lambda Calculus
-=======================
-
-The `nltk.logic` package allows expressions of First-Order Logic (FOL) to be
-parsed into ``Expression`` objects. In addition to FOL, the parser
-handles lambda-abstraction with variables of higher order.
-
---------
-Overview
---------
-
-    >>> from nltk.sem.logic import *
-
-The default inventory of logical constants is the following:
-
-    >>> boolean_ops() # doctest: +NORMALIZE_WHITESPACE
-    negation           -
-    conjunction        &
-    disjunction        |
-    implication        ->
-    equivalence        <->
-    >>> equality_preds() # doctest: +NORMALIZE_WHITESPACE
-    equality           =
-    inequality         !=
-    >>> binding_ops() # doctest: +NORMALIZE_WHITESPACE
-    existential        exists
-    universal          all
-    lambda             \
-
-----------------
-Regression Tests
-----------------
-
-
-Untyped Logic
-+++++++++++++
-
-Process logical expressions conveniently:
-
-    >>> read_expr = Expression.fromstring
-
-Test for equality under alpha-conversion
-========================================
-
-    >>> e1 = read_expr('exists x.P(x)')
-    >>> print(e1)
-    exists x.P(x)
-    >>> e2 = e1.alpha_convert(Variable('z'))
-    >>> print(e2)
-    exists z.P(z)
-    >>> e1 == e2
-    True
-
-
-    >>> l = read_expr(r'\X.\X.X(X)(1)').simplify()
-    >>> id = read_expr(r'\X.X(X)')
-    >>> l == id
-    True
-
-Test numerals
-=============
-
-    >>> zero = read_expr(r'\F x.x')
-    >>> one = read_expr(r'\F x.F(x)')
-    >>> two = read_expr(r'\F x.F(F(x))')
-    >>> three = read_expr(r'\F x.F(F(F(x)))')
-    >>> four = read_expr(r'\F x.F(F(F(F(x))))')
-    >>> succ = read_expr(r'\N F x.F(N(F,x))')
-    >>> plus = read_expr(r'\M N F x.M(F,N(F,x))')
-    >>> mult = read_expr(r'\M N F.M(N(F))')
-    >>> pred = read_expr(r'\N F x.(N(\G H.H(G(F)))(\u.x)(\u.u))')
-    >>> v1 = ApplicationExpression(succ, zero).simplify()
-    >>> v1 == one
-    True
-    >>> v2 = ApplicationExpression(succ, v1).simplify()
-    >>> v2 == two
-    True
-    >>> v3 = ApplicationExpression(ApplicationExpression(plus, v1), v2).simplify()
-    >>> v3 == three
-    True
-    >>> v4 = ApplicationExpression(ApplicationExpression(mult, v2), v2).simplify()
-    >>> v4 == four
-    True
-    >>> v5 = ApplicationExpression(pred, ApplicationExpression(pred, v4)).simplify()
-    >>> v5 == two
-    True
-
-Overloaded operators also exist, for convenience.
-
-    >>> print(succ(zero).simplify() == one)
-    True
-    >>> print(plus(one,two).simplify() == three)
-    True
-    >>> print(mult(two,two).simplify() == four)
-    True
-    >>> print(pred(pred(four)).simplify() == two)
-    True
-
-    >>> john = read_expr(r'john')
-    >>> man = read_expr(r'\x.man(x)')
-    >>> walk = read_expr(r'\x.walk(x)')
-    >>> man(john).simplify()
-    <ApplicationExpression man(john)>
-    >>> print(-walk(john).simplify())
-    -walk(john)
-    >>> print((man(john) & walk(john)).simplify())
-    (man(john) & walk(john))
-    >>> print((man(john) | walk(john)).simplify())
-    (man(john) | walk(john))
-    >>> print((man(john) > walk(john)).simplify())
-    (man(john) -> walk(john))
-    >>> print((man(john) < walk(john)).simplify())
-    (man(john) <-> walk(john))
-
-Python's built-in lambda operator can also be used with Expressions
-
-    >>> john = VariableExpression(Variable('john'))
-    >>> run_var = VariableExpression(Variable('run'))
-    >>> run = lambda x: run_var(x)
-    >>> run(john)
-    <ApplicationExpression run(john)>
-
-
-``betaConversionTestSuite.pl``
-------------------------------
-
-Tests based on Blackburn & Bos' book, *Representation and Inference
-for Natural Language*.
-
-    >>> x1 = read_expr(r'\P.P(mia)(\x.walk(x))').simplify()
-    >>> x2 = read_expr(r'walk(mia)').simplify()
-    >>> x1 == x2
-    True
-
-    >>> x1 = read_expr(r'exists x.(man(x) & ((\P.exists x.(woman(x) & P(x)))(\y.love(x,y))))').simplify()
-    >>> x2 = read_expr(r'exists x.(man(x) & exists y.(woman(y) & love(x,y)))').simplify()
-    >>> x1 == x2
-    True
-    >>> x1 = read_expr(r'\a.sleep(a)(mia)').simplify()
-    >>> x2 = read_expr(r'sleep(mia)').simplify()
-    >>> x1 == x2
-    True
-    >>> x1 = read_expr(r'\a.\b.like(b,a)(mia)').simplify()
-    >>> x2 = read_expr(r'\b.like(b,mia)').simplify()
-    >>> x1 == x2
-    True
-    >>> x1 = read_expr(r'\a.(\b.like(b,a)(vincent))').simplify()
-    >>> x2 = read_expr(r'\a.like(vincent,a)').simplify()
-    >>> x1 == x2
-    True
-    >>> x1 = read_expr(r'\a.((\b.like(b,a)(vincent)) & sleep(a))').simplify()
-    >>> x2 = read_expr(r'\a.(like(vincent,a) & sleep(a))').simplify()
-    >>> x1 == x2
-    True
-
-    >>> x1 = read_expr(r'(\a.\b.like(b,a)(mia)(vincent))').simplify()
-    >>> x2 = read_expr(r'like(vincent,mia)').simplify()
-    >>> x1 == x2
-    True
-
-    >>> x1 = read_expr(r'P((\a.sleep(a)(vincent)))').simplify()
-    >>> x2 = read_expr(r'P(sleep(vincent))').simplify()
-    >>> x1 == x2
-    True
-
-    >>> x1 = read_expr(r'\A.A((\b.sleep(b)(vincent)))').simplify()
-    >>> x2 = read_expr(r'\A.A(sleep(vincent))').simplify()
-    >>> x1 == x2
-    True
-
-    >>> x1 = read_expr(r'\A.A(sleep(vincent))').simplify()
-    >>> x2 = read_expr(r'\A.A(sleep(vincent))').simplify()
-    >>> x1 == x2
-    True
-
-    >>> x1 = read_expr(r'(\A.A(vincent)(\b.sleep(b)))').simplify()
-    >>> x2 = read_expr(r'sleep(vincent)').simplify()
-    >>> x1 == x2
-    True
-
-    >>> x1 = read_expr(r'\A.believe(mia,A(vincent))(\b.sleep(b))').simplify()
-    >>> x2 = read_expr(r'believe(mia,sleep(vincent))').simplify()
-    >>> x1 == x2
-    True
-
-    >>> x1 = read_expr(r'(\A.(A(vincent) & A(mia)))(\b.sleep(b))').simplify()
-    >>> x2 = read_expr(r'(sleep(vincent) & sleep(mia))').simplify()
-    >>> x1 == x2
-    True
-
-    >>> x1 = read_expr(r'\A.\B.(\C.C(A(vincent))(\d.probably(d)) & (\C.C(B(mia))(\d.improbably(d))))(\f.walk(f))(\f.talk(f))').simplify()
-    >>> x2 = read_expr(r'(probably(walk(vincent)) & improbably(talk(mia)))').simplify()
-    >>> x1 == x2
-    True
-
-    >>> x1 = read_expr(r'(\a.\b.(\C.C(a,b)(\d.\f.love(d,f))))(jules)(mia)').simplify()
-    >>> x2 = read_expr(r'love(jules,mia)').simplify()
-    >>> x1 == x2
-    True
-
-    >>> x1 = read_expr(r'(\A.\B.exists c.(A(c) & B(c)))(\d.boxer(d),\d.sleep(d))').simplify()
-    >>> x2 = read_expr(r'exists c.(boxer(c) & sleep(c))').simplify()
-    >>> x1 == x2
-    True
-
-    >>> x1 = read_expr(r'\A.Z(A)(\c.\a.like(a,c))').simplify()
-    >>> x2 = read_expr(r'Z(\c.\a.like(a,c))').simplify()
-    >>> x1 == x2
-    True
-
-    >>> x1 = read_expr(r'\A.\b.A(b)(\c.\b.like(b,c))').simplify()
-    >>> x2 = read_expr(r'\b.(\c.\b.like(b,c)(b))').simplify()
-    >>> x1 == x2
-    True
-
-    >>> x1 = read_expr(r'(\a.\b.(\C.C(a,b)(\b.\a.loves(b,a))))(jules)(mia)').simplify()
-    >>> x2 = read_expr(r'loves(jules,mia)').simplify()
-    >>> x1 == x2
-    True
-
-    >>> x1 = read_expr(r'(\A.\b.(exists b.A(b) & A(b)))(\c.boxer(c))(vincent)').simplify()
-    >>> x2 = read_expr(r'((exists b.boxer(b)) & boxer(vincent))').simplify()
-    >>> x1 == x2
-    True
-
-Test Parser
-===========
-
-    >>> print(read_expr(r'john'))
-    john
-    >>> print(read_expr(r'x'))
-    x
-    >>> print(read_expr(r'-man(x)'))
-    -man(x)
-    >>> print(read_expr(r'--man(x)'))
-    --man(x)
-    >>> print(read_expr(r'(man(x))'))
-    man(x)
-    >>> print(read_expr(r'((man(x)))'))
-    man(x)
-    >>> print(read_expr(r'man(x) <-> tall(x)'))
-    (man(x) <-> tall(x))
-    >>> print(read_expr(r'(man(x) <-> tall(x))'))
-    (man(x) <-> tall(x))
-    >>> print(read_expr(r'(man(x) & tall(x) & walks(x))'))
-    (man(x) & tall(x) & walks(x))
-    >>> print(read_expr(r'(man(x) & tall(x) & walks(x))').first)
-    (man(x) & tall(x))
-    >>> print(read_expr(r'man(x) | tall(x) & walks(x)'))
-    (man(x) | (tall(x) & walks(x)))
-    >>> print(read_expr(r'((man(x) & tall(x)) | walks(x))'))
-    ((man(x) & tall(x)) | walks(x))
-    >>> print(read_expr(r'man(x) & (tall(x) | walks(x))'))
-    (man(x) & (tall(x) | walks(x)))
-    >>> print(read_expr(r'(man(x) & (tall(x) | walks(x)))'))
-    (man(x) & (tall(x) | walks(x)))
-    >>> print(read_expr(r'P(x) -> Q(x) <-> R(x) | S(x) & T(x)'))
-    ((P(x) -> Q(x)) <-> (R(x) | (S(x) & T(x))))
-    >>> print(read_expr(r'exists x.man(x)'))
-    exists x.man(x)
-    >>> print(read_expr(r'exists x.(man(x) & tall(x))'))
-    exists x.(man(x) & tall(x))
-    >>> print(read_expr(r'exists x.(man(x) & tall(x) & walks(x))'))
-    exists x.(man(x) & tall(x) & walks(x))
-    >>> print(read_expr(r'-P(x) & Q(x)'))
-    (-P(x) & Q(x))
-    >>> read_expr(r'-P(x) & Q(x)') == read_expr(r'(-P(x)) & Q(x)')
-    True
-    >>> print(read_expr(r'\x.man(x)'))
-    \x.man(x)
-    >>> print(read_expr(r'\x.man(x)(john)'))
-    \x.man(x)(john)
-    >>> print(read_expr(r'\x.man(x)(john) & tall(x)'))
-    (\x.man(x)(john) & tall(x))
-    >>> print(read_expr(r'\x.\y.sees(x,y)'))
-    \x y.sees(x,y)
-    >>> print(read_expr(r'\x  y.sees(x,y)'))
-    \x y.sees(x,y)
-    >>> print(read_expr(r'\x.\y.sees(x,y)(a)'))
-    (\x y.sees(x,y))(a)
-    >>> print(read_expr(r'\x  y.sees(x,y)(a)'))
-    (\x y.sees(x,y))(a)
-    >>> print(read_expr(r'\x.\y.sees(x,y)(a)(b)'))
-    ((\x y.sees(x,y))(a))(b)
-    >>> print(read_expr(r'\x  y.sees(x,y)(a)(b)'))
-    ((\x y.sees(x,y))(a))(b)
-    >>> print(read_expr(r'\x.\y.sees(x,y)(a,b)'))
-    ((\x y.sees(x,y))(a))(b)
-    >>> print(read_expr(r'\x  y.sees(x,y)(a,b)'))
-    ((\x y.sees(x,y))(a))(b)
-    >>> print(read_expr(r'((\x.\y.sees(x,y))(a))(b)'))
-    ((\x y.sees(x,y))(a))(b)
-    >>> print(read_expr(r'P(x)(y)(z)'))
-    P(x,y,z)
-    >>> print(read_expr(r'P(Q)'))
-    P(Q)
-    >>> print(read_expr(r'P(Q(x))'))
-    P(Q(x))
-    >>> print(read_expr(r'(\x.exists y.walks(x,y))(x)'))
-    (\x.exists y.walks(x,y))(x)
-    >>> print(read_expr(r'exists x.(x = john)'))
-    exists x.(x = john)
-    >>> print(read_expr(r'((\P.\Q.exists x.(P(x) & Q(x)))(\x.dog(x)))(\x.bark(x))'))
-    ((\P Q.exists x.(P(x) & Q(x)))(\x.dog(x)))(\x.bark(x))
-    >>> a = read_expr(r'exists c.exists b.A(b,c) & A(b,c)')
-    >>> b = read_expr(r'(exists c.(exists b.A(b,c))) & A(b,c)')
-    >>> print(a == b)
-    True
-    >>> a = read_expr(r'exists c.(exists b.A(b,c) & A(b,c))')
-    >>> b = read_expr(r'exists c.((exists b.A(b,c)) & A(b,c))')
-    >>> print(a == b)
-    True
-    >>> print(read_expr(r'exists x.x = y'))
-    exists x.(x = y)
-    >>> print(read_expr('A(B)(C)'))
-    A(B,C)
-    >>> print(read_expr('(A(B))(C)'))
-    A(B,C)
-    >>> print(read_expr('A((B)(C))'))
-    A(B(C))
-    >>> print(read_expr('A(B(C))'))
-    A(B(C))
-    >>> print(read_expr('(A)(B(C))'))
-    A(B(C))
-    >>> print(read_expr('(((A)))(((B))(((C))))'))
-    A(B(C))
-    >>> print(read_expr(r'A != B'))
-    -(A = B)
-    >>> print(read_expr('P(x) & x=y & P(y)'))
-    (P(x) & (x = y) & P(y))
-    >>> try: print(read_expr(r'\walk.walk(x)'))
-    ... except LogicalExpressionException as e: print(e)
-    'walk' is an illegal variable name.  Constants may not be abstracted.
-    \walk.walk(x)
-     ^
-    >>> try: print(read_expr(r'all walk.walk(john)'))
-    ... except LogicalExpressionException as e: print(e)
-    'walk' is an illegal variable name.  Constants may not be quantified.
-    all walk.walk(john)
-        ^
-    >>> try: print(read_expr(r'x(john)'))
-    ... except LogicalExpressionException as e: print(e)
-    'x' is an illegal predicate name.  Individual variables may not be used as predicates.
-    x(john)
-    ^
-
-    >>> from nltk.sem.logic import LogicParser # hack to give access to custom quote chars
-    >>> lpq = LogicParser()
-    >>> lpq.quote_chars = [("'", "'", "\\", False)]
-    >>> print(lpq.parse(r"(man(x) & 'tall\'s,' (x) & walks (x) )"))
-    (man(x) & tall's,(x) & walks(x))
-    >>> lpq.quote_chars = [("'", "'", "\\", True)]
-    >>> print(lpq.parse(r"'tall\'s,'"))
-    'tall\'s,'
-    >>> print(lpq.parse(r"'spaced name(x)'"))
-    'spaced name(x)'
-    >>> print(lpq.parse(r"-'tall\'s,'(x)"))
-    -'tall\'s,'(x)
-    >>> print(lpq.parse(r"(man(x) & 'tall\'s,' (x) & walks (x) )"))
-    (man(x) & 'tall\'s,'(x) & walks(x))
-
-
-Simplify
-========
-
-    >>> print(read_expr(r'\x.man(x)(john)').simplify())
-    man(john)
-    >>> print(read_expr(r'\x.((man(x)))(john)').simplify())
-    man(john)
-    >>> print(read_expr(r'\x.\y.sees(x,y)(john, mary)').simplify())
-    sees(john,mary)
-    >>> print(read_expr(r'\x  y.sees(x,y)(john, mary)').simplify())
-    sees(john,mary)
-    >>> print(read_expr(r'\x.\y.sees(x,y)(john)(mary)').simplify())
-    sees(john,mary)
-    >>> print(read_expr(r'\x  y.sees(x,y)(john)(mary)').simplify())
-    sees(john,mary)
-    >>> print(read_expr(r'\x.\y.sees(x,y)(john)').simplify())
-    \y.sees(john,y)
-    >>> print(read_expr(r'\x  y.sees(x,y)(john)').simplify())
-    \y.sees(john,y)
-    >>> print(read_expr(r'(\x.\y.sees(x,y)(john))(mary)').simplify())
-    sees(john,mary)
-    >>> print(read_expr(r'(\x  y.sees(x,y)(john))(mary)').simplify())
-    sees(john,mary)
-    >>> print(read_expr(r'exists x.(man(x) & (\x.exists y.walks(x,y))(x))').simplify())
-    exists x.(man(x) & exists y.walks(x,y))
-    >>> e1 = read_expr(r'exists x.(man(x) & (\x.exists y.walks(x,y))(y))').simplify()
-    >>> e2 = read_expr(r'exists x.(man(x) & exists z1.walks(y,z1))')
-    >>> e1 == e2
-    True
-    >>> print(read_expr(r'(\P Q.exists x.(P(x) & Q(x)))(\x.dog(x))').simplify())
-    \Q.exists x.(dog(x) & Q(x))
-    >>> print(read_expr(r'((\P.\Q.exists x.(P(x) & Q(x)))(\x.dog(x)))(\x.bark(x))').simplify())
-    exists x.(dog(x) & bark(x))
-    >>> print(read_expr(r'\P.(P(x)(y))(\a b.Q(a,b))').simplify())
-    Q(x,y)
-
-Replace
-=======
-
-    >>> a = read_expr(r'a')
-    >>> x = read_expr(r'x')
-    >>> y = read_expr(r'y')
-    >>> z = read_expr(r'z')
-
-    >>> print(read_expr(r'man(x)').replace(x.variable, a, False))
-    man(a)
-    >>> print(read_expr(r'(man(x) & tall(x))').replace(x.variable, a, False))
-    (man(a) & tall(a))
-    >>> print(read_expr(r'exists x.man(x)').replace(x.variable, a, False))
-    exists x.man(x)
-    >>> print(read_expr(r'exists x.man(x)').replace(x.variable, a, True))
-    exists a.man(a)
-    >>> print(read_expr(r'exists x.give(x,y,z)').replace(y.variable, a, False))
-    exists x.give(x,a,z)
-    >>> print(read_expr(r'exists x.give(x,y,z)').replace(y.variable, a, True))
-    exists x.give(x,a,z)
-    >>> e1 = read_expr(r'exists x.give(x,y,z)').replace(y.variable, x, False)
-    >>> e2 = read_expr(r'exists z1.give(z1,x,z)')
-    >>> e1 == e2
-    True
-    >>> e1 = read_expr(r'exists x.give(x,y,z)').replace(y.variable, x, True)
-    >>> e2 = read_expr(r'exists z1.give(z1,x,z)')
-    >>> e1 == e2
-    True
-    >>> print(read_expr(r'\x y z.give(x,y,z)').replace(y.variable, a, False))
-    \x y z.give(x,y,z)
-    >>> print(read_expr(r'\x y z.give(x,y,z)').replace(y.variable, a, True))
-    \x a z.give(x,a,z)
-    >>> print(read_expr(r'\x.\y.give(x,y,z)').replace(z.variable, a, False))
-    \x y.give(x,y,a)
-    >>> print(read_expr(r'\x.\y.give(x,y,z)').replace(z.variable, a, True))
-    \x y.give(x,y,a)
-    >>> e1 = read_expr(r'\x.\y.give(x,y,z)').replace(z.variable, x, False)
-    >>> e2 = read_expr(r'\z1.\y.give(z1,y,x)')
-    >>> e1 == e2
-    True
-    >>> e1 = read_expr(r'\x.\y.give(x,y,z)').replace(z.variable, x, True)
-    >>> e2 = read_expr(r'\z1.\y.give(z1,y,x)')
-    >>> e1 == e2
-    True
-    >>> print(read_expr(r'\x.give(x,y,z)').replace(z.variable, y, False))
-    \x.give(x,y,y)
-    >>> print(read_expr(r'\x.give(x,y,z)').replace(z.variable, y, True))
-    \x.give(x,y,y)
-
-    >>> from nltk.sem import logic
-    >>> logic._counter._value = 0
-    >>> e1 = read_expr('e1')
-    >>> e2 = read_expr('e2')
-    >>> print(read_expr('exists e1 e2.(walk(e1) & talk(e2))').replace(e1.variable, e2, True))
-    exists e2 e01.(walk(e2) & talk(e01))
-
-
-Variables / Free
-================
-
-    >>> examples = [r'walk(john)',
-    ...             r'walk(x)',
-    ...             r'?vp(?np)',
-    ...             r'see(john,mary)',
-    ...             r'exists x.walk(x)',
-    ...             r'\x.see(john,x)',
-    ...             r'\x.see(john,x)(mary)',
-    ...             r'P(x)',
-    ...             r'\P.P(x)',
-    ...             r'aa(x,bb(y),cc(z),P(w),u)',
-    ...             r'bo(?det(?n),@x)']
-    >>> examples = [read_expr(e) for e in examples]
-
-    >>> for e in examples:
-    ...     print('%-25s' % e, sorted(e.free()))
-    walk(john)                []
-    walk(x)                   [Variable('x')]
-    ?vp(?np)                  []
-    see(john,mary)            []
-    exists x.walk(x)          []
-    \x.see(john,x)            []
-    (\x.see(john,x))(mary)    []
-    P(x)                      [Variable('P'), Variable('x')]
-    \P.P(x)                   [Variable('x')]
-    aa(x,bb(y),cc(z),P(w),u)  [Variable('P'), Variable('u'), Variable('w'), Variable('x'), Variable('y'), Variable('z')]
-    bo(?det(?n),@x)           []
-
-    >>> for e in examples:
-    ...     print('%-25s' % e, sorted(e.constants()))
-    walk(john)                [Variable('john')]
-    walk(x)                   []
-    ?vp(?np)                  [Variable('?np')]
-    see(john,mary)            [Variable('john'), Variable('mary')]
-    exists x.walk(x)          []
-    \x.see(john,x)            [Variable('john')]
-    (\x.see(john,x))(mary)    [Variable('john'), Variable('mary')]
-    P(x)                      []
-    \P.P(x)                   []
-    aa(x,bb(y),cc(z),P(w),u)  []
-    bo(?det(?n),@x)           [Variable('?n'), Variable('@x')]
-
-    >>> for e in examples:
-    ...     print('%-25s' % e, sorted(e.predicates()))
-    walk(john)                [Variable('walk')]
-    walk(x)                   [Variable('walk')]
-    ?vp(?np)                  [Variable('?vp')]
-    see(john,mary)            [Variable('see')]
-    exists x.walk(x)          [Variable('walk')]
-    \x.see(john,x)            [Variable('see')]
-    (\x.see(john,x))(mary)    [Variable('see')]
-    P(x)                      []
-    \P.P(x)                   []
-    aa(x,bb(y),cc(z),P(w),u)  [Variable('aa'), Variable('bb'), Variable('cc')]
-    bo(?det(?n),@x)           [Variable('?det'), Variable('bo')]
-
-    >>> for e in examples:
-    ...     print('%-25s' % e, sorted(e.variables()))
-    walk(john)                []
-    walk(x)                   [Variable('x')]
-    ?vp(?np)                  [Variable('?np'), Variable('?vp')]
-    see(john,mary)            []
-    exists x.walk(x)          []
-    \x.see(john,x)            []
-    (\x.see(john,x))(mary)    []
-    P(x)                      [Variable('P'), Variable('x')]
-    \P.P(x)                   [Variable('x')]
-    aa(x,bb(y),cc(z),P(w),u)  [Variable('P'), Variable('u'), Variable('w'), Variable('x'), Variable('y'), Variable('z')]
-    bo(?det(?n),@x)           [Variable('?det'), Variable('?n'), Variable('@x')]
-
-
-
-`normalize`
-    >>> print(read_expr(r'\e083.(walk(e083, z472) & talk(e092, z938))').normalize())
-    \e01.(walk(e01,z3) & talk(e02,z4))
-
-Typed Logic
-+++++++++++
-
-    >>> from nltk.sem.logic import LogicParser
-    >>> tlp = LogicParser(True)
-    >>> print(tlp.parse(r'man(x)').type)
-    ?
-    >>> print(tlp.parse(r'walk(angus)').type)
-    ?
-    >>> print(tlp.parse(r'-man(x)').type)
-    t
-    >>> print(tlp.parse(r'(man(x) <-> tall(x))').type)
-    t
-    >>> print(tlp.parse(r'exists x.(man(x) & tall(x))').type)
-    t
-    >>> print(tlp.parse(r'\x.man(x)').type)
-    <e,?>
-    >>> print(tlp.parse(r'john').type)
-    e
-    >>> print(tlp.parse(r'\x y.sees(x,y)').type)
-    <e,<e,?>>
-    >>> print(tlp.parse(r'\x.man(x)(john)').type)
-    ?
-    >>> print(tlp.parse(r'\x.\y.sees(x,y)(john)').type)
-    <e,?>
-    >>> print(tlp.parse(r'\x.\y.sees(x,y)(john)(mary)').type)
-    ?
-    >>> print(tlp.parse(r'\P.\Q.exists x.(P(x) & Q(x))').type)
-    <<e,t>,<<e,t>,t>>
-    >>> print(tlp.parse(r'\x.y').type)
-    <?,e>
-    >>> print(tlp.parse(r'\P.P(x)').type)
-    <<e,?>,?>
-
-    >>> parsed = tlp.parse('see(john,mary)')
-    >>> print(parsed.type)
-    ?
-    >>> print(parsed.function)
-    see(john)
-    >>> print(parsed.function.type)
-    <e,?>
-    >>> print(parsed.function.function)
-    see
-    >>> print(parsed.function.function.type)
-    <e,<e,?>>
-
-    >>> parsed = tlp.parse('P(x,y)')
-    >>> print(parsed)
-    P(x,y)
-    >>> print(parsed.type)
-    ?
-    >>> print(parsed.function)
-    P(x)
-    >>> print(parsed.function.type)
-    <e,?>
-    >>> print(parsed.function.function)
-    P
-    >>> print(parsed.function.function.type)
-    <e,<e,?>>
-
-    >>> print(tlp.parse(r'P').type)
-    ?
-
-    >>> print(tlp.parse(r'P', {'P': 't'}).type)
-    t
-
-    >>> a = tlp.parse(r'P(x)')
-    >>> print(a.type)
-    ?
-    >>> print(a.function.type)
-    <e,?>
-    >>> print(a.argument.type)
-    e
-
-    >>> a = tlp.parse(r'-P(x)')
-    >>> print(a.type)
-    t
-    >>> print(a.term.type)
-    t
-    >>> print(a.term.function.type)
-    <e,t>
-    >>> print(a.term.argument.type)
-    e
-
-    >>> a = tlp.parse(r'P & Q')
-    >>> print(a.type)
-    t
-    >>> print(a.first.type)
-    t
-    >>> print(a.second.type)
-    t
-
-    >>> a = tlp.parse(r'(P(x) & Q(x))')
-    >>> print(a.type)
-    t
-    >>> print(a.first.type)
-    t
-    >>> print(a.first.function.type)
-    <e,t>
-    >>> print(a.first.argument.type)
-    e
-    >>> print(a.second.type)
-    t
-    >>> print(a.second.function.type)
-    <e,t>
-    >>> print(a.second.argument.type)
-    e
-
-    >>> a = tlp.parse(r'\x.P(x)')
-    >>> print(a.type)
-    <e,?>
-    >>> print(a.term.function.type)
-    <e,?>
-    >>> print(a.term.argument.type)
-    e
-
-    >>> a = tlp.parse(r'\P.P(x)')
-    >>> print(a.type)
-    <<e,?>,?>
-    >>> print(a.term.function.type)
-    <e,?>
-    >>> print(a.term.argument.type)
-    e
-
-    >>> a = tlp.parse(r'(\x.P(x)(john)) & Q(x)')
-    >>> print(a.type)
-    t
-    >>> print(a.first.type)
-    t
-    >>> print(a.first.function.type)
-    <e,t>
-    >>> print(a.first.function.term.function.type)
-    <e,t>
-    >>> print(a.first.function.term.argument.type)
-    e
-    >>> print(a.first.argument.type)
-    e
-
-    >>> a = tlp.parse(r'\x y.P(x,y)(john)(mary) & Q(x)')
-    >>> print(a.type)
-    t
-    >>> print(a.first.type)
-    t
-    >>> print(a.first.function.type)
-    <e,t>
-    >>> print(a.first.function.function.type)
-    <e,<e,t>>
-
-    >>> a = tlp.parse(r'--P')
-    >>> print(a.type)
-    t
-    >>> print(a.term.type)
-    t
-    >>> print(a.term.term.type)
-    t
-
-    >>> tlp.parse(r'\x y.P(x,y)').type
-    <e,<e,?>>
-    >>> tlp.parse(r'\x y.P(x,y)', {'P': '<e,<e,t>>'}).type
-    <e,<e,t>>
-
-    >>> a = tlp.parse(r'\P y.P(john,y)(\x y.see(x,y))')
-    >>> a.type
-    <e,?>
-    >>> a.function.type
-    <<e,<e,?>>,<e,?>>
-    >>> a.function.term.term.function.function.type
-    <e,<e,?>>
-    >>> a.argument.type
-    <e,<e,?>>
-
-    >>> a = tlp.parse(r'exists c f.(father(c) = f)')
-    >>> a.type
-    t
-    >>> a.term.term.type
-    t
-    >>> a.term.term.first.type
-    e
-    >>> a.term.term.first.function.type
-    <e,e>
-    >>> a.term.term.second.type
-    e
-
-typecheck()
-
-    >>> a = tlp.parse('P(x)')
-    >>> b = tlp.parse('Q(x)')
-    >>> a.type
-    ?
-    >>> c = a & b
-    >>> c.first.type
-    ?
-    >>> c.typecheck() # doctest: +ELLIPSIS
-    {...}
-    >>> c.first.type
-    t
-
-    >>> a = tlp.parse('P(x)')
-    >>> b = tlp.parse('P(x) & Q(x)')
-    >>> a.type
-    ?
-    >>> typecheck([a,b]) # doctest: +ELLIPSIS
-    {...}
-    >>> a.type
-    t
-
-    >>> e = tlp.parse(r'man(x)')
-    >>> print(dict((k,str(v)) for k,v in e.typecheck().items()) == {'x': 'e', 'man': '<e,?>'})
-    True
-    >>> sig = {'man': '<e, t>'}
-    >>> e = tlp.parse(r'man(x)', sig)
-    >>> print(e.function.type)
-    <e,t>
-    >>> print(dict((k,str(v)) for k,v in e.typecheck().items()) == {'x': 'e', 'man': '<e,t>'})
-    True
-    >>> print(e.function.type)
-    <e,t>
-    >>> print(dict((k,str(v)) for k,v in e.typecheck(sig).items()) == {'x': 'e', 'man': '<e,t>'})
-    True
-
-findtype()
-
-    >>> print(tlp.parse(r'man(x)').findtype(Variable('man')))
-    <e,?>
-    >>> print(tlp.parse(r'see(x,y)').findtype(Variable('see')))
-    <e,<e,?>>
-    >>> print(tlp.parse(r'P(Q(R(x)))').findtype(Variable('Q')))
-    ?
-
-reading types from strings
-
-    >>> Type.fromstring('e')
-    e
-    >>> Type.fromstring('<e,t>')
-    <e,t>
-    >>> Type.fromstring('<<e,t>,<e,t>>')
-    <<e,t>,<e,t>>
-    >>> Type.fromstring('<<e,?>,?>')
-    <<e,?>,?>
-
-alternative type format
-
-    >>> Type.fromstring('e').str()
-    'IND'
-    >>> Type.fromstring('<e,?>').str()
-    '(IND -> ANY)'
-    >>> Type.fromstring('<<e,t>,t>').str()
-    '((IND -> BOOL) -> BOOL)'
-
-Type.__eq__()
-
-    >>> from nltk.sem.logic import *
-
-    >>> e = ENTITY_TYPE
-    >>> t = TRUTH_TYPE
-    >>> a = ANY_TYPE
-    >>> et = ComplexType(e,t)
-    >>> eet = ComplexType(e,ComplexType(e,t))
-    >>> at = ComplexType(a,t)
-    >>> ea = ComplexType(e,a)
-    >>> aa = ComplexType(a,a)
-
-    >>> e == e
-    True
-    >>> t == t
-    True
-    >>> e == t
-    False
-    >>> a == t
-    False
-    >>> t == a
-    False
-    >>> a == a
-    True
-    >>> et == et
-    True
-    >>> a == et
-    False
-    >>> et == a
-    False
-    >>> a == ComplexType(a,aa)
-    True
-    >>> ComplexType(a,aa) == a
-    True
-
-matches()
-
-    >>> e.matches(t)
-    False
-    >>> a.matches(t)
-    True
-    >>> t.matches(a)
-    True
-    >>> a.matches(et)
-    True
-    >>> et.matches(a)
-    True
-    >>> ea.matches(eet)
-    True
-    >>> eet.matches(ea)
-    True
-    >>> aa.matches(et)
-    True
-    >>> aa.matches(t)
-    True
-
-Type error during parsing
-=========================
-
-    >>> try: print(tlp.parse(r'exists x y.(P(x) & P(x,y))'))
-    ... except InconsistentTypeHierarchyException as e: print(e)
-    The variable 'P' was found in multiple places with different types.
-    >>> try: tlp.parse(r'\x y.see(x,y)(\x.man(x))')
-    ... except TypeException as e: print(e)
-    The function '\x y.see(x,y)' is of type '<e,<e,?>>' and cannot be applied to '\x.man(x)' of type '<e,?>'.  Its argument must match type 'e'.
-    >>> try: tlp.parse(r'\P x y.-P(x,y)(\x.-man(x))')
-    ... except TypeException as e: print(e)
-    The function '\P x y.-P(x,y)' is of type '<<e,<e,t>>,<e,<e,t>>>' and cannot be applied to '\x.-man(x)' of type '<e,t>'.  Its argument must match type '<e,<e,t>>'.
-
-    >>> a = tlp.parse(r'-talk(x)')
-    >>> signature = a.typecheck()
-    >>> try: print(tlp.parse(r'-talk(x,y)', signature))
-    ... except InconsistentTypeHierarchyException as e: print(e)
-    The variable 'talk' was found in multiple places with different types.
-
-    >>> a = tlp.parse(r'-P(x)')
-    >>> b = tlp.parse(r'-P(x,y)')
-    >>> a.typecheck() # doctest: +ELLIPSIS
-    {...}
-    >>> b.typecheck() # doctest: +ELLIPSIS
-    {...}
-    >>> try: typecheck([a,b])
-    ... except InconsistentTypeHierarchyException as e: print(e)
-    The variable 'P' was found in multiple places with different types.
-
-    >>> a = tlp.parse(r'P(x)')
-    >>> b = tlp.parse(r'P(x,y)')
-    >>> signature = {'P': '<e,t>'}
-    >>> a.typecheck(signature) # doctest: +ELLIPSIS
-    {...}
-    >>> try: typecheck([a,b], signature)
-    ... except InconsistentTypeHierarchyException as e: print(e)
-    The variable 'P' was found in multiple places with different types.
-
-Parse errors
-============
-
-    >>> try: read_expr(r'')
-    ... except LogicalExpressionException as e: print(e)
-    End of input found.  Expression expected.
-    <BLANKLINE>
-    ^
-    >>> try: read_expr(r'(')
-    ... except LogicalExpressionException as e: print(e)
-    End of input found.  Expression expected.
-    (
-     ^
-    >>> try: read_expr(r')')
-    ... except LogicalExpressionException as e: print(e)
-    Unexpected token: ')'.  Expression expected.
-    )
-    ^
-    >>> try: read_expr(r'()')
-    ... except LogicalExpressionException as e: print(e)
-    Unexpected token: ')'.  Expression expected.
-    ()
-     ^
-    >>> try: read_expr(r'(P(x) & Q(x)')
-    ... except LogicalExpressionException as e: print(e)
-    End of input found.  Expected token ')'.
-    (P(x) & Q(x)
-                ^
-    >>> try: read_expr(r'(P(x) &')
-    ... except LogicalExpressionException as e: print(e)
-    End of input found.  Expression expected.
-    (P(x) &
-           ^
-    >>> try: read_expr(r'(P(x) | )')
-    ... except LogicalExpressionException as e: print(e)
-    Unexpected token: ')'.  Expression expected.
-    (P(x) | )
-            ^
-    >>> try: read_expr(r'P(x) ->')
-    ... except LogicalExpressionException as e: print(e)
-    End of input found.  Expression expected.
-    P(x) ->
-           ^
-    >>> try: read_expr(r'P(x')
-    ... except LogicalExpressionException as e: print(e)
-    End of input found.  Expected token ')'.
-    P(x
-       ^
-    >>> try: read_expr(r'P(x,')
-    ... except LogicalExpressionException as e: print(e)
-    End of input found.  Expression expected.
-    P(x,
-        ^
-    >>> try: read_expr(r'P(x,)')
-    ... except LogicalExpressionException as e: print(e)
-    Unexpected token: ')'.  Expression expected.
-    P(x,)
-        ^
-    >>> try: read_expr(r'exists')
-    ... except LogicalExpressionException as e: print(e)
-    End of input found.  Variable and Expression expected following quantifier 'exists'.
-    exists
-           ^
-    >>> try: read_expr(r'exists x')
-    ... except LogicalExpressionException as e: print(e)
-    End of input found.  Expression expected.
-    exists x
-             ^
-    >>> try: read_expr(r'exists x.')
-    ... except LogicalExpressionException as e: print(e)
-    End of input found.  Expression expected.
-    exists x.
-             ^
-    >>> try: read_expr(r'\  ')
-    ... except LogicalExpressionException as e: print(e)
-    End of input found.  Variable and Expression expected following lambda operator.
-    \
-      ^
-    >>> try: read_expr(r'\ x')
-    ... except LogicalExpressionException as e: print(e)
-    End of input found.  Expression expected.
-    \ x
-        ^
-    >>> try: read_expr(r'\ x y')
-    ... except LogicalExpressionException as e: print(e)
-    End of input found.  Expression expected.
-    \ x y
-          ^
-    >>> try: read_expr(r'\ x.')
-    ... except LogicalExpressionException as e: print(e)
-    End of input found.  Expression expected.
-    \ x.
-        ^
-    >>> try: read_expr(r'P(x)Q(x)')
-    ... except LogicalExpressionException as e: print(e)
-    Unexpected token: 'Q'.
-    P(x)Q(x)
-        ^
-    >>> try: read_expr(r'(P(x)Q(x)')
-    ... except LogicalExpressionException as e: print(e)
-    Unexpected token: 'Q'.  Expected token ')'.
-    (P(x)Q(x)
-         ^
-    >>> try: read_expr(r'exists x y')
-    ... except LogicalExpressionException as e: print(e)
-    End of input found.  Expression expected.
-    exists x y
-               ^
-    >>> try: read_expr(r'exists x y.')
-    ... except LogicalExpressionException as e: print(e)
-    End of input found.  Expression expected.
-    exists x y.
-               ^
-    >>> try: read_expr(r'exists x -> y')
-    ... except LogicalExpressionException as e: print(e)
-    Unexpected token: '->'.  Expression expected.
-    exists x -> y
-             ^
-
-
-    >>> try: read_expr(r'A -> ((P(x) & Q(x)) -> Z')
-    ... except LogicalExpressionException as e: print(e)
-    End of input found.  Expected token ')'.
-    A -> ((P(x) & Q(x)) -> Z
-                            ^
-    >>> try: read_expr(r'A -> ((P(x) &) -> Z')
-    ... except LogicalExpressionException as e: print(e)
-    Unexpected token: ')'.  Expression expected.
-    A -> ((P(x) &) -> Z
-                 ^
-    >>> try: read_expr(r'A -> ((P(x) | )) -> Z')
-    ... except LogicalExpressionException as e: print(e)
-    Unexpected token: ')'.  Expression expected.
-    A -> ((P(x) | )) -> Z
-                  ^
-    >>> try: read_expr(r'A -> (P(x) ->) -> Z')
-    ... except LogicalExpressionException as e: print(e)
-    Unexpected token: ')'.  Expression expected.
-    A -> (P(x) ->) -> Z
-                 ^
-    >>> try: read_expr(r'A -> (P(x) -> Z')
-    ... except LogicalExpressionException as e: print(e)
-    End of input found.  Expected token ')'.
-    A -> (P(x) -> Z
-                   ^
-    >>> try: read_expr(r'A -> (P(x,) -> Z')
-    ... except LogicalExpressionException as e: print(e)
-    Unexpected token: ')'.  Expression expected.
-    A -> (P(x,) -> Z
-              ^
-    >>> try: read_expr(r'A -> (P(x,)) -> Z')
-    ... except LogicalExpressionException as e: print(e)
-    Unexpected token: ')'.  Expression expected.
-    A -> (P(x,)) -> Z
-              ^
-    >>> try: read_expr(r'A -> (exists) -> Z')
-    ... except LogicalExpressionException as e: print(e)
-    ')' is an illegal variable name.  Constants may not be quantified.
-    A -> (exists) -> Z
-                ^
-    >>> try: read_expr(r'A -> (exists x) -> Z')
-    ... except LogicalExpressionException as e: print(e)
-    Unexpected token: ')'.  Expression expected.
-    A -> (exists x) -> Z
-                  ^
-    >>> try: read_expr(r'A -> (exists x.) -> Z')
-    ... except LogicalExpressionException as e: print(e)
-    Unexpected token: ')'.  Expression expected.
-    A -> (exists x.) -> Z
-                   ^
-    >>> try: read_expr(r'A -> (\  ) -> Z')
-    ... except LogicalExpressionException as e: print(e)
-    ')' is an illegal variable name.  Constants may not be abstracted.
-    A -> (\  ) -> Z
-             ^
-    >>> try: read_expr(r'A -> (\ x) -> Z')
-    ... except LogicalExpressionException as e: print(e)
-    Unexpected token: ')'.  Expression expected.
-    A -> (\ x) -> Z
-             ^
-    >>> try: read_expr(r'A -> (\ x y) -> Z')
-    ... except LogicalExpressionException as e: print(e)
-    Unexpected token: ')'.  Expression expected.
-    A -> (\ x y) -> Z
-               ^
-    >>> try: read_expr(r'A -> (\ x.) -> Z')
-    ... except LogicalExpressionException as e: print(e)
-    Unexpected token: ')'.  Expression expected.
-    A -> (\ x.) -> Z
-              ^
-    >>> try: read_expr(r'A -> (P(x)Q(x)) -> Z')
-    ... except LogicalExpressionException as e: print(e)
-    Unexpected token: 'Q'.  Expected token ')'.
-    A -> (P(x)Q(x)) -> Z
-              ^
-    >>> try: read_expr(r'A -> ((P(x)Q(x)) -> Z')
-    ... except LogicalExpressionException as e: print(e)
-    Unexpected token: 'Q'.  Expected token ')'.
-    A -> ((P(x)Q(x)) -> Z
-               ^
-    >>> try: read_expr(r'A -> (all x y) -> Z')
-    ... except LogicalExpressionException as e: print(e)
-    Unexpected token: ')'.  Expression expected.
-    A -> (all x y) -> Z
-                 ^
-    >>> try: read_expr(r'A -> (exists x y.) -> Z')
-    ... except LogicalExpressionException as e: print(e)
-    Unexpected token: ')'.  Expression expected.
-    A -> (exists x y.) -> Z
-                     ^
-    >>> try: read_expr(r'A -> (exists x -> y) -> Z')
-    ... except LogicalExpressionException as e: print(e)
-    Unexpected token: '->'.  Expression expected.
-    A -> (exists x -> y) -> Z
-                   ^
-
-
diff --git a/nlp_resource_data/nltk/test/metrics.doctest b/nlp_resource_data/nltk/test/metrics.doctest

deleted file mode 100644 (file)

index 139a888..0000000
--- a/nlp_resource_data/nltk/test/metrics.doctest
+++ /dev/null
@@ -1,270 +0,0 @@
-.. Copyright (C) 2001-2019 NLTK Project
-.. For license information, see LICENSE.TXT
-
-=======
-Metrics
-=======
-
-The `nltk.metrics` package provides a variety of *evaluation measures*
-which can be used for a wide variety of NLP tasks.
-
-   >>> from __future__ import print_function
-   >>> from nltk.metrics import *
-
-------------------
-Standard IR Scores
-------------------
-
-We can use standard scores from information retrieval to test the
-performance of taggers, chunkers, etc.
-
-    >>> reference = 'DET NN VB DET JJ NN NN IN DET NN'.split()
-    >>> test    = 'DET VB VB DET NN NN NN IN DET NN'.split()
-    >>> print(accuracy(reference, test))
-    0.8
-
-
-The following measures apply to sets:
-
-    >>> reference_set = set(reference)
-    >>> test_set = set(test)
-    >>> precision(reference_set, test_set)
-    1.0
-    >>> print(recall(reference_set, test_set))
-    0.8
-    >>> print(f_measure(reference_set, test_set))
-    0.88888888888...
-
-Measuring the likelihood of the data, given probability distributions:
-
-    >>> from nltk import FreqDist, MLEProbDist
-    >>> pdist1 = MLEProbDist(FreqDist("aldjfalskfjaldsf"))
-    >>> pdist2 = MLEProbDist(FreqDist("aldjfalssjjlldss"))
-    >>> print(log_likelihood(['a', 'd'], [pdist1, pdist2]))
-    -2.7075187496...
-
-
-----------------
-Distance Metrics
-----------------
-
-String edit distance (Levenshtein):
-
-    >>> edit_distance("rain", "shine")
-    3
-
-Other distance measures:
-
-    >>> s1 = set([1,2,3,4])
-    >>> s2 = set([3,4,5])
-    >>> binary_distance(s1, s2)
-    1.0
-    >>> print(jaccard_distance(s1, s2))
-    0.6
-    >>> print(masi_distance(s1, s2))
-    0.868
-
-----------------------
-Miscellaneous Measures
-----------------------
-
-Rank Correlation works with two dictionaries mapping keys to ranks.
-The dictionaries should have the same set of keys.
-
-    >>> spearman_correlation({'e':1, 't':2, 'a':3}, {'e':1, 'a':2, 't':3})
-    0.5
-
-Windowdiff uses a sliding window in comparing two segmentations of the same input (e.g. tokenizations, chunkings).
-Segmentations are represented using strings of zeros and ones.
-
-    >>> s1 = "000100000010"
-    >>> s2 = "000010000100"
-    >>> s3 = "100000010000"
-    >>> s4 = "000000000000"
-    >>> s5 = "111111111111"
-    >>> windowdiff(s1, s1, 3)
-    0.0
-    >>> abs(windowdiff(s1, s2, 3) - 0.3)  < 1e-6  # windowdiff(s1, s2, 3) == 0.3
-    True
-    >>> abs(windowdiff(s2, s3, 3) - 0.8)  < 1e-6  # windowdiff(s2, s3, 3) == 0.8
-    True
-    >>> windowdiff(s1, s4, 3)
-    0.5
-    >>> windowdiff(s1, s5, 3)
-    1.0
-
-----------------
-Confusion Matrix
-----------------
-
-    >>> reference = 'This is the reference data.  Testing 123.  aoaeoeoe'
-    >>> test =      'Thos iz_the rifirenci data.  Testeng 123.  aoaeoeoe'
-    >>> print(ConfusionMatrix(reference, test))
-      |   . 1 2 3 T _ a c d e f g h i n o r s t z |
-    --+-------------------------------------------+
-      |<8>. . . . . 1 . . . . . . . . . . . . . . |
-    . | .<2>. . . . . . . . . . . . . . . . . . . |
-    1 | . .<1>. . . . . . . . . . . . . . . . . . |
-    2 | . . .<1>. . . . . . . . . . . . . . . . . |
-    3 | . . . .<1>. . . . . . . . . . . . . . . . |
-    T | . . . . .<2>. . . . . . . . . . . . . . . |
-    _ | . . . . . .<.>. . . . . . . . . . . . . . |
-    a | . . . . . . .<4>. . . . . . . . . . . . . |
-    c | . . . . . . . .<1>. . . . . . . . . . . . |
-    d | . . . . . . . . .<1>. . . . . . . . . . . |
-    e | . . . . . . . . . .<6>. . . 3 . . . . . . |
-    f | . . . . . . . . . . .<1>. . . . . . . . . |
-    g | . . . . . . . . . . . .<1>. . . . . . . . |
-    h | . . . . . . . . . . . . .<2>. . . . . . . |
-    i | . . . . . . . . . . 1 . . .<1>. 1 . . . . |
-    n | . . . . . . . . . . . . . . .<2>. . . . . |
-    o | . . . . . . . . . . . . . . . .<3>. . . . |
-    r | . . . . . . . . . . . . . . . . .<2>. . . |
-    s | . . . . . . . . . . . . . . . . . .<2>. 1 |
-    t | . . . . . . . . . . . . . . . . . . .<3>. |
-    z | . . . . . . . . . . . . . . . . . . . .<.>|
-    --+-------------------------------------------+
-    (row = reference; col = test)
-    <BLANKLINE>
-
-    >>> cm = ConfusionMatrix(reference, test)
-    >>> print(cm.pretty_format(sort_by_count=True))
-      |   e a i o s t . T h n r 1 2 3 c d f g _ z |
-    --+-------------------------------------------+
-      |<8>. . . . . . . . . . . . . . . . . . 1 . |
-    e | .<6>. 3 . . . . . . . . . . . . . . . . . |
-    a | . .<4>. . . . . . . . . . . . . . . . . . |
-    i | . 1 .<1>1 . . . . . . . . . . . . . . . . |
-    o | . . . .<3>. . . . . . . . . . . . . . . . |
-    s | . . . . .<2>. . . . . . . . . . . . . . 1 |
-    t | . . . . . .<3>. . . . . . . . . . . . . . |
-    . | . . . . . . .<2>. . . . . . . . . . . . . |
-    T | . . . . . . . .<2>. . . . . . . . . . . . |
-    h | . . . . . . . . .<2>. . . . . . . . . . . |
-    n | . . . . . . . . . .<2>. . . . . . . . . . |
-    r | . . . . . . . . . . .<2>. . . . . . . . . |
-    1 | . . . . . . . . . . . .<1>. . . . . . . . |
-    2 | . . . . . . . . . . . . .<1>. . . . . . . |
-    3 | . . . . . . . . . . . . . .<1>. . . . . . |
-    c | . . . . . . . . . . . . . . .<1>. . . . . |
-    d | . . . . . . . . . . . . . . . .<1>. . . . |
-    f | . . . . . . . . . . . . . . . . .<1>. . . |
-    g | . . . . . . . . . . . . . . . . . .<1>. . |
-    _ | . . . . . . . . . . . . . . . . . . .<.>. |
-    z | . . . . . . . . . . . . . . . . . . . .<.>|
-    --+-------------------------------------------+
-    (row = reference; col = test)
-    <BLANKLINE>
-
-    >>> print(cm.pretty_format(sort_by_count=True, truncate=10))
-      |   e a i o s t . T h |
-    --+---------------------+
-      |<8>. . . . . . . . . |
-    e | .<6>. 3 . . . . . . |
-    a | . .<4>. . . . . . . |
-    i | . 1 .<1>1 . . . . . |
-    o | . . . .<3>. . . . . |
-    s | . . . . .<2>. . . . |
-    t | . . . . . .<3>. . . |
-    . | . . . . . . .<2>. . |
-    T | . . . . . . . .<2>. |
-    h | . . . . . . . . .<2>|
-    --+---------------------+
-    (row = reference; col = test)
-    <BLANKLINE>
-
-    >>> print(cm.pretty_format(sort_by_count=True, truncate=10, values_in_chart=False))
-       |                   1 |
-       | 1 2 3 4 5 6 7 8 9 0 |
-    ---+---------------------+
-     1 |<8>. . . . . . . . . |
-     2 | .<6>. 3 . . . . . . |
-     3 | . .<4>. . . . . . . |
-     4 | . 1 .<1>1 . . . . . |
-     5 | . . . .<3>. . . . . |
-     6 | . . . . .<2>. . . . |
-     7 | . . . . . .<3>. . . |
-     8 | . . . . . . .<2>. . |
-     9 | . . . . . . . .<2>. |
-    10 | . . . . . . . . .<2>|
-    ---+---------------------+
-    (row = reference; col = test)
-    Value key:
-         1:
-         2: e
-         3: a
-         4: i
-         5: o
-         6: s
-         7: t
-         8: .
-         9: T
-        10: h
-    <BLANKLINE>
-
-
---------------------
-Association measures
---------------------
-
-These measures are useful to determine whether the coocurrence of two random
-events is meaningful. They are used, for instance, to distinguish collocations
-from other pairs of adjacent words.
-
-We bring some examples of bigram association calculations from Manning and
-Schutze's SNLP, 2nd Ed. chapter 5.
-
-    >>> n_new_companies, n_new, n_companies, N = 8, 15828, 4675, 14307668
-    >>> bam = BigramAssocMeasures
-    >>> bam.raw_freq(20, (42, 20), N) == 20. / N
-    True
-    >>> bam.student_t(n_new_companies, (n_new, n_companies), N)
-    0.999...
-    >>> bam.chi_sq(n_new_companies, (n_new, n_companies), N)
-    1.54...
-    >>> bam.likelihood_ratio(150, (12593, 932), N)
-    1291...
-
-For other associations, we ensure the ordering of the measures:
-
-    >>> bam.mi_like(20, (42, 20), N) > bam.mi_like(20, (41, 27), N)
-    True
-    >>> bam.pmi(20, (42, 20), N) > bam.pmi(20, (41, 27), N)
-    True
-    >>> bam.phi_sq(20, (42, 20), N) > bam.phi_sq(20, (41, 27), N)
-    True
-    >>> bam.poisson_stirling(20, (42, 20), N) > bam.poisson_stirling(20, (41, 27), N)
-    True
-    >>> bam.jaccard(20, (42, 20), N) > bam.jaccard(20, (41, 27), N)
-    True
-    >>> bam.dice(20, (42, 20), N) > bam.dice(20, (41, 27), N)
-    True
-    >>> bam.fisher(20, (42, 20), N) > bam.fisher(20, (41, 27), N) # doctest: +SKIP
-    False
-
-For trigrams, we have to provide more count information:
-
-    >>> n_w1_w2_w3 = 20
-    >>> n_w1_w2, n_w1_w3, n_w2_w3 = 35, 60, 40
-    >>> pair_counts = (n_w1_w2, n_w1_w3, n_w2_w3)
-    >>> n_w1, n_w2, n_w3 = 100, 200, 300
-    >>> uni_counts = (n_w1, n_w2, n_w3)
-    >>> N = 14307668
-    >>> tam = TrigramAssocMeasures
-    >>> tam.raw_freq(n_w1_w2_w3, pair_counts, uni_counts, N) == 1. * n_w1_w2_w3 / N
-    True
-    >>> uni_counts2 = (n_w1, n_w2, 100)
-    >>> tam.student_t(n_w1_w2_w3, pair_counts, uni_counts2, N) > tam.student_t(n_w1_w2_w3, pair_counts, uni_counts, N)
-    True
-    >>> tam.chi_sq(n_w1_w2_w3, pair_counts, uni_counts2, N) > tam.chi_sq(n_w1_w2_w3, pair_counts, uni_counts, N)
-    True
-    >>> tam.mi_like(n_w1_w2_w3, pair_counts, uni_counts2, N) > tam.mi_like(n_w1_w2_w3, pair_counts, uni_counts, N)
-    True
-    >>> tam.pmi(n_w1_w2_w3, pair_counts, uni_counts2, N) > tam.pmi(n_w1_w2_w3, pair_counts, uni_counts, N)
-    True
-    >>> tam.likelihood_ratio(n_w1_w2_w3, pair_counts, uni_counts2, N) > tam.likelihood_ratio(n_w1_w2_w3, pair_counts, uni_counts, N)
-    True
-    >>> tam.poisson_stirling(n_w1_w2_w3, pair_counts, uni_counts2, N) > tam.poisson_stirling(n_w1_w2_w3, pair_counts, uni_counts, N)
-    True
-    >>> tam.jaccard(n_w1_w2_w3, pair_counts, uni_counts2, N) > tam.jaccard(n_w1_w2_w3, pair_counts, uni_counts, N)
-    True
diff --git a/nlp_resource_data/nltk/test/misc.doctest b/nlp_resource_data/nltk/test/misc.doctest

deleted file mode 100644 (file)

index 71343b3..0000000
--- a/nlp_resource_data/nltk/test/misc.doctest
+++ /dev/null
@@ -1,118 +0,0 @@
-.. Copyright (C) 2001-2019 NLTK Project
-.. For license information, see LICENSE.TXT
-
---------------------------------------------------------------------------------
-Unit tests for the miscellaneous sort functions.
---------------------------------------------------------------------------------
-
-    >>> from copy import deepcopy
-    >>> from nltk.misc.sort import *
-
-A (very) small list of unsorted integers.
-
-    >>> test_data = [12, 67, 7, 28, 92, 56, 53, 720, 91, 57, 20, 20]
-
-Test each sorting method - each method returns the number of operations
-required to sort the data, and sorts in-place (desctructively - hence the need
-for multiple copies).
-
-    >>> sorted_data = deepcopy(test_data)
-    >>> selection(sorted_data)
-    66
-
-    >>> sorted_data
-    [7, 12, 20, 20, 28, 53, 56, 57, 67, 91, 92, 720]
-
-    >>> sorted_data = deepcopy(test_data)
-    >>> bubble(sorted_data)
-    30
-
-    >>> sorted_data
-    [7, 12, 20, 20, 28, 53, 56, 57, 67, 91, 92, 720]
-
-    >>> sorted_data = deepcopy(test_data)
-    >>> merge(sorted_data)
-    30
-
-    >>> sorted_data
-    [7, 12, 20, 20, 28, 53, 56, 57, 67, 91, 92, 720]
-
-    >>> sorted_data = deepcopy(test_data)
-    >>> quick(sorted_data)
-    13
-
-    >>> sorted_data
-    [7, 12, 20, 20, 28, 53, 56, 57, 67, 91, 92, 720]
-
---------------------------------------------------------------------------------
-Unit tests for Wordfinder class
---------------------------------------------------------------------------------
-
-    >>> import random
-
-    >>> # The following is not enough for reproducibility under Python 2/3
-    >>> # (see http://bugs.python.org/issue9025) so this test is skipped.
-    >>> random.seed(12345)
-
-    >>> from nltk.misc import wordfinder
-    >>> wordfinder.word_finder() # doctest: +SKIP
-    Word Finder
-    <BLANKLINE>
-    J V L A I R O T A T I S I V O D E R E T
-    H U U B E A R O E P O C S O R E T N E P
-    A D A U Z E E S R A P P A L L M E N T R
-    C X A D Q S Z T P E O R S N G P J A D E
-    I G Y K K T I A A R G F I D T E L C N S
-    R E C N B H T R L T N N B W N T A O A I
-    A Y I L O E I A M E I A A Y U R P L L D
-    G L T V S T S F E A D I P H D O O H N I
-    R L S E C I N I L R N N M E C G R U E A
-    A A Y G I C E N L L E O I G Q R T A E L
-    M R C E T I S T A E T L L E U A E N R L
-    O U O T A S E E C S O O N H Y P A T G Y
-    E M H O M M D R E S F P U L T H C F N V
-    L A C A I M A M A N L B R U T E D O M I
-    O R I L N E E E E E U A R S C R Y L I P
-    H T R K E S N N M S I L A S R E V I N U
-    T X T A A O U T K S E T A R R E S I B J
-    A E D L E L J I F O O R P E L K N I R W
-    K H A I D E Q O P R I C K T I M B E R P
-    Z K D O O H G N I H T U R V E Y D R O P
-    <BLANKLINE>
-    1: INTERCHANGER
-    2: TEARLESSNESS
-    3: UNIVERSALISM
-    4: DESENSITIZER
-    5: INTERMENTION
-    6: TRICHOCYSTIC
-    7: EXTRAMURALLY
-    8: VEGETOALKALI
-    9: PALMELLACEAE
-    10: AESTHETICISM
-    11: PETROGRAPHER
-    12: VISITATORIAL
-    13: OLEOMARGARIC
-    14: WRINKLEPROOF
-    15: PRICKTIMBER
-    16: PRESIDIALLY
-    17: SCITAMINEAE
-    18: ENTEROSCOPE
-    19: APPALLMENT
-    20: TURVEYDROP
-    21: THINGHOOD
-    22: BISERRATE
-    23: GREENLAND
-    24: BRUTEDOM
-    25: POLONIAN
-    26: ACOLHUAN
-    27: LAPORTEA
-    28: TENDING
-    29: TEREDO
-    30: MESOLE
-    31: UNLIMP
-    32: OSTARA
-    33: PILY
-    34: DUNT
-    35: ONYX
-    36: KATH
-    37: JUNE
diff --git a/nlp_resource_data/nltk/test/nonmonotonic.doctest b/nlp_resource_data/nltk/test/nonmonotonic.doctest

deleted file mode 100644 (file)

index be761b3..0000000
--- a/nlp_resource_data/nltk/test/nonmonotonic.doctest
+++ /dev/null
@@ -1,286 +0,0 @@
-.. Copyright (C) 2001-2019 NLTK Project
-.. For license information, see LICENSE.TXT
-
-======================
-Nonmonotonic Reasoning
-======================
-
-    >>> from nltk import *
-    >>> from nltk.inference.nonmonotonic import *
-    >>> from nltk.sem import logic
-    >>> logic._counter._value = 0
-    >>> read_expr = logic.Expression.fromstring
-
-------------------------
-Closed Domain Assumption
-------------------------
-
-The only entities in the domain are those found in the assumptions or goal.
-If the domain only contains "A" and "B", then the expression "exists x.P(x)" can
-be replaced with "P(A) | P(B)" and an expression "all x.P(x)" can be replaced
-with "P(A) & P(B)".
-
-    >>> p1 = read_expr(r'all x.(man(x) -> mortal(x))')
-    >>> p2 = read_expr(r'man(Socrates)')
-    >>> c = read_expr(r'mortal(Socrates)')
-    >>> prover = Prover9Command(c, [p1,p2])
-    >>> prover.prove()
-    True
-    >>> cdp = ClosedDomainProver(prover)
-    >>> for a in cdp.assumptions(): print(a) # doctest: +SKIP
-    (man(Socrates) -> mortal(Socrates))
-    man(Socrates)
-    >>> cdp.prove()
-    True
-
-    >>> p1 = read_expr(r'exists x.walk(x)')
-    >>> p2 = read_expr(r'man(Socrates)')
-    >>> c = read_expr(r'walk(Socrates)')
-    >>> prover = Prover9Command(c, [p1,p2])
-    >>> prover.prove()
-    False
-    >>> cdp = ClosedDomainProver(prover)
-    >>> for a in cdp.assumptions(): print(a) # doctest: +SKIP
-    walk(Socrates)
-    man(Socrates)
-    >>> cdp.prove()
-    True
-
-    >>> p1 = read_expr(r'exists x.walk(x)')
-    >>> p2 = read_expr(r'man(Socrates)')
-    >>> p3 = read_expr(r'-walk(Bill)')
-    >>> c = read_expr(r'walk(Socrates)')
-    >>> prover = Prover9Command(c, [p1,p2,p3])
-    >>> prover.prove()
-    False
-    >>> cdp = ClosedDomainProver(prover)
-    >>> for a in cdp.assumptions(): print(a) # doctest: +SKIP
-    (walk(Socrates) | walk(Bill))
-    man(Socrates)
-    -walk(Bill)
-    >>> cdp.prove()
-    True
-
-    >>> p1 = read_expr(r'walk(Socrates)')
-    >>> p2 = read_expr(r'walk(Bill)')
-    >>> c = read_expr(r'all x.walk(x)')
-    >>> prover = Prover9Command(c, [p1,p2])
-    >>> prover.prove()
-    False
-    >>> cdp = ClosedDomainProver(prover)
-    >>> for a in cdp.assumptions(): print(a) # doctest: +SKIP
-    walk(Socrates)
-    walk(Bill)
-    >>> print(cdp.goal()) # doctest: +SKIP
-    (walk(Socrates) & walk(Bill))
-    >>> cdp.prove()
-    True
-
-    >>> p1 = read_expr(r'girl(mary)')
-    >>> p2 = read_expr(r'dog(rover)')
-    >>> p3 = read_expr(r'all x.(girl(x) -> -dog(x))')
-    >>> p4 = read_expr(r'all x.(dog(x) -> -girl(x))')
-    >>> p5 = read_expr(r'chase(mary, rover)')
-    >>> c = read_expr(r'exists y.(dog(y) & all x.(girl(x) -> chase(x,y)))')
-    >>> prover = Prover9Command(c, [p1,p2,p3,p4,p5])
-    >>> print(prover.prove())
-    False
-    >>> cdp = ClosedDomainProver(prover)
-    >>> for a in cdp.assumptions(): print(a) # doctest: +SKIP
-    girl(mary)
-    dog(rover)
-    ((girl(rover) -> -dog(rover)) & (girl(mary) -> -dog(mary)))
-    ((dog(rover) -> -girl(rover)) & (dog(mary) -> -girl(mary)))
-    chase(mary,rover)
-    >>> print(cdp.goal()) # doctest: +SKIP
-    ((dog(rover) & (girl(rover) -> chase(rover,rover)) & (girl(mary) -> chase(mary,rover))) | (dog(mary) & (girl(rover) -> chase(rover,mary)) & (girl(mary) -> chase(mary,mary))))
-    >>> print(cdp.prove())
-    True
-
------------------------
-Unique Names Assumption
------------------------
-
-No two entities in the domain represent the same entity unless it can be
-explicitly proven that they do.  Therefore, if the domain contains "A" and "B",
-then add the assumption "-(A = B)" if it is not the case that
-"<assumptions> \|- (A = B)".
-
-    >>> p1 = read_expr(r'man(Socrates)')
-    >>> p2 = read_expr(r'man(Bill)')
-    >>> c = read_expr(r'exists x.exists y.-(x = y)')
-    >>> prover = Prover9Command(c, [p1,p2])
-    >>> prover.prove()
-    False
-    >>> unp = UniqueNamesProver(prover)
-    >>> for a in unp.assumptions(): print(a) # doctest: +SKIP
-    man(Socrates)
-    man(Bill)
-    -(Socrates = Bill)
-    >>> unp.prove()
-    True
-
-    >>> p1 = read_expr(r'all x.(walk(x) -> (x = Socrates))')
-    >>> p2 = read_expr(r'Bill = William')
-    >>> p3 = read_expr(r'Bill = Billy')
-    >>> c = read_expr(r'-walk(William)')
-    >>> prover = Prover9Command(c, [p1,p2,p3])
-    >>> prover.prove()
-    False
-    >>> unp = UniqueNamesProver(prover)
-    >>> for a in unp.assumptions(): print(a) # doctest: +SKIP
-    all x.(walk(x) -> (x = Socrates))
-    (Bill = William)
-    (Bill = Billy)
-    -(William = Socrates)
-    -(Billy = Socrates)
-    -(Socrates = Bill)
-    >>> unp.prove()
-    True
-
------------------------
-Closed World Assumption
------------------------
-
-The only entities that have certain properties are those that is it stated
-have the properties.  We accomplish this assumption by "completing" predicates.
-
-If the assumptions contain "P(A)", then "all x.(P(x) -> (x=A))" is the completion
-of "P".  If the assumptions contain "all x.(ostrich(x) -> bird(x))", then
-"all x.(bird(x) -> ostrich(x))" is the completion of "bird".  If the
-assumptions don't contain anything that are "P", then "all x.-P(x)" is the
-completion of "P".
-
-    >>> p1 = read_expr(r'walk(Socrates)')
-    >>> p2 = read_expr(r'-(Socrates = Bill)')
-    >>> c = read_expr(r'-walk(Bill)')
-    >>> prover = Prover9Command(c, [p1,p2])
-    >>> prover.prove()
-    False
-    >>> cwp = ClosedWorldProver(prover)
-    >>> for a in cwp.assumptions(): print(a) # doctest: +SKIP
-    walk(Socrates)
-    -(Socrates = Bill)
-    all z1.(walk(z1) -> (z1 = Socrates))
-    >>> cwp.prove()
-    True
-
-    >>> p1 = read_expr(r'see(Socrates, John)')
-    >>> p2 = read_expr(r'see(John, Mary)')
-    >>> p3 = read_expr(r'-(Socrates = John)')
-    >>> p4 = read_expr(r'-(John = Mary)')
-    >>> c = read_expr(r'-see(Socrates, Mary)')
-    >>> prover = Prover9Command(c, [p1,p2,p3,p4])
-    >>> prover.prove()
-    False
-    >>> cwp = ClosedWorldProver(prover)
-    >>> for a in cwp.assumptions(): print(a) # doctest: +SKIP
-    see(Socrates,John)
-    see(John,Mary)
-    -(Socrates = John)
-    -(John = Mary)
-    all z3 z4.(see(z3,z4) -> (((z3 = Socrates) & (z4 = John)) | ((z3 = John) & (z4 = Mary))))
-    >>> cwp.prove()
-    True
-
-    >>> p1 = read_expr(r'all x.(ostrich(x) -> bird(x))')
-    >>> p2 = read_expr(r'bird(Tweety)')
-    >>> p3 = read_expr(r'-ostrich(Sam)')
-    >>> p4 = read_expr(r'Sam != Tweety')
-    >>> c = read_expr(r'-bird(Sam)')
-    >>> prover = Prover9Command(c, [p1,p2,p3,p4])
-    >>> prover.prove()
-    False
-    >>> cwp = ClosedWorldProver(prover)
-    >>> for a in cwp.assumptions(): print(a) # doctest: +SKIP
-    all x.(ostrich(x) -> bird(x))
-    bird(Tweety)
-    -ostrich(Sam)
-    -(Sam = Tweety)
-    all z7.-ostrich(z7)
-    all z8.(bird(z8) -> ((z8 = Tweety) | ostrich(z8)))
-    >>> print(cwp.prove())
-    True
-
------------------------
-Multi-Decorator Example
------------------------
-
-Decorators can be nested to utilize multiple assumptions.
-
-    >>> p1 = read_expr(r'see(Socrates, John)')
-    >>> p2 = read_expr(r'see(John, Mary)')
-    >>> c = read_expr(r'-see(Socrates, Mary)')
-    >>> prover = Prover9Command(c, [p1,p2])
-    >>> print(prover.prove())
-    False
-    >>> cmd = ClosedDomainProver(UniqueNamesProver(ClosedWorldProver(prover)))
-    >>> print(cmd.prove())
-    True
-
------------------
-Default Reasoning
------------------
-    >>> logic._counter._value = 0
-    >>> premises = []
-
-define the taxonomy
-    >>> premises.append(read_expr(r'all x.(elephant(x)        -> animal(x))'))
-    >>> premises.append(read_expr(r'all x.(bird(x)            -> animal(x))'))
-    >>> premises.append(read_expr(r'all x.(dove(x)            -> bird(x))'))
-    >>> premises.append(read_expr(r'all x.(ostrich(x)         -> bird(x))'))
-    >>> premises.append(read_expr(r'all x.(flying_ostrich(x)  -> ostrich(x))'))
-
-default the properties using abnormalities
-    >>> premises.append(read_expr(r'all x.((animal(x)  & -Ab1(x)) -> -fly(x))')) #normal animals don't fly
-    >>> premises.append(read_expr(r'all x.((bird(x)    & -Ab2(x)) -> fly(x))'))  #normal birds fly
-    >>> premises.append(read_expr(r'all x.((ostrich(x) & -Ab3(x)) -> -fly(x))')) #normal ostriches don't fly
-
-specify abnormal entities
-    >>> premises.append(read_expr(r'all x.(bird(x)           -> Ab1(x))')) #flight
-    >>> premises.append(read_expr(r'all x.(ostrich(x)        -> Ab2(x))')) #non-flying bird
-    >>> premises.append(read_expr(r'all x.(flying_ostrich(x) -> Ab3(x))')) #flying ostrich
-
-define entities
-    >>> premises.append(read_expr(r'elephant(el)'))
-    >>> premises.append(read_expr(r'dove(do)'))
-    >>> premises.append(read_expr(r'ostrich(os)'))
-
-print the augmented assumptions list
-    >>> prover = Prover9Command(None, premises)
-    >>> command = UniqueNamesProver(ClosedWorldProver(prover))
-    >>> for a in command.assumptions(): print(a) # doctest: +SKIP
-    all x.(elephant(x) -> animal(x))
-    all x.(bird(x) -> animal(x))
-    all x.(dove(x) -> bird(x))
-    all x.(ostrich(x) -> bird(x))
-    all x.(flying_ostrich(x) -> ostrich(x))
-    all x.((animal(x) & -Ab1(x)) -> -fly(x))
-    all x.((bird(x) & -Ab2(x)) -> fly(x))
-    all x.((ostrich(x) & -Ab3(x)) -> -fly(x))
-    all x.(bird(x) -> Ab1(x))
-    all x.(ostrich(x) -> Ab2(x))
-    all x.(flying_ostrich(x) -> Ab3(x))
-    elephant(el)
-    dove(do)
-    ostrich(os)
-    all z1.(animal(z1) -> (elephant(z1) | bird(z1)))
-    all z2.(Ab1(z2) -> bird(z2))
-    all z3.(bird(z3) -> (dove(z3) | ostrich(z3)))
-    all z4.(dove(z4) -> (z4 = do))
-    all z5.(Ab2(z5) -> ostrich(z5))
-    all z6.(Ab3(z6) -> flying_ostrich(z6))
-    all z7.(ostrich(z7) -> ((z7 = os) | flying_ostrich(z7)))
-    all z8.-flying_ostrich(z8)
-    all z9.(elephant(z9) -> (z9 = el))
-    -(el = os)
-    -(el = do)
-    -(os = do)
-
-    >>> UniqueNamesProver(ClosedWorldProver(Prover9Command(read_expr('-fly(el)'), premises))).prove()
-    True
-    >>> UniqueNamesProver(ClosedWorldProver(Prover9Command(read_expr('fly(do)'), premises))).prove()
-    True
-    >>> UniqueNamesProver(ClosedWorldProver(Prover9Command(read_expr('-fly(os)'), premises))).prove()
-    True
-
diff --git a/nlp_resource_data/nltk/test/nonmonotonic_fixt.py b/nlp_resource_data/nltk/test/nonmonotonic_fixt.py

deleted file mode 100644 (file)

index 0c38381..0000000
--- a/nlp_resource_data/nltk/test/nonmonotonic_fixt.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import
-
-
-def setup_module(module):
-    from nose import SkipTest
-    from nltk.inference.mace import Mace
-
-    try:
-        m = Mace()
-        m._find_binary('mace4')
-    except LookupError:
-        raise SkipTest(
-            "Mace4/Prover9 is not available so nonmonotonic.doctest was skipped"
-        )
diff --git a/nlp_resource_data/nltk/test/paice.doctest b/nlp_resource_data/nltk/test/paice.doctest

deleted file mode 100644 (file)

index 1e3a65c..0000000
--- a/nlp_resource_data/nltk/test/paice.doctest
+++ /dev/null
@@ -1,35 +0,0 @@
-
-=====================================================
-PAICE's evaluation statistics for stemming algorithms
-=====================================================
-
-Given a list of words with their real lemmas and stems according to stemming algorithm under evaluation,
-counts Understemming Index (UI), Overstemming Index (OI), Stemming Weight (SW) and Error-rate relative to truncation (ERRT).
-
-   >>> from nltk.metrics import Paice
-
-
--------------------------------------
-Understemming and Overstemming values
--------------------------------------
-
-    >>> lemmas = {'kneel': ['kneel', 'knelt'],
-    ...           'range': ['range', 'ranged'],
-    ...           'ring': ['ring', 'rang', 'rung']}
-    >>> stems = {'kneel': ['kneel'],
-    ...          'knelt': ['knelt'],
-    ...          'rang': ['rang', 'range', 'ranged'],
-    ...          'ring': ['ring'],
-    ...          'rung': ['rung']}
-    >>> p = Paice(lemmas, stems)
-    >>> p.gumt, p.gdmt, p.gwmt, p.gdnt
-    (4.0, 5.0, 2.0, 16.0)
-
-    >>> p.ui, p.oi, p.sw
-    (0.8..., 0.125..., 0.15625...)
-
-    >>> p.errt
-    1.0
-
-    >>> [('{0:.3f}'.format(a), '{0:.3f}'.format(b)) for a, b in p.coords]
-    [('0.000', '1.000'), ('0.000', '0.375'), ('0.600', '0.125'), ('0.800', '0.125')]
diff --git a/nlp_resource_data/nltk/test/parse.doctest b/nlp_resource_data/nltk/test/parse.doctest

deleted file mode 100644 (file)

index b7c0ee1..0000000
--- a/nlp_resource_data/nltk/test/parse.doctest
+++ /dev/null
@@ -1,884 +0,0 @@
-.. Copyright (C) 2001-2019 NLTK Project
-.. For license information, see LICENSE.TXT
-
-=========
- Parsing
-=========
-
-Unit tests for the Context Free Grammar class
----------------------------------------------
-
-    >>> from nltk import Nonterminal, nonterminals, Production, CFG
-
-    >>> nt1 = Nonterminal('NP')
-    >>> nt2 = Nonterminal('VP')
-
-    >>> nt1.symbol()
-    'NP'
-
-    >>> nt1 == Nonterminal('NP')
-    True
-
-    >>> nt1 == nt2
-    False
-
-    >>> S, NP, VP, PP = nonterminals('S, NP, VP, PP')
-    >>> N, V, P, DT = nonterminals('N, V, P, DT')
-
-    >>> prod1 = Production(S, [NP, VP])
-    >>> prod2 = Production(NP, [DT, NP])
-
-    >>> prod1.lhs()
-    S
-
-    >>> prod1.rhs()
-    (NP, VP)
-
-    >>> prod1 == Production(S, [NP, VP])
-    True
-
-    >>> prod1 == prod2
-    False
-
-    >>> grammar = CFG.fromstring("""
-    ... S -> NP VP
-    ... PP -> P NP
-    ... NP -> 'the' N | N PP | 'the' N PP
-    ... VP -> V NP | V PP | V NP PP
-    ... N -> 'cat'
-    ... N -> 'dog'
-    ... N -> 'rug'
-    ... V -> 'chased'
-    ... V -> 'sat'
-    ... P -> 'in'
-    ... P -> 'on'
-    ... """)
-
-Unit tests for the rd (Recursive Descent Parser) class
-------------------------------------------------------
-
-Create and run a recursive descent parser over both a syntactically ambiguous
-and unambiguous sentence.
-
-    >>> from nltk.parse import RecursiveDescentParser
-    >>> rd = RecursiveDescentParser(grammar)
-
-    >>> sentence1 = 'the cat chased the dog'.split()
-    >>> sentence2 = 'the cat chased the dog on the rug'.split()
-
-    >>> for t in rd.parse(sentence1):
-    ...     print(t)
-    (S (NP the (N cat)) (VP (V chased) (NP the (N dog))))
-
-    >>> for t in rd.parse(sentence2):
-    ...     print(t)
-    (S
-      (NP the (N cat))
-      (VP (V chased) (NP the (N dog) (PP (P on) (NP the (N rug))))))
-    (S
-      (NP the (N cat))
-      (VP (V chased) (NP the (N dog)) (PP (P on) (NP the (N rug)))))
-
-
-(dolist (expr doctest-font-lock-keywords)
-  (add-to-list 'font-lock-keywords expr))
-
-  font-lock-keywords
-(add-to-list 'font-lock-keywords
-  (car doctest-font-lock-keywords))
-
-
-Unit tests for the sr (Shift Reduce Parser) class
--------------------------------------------------
-
-Create and run a shift reduce parser over both a syntactically ambiguous
-and unambiguous sentence. Note that unlike the recursive descent parser, one
-and only one parse is ever returned.
-
-    >>> from nltk.parse import ShiftReduceParser
-    >>> sr = ShiftReduceParser(grammar)
-
-    >>> sentence1 = 'the cat chased the dog'.split()
-    >>> sentence2 = 'the cat chased the dog on the rug'.split()
-
-    >>> for t in sr.parse(sentence1):
-    ...     print(t)
-    (S (NP the (N cat)) (VP (V chased) (NP the (N dog))))
-
-
-The shift reduce parser uses heuristics to decide what to do when there are
-multiple possible shift or reduce operations available - for the supplied
-grammar clearly the wrong operation is selected.
-
-    >>> for t in sr.parse(sentence2):
-    ...     print(t)
-
-
-Unit tests for the Chart Parser class
--------------------------------------
-
-We use the demo() function for testing.
-We must turn off showing of times.
-
-    >>> import nltk
-
-First we test tracing with a short sentence
-
-    >>> nltk.parse.chart.demo(2, print_times=False, trace=1,
-    ...                       sent='I saw a dog', numparses=1)
-    * Sentence:
-    I saw a dog
-    ['I', 'saw', 'a', 'dog']
-    <BLANKLINE>
-    * Strategy: Bottom-up
-    <BLANKLINE>
-    |.    I    .   saw   .    a    .   dog   .|
-    |[---------]         .         .         .| [0:1] 'I'
-    |.         [---------]         .         .| [1:2] 'saw'
-    |.         .         [---------]         .| [2:3] 'a'
-    |.         .         .         [---------]| [3:4] 'dog'
-    |>         .         .         .         .| [0:0] NP -> * 'I'
-    |[---------]         .         .         .| [0:1] NP -> 'I' *
-    |>         .         .         .         .| [0:0] S  -> * NP VP
-    |>         .         .         .         .| [0:0] NP -> * NP PP
-    |[--------->         .         .         .| [0:1] S  -> NP * VP
-    |[--------->         .         .         .| [0:1] NP -> NP * PP
-    |.         >         .         .         .| [1:1] Verb -> * 'saw'
-    |.         [---------]         .         .| [1:2] Verb -> 'saw' *
-    |.         >         .         .         .| [1:1] VP -> * Verb NP
-    |.         >         .         .         .| [1:1] VP -> * Verb
-    |.         [--------->         .         .| [1:2] VP -> Verb * NP
-    |.         [---------]         .         .| [1:2] VP -> Verb *
-    |.         >         .         .         .| [1:1] VP -> * VP PP
-    |[-------------------]         .         .| [0:2] S  -> NP VP *
-    |.         [--------->         .         .| [1:2] VP -> VP * PP
-    |.         .         >         .         .| [2:2] Det -> * 'a'
-    |.         .         [---------]         .| [2:3] Det -> 'a' *
-    |.         .         >         .         .| [2:2] NP -> * Det Noun
-    |.         .         [--------->         .| [2:3] NP -> Det * Noun
-    |.         .         .         >         .| [3:3] Noun -> * 'dog'
-    |.         .         .         [---------]| [3:4] Noun -> 'dog' *
-    |.         .         [-------------------]| [2:4] NP -> Det Noun *
-    |.         .         >         .         .| [2:2] S  -> * NP VP
-    |.         .         >         .         .| [2:2] NP -> * NP PP
-    |.         [-----------------------------]| [1:4] VP -> Verb NP *
-    |.         .         [------------------->| [2:4] S  -> NP * VP
-    |.         .         [------------------->| [2:4] NP -> NP * PP
-    |[=======================================]| [0:4] S  -> NP VP *
-    |.         [----------------------------->| [1:4] VP -> VP * PP
-    Nr edges in chart: 33
-    (S (NP I) (VP (Verb saw) (NP (Det a) (Noun dog))))
-    <BLANKLINE>
-
-Then we test the different parsing Strategies.
-Note that the number of edges differ between the strategies.
-
-Top-down
-
-    >>> nltk.parse.chart.demo(1, print_times=False, trace=0,
-    ...                       sent='I saw John with a dog', numparses=2)
-    * Sentence:
-    I saw John with a dog
-    ['I', 'saw', 'John', 'with', 'a', 'dog']
-    <BLANKLINE>
-    * Strategy: Top-down
-    <BLANKLINE>
-    Nr edges in chart: 48
-    (S
-      (NP I)
-      (VP (Verb saw) (NP (NP John) (PP with (NP (Det a) (Noun dog))))))
-    (S
-      (NP I)
-      (VP (VP (Verb saw) (NP John)) (PP with (NP (Det a) (Noun dog)))))
-    <BLANKLINE>
-
-Bottom-up
-
-    >>> nltk.parse.chart.demo(2, print_times=False, trace=0,
-    ...                       sent='I saw John with a dog', numparses=2)
-    * Sentence:
-    I saw John with a dog
-    ['I', 'saw', 'John', 'with', 'a', 'dog']
-    <BLANKLINE>
-    * Strategy: Bottom-up
-    <BLANKLINE>
-    Nr edges in chart: 53
-    (S
-      (NP I)
-      (VP (VP (Verb saw) (NP John)) (PP with (NP (Det a) (Noun dog)))))
-    (S
-      (NP I)
-      (VP (Verb saw) (NP (NP John) (PP with (NP (Det a) (Noun dog))))))
-    <BLANKLINE>
-
-Bottom-up Left-Corner
-
-    >>> nltk.parse.chart.demo(3, print_times=False, trace=0,
-    ...                       sent='I saw John with a dog', numparses=2)
-    * Sentence:
-    I saw John with a dog
-    ['I', 'saw', 'John', 'with', 'a', 'dog']
-    <BLANKLINE>
-    * Strategy: Bottom-up left-corner
-    <BLANKLINE>
-    Nr edges in chart: 36
-    (S
-      (NP I)
-      (VP (VP (Verb saw) (NP John)) (PP with (NP (Det a) (Noun dog)))))
-    (S
-      (NP I)
-      (VP (Verb saw) (NP (NP John) (PP with (NP (Det a) (Noun dog))))))
-    <BLANKLINE>
-
-Left-Corner with Bottom-Up Filter
-
-    >>> nltk.parse.chart.demo(4, print_times=False, trace=0,
-    ...                       sent='I saw John with a dog', numparses=2)
-    * Sentence:
-    I saw John with a dog
-    ['I', 'saw', 'John', 'with', 'a', 'dog']
-    <BLANKLINE>
-    * Strategy: Filtered left-corner
-    <BLANKLINE>
-    Nr edges in chart: 28
-    (S
-      (NP I)
-      (VP (VP (Verb saw) (NP John)) (PP with (NP (Det a) (Noun dog)))))
-    (S
-      (NP I)
-      (VP (Verb saw) (NP (NP John) (PP with (NP (Det a) (Noun dog))))))
-    <BLANKLINE>
-
-The stepping chart parser
-
-    >>> nltk.parse.chart.demo(5, print_times=False, trace=1,
-    ...                       sent='I saw John with a dog', numparses=2)
-    * Sentence:
-    I saw John with a dog
-    ['I', 'saw', 'John', 'with', 'a', 'dog']
-    <BLANKLINE>
-    * Strategy: Stepping (top-down vs bottom-up)
-    <BLANKLINE>
-    *** SWITCH TO TOP DOWN
-    |[------]      .      .      .      .      .| [0:1] 'I'
-    |.      [------]      .      .      .      .| [1:2] 'saw'
-    |.      .      [------]      .      .      .| [2:3] 'John'
-    |.      .      .      [------]      .      .| [3:4] 'with'
-    |.      .      .      .      [------]      .| [4:5] 'a'
-    |.      .      .      .      .      [------]| [5:6] 'dog'
-    |>      .      .      .      .      .      .| [0:0] S  -> * NP VP
-    |>      .      .      .      .      .      .| [0:0] NP -> * NP PP
-    |>      .      .      .      .      .      .| [0:0] NP -> * Det Noun
-    |>      .      .      .      .      .      .| [0:0] NP -> * 'I'
-    |[------]      .      .      .      .      .| [0:1] NP -> 'I' *
-    |[------>      .      .      .      .      .| [0:1] S  -> NP * VP
-    |[------>      .      .      .      .      .| [0:1] NP -> NP * PP
-    |.      >      .      .      .      .      .| [1:1] VP -> * VP PP
-    |.      >      .      .      .      .      .| [1:1] VP -> * Verb NP
-    |.      >      .      .      .      .      .| [1:1] VP -> * Verb
-    |.      >      .      .      .      .      .| [1:1] Verb -> * 'saw'
-    |.      [------]      .      .      .      .| [1:2] Verb -> 'saw' *
-    |.      [------>      .      .      .      .| [1:2] VP -> Verb * NP
-    |.      [------]      .      .      .      .| [1:2] VP -> Verb *
-    |[-------------]      .      .      .      .| [0:2] S  -> NP VP *
-    |.      [------>      .      .      .      .| [1:2] VP -> VP * PP
-    *** SWITCH TO BOTTOM UP
-    |.      .      >      .      .      .      .| [2:2] NP -> * 'John'
-    |.      .      .      >      .      .      .| [3:3] PP -> * 'with' NP
-    |.      .      .      >      .      .      .| [3:3] Prep -> * 'with'
-    |.      .      .      .      >      .      .| [4:4] Det -> * 'a'
-    |.      .      .      .      .      >      .| [5:5] Noun -> * 'dog'
-    |.      .      [------]      .      .      .| [2:3] NP -> 'John' *
-    |.      .      .      [------>      .      .| [3:4] PP -> 'with' * NP
-    |.      .      .      [------]      .      .| [3:4] Prep -> 'with' *
-    |.      .      .      .      [------]      .| [4:5] Det -> 'a' *
-    |.      .      .      .      .      [------]| [5:6] Noun -> 'dog' *
-    |.      [-------------]      .      .      .| [1:3] VP -> Verb NP *
-    |[--------------------]      .      .      .| [0:3] S  -> NP VP *
-    |.      [------------->      .      .      .| [1:3] VP -> VP * PP
-    |.      .      >      .      .      .      .| [2:2] S  -> * NP VP
-    |.      .      >      .      .      .      .| [2:2] NP -> * NP PP
-    |.      .      .      .      >      .      .| [4:4] NP -> * Det Noun
-    |.      .      [------>      .      .      .| [2:3] S  -> NP * VP
-    |.      .      [------>      .      .      .| [2:3] NP -> NP * PP
-    |.      .      .      .      [------>      .| [4:5] NP -> Det * Noun
-    |.      .      .      .      [-------------]| [4:6] NP -> Det Noun *
-    |.      .      .      [--------------------]| [3:6] PP -> 'with' NP *
-    |.      [----------------------------------]| [1:6] VP -> VP PP *
-    *** SWITCH TO TOP DOWN
-    |.      .      >      .      .      .      .| [2:2] NP -> * Det Noun
-    |.      .      .      .      >      .      .| [4:4] NP -> * NP PP
-    |.      .      .      >      .      .      .| [3:3] VP -> * VP PP
-    |.      .      .      >      .      .      .| [3:3] VP -> * Verb NP
-    |.      .      .      >      .      .      .| [3:3] VP -> * Verb
-    |[=========================================]| [0:6] S  -> NP VP *
-    |.      [---------------------------------->| [1:6] VP -> VP * PP
-    |.      .      [---------------------------]| [2:6] NP -> NP PP *
-    |.      .      .      .      [------------->| [4:6] NP -> NP * PP
-    |.      [----------------------------------]| [1:6] VP -> Verb NP *
-    |.      .      [--------------------------->| [2:6] S  -> NP * VP
-    |.      .      [--------------------------->| [2:6] NP -> NP * PP
-    |[=========================================]| [0:6] S  -> NP VP *
-    |.      [---------------------------------->| [1:6] VP -> VP * PP
-    |.      .      .      .      .      .      >| [6:6] VP -> * VP PP
-    |.      .      .      .      .      .      >| [6:6] VP -> * Verb NP
-    |.      .      .      .      .      .      >| [6:6] VP -> * Verb
-    *** SWITCH TO BOTTOM UP
-    |.      .      .      .      >      .      .| [4:4] S  -> * NP VP
-    |.      .      .      .      [------------->| [4:6] S  -> NP * VP
-    *** SWITCH TO TOP DOWN
-    *** SWITCH TO BOTTOM UP
-    *** SWITCH TO TOP DOWN
-    *** SWITCH TO BOTTOM UP
-    *** SWITCH TO TOP DOWN
-    *** SWITCH TO BOTTOM UP
-    Nr edges in chart: 61
-    (S
-      (NP I)
-      (VP (VP (Verb saw) (NP John)) (PP with (NP (Det a) (Noun dog)))))
-    (S
-      (NP I)
-      (VP (Verb saw) (NP (NP John) (PP with (NP (Det a) (Noun dog))))))
-    <BLANKLINE>
-
-
-Unit tests for the Incremental Chart Parser class
--------------------------------------------------
-
-The incremental chart parsers are defined in earleychart.py.
-We use the demo() function for testing. We must turn off showing of times.
-
-    >>> import nltk
-
-Earley Chart Parser
-
-    >>> nltk.parse.earleychart.demo(print_times=False, trace=1,
-    ...                             sent='I saw John with a dog', numparses=2)
-    * Sentence:
-    I saw John with a dog
-    ['I', 'saw', 'John', 'with', 'a', 'dog']
-    <BLANKLINE>
-    |.  I   . saw  . John . with .  a   . dog  .|
-    |[------]      .      .      .      .      .| [0:1] 'I'
-    |.      [------]      .      .      .      .| [1:2] 'saw'
-    |.      .      [------]      .      .      .| [2:3] 'John'
-    |.      .      .      [------]      .      .| [3:4] 'with'
-    |.      .      .      .      [------]      .| [4:5] 'a'
-    |.      .      .      .      .      [------]| [5:6] 'dog'
-    |>      .      .      .      .      .      .| [0:0] S  -> * NP VP
-    |>      .      .      .      .      .      .| [0:0] NP -> * NP PP
-    |>      .      .      .      .      .      .| [0:0] NP -> * Det Noun
-    |>      .      .      .      .      .      .| [0:0] NP -> * 'I'
-    |[------]      .      .      .      .      .| [0:1] NP -> 'I' *
-    |[------>      .      .      .      .      .| [0:1] S  -> NP * VP
-    |[------>      .      .      .      .      .| [0:1] NP -> NP * PP
-    |.      >      .      .      .      .      .| [1:1] VP -> * VP PP
-    |.      >      .      .      .      .      .| [1:1] VP -> * Verb NP
-    |.      >      .      .      .      .      .| [1:1] VP -> * Verb
-    |.      >      .      .      .      .      .| [1:1] Verb -> * 'saw'
-    |.      [------]      .      .      .      .| [1:2] Verb -> 'saw' *
-    |.      [------>      .      .      .      .| [1:2] VP -> Verb * NP
-    |.      [------]      .      .      .      .| [1:2] VP -> Verb *
-    |[-------------]      .      .      .      .| [0:2] S  -> NP VP *
-    |.      [------>      .      .      .      .| [1:2] VP -> VP * PP
-    |.      .      >      .      .      .      .| [2:2] NP -> * NP PP
-    |.      .      >      .      .      .      .| [2:2] NP -> * Det Noun
-    |.      .      >      .      .      .      .| [2:2] NP -> * 'John'
-    |.      .      [------]      .      .      .| [2:3] NP -> 'John' *
-    |.      [-------------]      .      .      .| [1:3] VP -> Verb NP *
-    |.      .      [------>      .      .      .| [2:3] NP -> NP * PP
-    |.      .      .      >      .      .      .| [3:3] PP -> * 'with' NP
-    |[--------------------]      .      .      .| [0:3] S  -> NP VP *
-    |.      [------------->      .      .      .| [1:3] VP -> VP * PP
-    |.      .      .      [------>      .      .| [3:4] PP -> 'with' * NP
-    |.      .      .      .      >      .      .| [4:4] NP -> * NP PP
-    |.      .      .      .      >      .      .| [4:4] NP -> * Det Noun
-    |.      .      .      .      >      .      .| [4:4] Det -> * 'a'
-    |.      .      .      .      [------]      .| [4:5] Det -> 'a' *
-    |.      .      .      .      [------>      .| [4:5] NP -> Det * Noun
-    |.      .      .      .      .      >      .| [5:5] Noun -> * 'dog'
-    |.      .      .      .      .      [------]| [5:6] Noun -> 'dog' *
-    |.      .      .      .      [-------------]| [4:6] NP -> Det Noun *
-    |.      .      .      [--------------------]| [3:6] PP -> 'with' NP *
-    |.      .      .      .      [------------->| [4:6] NP -> NP * PP
-    |.      .      [---------------------------]| [2:6] NP -> NP PP *
-    |.      [----------------------------------]| [1:6] VP -> VP PP *
-    |[=========================================]| [0:6] S  -> NP VP *
-    |.      [---------------------------------->| [1:6] VP -> VP * PP
-    |.      [----------------------------------]| [1:6] VP -> Verb NP *
-    |.      .      [--------------------------->| [2:6] NP -> NP * PP
-    |[=========================================]| [0:6] S  -> NP VP *
-    |.      [---------------------------------->| [1:6] VP -> VP * PP
-    (S
-      (NP I)
-      (VP (VP (Verb saw) (NP John)) (PP with (NP (Det a) (Noun dog)))))
-    (S
-      (NP I)
-      (VP (Verb saw) (NP (NP John) (PP with (NP (Det a) (Noun dog))))))
-
-
-Unit tests for LARGE context-free grammars
-------------------------------------------
-
-Reading the ATIS grammar.
-
-    >>> grammar = nltk.data.load('grammars/large_grammars/atis.cfg')
-    >>> grammar
-    <Grammar with 5517 productions>
-
-Reading the test sentences.
-
-    >>> sentences = nltk.data.load('grammars/large_grammars/atis_sentences.txt')
-    >>> sentences = nltk.parse.util.extract_test_sentences(sentences)
-    >>> len(sentences)
-    98
-    >>> testsentence = sentences[22]
-    >>> testsentence[0]
-    ['show', 'me', 'northwest', 'flights', 'to', 'detroit', '.']
-    >>> testsentence[1]
-    17
-    >>> sentence = testsentence[0]
-
-Now we test all different parsing strategies.
-Note that the number of edges differ between the strategies.
-
-Bottom-up parsing.
-
-    >>> parser = nltk.parse.BottomUpChartParser(grammar)
-    >>> chart = parser.chart_parse(sentence)
-    >>> print((chart.num_edges()))
-    7661
-    >>> print((len(list(chart.parses(grammar.start())))))
-    17
-
-Bottom-up Left-corner parsing.
-
-    >>> parser = nltk.parse.BottomUpLeftCornerChartParser(grammar)
-    >>> chart = parser.chart_parse(sentence)
-    >>> print((chart.num_edges()))
-    4986
-    >>> print((len(list(chart.parses(grammar.start())))))
-    17
-
-Left-corner parsing with bottom-up filter.
-
-    >>> parser = nltk.parse.LeftCornerChartParser(grammar)
-    >>> chart = parser.chart_parse(sentence)
-    >>> print((chart.num_edges()))
-    1342
-    >>> print((len(list(chart.parses(grammar.start())))))
-    17
-
-Top-down parsing.
-
-    >>> parser = nltk.parse.TopDownChartParser(grammar)
-    >>> chart = parser.chart_parse(sentence)
-    >>> print((chart.num_edges()))
-    28352
-    >>> print((len(list(chart.parses(grammar.start())))))
-    17
-
-Incremental Bottom-up parsing.
-
-    >>> parser = nltk.parse.IncrementalBottomUpChartParser(grammar)
-    >>> chart = parser.chart_parse(sentence)
-    >>> print((chart.num_edges()))
-    7661
-    >>> print((len(list(chart.parses(grammar.start())))))
-    17
-
-Incremental Bottom-up Left-corner parsing.
-
-    >>> parser = nltk.parse.IncrementalBottomUpLeftCornerChartParser(grammar)
-    >>> chart = parser.chart_parse(sentence)
-    >>> print((chart.num_edges()))
-    4986
-    >>> print((len(list(chart.parses(grammar.start())))))
-    17
-
-Incremental Left-corner parsing with bottom-up filter.
-
-    >>> parser = nltk.parse.IncrementalLeftCornerChartParser(grammar)
-    >>> chart = parser.chart_parse(sentence)
-    >>> print((chart.num_edges()))
-    1342
-    >>> print((len(list(chart.parses(grammar.start())))))
-    17
-
-Incremental Top-down parsing.
-
-    >>> parser = nltk.parse.IncrementalTopDownChartParser(grammar)
-    >>> chart = parser.chart_parse(sentence)
-    >>> print((chart.num_edges()))
-    28352
-    >>> print((len(list(chart.parses(grammar.start())))))
-    17
-
-Earley parsing. This is similar to the incremental top-down algorithm.
-
-    >>> parser = nltk.parse.EarleyChartParser(grammar)
-    >>> chart = parser.chart_parse(sentence)
-    >>> print((chart.num_edges()))
-    28352
-    >>> print((len(list(chart.parses(grammar.start())))))
-    17
-
-
-Unit tests for the Probabilistic CFG class
-------------------------------------------
-
-    >>> from nltk.corpus import treebank
-    >>> from itertools import islice
-    >>> from nltk.grammar import PCFG, induce_pcfg, toy_pcfg1, toy_pcfg2
-
-Create a set of PCFG productions.
-
-    >>> grammar = PCFG.fromstring("""
-    ... A -> B B [.3] | C B C [.7]
-    ... B -> B D [.5] | C [.5]
-    ... C -> 'a' [.1] | 'b' [0.9]
-    ... D -> 'b' [1.0]
-    ... """)
-    >>> prod = grammar.productions()[0]
-    >>> prod
-    A -> B B [0.3]
-
-    >>> prod.lhs()
-    A
-
-    >>> prod.rhs()
-    (B, B)
-
-    >>> print((prod.prob()))
-    0.3
-
-    >>> grammar.start()
-    A
-
-    >>> grammar.productions()
-    [A -> B B [0.3], A -> C B C [0.7], B -> B D [0.5], B -> C [0.5], C -> 'a' [0.1], C -> 'b' [0.9], D -> 'b' [1.0]]
-
-Induce some productions using parsed Treebank data.
-
-    >>> productions = []
-    >>> for fileid in treebank.fileids()[:2]:
-    ...     for t in treebank.parsed_sents(fileid):
-    ...         productions += t.productions()
-
-    >>> grammar = induce_pcfg(S, productions)
-    >>> grammar
-    <Grammar with 71 productions>
-
-    >>> sorted(grammar.productions(lhs=Nonterminal('PP')))[:2]
-    [PP -> IN NP [1.0]]
-    >>> sorted(grammar.productions(lhs=Nonterminal('NNP')))[:2]
-    [NNP -> 'Agnew' [0.0714286], NNP -> 'Consolidated' [0.0714286]]
-    >>> sorted(grammar.productions(lhs=Nonterminal('JJ')))[:2]
-    [JJ -> 'British' [0.142857], JJ -> 'former' [0.142857]]
-    >>> sorted(grammar.productions(lhs=Nonterminal('NP')))[:2]
-    [NP -> CD NNS [0.133333], NP -> DT JJ JJ NN [0.0666667]]
-
-Unit tests for the Probabilistic Chart Parse classes
-----------------------------------------------------
-
-    >>> tokens = "Jack saw Bob with my cookie".split()
-    >>> grammar = toy_pcfg2
-    >>> print(grammar)
-    Grammar with 23 productions (start state = S)
-        S -> NP VP [1.0]
-        VP -> V NP [0.59]
-        VP -> V [0.4]
-        VP -> VP PP [0.01]
-        NP -> Det N [0.41]
-        NP -> Name [0.28]
-        NP -> NP PP [0.31]
-        PP -> P NP [1.0]
-        V -> 'saw' [0.21]
-        V -> 'ate' [0.51]
-        V -> 'ran' [0.28]
-        N -> 'boy' [0.11]
-        N -> 'cookie' [0.12]
-        N -> 'table' [0.13]
-        N -> 'telescope' [0.14]
-        N -> 'hill' [0.5]
-        Name -> 'Jack' [0.52]
-        Name -> 'Bob' [0.48]
-        P -> 'with' [0.61]
-        P -> 'under' [0.39]
-        Det -> 'the' [0.41]
-        Det -> 'a' [0.31]
-        Det -> 'my' [0.28]
-
-Create several parsers using different queuing strategies and show the
-resulting parses.
-
-    >>> from nltk.parse import pchart
-
-    >>> parser = pchart.InsideChartParser(grammar)
-    >>> for t in parser.parse(tokens):
-    ...     print(t)
-    (S
-      (NP (Name Jack))
-      (VP
-        (V saw)
-        (NP
-          (NP (Name Bob))
-          (PP (P with) (NP (Det my) (N cookie)))))) (p=6.31607e-06)
-    (S
-      (NP (Name Jack))
-      (VP
-        (VP (V saw) (NP (Name Bob)))
-        (PP (P with) (NP (Det my) (N cookie))))) (p=2.03744e-07)
-
-    >>> parser = pchart.RandomChartParser(grammar)
-    >>> for t in parser.parse(tokens):
-    ...     print(t)
-    (S
-      (NP (Name Jack))
-      (VP
-        (V saw)
-        (NP
-          (NP (Name Bob))
-          (PP (P with) (NP (Det my) (N cookie)))))) (p=6.31607e-06)
-    (S
-      (NP (Name Jack))
-      (VP
-        (VP (V saw) (NP (Name Bob)))
-        (PP (P with) (NP (Det my) (N cookie))))) (p=2.03744e-07)
-
-    >>> parser = pchart.UnsortedChartParser(grammar)
-    >>> for t in parser.parse(tokens):
-    ...     print(t)
-    (S
-      (NP (Name Jack))
-      (VP
-        (V saw)
-        (NP
-          (NP (Name Bob))
-          (PP (P with) (NP (Det my) (N cookie)))))) (p=6.31607e-06)
-    (S
-      (NP (Name Jack))
-      (VP
-        (VP (V saw) (NP (Name Bob)))
-        (PP (P with) (NP (Det my) (N cookie))))) (p=2.03744e-07)
-
-    >>> parser = pchart.LongestChartParser(grammar)
-    >>> for t in parser.parse(tokens):
-    ...     print(t)
-    (S
-      (NP (Name Jack))
-      (VP
-        (V saw)
-        (NP
-          (NP (Name Bob))
-          (PP (P with) (NP (Det my) (N cookie)))))) (p=6.31607e-06)
-    (S
-      (NP (Name Jack))
-      (VP
-        (VP (V saw) (NP (Name Bob)))
-        (PP (P with) (NP (Det my) (N cookie))))) (p=2.03744e-07)
-
-    >>> parser = pchart.InsideChartParser(grammar, beam_size = len(tokens)+1)
-    >>> for t in parser.parse(tokens):
-    ...     print(t)
-
-
-Unit tests for the Viterbi Parse classes
-----------------------------------------
-
-    >>> from nltk.parse import ViterbiParser
-    >>> tokens = "Jack saw Bob with my cookie".split()
-    >>> grammar = toy_pcfg2
-
-Parse the tokenized sentence.
-
-    >>> parser = ViterbiParser(grammar)
-    >>> for t in parser.parse(tokens):
-    ...     print(t)
-    (S
-      (NP (Name Jack))
-      (VP
-        (V saw)
-        (NP
-          (NP (Name Bob))
-          (PP (P with) (NP (Det my) (N cookie)))))) (p=6.31607e-06)
-
-
-Unit tests for the FeatStructNonterminal class
-----------------------------------------------
-
-    >>> from nltk.grammar import FeatStructNonterminal
-    >>> FeatStructNonterminal(
-    ...     pos='n', agr=FeatStructNonterminal(number='pl', gender='f'))
-    [agr=[gender='f', number='pl'], pos='n']
-
-    >>> FeatStructNonterminal('VP[+fin]/NP[+pl]')
-    VP[+fin]/NP[+pl]
-
-
-Tracing the Feature Chart Parser
---------------------------------
-
-We use the featurechart.demo() function for tracing the Feature Chart Parser.
-
-    >>> nltk.parse.featurechart.demo(print_times=False,
-    ...                              print_grammar=True,
-    ...                              parser=nltk.parse.featurechart.FeatureChartParser,
-    ...                              sent='I saw John with a dog')
-    <BLANKLINE>
-    Grammar with 18 productions (start state = S[])
-        S[] -> NP[] VP[]
-        PP[] -> Prep[] NP[]
-        NP[] -> NP[] PP[]
-        VP[] -> VP[] PP[]
-        VP[] -> Verb[] NP[]
-        VP[] -> Verb[]
-        NP[] -> Det[pl=?x] Noun[pl=?x]
-        NP[] -> 'John'
-        NP[] -> 'I'
-        Det[] -> 'the'
-        Det[] -> 'my'
-        Det[-pl] -> 'a'
-        Noun[-pl] -> 'dog'
-        Noun[-pl] -> 'cookie'
-        Verb[] -> 'ate'
-        Verb[] -> 'saw'
-        Prep[] -> 'with'
-        Prep[] -> 'under'
-    <BLANKLINE>
-    * FeatureChartParser
-    Sentence: I saw John with a dog
-    |.I.s.J.w.a.d.|
-    |[-] . . . . .| [0:1] 'I'
-    |. [-] . . . .| [1:2] 'saw'
-    |. . [-] . . .| [2:3] 'John'
-    |. . . [-] . .| [3:4] 'with'
-    |. . . . [-] .| [4:5] 'a'
-    |. . . . . [-]| [5:6] 'dog'
-    |[-] . . . . .| [0:1] NP[] -> 'I' *
-    |[-> . . . . .| [0:1] S[] -> NP[] * VP[] {}
-    |[-> . . . . .| [0:1] NP[] -> NP[] * PP[] {}
-    |. [-] . . . .| [1:2] Verb[] -> 'saw' *
-    |. [-> . . . .| [1:2] VP[] -> Verb[] * NP[] {}
-    |. [-] . . . .| [1:2] VP[] -> Verb[] *
-    |. [-> . . . .| [1:2] VP[] -> VP[] * PP[] {}
-    |[---] . . . .| [0:2] S[] -> NP[] VP[] *
-    |. . [-] . . .| [2:3] NP[] -> 'John' *
-    |. . [-> . . .| [2:3] S[] -> NP[] * VP[] {}
-    |. . [-> . . .| [2:3] NP[] -> NP[] * PP[] {}
-    |. [---] . . .| [1:3] VP[] -> Verb[] NP[] *
-    |. [---> . . .| [1:3] VP[] -> VP[] * PP[] {}
-    |[-----] . . .| [0:3] S[] -> NP[] VP[] *
-    |. . . [-] . .| [3:4] Prep[] -> 'with' *
-    |. . . [-> . .| [3:4] PP[] -> Prep[] * NP[] {}
-    |. . . . [-] .| [4:5] Det[-pl] -> 'a' *
-    |. . . . [-> .| [4:5] NP[] -> Det[pl=?x] * Noun[pl=?x] {?x: False}
-    |. . . . . [-]| [5:6] Noun[-pl] -> 'dog' *
-    |. . . . [---]| [4:6] NP[] -> Det[-pl] Noun[-pl] *
-    |. . . . [--->| [4:6] S[] -> NP[] * VP[] {}
-    |. . . . [--->| [4:6] NP[] -> NP[] * PP[] {}
-    |. . . [-----]| [3:6] PP[] -> Prep[] NP[] *
-    |. . [-------]| [2:6] NP[] -> NP[] PP[] *
-    |. [---------]| [1:6] VP[] -> VP[] PP[] *
-    |. [--------->| [1:6] VP[] -> VP[] * PP[] {}
-    |[===========]| [0:6] S[] -> NP[] VP[] *
-    |. . [------->| [2:6] S[] -> NP[] * VP[] {}
-    |. . [------->| [2:6] NP[] -> NP[] * PP[] {}
-    |. [---------]| [1:6] VP[] -> Verb[] NP[] *
-    |. [--------->| [1:6] VP[] -> VP[] * PP[] {}
-    |[===========]| [0:6] S[] -> NP[] VP[] *
-    (S[]
-      (NP[] I)
-      (VP[]
-        (VP[] (Verb[] saw) (NP[] John))
-        (PP[] (Prep[] with) (NP[] (Det[-pl] a) (Noun[-pl] dog)))))
-    (S[]
-      (NP[] I)
-      (VP[]
-        (Verb[] saw)
-        (NP[]
-          (NP[] John)
-          (PP[] (Prep[] with) (NP[] (Det[-pl] a) (Noun[-pl] dog))))))
-
-
-Unit tests for the Feature Chart Parser classes
------------------------------------------------
-
-The list of parsers we want to test.
-
-    >>> parsers = [nltk.parse.featurechart.FeatureChartParser,
-    ...            nltk.parse.featurechart.FeatureTopDownChartParser,
-    ...            nltk.parse.featurechart.FeatureBottomUpChartParser,
-    ...            nltk.parse.featurechart.FeatureBottomUpLeftCornerChartParser,
-    ...            nltk.parse.earleychart.FeatureIncrementalChartParser,
-    ...            nltk.parse.earleychart.FeatureEarleyChartParser,
-    ...            nltk.parse.earleychart.FeatureIncrementalTopDownChartParser,
-    ...            nltk.parse.earleychart.FeatureIncrementalBottomUpChartParser,
-    ...            nltk.parse.earleychart.FeatureIncrementalBottomUpLeftCornerChartParser,
-    ...            ]
-
-A helper function that tests each parser on the given grammar and sentence.
-We check that the number of trees are correct, and that all parsers
-return the same trees. Otherwise an error is printed.
-
-    >>> def unittest(grammar, sentence, nr_trees):
-    ...     sentence = sentence.split()
-    ...     trees = None
-    ...     for P in parsers:
-    ...         result = P(grammar).parse(sentence)
-    ...         result = set(tree.freeze() for tree in result)
-    ...         if len(result) != nr_trees:
-    ...             print("Wrong nr of trees:", len(result))
-    ...         elif trees is None:
-    ...             trees = result
-    ...         elif result != trees:
-    ...             print("Trees differ for parser:", P.__name__)
-
-The demo grammar from before, with an ambiguous sentence.
-
-    >>> isawjohn = nltk.parse.featurechart.demo_grammar()
-    >>> unittest(isawjohn, "I saw John with a dog with my cookie", 5)
-
-This grammar tests that variables in different grammar rules are renamed
-before unification. (The problematic variable is in this case ?X).
-
-    >>> whatwasthat = nltk.grammar.FeatureGrammar.fromstring('''
-    ... S[] -> NP[num=?N] VP[num=?N, slash=?X]
-    ... NP[num=?X] -> "what"
-    ... NP[num=?X] -> "that"
-    ... VP[num=?P, slash=none] -> V[num=?P] NP[]
-    ... V[num=sg] -> "was"
-    ... ''')
-    >>> unittest(whatwasthat, "what was that", 1)
-
-This grammar tests that the same rule can be used in different places
-in another rule, and that the variables are properly renamed.
-
-    >>> thislovesthat = nltk.grammar.FeatureGrammar.fromstring('''
-    ... S[] -> NP[case=nom] V[] NP[case=acc]
-    ... NP[case=?X] -> Pron[case=?X]
-    ... Pron[] -> "this"
-    ... Pron[] -> "that"
-    ... V[] -> "loves"
-    ... ''')
-    >>> unittest(thislovesthat, "this loves that", 1)
-
-
-Tests for loading feature grammar files
----------------------------------------
-
-Alternative 1: first load the grammar, then create the parser.
-
-    >>> fcfg = nltk.data.load('grammars/book_grammars/feat0.fcfg')
-    >>> fcp1 = nltk.parse.FeatureChartParser(fcfg)
-    >>> print((type(fcp1)))
-    <class 'nltk.parse.featurechart.FeatureChartParser'>
-
-Alternative 2: directly load the parser.
-
-    >>> fcp2 = nltk.parse.load_parser('grammars/book_grammars/feat0.fcfg')
-    >>> print((type(fcp2)))
-    <class 'nltk.parse.featurechart.FeatureChartParser'>
-
-
-
diff --git a/nlp_resource_data/nltk/test/portuguese_en.doctest b/nlp_resource_data/nltk/test/portuguese_en.doctest

deleted file mode 100644 (file)

index 87051c9..0000000
--- a/nlp_resource_data/nltk/test/portuguese_en.doctest
+++ /dev/null
@@ -1,565 +0,0 @@
-.. Copyright (C) 2001-2019 NLTK Project
-.. For license information, see LICENSE.TXT
-
-==================================
-Examples for Portuguese Processing
-==================================
-
-This HOWTO contains a variety of examples relating to the Portuguese language.
-It is intended to be read in conjunction with the NLTK book
-(``http://nltk.org/book``).  For instructions on running the Python
-interpreter, please see the section *Getting Started with Python*, in Chapter 1.
-
---------------------------------------------
-Python Programming, with Portuguese Examples
---------------------------------------------
-
-Chapter 1 of the NLTK book contains many elementary programming examples, all
-with English texts.  In this section, we'll see some corresponding examples
-using Portuguese.  Please refer to the chapter for full discussion.  *Vamos!*
-
-    >>> from nltk.examples.pt import *
-    *** Introductory Examples for the NLTK Book ***
-    Loading ptext1, ... and psent1, ...
-    Type the name of the text or sentence to view it.
-    Type: 'texts()' or 'sents()' to list the materials.
-    ptext1: Memórias Póstumas de Brás Cubas (1881)
-    ptext2: Dom Casmurro (1899)
-    ptext3: Gênesis
-    ptext4: Folha de Sao Paulo (1994)
-
-
-Any time we want to find out about these texts, we just have
-to enter their names at the Python prompt:
-
-    >>> ptext2
-    <Text: Dom Casmurro (1899)>
-
-Searching Text
---------------
-
-A concordance permits us to see words in context.
-
-    >>> ptext1.concordance('olhos')
-    Building index...
-    Displaying 25 of 138 matches:
-    De pé , à cabeceira da cama , com os olhos estúpidos , a boca entreaberta , a t
-    orelhas . Pela minha parte fechei os olhos e deixei - me ir à ventura . Já agor
-    xões de cérebro enfermo . Como ia de olhos fechados , não via o caminho ; lembr
-    gelos eternos . Com efeito , abri os olhos e vi que o meu animal galopava numa
-    me apareceu então , fitando - me uns olhos rutilantes como o sol . Tudo nessa f
-     mim mesmo . Então , encarei - a com olhos súplices , e pedi mais alguns anos .
-    ...
-
-For a given word, we can find words with a similar text distribution:
-
-    >>> ptext1.similar('chegar')
-    Building word-context index...
-    acabada acudir aludir avistar bramanismo casamento cheguei com contar
-    contrário corpo dali deixei desferirem dizer fazer filhos já leitor lhe
-    >>> ptext3.similar('chegar')
-    Building word-context index...
-    achar alumiar arrombar destruir governar guardar ir lavrar passar que
-    toda tomar ver vir
-
-We can search for the statistically significant collocations in a text:
-
-    >>> ptext1.collocations()
-    Building collocations list
-    Quincas Borba; Lobo Neves; alguma coisa; Brás Cubas; meu pai; dia
-    seguinte; não sei; Meu pai; alguns instantes; outra vez; outra coisa;
-    por exemplo; mim mesmo; coisa nenhuma; mesma coisa; não era; dias
-    depois; Passeio Público; olhar para; das coisas
-
-We can search for words in context, with the help of *regular expressions*, e.g.:
-
-    >>> ptext1.findall("<olhos> (<.*>)")
-    estúpidos; e; fechados; rutilantes; súplices; a; do; babavam;
-    na; moles; se; da; umas; espraiavam; chamejantes; espetados;
-    ...
-
-We can automatically generate random text based on a given text, e.g.:
-
-    >>> ptext3.generate() # doctest: +SKIP
-    No princípio , criou Deus os abençoou , dizendo : Onde { estão } e até
-    à ave dos céus , { que } será . Disse mais Abrão : Dá - me a mulher
-    que tomaste ; porque daquele poço Eseque , { tinha .} E disse : Não
-    poderemos descer ; mas , do campo ainda não estava na casa do teu
-    pescoço . E viveu Serugue , depois Simeão e Levi { são } estes ? E o
-    varão , porque habitava na terra de Node , da mão de Esaú : Jeús ,
-    Jalão e Corá
-
-Texts as List of Words
-----------------------
-
-A few sentences have been defined for you.
-
-    >>> psent1
-    ['o', 'amor', 'da', 'gl\xf3ria', 'era', 'a', 'coisa', 'mais',
-    'verdadeiramente', 'humana', 'que', 'h\xe1', 'no', 'homem', ',',
-    'e', ',', 'conseq\xfcentemente', ',', 'a', 'sua', 'mais',
-    'genu\xedna', 'fei\xe7\xe3o', '.']
-    >>>
-
-Notice that the sentence has been *tokenized*.  Each token is
-represented as a string, represented using quotes, e.g. ``'coisa'``.
-Some strings contain special characters, e.g. ``\xf3``,
-the internal representation for ó.
-The tokens are combined in the form of a *list*.  How long is this list?
-
-    >>> len(psent1)
-    25
-    >>>
-
-What is the vocabulary of this sentence?
-
-    >>> sorted(set(psent1))
-    [',', '.', 'a', 'amor', 'coisa', 'conseqüentemente', 'da', 'e', 'era',
-     'feição', 'genuína', 'glória', 'homem', 'humana', 'há', 'mais', 'no',
-     'o', 'que', 'sua', 'verdadeiramente']
-    >>>
-
-Let's iterate over each item in ``psent2``, and print information for each:
-
-    >>> for w in psent2:
-    ...     print(w, len(w), w[-1])
-    ...
-    Não 3 o
-    consultes 9 s
-    dicionários 11 s
-    . 1 .
-
-Observe how we make a human-readable version of a string, using ``decode()``.
-Also notice that we accessed the last character of a string ``w`` using ``w[-1]``.
-
-We just saw a ``for`` loop above.  Another useful control structure is a
-*list comprehension*.
-
-    >>> [w.upper() for w in psent2]
-    ['N\xc3O', 'CONSULTES', 'DICION\xc1RIOS', '.']
-    >>> [w for w in psent1 if w.endswith('a')]
-    ['da', 'gl\xf3ria', 'era', 'a', 'coisa', 'humana', 'a', 'sua', 'genu\xedna']
-    >>> [w for w in ptext4 if len(w) > 15]
-    [u'norte-irlandeses', u'pan-nacionalismo', u'predominatemente', u'primeiro-ministro',
-    u'primeiro-ministro', u'irlandesa-americana', u'responsabilidades', u'significativamente']
-
-We can examine the relative frequency of words in a text, using ``FreqDist``:
-
-    >>> fd1 = FreqDist(ptext1)
-    >>> fd1
-    <FreqDist with 10848 samples and 77098 outcomes>
-    >>> fd1['olhos']
-    137
-    >>> fd1.max()
-    u','
-    >>> fd1.samples()[:100]
-    [u',', u'.', u'a', u'que', u'de', u'e', u'-', u'o', u';', u'me', u'um', u'n\xe3o',
-    u'\x97', u'se', u'do', u'da', u'uma', u'com', u'os', u'\xe9', u'era', u'as', u'eu',
-    u'lhe', u'ao', u'em', u'para', u'mas', u'...', u'!', u'\xe0', u'na', u'mais', u'?',
-    u'no', u'como', u'por', u'N\xe3o', u'dos', u'ou', u'ele', u':', u'Virg\xedlia',
-    u'meu', u'disse', u'minha', u'das', u'O', u'/', u'A', u'CAP\xcdTULO', u'muito',
-    u'depois', u'coisa', u'foi', u'sem', u'olhos', u'ela', u'nos', u'tinha', u'nem',
-    u'E', u'outro', u'vida', u'nada', u'tempo', u'menos', u'outra', u'casa', u'homem',
-    u'porque', u'quando', u'mim', u'mesmo', u'ser', u'pouco', u'estava', u'dia',
-    u't\xe3o', u'tudo', u'Mas', u'at\xe9', u'D', u'ainda', u's\xf3', u'alguma',
-    u'la', u'vez', u'anos', u'h\xe1', u'Era', u'pai', u'esse', u'lo', u'dizer', u'assim',
-    u'ent\xe3o', u'dizia', u'aos', u'Borba']
-
----------------
-Reading Corpora
----------------
-
-Accessing the Machado Text Corpus
----------------------------------
-
-NLTK includes the complete works of Machado de Assis.
-
-    >>> from nltk.corpus import machado
-    >>> machado.fileids()
-    ['contos/macn001.txt', 'contos/macn002.txt', 'contos/macn003.txt', ...]
-
-Each file corresponds to one of the works of Machado de Assis.  To see a complete
-list of works, you can look at the corpus README file: ``print machado.readme()``.
-Let's access the text of the *Posthumous Memories of Brás Cubas*.
-
-We can access the text as a list of characters, and access 200 characters starting
-from position 10,000.
-
-    >>> raw_text = machado.raw('romance/marm05.txt')
-    >>> raw_text[10000:10200]
-    u', primou no\nEstado, e foi um dos amigos particulares do vice-rei Conde
-    da Cunha.\n\nComo este apelido de Cubas lhe\ncheirasse excessivamente a
-    tanoaria, alegava meu pai, bisneto de Dami\xe3o, que o\ndito ape'
-
-However, this is not a very useful way to work with a text.  We generally think
-of a text as a sequence of words and punctuation, not characters:
-
-    >>> text1 = machado.words('romance/marm05.txt')
-    >>> text1
-    ['Romance', ',', 'Mem\xf3rias', 'P\xf3stumas', 'de', ...]
-    >>> len(text1)
-    77098
-    >>> len(set(text1))
-    10848
-
-Here's a program that finds the most common ngrams that contain a
-particular target word.
-
-    >>> from nltk import ngrams, FreqDist
-    >>> target_word = 'olhos'
-    >>> fd = FreqDist(ng
-    ...               for ng in ngrams(text1, 5)
-    ...               if target_word in ng)
-    >>> for hit in fd.samples():
-    ...     print(' '.join(hit))
-    ...
-    , com os olhos no
-    com os olhos no ar
-    com os olhos no chão
-    e todos com os olhos
-    me estar com os olhos
-    os olhos estúpidos , a
-    os olhos na costura ,
-    os olhos no ar ,
-    , com os olhos espetados
-    , com os olhos estúpidos
-    , com os olhos fitos
-    , com os olhos naquele
-    , com os olhos para
-
-
-Accessing the MacMorpho Tagged Corpus
--------------------------------------
-
-NLTK includes the MAC-MORPHO Brazilian Portuguese POS-tagged news text,
-with over a million words of
-journalistic texts extracted from ten sections of
-the daily newspaper *Folha de Sao Paulo*, 1994.
-
-We can access this corpus as a sequence of words or tagged words as follows:
-    >>> import nltk.corpus
-    >>> nltk.corpus.mac_morpho.words()
-    ['Jersei', 'atinge', 'm\xe9dia', 'de', 'Cr$', '1,4', ...]
-    >>> nltk.corpus.mac_morpho.sents() # doctest: +NORMALIZE_WHITESPACE
-    [['Jersei', 'atinge', 'm\xe9dia', 'de', 'Cr$', '1,4', 'milh\xe3o',
-    'em', 'a', 'venda', 'de', 'a', 'Pinhal', 'em', 'S\xe3o', 'Paulo'],
-    ['Programe', 'sua', 'viagem', 'a', 'a', 'Exposi\xe7\xe3o', 'Nacional',
-    'do', 'Zebu', ',', 'que', 'come\xe7a', 'dia', '25'], ...]
-    >>> nltk.corpus.mac_morpho.tagged_words()
-    [('Jersei', 'N'), ('atinge', 'V'), ('m\xe9dia', 'N'), ...]
-
-We can also access it in sentence chunks.
-
-    >>> nltk.corpus.mac_morpho.tagged_sents() # doctest: +NORMALIZE_WHITESPACE
-    [[('Jersei', 'N'), ('atinge', 'V'), ('m\xe9dia', 'N'), ('de', 'PREP'),
-      ('Cr$', 'CUR'), ('1,4', 'NUM'), ('milh\xe3o', 'N'), ('em', 'PREP|+'),
-      ('a', 'ART'), ('venda', 'N'), ('de', 'PREP|+'), ('a', 'ART'),
-      ('Pinhal', 'NPROP'), ('em', 'PREP'), ('S\xe3o', 'NPROP'),
-      ('Paulo', 'NPROP')],
-     [('Programe', 'V'), ('sua', 'PROADJ'), ('viagem', 'N'), ('a', 'PREP|+'),
-      ('a', 'ART'), ('Exposi\xe7\xe3o', 'NPROP'), ('Nacional', 'NPROP'),
-      ('do', 'NPROP'), ('Zebu', 'NPROP'), (',', ','), ('que', 'PRO-KS-REL'),
-      ('come\xe7a', 'V'), ('dia', 'N'), ('25', 'N|AP')], ...]
-
-This data can be used to train taggers (examples below for the Floresta treebank).
-
-Accessing the Floresta Portuguese Treebank
-------------------------------------------
-
-The NLTK data distribution includes the
-"Floresta Sinta(c)tica Corpus" version 7.4, available from
-``http://www.linguateca.pt/Floresta/``.
-
-We can access this corpus as a sequence of words or tagged words as follows:
-
-    >>> from nltk.corpus import floresta
-    >>> floresta.words()
-    ['Um', 'revivalismo', 'refrescante', 'O', '7_e_Meio', ...]
-    >>> floresta.tagged_words()
-    [('Um', '>N+art'), ('revivalismo', 'H+n'), ...]
-
-The tags consist of some syntactic information, followed by a plus sign,
-followed by a conventional part-of-speech tag.  Let's strip off the material before
-the plus sign:
-
-    >>> def simplify_tag(t):
-    ...     if "+" in t:
-    ...         return t[t.index("+")+1:]
-    ...     else:
-    ...         return t
-    >>> twords = floresta.tagged_words()
-    >>> twords = [(w.lower(), simplify_tag(t)) for (w,t) in twords]
-    >>> twords[:10]
-    [('um', 'art'), ('revivalismo', 'n'), ('refrescante', 'adj'), ('o', 'art'), ('7_e_meio', 'prop'),
-    ('\xe9', 'v-fin'), ('um', 'art'), ('ex-libris', 'n'), ('de', 'prp'), ('a', 'art')]
-
-Pretty printing the tagged words:
-
-    >>> print(' '.join(word + '/' + tag for (word, tag) in twords[:10]))
-    um/art revivalismo/n refrescante/adj o/art 7_e_meio/prop é/v-fin um/art ex-libris/n de/prp a/art
-
-Count the word tokens and types, and determine the most common word:
-
-    >>> words = floresta.words()
-    >>> len(words)
-    211852
-    >>> fd = nltk.FreqDist(words)
-    >>> len(fd)
-    29421
-    >>> fd.max()
-    'de'
-
-List the 20 most frequent tags, in order of decreasing frequency:
-
-    >>> tags = [simplify_tag(tag) for (word,tag) in floresta.tagged_words()]
-    >>> fd = nltk.FreqDist(tags)
-    >>> fd.keys()[:20] # doctest: +NORMALIZE_WHITESPACE
-    ['n', 'prp', 'art', 'v-fin', ',', 'prop', 'adj', 'adv', '.',
-     'conj-c', 'v-inf', 'pron-det', 'v-pcp', 'num', 'pron-indp',
-     'pron-pers', '\xab', '\xbb', 'conj-s', '}']
-
-We can also access the corpus grouped by sentence:
-
-    >>> floresta.sents() # doctest: +NORMALIZE_WHITESPACE
-    [['Um', 'revivalismo', 'refrescante'],
-     ['O', '7_e_Meio', '\xe9', 'um', 'ex-libris', 'de', 'a', 'noite',
-      'algarvia', '.'], ...]
-    >>> floresta.tagged_sents() # doctest: +NORMALIZE_WHITESPACE
-    [[('Um', '>N+art'), ('revivalismo', 'H+n'), ('refrescante', 'N<+adj')],
-     [('O', '>N+art'), ('7_e_Meio', 'H+prop'), ('\xe9', 'P+v-fin'),
-      ('um', '>N+art'), ('ex-libris', 'H+n'), ('de', 'H+prp'),
-      ('a', '>N+art'), ('noite', 'H+n'), ('algarvia', 'N<+adj'), ('.', '.')],
-     ...]
-    >>> floresta.parsed_sents() # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
-    [Tree('UTT+np', [Tree('>N+art', ['Um']), Tree('H+n', ['revivalismo']),
-                     Tree('N<+adj', ['refrescante'])]),
-     Tree('STA+fcl',
-         [Tree('SUBJ+np', [Tree('>N+art', ['O']),
-                           Tree('H+prop', ['7_e_Meio'])]),
-          Tree('P+v-fin', ['\xe9']),
-          Tree('SC+np',
-             [Tree('>N+art', ['um']),
-              Tree('H+n', ['ex-libris']),
-              Tree('N<+pp', [Tree('H+prp', ['de']),
-                             Tree('P<+np', [Tree('>N+art', ['a']),
-                                            Tree('H+n', ['noite']),
-                                            Tree('N<+adj', ['algarvia'])])])]),
-          Tree('.', ['.'])]), ...]
-
-To view a parse tree, use the ``draw()`` method, e.g.:
-
-    >>> psents = floresta.parsed_sents()
-    >>> psents[5].draw() # doctest: +SKIP
-
-Character Encodings
--------------------
-
-Python understands the common character encoding used for Portuguese, ISO 8859-1 (ISO Latin 1).
-
-    >>> import os, nltk.test
-    >>> testdir = os.path.split(nltk.test.__file__)[0]
-    >>> text = open(os.path.join(testdir, 'floresta.txt'), 'rb').read().decode('ISO 8859-1')
-    >>> text[:60]
-    'O 7 e Meio \xe9 um ex-libris da noite algarvia.\n\xc9 uma das mais '
-    >>> print(text[:60])
-    O 7 e Meio é um ex-libris da noite algarvia.
-    É uma das mais
-
-For more information about character encodings and Python, please see section 3.3 of the book.
-
-----------------
-Processing Tasks
-----------------
-
-
-Simple Concordancing
---------------------
-
-Here's a function that takes a word and a specified amount of context (measured
-in characters), and generates a concordance for that word.
-
-    >>> def concordance(word, context=30):
-    ...     for sent in floresta.sents():
-    ...         if word in sent:
-    ...             pos = sent.index(word)
-    ...             left = ' '.join(sent[:pos])
-    ...             right = ' '.join(sent[pos+1:])
-    ...             print('%*s %s %-*s' %
-    ...                 (context, left[-context:], word, context, right[:context]))
-
-    >>> concordance("dar") # doctest: +SKIP
-    anduru , foi o suficiente para dar a volta a o resultado .
-                 1. O P?BLICO veio dar a a imprensa di?ria portuguesa
-      A fartura de pensamento pode dar maus resultados e n?s n?o quer
-                          Come?a a dar resultados a pol?tica de a Uni
-    ial come?ar a incorporar- lo e dar forma a um ' site ' que tem se
-    r com Constantino para ele lhe dar tamb?m os pap?is assinados .
-    va a brincar , pois n?o lhe ia dar procura??o nenhuma enquanto n?
-    ?rica como o ant?doto capaz de dar sentido a o seu enorme poder .
-    . . .
-    >>> concordance("vender") # doctest: +SKIP
-    er recebido uma encomenda para vender 4000 blindados a o Iraque .
-    m?rico_Amorim caso conseguisse vender o lote de ac??es de o empres?r
-    mpre ter jovens simp?ticos a ? vender ? chega ! }
-           Disse que o governo vai vender ? desde autom?vel at? particip
-    ndiciou ontem duas pessoas por vender carro com ?gio .
-            A inten??o de Fleury ? vender as a??es para equilibrar as fi
-
-Part-of-Speech Tagging
-----------------------
-
-Let's begin by getting the tagged sentence data, and simplifying the tags
-as described earlier.
-
-    >>> from nltk.corpus import floresta
-    >>> tsents = floresta.tagged_sents()
-    >>> tsents = [[(w.lower(),simplify_tag(t)) for (w,t) in sent] for sent in tsents if sent]
-    >>> train = tsents[100:]
-    >>> test = tsents[:100]
-
-We already know that ``n`` is the most common tag, so we can set up a
-default tagger that tags every word as a noun, and see how well it does:
-
-    >>> tagger0 = nltk.DefaultTagger('n')
-    >>> nltk.tag.accuracy(tagger0, test)
-    0.17697228144989338
-
-Evidently, about one in every six words is a noun.  Let's improve on this by
-training a unigram tagger:
-
-    >>> tagger1 = nltk.UnigramTagger(train, backoff=tagger0)
-    >>> nltk.tag.accuracy(tagger1, test)
-    0.87029140014214645
-
-Next a bigram tagger:
-
-    >>> tagger2 = nltk.BigramTagger(train, backoff=tagger1)
-    >>> nltk.tag.accuracy(tagger2, test)
-    0.89019189765458417
-
-
-Sentence Segmentation
----------------------
-
-Punkt is a language-neutral sentence segmentation tool.  We
-
-    >>> sent_tokenizer=nltk.data.load('tokenizers/punkt/portuguese.pickle')
-    >>> raw_text = machado.raw('romance/marm05.txt')
-    >>> sentences = sent_tokenizer.tokenize(raw_text)
-    >>> for sent in sentences[1000:1005]:
-    ...     print("<<", sent, ">>")
-    ...
-    << Em verdade, parecia ainda mais mulher do que era;
-    seria criança nos seus folgares de moça; mas assim quieta, impassível, tinha a
-    compostura da mulher casada. >>
-    << Talvez essa circunstância lhe diminuía um pouco da
-    graça virginal. >>
-    << Depressa nos familiarizamos; a mãe fazia-lhe grandes elogios, eu
-    escutava-os de boa sombra, e ela sorria com os olhos fúlgidos, como se lá dentro
-    do cérebro lhe estivesse a voar uma borboletinha de asas de ouro e olhos de
-    diamante... >>
-    << Digo lá dentro, porque cá fora o
-    que esvoaçou foi uma borboleta preta, que subitamente penetrou na varanda, e
-    começou a bater as asas em derredor de D. Eusébia. >>
-    << D. Eusébia deu um grito,
-    levantou-se, praguejou umas palavras soltas: - T'esconjuro!... >>
-
-The sentence tokenizer can be trained and evaluated on other text.
-The source text (from the Floresta Portuguese Treebank) contains one sentence per line.
-We read the text, split it into its lines, and then join these lines together using
-spaces.  Now the information about sentence breaks has been discarded.  We split this
-material into training and testing data:
-
-    >>> import os, nltk.test
-    >>> testdir = os.path.split(nltk.test.__file__)[0]
-    >>> text = open(os.path.join(testdir, 'floresta.txt'), 'rb').read().decode('ISO-8859-1')
-    >>> lines = text.split('\n')
-    >>> train = ' '.join(lines[10:])
-    >>> test = ' '.join(lines[:10])
-
-Now we train the sentence segmenter (or sentence tokenizer) and use it on our test sentences:
-
-    >>> stok = nltk.PunktSentenceTokenizer(train)
-    >>> print(stok.tokenize(test))
-    ['O 7 e Meio \xe9 um ex-libris da noite algarvia.',
-    '\xc9 uma das mais antigas discotecas do Algarve, situada em Albufeira,
-    que continua a manter os tra\xe7os decorativos e as clientelas de sempre.',
-    '\xc9 um pouco a vers\xe3o de uma esp\xe9cie de \xaboutro lado\xbb da noite,
-    a meio caminho entre os devaneios de uma fauna perif\xe9rica, seja de Lisboa,
-    Londres, Dublin ou Faro e Portim\xe3o, e a postura circunspecta dos fi\xe9is da casa,
-    que dela esperam a m\xfasica \xabgeracionista\xbb dos 60 ou dos 70.',
-    'N\xe3o deixa de ser, nos tempos que correm, um certo \xabvery typical\xbb algarvio,
-    cabe\xe7a de cartaz para os que querem fugir a algumas movimenta\xe7\xf5es nocturnas
-    j\xe1 a caminho da ritualiza\xe7\xe3o de massas, do g\xe9nero \xabvamos todos ao
-    Calypso e encontramo-nos na Locomia\xbb.',
-    'E assim, aos 2,5 milh\xf5es que o Minist\xe9rio do Planeamento e Administra\xe7\xe3o
-    do Territ\xf3rio j\xe1 gasta no pagamento do pessoal afecto a estes organismos,
-    v\xeam juntar-se os montantes das obras propriamente ditas, que os munic\xedpios,
-    j\xe1 com projectos na m\xe3o, v\xeam reivindicar junto do Executivo, como salienta
-    aquele membro do Governo.',
-    'E o dinheiro \xabn\xe3o falta s\xf3 \xe0s c\xe2maras\xbb, lembra o secret\xe1rio de Estado,
-    que considera que a solu\xe7\xe3o para as autarquias \xe9 \xabespecializarem-se em
-    fundos comunit\xe1rios\xbb.',
-    'Mas como, se muitas n\xe3o disp\xf5em, nos seus quadros, dos t\xe9cnicos necess\xe1rios?',
-    '\xabEncomendem-nos a projectistas de fora\xbb porque, se as obras vierem a ser financiadas,
-    eles at\xe9 saem de gra\xe7a, j\xe1 que, nesse caso, \xabos fundos comunit\xe1rios pagam
-    os projectos, o mesmo n\xe3o acontecendo quando eles s\xe3o feitos pelos GAT\xbb,
-    dado serem organismos do Estado.',
-    'Essa poder\xe1 vir a ser uma hip\xf3tese, at\xe9 porque, no terreno, a capacidade dos GAT
-    est\xe1 cada vez mais enfraquecida.',
-    'Alguns at\xe9 j\xe1 desapareceram, como o de Castro Verde, e outros t\xeam vindo a perder quadros.']
-
-NLTK's data collection includes a trained model for Portuguese sentence
-segmentation, which can be loaded as follows.  It is faster to load a trained model than
-to retrain it.
-
-    >>> stok = nltk.data.load('tokenizers/punkt/portuguese.pickle')
-
-Stemming
---------
-
-NLTK includes the RSLP Portuguese stemmer.  Here we use it to stem some Portuguese text:
-
-    >>> stemmer = nltk.stem.RSLPStemmer()
-    >>> stemmer.stem("copiar")
-    'copi'
-    >>> stemmer.stem("paisagem")
-    'pais'
-
-
-Stopwords
----------
-
-NLTK includes Portuguese stopwords:
-
-    >>> stopwords = nltk.corpus.stopwords.words('portuguese')
-    >>> stopwords[:10]
-    ['a', 'ao', 'aos', 'aquela', 'aquelas', 'aquele', 'aqueles', 'aquilo', 'as', 'at\xe9']
-
-Now we can use these to filter text.  Let's find the most frequent words (other than stopwords)
-and print them in descending order of frequency:
-
-    >>> fd = nltk.FreqDist(w.lower() for w in floresta.words() if w not in stopwords)
-    >>> for word in list(fd.keys())[:20]:
-    ...     print(word, fd[word])
-    , 13444
-    . 7725
-    « 2369
-    » 2310
-    é 1305
-    o 1086
-    } 1047
-    { 1044
-    a 897
-    ; 633
-    em 516
-    ser 466
-    sobre 349
-    os 313
-    anos 301
-    ontem 292
-    ainda 279
-    segundo 256
-    ter 249
-    dois 231
-
diff --git a/nlp_resource_data/nltk/test/portuguese_en_fixt.py b/nlp_resource_data/nltk/test/portuguese_en_fixt.py

deleted file mode 100644 (file)

index afbd59e..0000000
--- a/nlp_resource_data/nltk/test/portuguese_en_fixt.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import
-from nltk.compat import PY3
-
-from nltk.corpus import teardown_module
-
-
-def setup_module(module):
-    from nose import SkipTest
-
-    raise SkipTest(
-        "portuguese_en.doctest imports nltk.examples.pt which doesn't exist!"
-    )
-
-    if not PY3:
-        raise SkipTest(
-            "portuguese_en.doctest was skipped because non-ascii doctests are not supported under Python 2.x"
-        )
diff --git a/nlp_resource_data/nltk/test/probability.doctest b/nlp_resource_data/nltk/test/probability.doctest

deleted file mode 100644 (file)

index 3cb582e..0000000
--- a/nlp_resource_data/nltk/test/probability.doctest
+++ /dev/null
@@ -1,304 +0,0 @@
-.. Copyright (C) 2001-2019 NLTK Project
-.. For license information, see LICENSE.TXT
-
-===========
-Probability
-===========
-
-    >>> import nltk
-    >>> from nltk.probability import *
-
-FreqDist
---------
-
-    >>> text1 = ['no', 'good', 'fish', 'goes', 'anywhere', 'without', 'a', 'porpoise', '!']
-    >>> text2 = ['no', 'good', 'porpoise', 'likes', 'to', 'fish', 'fish', 'anywhere', '.']
-
-    >>> fd1 = nltk.FreqDist(text1)
-    >>> fd1 == nltk.FreqDist(text1)
-    True
-
-Note that items are sorted in order of decreasing frequency; two items of the same frequency appear in indeterminate order.
-
-    >>> import itertools
-    >>> both = nltk.FreqDist(text1 + text2)
-    >>> both_most_common = both.most_common()
-    >>> list(itertools.chain(*(sorted(ys) for k, ys in itertools.groupby(both_most_common, key=lambda t: t[1]))))
-    [('fish', 3), ('anywhere', 2), ('good', 2), ('no', 2), ('porpoise', 2), ('!', 1), ('.', 1), ('a', 1), ('goes', 1), ('likes', 1), ('to', 1), ('without', 1)]
-
-    >>> both == fd1 + nltk.FreqDist(text2)
-    True
-    >>> fd1 == nltk.FreqDist(text1) # But fd1 is unchanged
-    True
-
-    >>> fd2 = nltk.FreqDist(text2)
-    >>> fd1.update(fd2)
-    >>> fd1 == both
-    True
-
-    >>> fd1 = nltk.FreqDist(text1)
-    >>> fd1.update(text2)
-    >>> fd1 == both
-    True
-
-    >>> fd1 = nltk.FreqDist(text1)
-    >>> fd2 = nltk.FreqDist(fd1)
-    >>> fd2 == fd1
-    True
-
-``nltk.FreqDist`` can be pickled:
-
-    >>> import pickle
-    >>> fd1 = nltk.FreqDist(text1)
-    >>> pickled = pickle.dumps(fd1)
-    >>> fd1 == pickle.loads(pickled)
-    True
-
-Mathematical operations:
-
-    >>> FreqDist('abbb') + FreqDist('bcc')
-    FreqDist({'b': 4, 'c': 2, 'a': 1})
-    >>> FreqDist('abbbc') - FreqDist('bccd')
-    FreqDist({'b': 2, 'a': 1})
-    >>> FreqDist('abbb') | FreqDist('bcc')
-    FreqDist({'b': 3, 'c': 2, 'a': 1})
-    >>> FreqDist('abbb') & FreqDist('bcc')
-    FreqDist({'b': 1})
-
-ConditionalFreqDist
--------------------
-
-    >>> cfd1 = ConditionalFreqDist()
-    >>> cfd1[1] = FreqDist('abbbb')
-    >>> cfd1[2] = FreqDist('xxxxyy')
-    >>> cfd1
-    <ConditionalFreqDist with 2 conditions>
-
-    >>> cfd2 = ConditionalFreqDist()
-    >>> cfd2[1] = FreqDist('bbccc')
-    >>> cfd2[2] = FreqDist('xxxyyyzz')
-    >>> cfd2[3] = FreqDist('m')
-    >>> cfd2
-    <ConditionalFreqDist with 3 conditions>
-
-    >>> r = cfd1 + cfd2
-    >>> [(i,r[i]) for i in r.conditions()]
-    [(1, FreqDist({'b': 6, 'c': 3, 'a': 1})), (2, FreqDist({'x': 7, 'y': 5, 'z': 2})), (3, FreqDist({'m': 1}))]
-
-    >>> r = cfd1 - cfd2
-    >>> [(i,r[i]) for i in r.conditions()]
-    [(1, FreqDist({'b': 2, 'a': 1})), (2, FreqDist({'x': 1}))]
-
-    >>> r = cfd1 | cfd2
-    >>> [(i,r[i]) for i in r.conditions()]
-    [(1, FreqDist({'b': 4, 'c': 3, 'a': 1})), (2, FreqDist({'x': 4, 'y': 3, 'z': 2})), (3, FreqDist({'m': 1}))]
-
-    >>> r = cfd1 & cfd2
-    >>> [(i,r[i]) for i in r.conditions()]
-    [(1, FreqDist({'b': 2})), (2, FreqDist({'x': 3, 'y': 2}))]
-
-Testing some HMM estimators
----------------------------
-
-We extract a small part (500 sentences) of the Brown corpus
-
-    >>> corpus = nltk.corpus.brown.tagged_sents(categories='adventure')[:500]
-    >>> print(len(corpus))
-    500
-
-We create a HMM trainer - note that we need the tags and symbols
-from the whole corpus, not just the training corpus
-
-    >>> from nltk.util import unique_list
-    >>> tag_set = unique_list(tag for sent in corpus for (word,tag) in sent)
-    >>> print(len(tag_set))
-    92
-    >>> symbols = unique_list(word for sent in corpus for (word,tag) in sent)
-    >>> print(len(symbols))
-    1464
-    >>> trainer = nltk.tag.HiddenMarkovModelTrainer(tag_set, symbols)
-
-We divide the corpus into 90% training and 10% testing
-
-    >>> train_corpus = []
-    >>> test_corpus = []
-    >>> for i in range(len(corpus)):
-    ...     if i % 10:
-    ...         train_corpus += [corpus[i]]
-    ...     else:
-    ...         test_corpus += [corpus[i]]
-    >>> print(len(train_corpus))
-    450
-    >>> print(len(test_corpus))
-    50
-
-And now we can test the estimators
-
-    >>> def train_and_test(est):
-    ...     hmm = trainer.train_supervised(train_corpus, estimator=est)
-    ...     print('%.2f%%' % (100 * hmm.evaluate(test_corpus)))
-
-Maximum Likelihood Estimation
------------------------------
-- this resulted in an initialization error before r7209
-
-    >>> mle = lambda fd, bins: MLEProbDist(fd)
-    >>> train_and_test(mle)
-    22.75%
-
-Laplace (= Lidstone with gamma==1)
-
-    >>> train_and_test(LaplaceProbDist)
-    66.04%
-
-Expected Likelihood Estimation (= Lidstone with gamma==0.5)
-
-    >>> train_and_test(ELEProbDist)
-    73.01%
-
-Lidstone Estimation, for gamma==0.1, 0.5 and 1
-(the later two should be exactly equal to MLE and ELE above)
-
-    >>> def lidstone(gamma):
-    ...     return lambda fd, bins: LidstoneProbDist(fd, gamma, bins)
-    >>> train_and_test(lidstone(0.1))
-    82.51%
-    >>> train_and_test(lidstone(0.5))
-    73.01%
-    >>> train_and_test(lidstone(1.0))
-    66.04%
-
-Witten Bell Estimation
-----------------------
-- This resulted in ZeroDivisionError before r7209
-
-    >>> train_and_test(WittenBellProbDist)
-    88.12%
-
-Good Turing Estimation
-
-    >>> gt = lambda fd, bins: SimpleGoodTuringProbDist(fd, bins=1e5)
-    >>> train_and_test(gt)
-    86.93%
-
-Kneser Ney Estimation
----------------------
-Since the Kneser-Ney distribution is best suited for trigrams, we must adjust
-our testing accordingly.
-
-    >>> corpus = [[((x[0],y[0],z[0]),(x[1],y[1],z[1]))
-    ...     for x, y, z in nltk.trigrams(sent)]
-    ...         for sent in corpus[:100]]
-
-We will then need to redefine the rest of the training/testing variables
-    >>> tag_set = unique_list(tag for sent in corpus for (word,tag) in sent)
-    >>> len(tag_set)
-    906
-
-    >>> symbols = unique_list(word for sent in corpus for (word,tag) in sent)
-    >>> len(symbols)
-    1341
-
-    >>> trainer = nltk.tag.HiddenMarkovModelTrainer(tag_set, symbols)
-    >>> train_corpus = []
-    >>> test_corpus = []
-
-    >>> for i in range(len(corpus)):
-    ...    if i % 10:
-    ...        train_corpus += [corpus[i]]
-    ...    else:
-    ...        test_corpus += [corpus[i]]
-
-    >>> len(train_corpus)
-    90
-    >>> len(test_corpus)
-    10
-
-    >>> kn = lambda fd, bins: KneserNeyProbDist(fd)
-    >>> train_and_test(kn)
-    0.86%
-
-Remains to be added:
-- Tests for HeldoutProbDist, CrossValidationProbDist and MutableProbDist
-
-Squashed bugs
--------------
-
-Issue 511: override pop and popitem to invalidate the cache
-
-    >>> fd = nltk.FreqDist('a')
-    >>> list(fd.keys())
-    ['a']
-    >>> fd.pop('a')
-    1
-    >>> list(fd.keys())
-    []
-
-Issue 533: access cumulative frequencies with no arguments
-
-    >>> fd = nltk.FreqDist('aab')
-    >>> list(fd._cumulative_frequencies(['a']))
-    [2.0]
-    >>> list(fd._cumulative_frequencies(['a', 'b']))
-    [2.0, 3.0]
-
-Issue 579: override clear to reset some variables
-
-    >>> fd = FreqDist('aab')
-    >>> fd.clear()
-    >>> fd.N()
-    0
-
-Issue 351: fix fileids method of CategorizedCorpusReader to inadvertently
-add errant categories
-
-    >>> from nltk.corpus import brown
-    >>> brown.fileids('blah')
-    Traceback (most recent call last):
-      ...
-    ValueError: Category blah not found
-    >>> brown.categories()
-    ['adventure', 'belles_lettres', 'editorial', 'fiction', 'government', 'hobbies', 'humor', 'learned', 'lore', 'mystery', 'news', 'religion', 'reviews', 'romance', 'science_fiction']
-
-Issue 175: add the unseen bin to SimpleGoodTuringProbDist by default
-otherwise any unseen events get a probability of zero, i.e.,
-they don't get smoothed
-
-    >>> from nltk import SimpleGoodTuringProbDist, FreqDist
-    >>> fd = FreqDist({'a':1, 'b':1, 'c': 2, 'd': 3, 'e': 4, 'f': 4, 'g': 4, 'h': 5, 'i': 5, 'j': 6, 'k': 6, 'l': 6, 'm': 7, 'n': 7, 'o': 8, 'p': 9, 'q': 10})
-    >>> p = SimpleGoodTuringProbDist(fd)
-    >>> p.prob('a')
-    0.017649766667026317...
-    >>> p.prob('o')
-    0.08433050215340411...
-    >>> p.prob('z')
-    0.022727272727272728...
-    >>> p.prob('foobar')
-    0.022727272727272728...
-
-``MLEProbDist``, ``ConditionalProbDist'', ``DictionaryConditionalProbDist`` and
-``ConditionalFreqDist`` can be pickled:
-
-    >>> import pickle
-    >>> pd = MLEProbDist(fd)
-    >>> sorted(pd.samples()) == sorted(pickle.loads(pickle.dumps(pd)).samples())
-    True
-    >>> dpd = DictionaryConditionalProbDist({'x': pd})
-    >>> unpickled = pickle.loads(pickle.dumps(dpd))
-    >>> dpd['x'].prob('a')
-    0.011363636...
-    >>> dpd['x'].prob('a') == unpickled['x'].prob('a')
-    True
-    >>> cfd = nltk.probability.ConditionalFreqDist()
-    >>> cfd['foo']['hello'] += 1
-    >>> cfd['foo']['hello'] += 1
-    >>> cfd['bar']['hello'] += 1
-    >>> cfd2 = pickle.loads(pickle.dumps(cfd))
-    >>> cfd2 == cfd
-    True
-    >>> cpd = ConditionalProbDist(cfd, SimpleGoodTuringProbDist)
-    >>> cpd2 = pickle.loads(pickle.dumps(cpd))
-    >>> cpd['foo'].prob('hello') == cpd2['foo'].prob('hello')
-    True
-
-
diff --git a/nlp_resource_data/nltk/test/probability_fixt.py b/nlp_resource_data/nltk/test/probability_fixt.py

deleted file mode 100644 (file)

index 680dab6..0000000
--- a/nlp_resource_data/nltk/test/probability_fixt.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import
-
-
-# probability.doctest uses HMM which requires numpy;
-# skip probability.doctest if numpy is not available
-
-
-def setup_module(module):
-    from nose import SkipTest
-
-    try:
-        import numpy
-    except ImportError:
-        raise SkipTest("probability.doctest requires numpy")
diff --git a/nlp_resource_data/nltk/test/propbank.doctest b/nlp_resource_data/nltk/test/propbank.doctest

deleted file mode 100644 (file)

index 9bec607..0000000
--- a/nlp_resource_data/nltk/test/propbank.doctest
+++ /dev/null
@@ -1,176 +0,0 @@
-.. Copyright (C) 2001-2019 NLTK Project
-.. For license information, see LICENSE.TXT
-
-========
-PropBank
-========
-
-The PropBank Corpus provides predicate-argument annotation for the
-entire Penn Treebank.  Each verb in the treebank is annotated by a single
-instance in PropBank, containing information about the location of
-the verb, and the location and identity of its arguments:
-
-    >>> from nltk.corpus import propbank
-    >>> pb_instances = propbank.instances()
-    >>> print(pb_instances) # doctest: +NORMALIZE_WHITESPACE
-    [<PropbankInstance: wsj_0001.mrg, sent 0, word 8>,
-     <PropbankInstance: wsj_0001.mrg, sent 1, word 10>, ...]
-
-Each propbank instance defines the following member variables:
-
-  - Location information: `fileid`, `sentnum`, `wordnum`
-  - Annotator information: `tagger`
-  - Inflection information: `inflection`
-  - Roleset identifier: `roleset`
-  - Verb (aka predicate) location: `predicate`
-  - Argument locations and types: `arguments`
-
-The following examples show the types of these arguments:
-
-    >>> inst = pb_instances[103]
-    >>> (inst.fileid, inst.sentnum, inst.wordnum)
-    ('wsj_0004.mrg', 8, 16)
-    >>> inst.tagger
-    'gold'
-    >>> inst.inflection
-    <PropbankInflection: vp--a>
-    >>> infl = inst.inflection
-    >>> infl.form, infl.tense, infl.aspect, infl.person, infl.voice
-    ('v', 'p', '-', '-', 'a')
-    >>> inst.roleset
-    'rise.01'
-    >>> inst.predicate
-    PropbankTreePointer(16, 0)
-    >>> inst.arguments # doctest: +NORMALIZE_WHITESPACE
-    ((PropbankTreePointer(0, 2), 'ARG1'),
-     (PropbankTreePointer(13, 1), 'ARGM-DIS'),
-     (PropbankTreePointer(17, 1), 'ARG4-to'),
-     (PropbankTreePointer(20, 1), 'ARG3-from'))
-
-The location of the predicate and of the arguments are encoded using
-`PropbankTreePointer` objects, as well as `PropbankChainTreePointer`
-objects and `PropbankSplitTreePointer` objects.  A
-`PropbankTreePointer` consists of a `wordnum` and a `height`:
-
-    >>> print(inst.predicate.wordnum, inst.predicate.height)
-    16 0
-
-This identifies the tree constituent that is headed by the word that
-is the `wordnum`\ 'th token in the sentence, and whose span is found
-by going `height` nodes up in the tree.  This type of pointer is only
-useful if we also have the corresponding tree structure, since it
-includes empty elements such as traces in the word number count.  The
-trees for 10% of the standard PropBank Corpus are contained in the
-`treebank` corpus:
-
-    >>> tree = inst.tree
-
-    >>> from nltk.corpus import treebank
-    >>> assert tree == treebank.parsed_sents(inst.fileid)[inst.sentnum]
-
-    >>> inst.predicate.select(tree)
-    Tree('VBD', ['rose'])
-    >>> for (argloc, argid) in inst.arguments:
-    ...     print('%-10s %s' % (argid, argloc.select(tree).pformat(500)[:50]))
-    ARG1       (NP-SBJ (NP (DT The) (NN yield)) (PP (IN on) (NP (
-    ARGM-DIS   (PP (IN for) (NP (NN example)))
-    ARG4-to    (PP-DIR (TO to) (NP (CD 8.04) (NN %)))
-    ARG3-from  (PP-DIR (IN from) (NP (CD 7.90) (NN %)))
-
-Propbank tree pointers can be converted to standard tree locations,
-which are usually easier to work with, using the `treepos()` method:
-
-    >>> treepos = inst.predicate.treepos(tree)
-    >>> print (treepos, tree[treepos])
-    (4, 0) (VBD rose)
-
-In some cases, argument locations will be encoded using
-`PropbankChainTreePointer`\ s (for trace chains) or
-`PropbankSplitTreePointer`\ s (for discontinuous constituents).  Both
-of these objects contain a single member variable, `pieces`,
-containing a list of the constituent pieces.  They also define the
-method `select()`, which will return a tree containing all the
-elements of the argument.  (A new head node is created, labeled
-"*CHAIN*" or "*SPLIT*", since the argument is not a single constituent
-in the original tree).  Sentence #6 contains an example of an argument
-that is both discontinuous and contains a chain:
-
-    >>> inst = pb_instances[6]
-    >>> inst.roleset
-    'expose.01'
-    >>> argloc, argid = inst.arguments[2]
-    >>> argloc
-    <PropbankChainTreePointer: 22:1,24:0,25:1*27:0>
-    >>> argloc.pieces
-    [<PropbankSplitTreePointer: 22:1,24:0,25:1>, PropbankTreePointer(27, 0)]
-    >>> argloc.pieces[0].pieces
-    ... # doctest: +NORMALIZE_WHITESPACE
-    [PropbankTreePointer(22, 1), PropbankTreePointer(24, 0),
-     PropbankTreePointer(25, 1)]
-    >>> print(argloc.select(inst.tree))
-    (*CHAIN*
-      (*SPLIT* (NP (DT a) (NN group)) (IN of) (NP (NNS workers)))
-      (-NONE- *))
-
-The PropBank Corpus also provides access to the frameset files, which
-define the argument labels used by the annotations, on a per-verb
-basis.  Each frameset file contains one or more predicates, such as
-'turn' or 'turn_on', each of which is divided into coarse-grained word
-senses called rolesets.  For each roleset, the frameset file provides
-descriptions of the argument roles, along with examples.
-
-    >>> expose_01 = propbank.roleset('expose.01')
-    >>> turn_01 = propbank.roleset('turn.01')
-    >>> print(turn_01) # doctest: +ELLIPSIS
-    <Element 'roleset' at ...>
-    >>> for role in turn_01.findall("roles/role"):
-    ...     print(role.attrib['n'], role.attrib['descr'])
-    0 turner
-    1 thing turning
-    m direction, location
-
-    >>> from xml.etree import ElementTree
-    >>> print(ElementTree.tostring(turn_01.find('example')).decode('utf8').strip())
-    <example name="transitive agentive">
-      <text>
-      John turned the key in the lock.
-      </text>
-      <arg n="0">John</arg>
-      <rel>turned</rel>
-      <arg n="1">the key</arg>
-      <arg f="LOC" n="m">in the lock</arg>
-    </example>
-
-Note that the standard corpus distribution only contains 10% of the
-treebank, so the parse trees are not available for instances starting
-at 9353:
-
-    >>> inst = pb_instances[9352]
-    >>> inst.fileid
-    'wsj_0199.mrg'
-    >>> print(inst.tree) # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
-    (S (NP-SBJ (NNP Trinity)) (VP (VBD said) (SBAR (-NONE- 0) ...))
-    >>> print(inst.predicate.select(inst.tree))
-    (VB begin)
-
-    >>> inst = pb_instances[9353]
-    >>> inst.fileid
-    'wsj_0200.mrg'
-    >>> print(inst.tree)
-    None
-    >>> print(inst.predicate.select(inst.tree))
-    Traceback (most recent call last):
-      . . .
-    ValueError: Parse tree not avaialable
-
-However, if you supply your own version of the treebank corpus (by
-putting it before the nltk-provided version on `nltk.data.path`, or
-by creating a `ptb` directory as described above and using the
-`propbank_ptb` module), then you can access the trees for all
-instances.
-
-A list of the verb lemmas contained in PropBank is returned by the
-`propbank.verbs()` method:
-
-    >>> propbank.verbs()
-    ['abandon', 'abate', 'abdicate', 'abet', 'abide', ...]
diff --git a/nlp_resource_data/nltk/test/relextract.doctest b/nlp_resource_data/nltk/test/relextract.doctest

deleted file mode 100644 (file)

index 085fa90..0000000
--- a/nlp_resource_data/nltk/test/relextract.doctest
+++ /dev/null
@@ -1,263 +0,0 @@
-.. Copyright (C) 2001-2019 NLTK Project
-.. For license information, see LICENSE.TXT
-
-======================
-Information Extraction
-======================
-
-Information Extraction standardly consists of three subtasks:
-
-#. Named Entity Recognition
-
-#. Relation Extraction
-
-#. Template Filling
-
-Named Entities
-~~~~~~~~~~~~~~
-
-The IEER corpus is marked up for a variety of Named Entities. A `Named
-Entity`:dt: (more strictly, a Named Entity mention) is a name of an
-entity belonging to a specified class. For example, the Named Entity
-classes in IEER include PERSON, LOCATION, ORGANIZATION, DATE and so
-on. Within NLTK, Named Entities are represented as subtrees within a
-chunk structure: the class name is treated as node label, while the
-entity mention itself appears as the leaves of the subtree. This is
-illustrated below, where we have show an extract of the chunk
-representation of document NYT_19980315.064:
-
-    >>> from nltk.corpus import ieer
-    >>> docs = ieer.parsed_docs('NYT_19980315')
-    >>> tree = docs[1].text
-    >>> print(tree) # doctest: +ELLIPSIS
-    (DOCUMENT
-    ...
-      ``It's
-      a
-      chance
-      to
-      think
-      about
-      first-level
-      questions,''
-      said
-      Ms.
-      (PERSON Cohn)
-      ,
-      a
-      partner
-      in
-      the
-      (ORGANIZATION McGlashan &AMP; Sarrail)
-      firm
-      in
-      (LOCATION San Mateo)
-      ,
-      (LOCATION Calif.)
-      ...)
-
-Thus, the Named Entity mentions in this example are *Cohn*, *McGlashan &AMP;
-Sarrail*, *San Mateo* and *Calif.*.
-
-The CoNLL2002 Dutch and Spanish data is treated similarly, although in
-this case, the strings are also POS tagged.
-
-    >>> from nltk.corpus import conll2002
-    >>> for doc in conll2002.chunked_sents('ned.train')[27]:
-    ...     print(doc)
-    (u'Het', u'Art')
-    (ORG Hof/N van/Prep Cassatie/N)
-    (u'verbrak', u'V')
-    (u'het', u'Art')
-    (u'arrest', u'N')
-    (u'zodat', u'Conj')
-    (u'het', u'Pron')
-    (u'moest', u'V')
-    (u'worden', u'V')
-    (u'overgedaan', u'V')
-    (u'door', u'Prep')
-    (u'het', u'Art')
-    (u'hof', u'N')
-    (u'van', u'Prep')
-    (u'beroep', u'N')
-    (u'van', u'Prep')
-    (LOC Antwerpen/N)
-    (u'.', u'Punc')
-
-Relation Extraction
-~~~~~~~~~~~~~~~~~~~
-
-Relation Extraction standardly consists of identifying specified
-relations between Named Entities. For example, assuming that we can
-recognize ORGANIZATIONs and LOCATIONs in text, we might want to also
-recognize pairs *(o, l)* of these kinds of entities such that *o* is
-located in *l*.
-
-The `sem.relextract` module provides some tools to help carry out a
-simple version of this task. The `tree2semi_rel()` function splits a chunk
-document into a list of two-member lists, each of which consists of a
-(possibly empty) string followed by a `Tree` (i.e., a Named Entity):
-
-    >>> from nltk.sem import relextract
-    >>> pairs = relextract.tree2semi_rel(tree)
-    >>> for s, tree in pairs[18:22]:
-    ...     print('("...%s", %s)' % (" ".join(s[-5:]),tree))
-    ("...about first-level questions,'' said Ms.", (PERSON Cohn))
-    ("..., a partner in the", (ORGANIZATION McGlashan &AMP; Sarrail))
-    ("...firm in", (LOCATION San Mateo))
-    ("...,", (LOCATION Calif.))
-
-The function `semi_rel2reldict()` processes triples of these pairs, i.e.,
-pairs of the form ``((string1, Tree1), (string2, Tree2), (string3,
-Tree3))`` and outputs a dictionary (a `reldict`) in which ``Tree1`` is
-the subject of the relation, ``string2`` is the filler
-and ``Tree3`` is the object of the relation. ``string1`` and ``string3`` are
-stored as left and right context respectively.
-
-    >>> reldicts = relextract.semi_rel2reldict(pairs)
-    >>> for k, v in sorted(reldicts[0].items()):
-    ...     print(k, '=>', v) # doctest: +ELLIPSIS
-    filler => of messages to their own ``Cyberia'' ...
-    lcon => transactions.'' Each week, they post
-    objclass => ORGANIZATION
-    objsym => white_house
-    objtext => White House
-    rcon => for access to its planned
-    subjclass => CARDINAL
-    subjsym => hundreds
-    subjtext => hundreds
-    untagged_filler => of messages to their own ``Cyberia'' ...
-
-The next example shows some of the values for two `reldict`\ s
-corresponding to the ``'NYT_19980315'`` text extract shown earlier.
-
-    >>> for r in reldicts[18:20]:
-    ...     print('=' * 20)
-    ...     print(r['subjtext'])
-    ...     print(r['filler'])
-    ...     print(r['objtext'])
-    ====================
-    Cohn
-    , a partner in the
-    McGlashan &AMP; Sarrail
-    ====================
-    McGlashan &AMP; Sarrail
-    firm in
-    San Mateo
-
-The function `relextract()` allows us to filter the `reldict`\ s
-according to the classes of the subject and object named entities. In
-addition, we can specify that the filler text has to match a given
-regular expression, as illustrated in the next example. Here, we are
-looking for pairs of entities in the IN relation, where IN has
-signature <ORG, LOC>.
-
-    >>> import re
-    >>> IN = re.compile(r'.*\bin\b(?!\b.+ing\b)')
-    >>> for fileid in ieer.fileids():
-    ...     for doc in ieer.parsed_docs(fileid):
-    ...         for rel in relextract.extract_rels('ORG', 'LOC', doc, corpus='ieer', pattern = IN):
-    ...             print(relextract.rtuple(rel))  # doctest: +ELLIPSIS
-    [ORG: 'Christian Democrats'] ', the leading political forces in' [LOC: 'Italy']
-    [ORG: 'AP'] ') _ Lebanese guerrillas attacked Israeli forces in southern' [LOC: 'Lebanon']
-    [ORG: 'Security Council'] 'adopted Resolution 425. Huge yellow banners hung across intersections in' [LOC: 'Beirut']
-    [ORG: 'U.N.'] 'failures in' [LOC: 'Africa']
-    [ORG: 'U.N.'] 'peacekeeping operation in' [LOC: 'Somalia']
-    [ORG: 'U.N.'] 'partners on a more effective role in' [LOC: 'Africa']
-    [ORG: 'AP'] ') _ A bomb exploded in a mosque in central' [LOC: 'San`a']
-    [ORG: 'Krasnoye Sormovo'] 'shipyard in the Soviet city of' [LOC: 'Gorky']
-    [ORG: 'Kelab Golf Darul Ridzuan'] 'in' [LOC: 'Perak']
-    [ORG: 'U.N.'] 'peacekeeping operation in' [LOC: 'Somalia']
-    [ORG: 'WHYY'] 'in' [LOC: 'Philadelphia']
-    [ORG: 'McGlashan &AMP; Sarrail'] 'firm in' [LOC: 'San Mateo']
-    [ORG: 'Freedom Forum'] 'in' [LOC: 'Arlington']
-    [ORG: 'Brookings Institution'] ', the research group in' [LOC: 'Washington']
-    [ORG: 'Idealab'] ', a self-described business incubator based in' [LOC: 'Los Angeles']
-    [ORG: 'Open Text'] ', based in' [LOC: 'Waterloo']
-    ...
-
-The next example illustrates a case where the patter is a disjunction
-of roles that a PERSON can occupy in an ORGANIZATION.
-
-    >>> roles = """
-    ... (.*(
-    ... analyst|
-    ... chair(wo)?man|
-    ... commissioner|
-    ... counsel|
-    ... director|
-    ... economist|
-    ... editor|
-    ... executive|
-    ... foreman|
-    ... governor|
-    ... head|
-    ... lawyer|
-    ... leader|
-    ... librarian).*)|
-    ... manager|
-    ... partner|
-    ... president|
-    ... producer|
-    ... professor|
-    ... researcher|
-    ... spokes(wo)?man|
-    ... writer|
-    ... ,\sof\sthe?\s*  # "X, of (the) Y"
-    ... """
-    >>> ROLES = re.compile(roles, re.VERBOSE)
-    >>> for fileid in ieer.fileids():
-    ...     for doc in ieer.parsed_docs(fileid):
-    ...         for rel in relextract.extract_rels('PER', 'ORG', doc, corpus='ieer', pattern=ROLES):
-    ...             print(relextract.rtuple(rel)) # doctest: +ELLIPSIS
-    [PER: 'Kivutha Kibwana'] ', of the' [ORG: 'National Convention Assembly']
-    [PER: 'Boban Boskovic'] ', chief executive of the' [ORG: 'Plastika']
-    [PER: 'Annan'] ', the first sub-Saharan African to head the' [ORG: 'United Nations']
-    [PER: 'Kiriyenko'] 'became a foreman at the' [ORG: 'Krasnoye Sormovo']
-    [PER: 'Annan'] ', the first sub-Saharan African to head the' [ORG: 'United Nations']
-    [PER: 'Mike Godwin'] ', chief counsel for the' [ORG: 'Electronic Frontier Foundation']
-    ...
-
-In the case of the CoNLL2002 data, we can include POS tags in the
-query pattern. This example also illustrates how the output can be
-presented as something that looks more like a clause in a logical language.
-
-    >>> de = """
-    ... .*
-    ... (
-    ... de/SP|
-    ... del/SP
-    ... )
-    ... """
-    >>> DE = re.compile(de, re.VERBOSE)
-    >>> rels = [rel for doc in conll2002.chunked_sents('esp.train')
-    ...         for rel in relextract.extract_rels('ORG', 'LOC', doc, corpus='conll2002', pattern = DE)]
-    >>> for r in rels[:10]:
-    ...     print(relextract.clause(r, relsym='DE'))    # doctest: +NORMALIZE_WHITESPACE
-    DE(u'tribunal_supremo', u'victoria')
-    DE(u'museo_de_arte', u'alcorc\xf3n')
-    DE(u'museo_de_bellas_artes', u'a_coru\xf1a')
-    DE(u'siria', u'l\xedbano')
-    DE(u'uni\xf3n_europea', u'pek\xedn')
-    DE(u'ej\xe9rcito', u'rogberi')
-    DE(u'juzgado_de_instrucci\xf3n_n\xfamero_1', u'san_sebasti\xe1n')
-    DE(u'psoe', u'villanueva_de_la_serena')
-    DE(u'ej\xe9rcito', u'l\xedbano')
-    DE(u'juzgado_de_lo_penal_n\xfamero_2', u'ceuta')
-    >>> vnv = """
-    ... (
-    ... is/V|
-    ... was/V|
-    ... werd/V|
-    ... wordt/V
-    ... )
-    ... .*
-    ... van/Prep
-    ... """
-    >>> VAN = re.compile(vnv, re.VERBOSE)
-    >>> for doc in conll2002.chunked_sents('ned.train'):
-    ...     for r in relextract.extract_rels('PER', 'ORG', doc, corpus='conll2002', pattern=VAN):
-    ...         print(relextract.clause(r, relsym="VAN"))
-    VAN(u"cornet_d'elzius", u'buitenlandse_handel')
-    VAN(u'johan_rottiers', u'kardinaal_van_roey_instituut')
-    VAN(u'annie_lennox', u'eurythmics')
diff --git a/nlp_resource_data/nltk/test/resolution.doctest b/nlp_resource_data/nltk/test/resolution.doctest

deleted file mode 100644 (file)

index 318efcd..0000000
--- a/nlp_resource_data/nltk/test/resolution.doctest
+++ /dev/null
@@ -1,221 +0,0 @@
-.. Copyright (C) 2001-2019 NLTK Project
-.. For license information, see LICENSE.TXT
-
-=========================
-Resolution Theorem Prover
-=========================
-
-    >>> from nltk.inference.resolution import *
-    >>> from nltk.sem import logic
-    >>> from nltk.sem.logic import *
-    >>> logic._counter._value = 0
-    >>> read_expr = logic.Expression.fromstring
-
-    >>> P = read_expr('P')
-    >>> Q = read_expr('Q')
-    >>> R = read_expr('R')
-    >>> A = read_expr('A')
-    >>> B = read_expr('B')
-    >>> x = read_expr('x')
-    >>> y = read_expr('y')
-    >>> z = read_expr('z')
-
--------------------------------
-Test most_general_unification()
--------------------------------
-    >>> print(most_general_unification(x, x))
-    {}
-    >>> print(most_general_unification(A, A))
-    {}
-    >>> print(most_general_unification(A, x))
-    {x: A}
-    >>> print(most_general_unification(x, A))
-    {x: A}
-    >>> print(most_general_unification(x, y))
-    {x: y}
-    >>> print(most_general_unification(P(x), P(A)))
-    {x: A}
-    >>> print(most_general_unification(P(x,B), P(A,y)))
-    {x: A, y: B}
-    >>> print(most_general_unification(P(x,B), P(B,x)))
-    {x: B}
-    >>> print(most_general_unification(P(x,y), P(A,x)))
-    {x: A, y: x}
-    >>> print(most_general_unification(P(Q(x)), P(y)))
-    {y: Q(x)}
-
-------------
-Test unify()
-------------
-    >>> print(Clause([]).unify(Clause([])))
-    []
-    >>> print(Clause([P(x)]).unify(Clause([-P(A)])))
-    [{}]
-    >>> print(Clause([P(A), Q(x)]).unify(Clause([-P(x), R(x)])))
-    [{R(A), Q(A)}]
-    >>> print(Clause([P(A), Q(x), R(x,y)]).unify(Clause([-P(x), Q(y)])))
-    [{Q(y), Q(A), R(A,y)}]
-    >>> print(Clause([P(A), -Q(y)]).unify(Clause([-P(x), Q(B)])))
-    [{}]
-    >>> print(Clause([P(x), Q(x)]).unify(Clause([-P(A), -Q(B)])))
-    [{-Q(B), Q(A)}, {-P(A), P(B)}]
-    >>> print(Clause([P(x,x), Q(x), R(x)]).unify(Clause([-P(A,z), -Q(B)])))
-    [{-Q(B), Q(A), R(A)}, {-P(A,z), R(B), P(B,B)}]
-
-    >>> a = clausify(read_expr('P(A)'))
-    >>> b = clausify(read_expr('A=B'))
-    >>> print(a[0].unify(b[0]))
-    [{P(B)}]
-
--------------------------
-Test is_tautology()
--------------------------
-    >>> print(Clause([P(A), -P(A)]).is_tautology())
-    True
-    >>> print(Clause([-P(A), P(A)]).is_tautology())
-    True
-    >>> print(Clause([P(x), -P(A)]).is_tautology())
-    False
-    >>> print(Clause([Q(B), -P(A), P(A)]).is_tautology())
-    True
-    >>> print(Clause([-Q(A), P(R(A)), -P(R(A)), Q(x), -R(y)]).is_tautology())
-    True
-    >>> print(Clause([P(x), -Q(A)]).is_tautology())
-    False
-
--------------------------
-Test subsumes()
--------------------------
-    >>> print(Clause([P(A), Q(B)]).subsumes(Clause([P(A), Q(B)])))
-    True
-    >>> print(Clause([-P(A)]).subsumes(Clause([P(A)])))
-    False
-    >>> print(Clause([P(A), Q(B)]).subsumes(Clause([Q(B), P(A)])))
-    True
-    >>> print(Clause([P(A), Q(B)]).subsumes(Clause([Q(B), R(A), P(A)])))
-    True
-    >>> print(Clause([P(A), R(A), Q(B)]).subsumes(Clause([Q(B), P(A)])))
-    False
-    >>> print(Clause([P(x)]).subsumes(Clause([P(A)])))
-    True
-    >>> print(Clause([P(A)]).subsumes(Clause([P(x)])))
-    True
-
-------------
-Test prove()
-------------
-    >>> print(ResolutionProverCommand(read_expr('man(x)')).prove())
-    False
-    >>> print(ResolutionProverCommand(read_expr('(man(x) -> man(x))')).prove())
-    True
-    >>> print(ResolutionProverCommand(read_expr('(man(x) -> --man(x))')).prove())
-    True
-    >>> print(ResolutionProverCommand(read_expr('-(man(x) & -man(x))')).prove())
-    True
-    >>> print(ResolutionProverCommand(read_expr('(man(x) | -man(x))')).prove())
-    True
-    >>> print(ResolutionProverCommand(read_expr('(man(x) -> man(x))')).prove())
-    True
-    >>> print(ResolutionProverCommand(read_expr('-(man(x) & -man(x))')).prove())
-    True
-    >>> print(ResolutionProverCommand(read_expr('(man(x) | -man(x))')).prove())
-    True
-    >>> print(ResolutionProverCommand(read_expr('(man(x) -> man(x))')).prove())
-    True
-    >>> print(ResolutionProverCommand(read_expr('(man(x) <-> man(x))')).prove())
-    True
-    >>> print(ResolutionProverCommand(read_expr('-(man(x) <-> -man(x))')).prove())
-    True
-    >>> print(ResolutionProverCommand(read_expr('all x.man(x)')).prove())
-    False
-    >>> print(ResolutionProverCommand(read_expr('-all x.some y.F(x,y) & some x.all y.(-F(x,y))')).prove())
-    False
-    >>> print(ResolutionProverCommand(read_expr('some x.all y.sees(x,y)')).prove())
-    False
-
-    >>> p1 = read_expr('all x.(man(x) -> mortal(x))')
-    >>> p2 = read_expr('man(Socrates)')
-    >>> c = read_expr('mortal(Socrates)')
-    >>> ResolutionProverCommand(c, [p1,p2]).prove()
-    True
-
-    >>> p1 = read_expr('all x.(man(x) -> walks(x))')
-    >>> p2 = read_expr('man(John)')
-    >>> c = read_expr('some y.walks(y)')
-    >>> ResolutionProverCommand(c, [p1,p2]).prove()
-    True
-
-    >>> p = read_expr('some e1.some e2.(believe(e1,john,e2) & walk(e2,mary))')
-    >>> c = read_expr('some e0.walk(e0,mary)')
-    >>> ResolutionProverCommand(c, [p]).prove()
-    True
-
-------------
-Test proof()
-------------
-    >>> p1 = read_expr('all x.(man(x) -> mortal(x))')
-    >>> p2 = read_expr('man(Socrates)')
-    >>> c = read_expr('mortal(Socrates)')
-    >>> logic._counter._value = 0
-    >>> tp = ResolutionProverCommand(c, [p1,p2])
-    >>> tp.prove()
-    True
-    >>> print(tp.proof())
-    [1] {-mortal(Socrates)}     A
-    [2] {-man(z2), mortal(z2)}  A
-    [3] {man(Socrates)}         A
-    [4] {-man(Socrates)}        (1, 2)
-    [5] {mortal(Socrates)}      (2, 3)
-    [6] {}                      (1, 5)
-    <BLANKLINE>
-
-------------------
-Question Answering
-------------------
-One answer
-    >>> p1 = read_expr('father_of(art,john)')
-    >>> p2 = read_expr('father_of(bob,kim)')
-    >>> p3 = read_expr('all x.all y.(father_of(x,y) -> parent_of(x,y))')
-    >>> c = read_expr('all x.(parent_of(x,john) -> ANSWER(x))')
-    >>> logic._counter._value = 0
-    >>> tp = ResolutionProverCommand(None, [p1,p2,p3,c])
-    >>> sorted(tp.find_answers())
-    [<ConstantExpression art>]
-    >>> print(tp.proof()) # doctest: +SKIP
-    [1] {father_of(art,john)}                  A
-    [2] {father_of(bob,kim)}                   A
-    [3] {-father_of(z3,z4), parent_of(z3,z4)}  A
-    [4] {-parent_of(z6,john), ANSWER(z6)}      A
-    [5] {parent_of(art,john)}                  (1, 3)
-    [6] {parent_of(bob,kim)}                   (2, 3)
-    [7] {ANSWER(z6), -father_of(z6,john)}      (3, 4)
-    [8] {ANSWER(art)}                          (1, 7)
-    [9] {ANSWER(art)}                          (4, 5)
-    <BLANKLINE>
-
-Multiple answers
-    >>> p1 = read_expr('father_of(art,john)')
-    >>> p2 = read_expr('mother_of(ann,john)')
-    >>> p3 = read_expr('all x.all y.(father_of(x,y) -> parent_of(x,y))')
-    >>> p4 = read_expr('all x.all y.(mother_of(x,y) -> parent_of(x,y))')
-    >>> c = read_expr('all x.(parent_of(x,john) -> ANSWER(x))')
-    >>> logic._counter._value = 0
-    >>> tp = ResolutionProverCommand(None, [p1,p2,p3,p4,c])
-    >>> sorted(tp.find_answers())
-    [<ConstantExpression ann>, <ConstantExpression art>]
-    >>> print(tp.proof()) # doctest: +SKIP
-    [ 1] {father_of(art,john)}                  A
-    [ 2] {mother_of(ann,john)}                  A
-    [ 3] {-father_of(z3,z4), parent_of(z3,z4)}  A
-    [ 4] {-mother_of(z7,z8), parent_of(z7,z8)}  A
-    [ 5] {-parent_of(z10,john), ANSWER(z10)}    A
-    [ 6] {parent_of(art,john)}                  (1, 3)
-    [ 7] {parent_of(ann,john)}                  (2, 4)
-    [ 8] {ANSWER(z10), -father_of(z10,john)}    (3, 5)
-    [ 9] {ANSWER(art)}                          (1, 8)
-    [10] {ANSWER(z10), -mother_of(z10,john)}    (4, 5)
-    [11] {ANSWER(ann)}                          (2, 10)
-    [12] {ANSWER(art)}                          (5, 6)
-    [13] {ANSWER(ann)}                          (5, 7)
-    <BLANKLINE>
-
diff --git a/nlp_resource_data/nltk/test/runtests.py b/nlp_resource_data/nltk/test/runtests.py

deleted file mode 100644 (file)

index 8f40cc6..0000000
--- a/nlp_resource_data/nltk/test/runtests.py
+++ /dev/null
@@ -1,81 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import, print_function
-import sys
-import os
-import nose
-from nose.plugins.manager import PluginManager
-from nose.plugins.doctests import Doctest
-from nose.plugins import builtin
-
-NLTK_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))
-sys.path.insert(0, NLTK_ROOT)
-
-NLTK_TEST_DIR = os.path.join(NLTK_ROOT, 'nltk')
-
-if __name__ == '__main__':
-    # there shouldn't be import from NLTK for coverage to work properly
-    from doctest_nose_plugin import DoctestFix
-
-    try:
-        # Import RedNose plugin for colored test output
-        from rednose import RedNose
-
-        rednose_available = True
-    except ImportError:
-        rednose_available = False
-
-    class NltkPluginManager(PluginManager):
-        """
-        Nose plugin manager that replaces standard doctest plugin
-        with a patched version and adds RedNose plugin for colored test output.
-        """
-
-        def loadPlugins(self):
-            for plug in builtin.plugins:
-                if plug != Doctest:
-                    self.addPlugin(plug())
-            self.addPlugin(DoctestFix())
-            if rednose_available:
-                self.addPlugin(RedNose())
-
-            super(NltkPluginManager, self).loadPlugins()
-
-    manager = NltkPluginManager()
-    manager.loadPlugins()
-
-    # allow passing extra options and running individual tests
-    # Examples:
-    #
-    #    python runtests.py semantics.doctest
-    #    python runtests.py --with-id -v
-    #    python runtests.py --with-id -v nltk.featstruct
-
-    args = sys.argv[1:]
-    if not args:
-        args = [NLTK_TEST_DIR]
-
-    if all(arg.startswith('-') for arg in args):
-        # only extra options were passed
-        args += [NLTK_TEST_DIR]
-
-    # Activate RedNose and hide skipped test messages from output
-    if rednose_available:
-        args += ['--rednose', '--hide-skips']
-
-    arguments = [
-        '--exclude=',  # why is this needed?
-        # '--with-xunit',
-        # '--xunit-file=$WORKSPACE/nosetests.xml',
-        # '--nocapture',
-        '--with-doctest',
-        # '--doctest-tests',
-        # '--debug=nose,nose.importer,nose.inspector,nose.plugins,nose.result,nose.selector',
-        '--doctest-extension=.doctest',
-        '--doctest-fixtures=_fixt',
-        '--doctest-options=+ELLIPSIS,+NORMALIZE_WHITESPACE,+IGNORE_EXCEPTION_DETAIL,+ALLOW_UNICODE,'
-        'doctestencoding=utf-8',
-        # '--verbosity=3',
-    ] + args
-
-    nose.main(argv=arguments, plugins=manager.plugins)
diff --git a/nlp_resource_data/nltk/test/segmentation_fixt.py b/nlp_resource_data/nltk/test/segmentation_fixt.py

deleted file mode 100644 (file)

index bb8a7cf..0000000
--- a/nlp_resource_data/nltk/test/segmentation_fixt.py
+++ /dev/null
@@ -1,12 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import
-
-
-# skip segmentation.doctest if numpy is not available
-def setup_module(module):
-    from nose import SkipTest
-
-    try:
-        import numpy
-    except ImportError:
-        raise SkipTest("segmentation.doctest requires numpy")
diff --git a/nlp_resource_data/nltk/test/semantics.doctest b/nlp_resource_data/nltk/test/semantics.doctest

deleted file mode 100644 (file)

index f1a1f3c..0000000
--- a/nlp_resource_data/nltk/test/semantics.doctest
+++ /dev/null
@@ -1,665 +0,0 @@
-.. Copyright (C) 2001-2019 NLTK Project
-.. For license information, see LICENSE.TXT
-
-=========
-Semantics
-=========
-
-    >>> import nltk
-    >>> from nltk.sem import Valuation, Model
-    >>> v = [('adam', 'b1'), ('betty', 'g1'), ('fido', 'd1'),
-    ... ('girl', set(['g1', 'g2'])), ('boy', set(['b1', 'b2'])),
-    ... ('dog', set(['d1'])),
-    ... ('love', set([('b1', 'g1'), ('b2', 'g2'), ('g1', 'b1'), ('g2', 'b1')]))]
-    >>> val = Valuation(v)
-    >>> dom = val.domain
-    >>> m = Model(dom, val)
-
-Evaluation
-----------
-
-The top-level method of a ``Model`` instance is ``evaluate()``, which
-assigns a semantic value to expressions of the ``logic`` module, under
-an assignment ``g``:
-
-    >>> dom = val.domain
-    >>> g = nltk.sem.Assignment(dom)
-    >>> m.evaluate('all x.(boy(x) -> - girl(x))', g)
-    True
-
-
-``evaluate()`` calls a recursive function ``satisfy()``, which in turn
-calls a function ``i()`` to interpret non-logical constants and
-individual variables. ``i()`` delegates the interpretation of these to
-the the model's ``Valuation`` and the variable assignment ``g``
-respectively. Any atomic expression which cannot be assigned a value
-by ``i`` raises an ``Undefined`` exception; this is caught by
-``evaluate``, which returns the string ``'Undefined'``.
-
-    >>> m.evaluate('walk(adam)', g, trace=2)
-    <BLANKLINE>
-    'walk(adam)' is undefined under M, g
-    'Undefined'
-
-Batch Processing
-----------------
-
-The utility functions ``interpret_sents()`` and ``evaluate_sents()`` are intended to
-help with processing multiple sentences. Here's an example of the first of these:
-
-    >>> sents = ['Mary walks']
-    >>> results = nltk.sem.util.interpret_sents(sents, 'grammars/sample_grammars/sem2.fcfg')
-    >>> for result in results:
-    ...     for (synrep, semrep) in result:
-    ...         print(synrep)
-    (S[SEM=<walk(mary)>]
-      (NP[-LOC, NUM='sg', SEM=<\P.P(mary)>]
-        (PropN[-LOC, NUM='sg', SEM=<\P.P(mary)>] Mary))
-      (VP[NUM='sg', SEM=<\x.walk(x)>]
-        (IV[NUM='sg', SEM=<\x.walk(x)>, TNS='pres'] walks)))
-
-In order to provide backwards compatibility with 'legacy' grammars where the semantics value
-is specified with a lowercase
-``sem`` feature, the relevant feature name can be passed to the function using the
-``semkey`` parameter, as shown here:
-
-    >>> sents = ['raining']
-    >>> g = nltk.grammar.FeatureGrammar.fromstring("""
-    ... % start S
-    ... S[sem=<raining>] -> 'raining'
-    ... """)
-    >>> results = nltk.sem.util.interpret_sents(sents, g, semkey='sem')
-    >>> for result in results:
-    ...     for (synrep, semrep) in result:
-    ...         print(semrep)
-    raining
-
-The function ``evaluate_sents()`` works in a similar manner, but also needs to be
-passed a ``Model`` against which the semantic representations are evaluated.
-
-Unit Tests
-==========
-
-
-Unit tests for relations and valuations
----------------------------------------
-
-    >>> from nltk.sem import *
-
-Relations are sets of tuples, all of the same length.
-
-    >>> s1 = set([('d1', 'd2'), ('d1', 'd1'), ('d2', 'd1')])
-    >>> is_rel(s1)
-    True
-    >>> s2 = set([('d1', 'd2'), ('d1', 'd2'), ('d1',)])
-    >>> is_rel(s2)
-    Traceback (most recent call last):
-      . . .
-    ValueError: Set set([('d1', 'd2'), ('d1',)]) contains sequences of different lengths
-    >>> s3 = set(['d1', 'd2'])
-    >>> is_rel(s3)
-    Traceback (most recent call last):
-      . . .
-    ValueError: Set set(['d2', 'd1']) contains sequences of different lengths
-    >>> s4 = set2rel(s3)
-    >>> is_rel(s4)
-    True
-    >>> is_rel(set())
-    True
-    >>> null_binary_rel = set([(None, None)])
-    >>> is_rel(null_binary_rel)
-    True
-
-Sets of entities are converted into sets of singleton tuples
-(containing strings).
-
-    >>> sorted(set2rel(s3))
-    [('d1',), ('d2',)]
-    >>> sorted(set2rel(set([1,3,5,])))
-    ['1', '3', '5']
-    >>> set2rel(set()) == set()
-    True
-    >>> set2rel(set2rel(s3)) == set2rel(s3)
-    True
-
-Predication is evaluated by set membership.
-
-    >>> ('d1', 'd2') in s1
-    True
-    >>> ('d2', 'd2') in s1
-    False
-    >>> ('d1',) in s1
-    False
-    >>> 'd2' in s1
-    False
-    >>> ('d1',) in s4
-    True
-    >>> ('d1',) in set()
-    False
-    >>> 'd1' in  null_binary_rel
-    False
-
-
-    >>> val = Valuation([('Fido', 'd1'), ('dog', set(['d1', 'd2'])), ('walk', set())])
-    >>> sorted(val['dog'])
-    [('d1',), ('d2',)]
-    >>> val.domain == set(['d1', 'd2'])
-    True
-    >>> print(val.symbols)
-    ['Fido', 'dog', 'walk']
-
-
-Parse a valuation from a string.
-
-    >>> v = """
-    ... john => b1
-    ... mary => g1
-    ... suzie => g2
-    ... fido => d1
-    ... tess => d2
-    ... noosa => n
-    ... girl => {g1, g2}
-    ... boy => {b1, b2}
-    ... dog => {d1, d2}
-    ... bark => {d1, d2}
-    ... walk => {b1, g2, d1}
-    ... chase => {(b1, g1), (b2, g1), (g1, d1), (g2, d2)}
-    ... see => {(b1, g1), (b2, d2), (g1, b1),(d2, b1), (g2, n)}
-    ... in => {(b1, n), (b2, n), (d2, n)}
-    ... with => {(b1, g1), (g1, b1), (d1, b1), (b1, d1)}
-    ... """
-    >>> val = Valuation.fromstring(v)
-
-    >>> print(val) # doctest: +SKIP
-    {'bark': set([('d1',), ('d2',)]),
-     'boy': set([('b1',), ('b2',)]),
-     'chase': set([('b1', 'g1'), ('g2', 'd2'), ('g1', 'd1'), ('b2', 'g1')]),
-     'dog': set([('d1',), ('d2',)]),
-     'fido': 'd1',
-     'girl': set([('g2',), ('g1',)]),
-     'in': set([('d2', 'n'), ('b1', 'n'), ('b2', 'n')]),
-     'john': 'b1',
-     'mary': 'g1',
-     'noosa': 'n',
-     'see': set([('b1', 'g1'), ('b2', 'd2'), ('d2', 'b1'), ('g2', 'n'), ('g1', 'b1')]),
-     'suzie': 'g2',
-     'tess': 'd2',
-     'walk': set([('d1',), ('b1',), ('g2',)]),
-     'with': set([('b1', 'g1'), ('d1', 'b1'), ('b1', 'd1'), ('g1', 'b1')])}
-
-
-Unit tests for function argument application in a Model
--------------------------------------------------------
-
-    >>> v = [('adam', 'b1'), ('betty', 'g1'), ('fido', 'd1'),\
-    ...      ('girl', set(['g1', 'g2'])), ('boy', set(['b1', 'b2'])), ('dog', set(['d1'])),
-    ...      ('love', set([('b1', 'g1'), ('b2', 'g2'), ('g1', 'b1'), ('g2', 'b1')])),
-    ...      ('kiss', null_binary_rel)]
-    >>> val = Valuation(v)
-    >>> dom = val.domain
-    >>> m = Model(dom, val)
-    >>> g = Assignment(dom)
-    >>> sorted(val['boy'])
-    [('b1',), ('b2',)]
-    >>> ('b1',) in val['boy']
-    True
-    >>> ('g1',) in val['boy']
-    False
-    >>> ('foo',) in val['boy']
-    False
-    >>> ('b1', 'g1') in val['love']
-    True
-    >>> ('b1', 'b1') in val['kiss']
-    False
-    >>> sorted(val.domain)
-    ['b1', 'b2', 'd1', 'g1', 'g2']
-
-
-Model Tests
-===========
-
-Extension of Lambda expressions
-
-    >>> v0 = [('adam', 'b1'), ('betty', 'g1'), ('fido', 'd1'),\
-    ... ('girl', set(['g1', 'g2'])), ('boy', set(['b1', 'b2'])),
-    ... ('dog', set(['d1'])),
-    ... ('love', set([('b1', 'g1'), ('b2', 'g2'), ('g1', 'b1'), ('g2', 'b1')]))]
-
-    >>> val0 = Valuation(v0)
-    >>> dom0 = val0.domain
-    >>> m0 = Model(dom0, val0)
-    >>> g0 = Assignment(dom0)
-
-    >>> print(m0.evaluate(r'\x. \y. love(x, y)', g0) == {'g2': {'g2': False, 'b2': False, 'b1': True, 'g1': False, 'd1': False}, 'b2': {'g2': True, 'b2': False, 'b1': False, 'g1': False, 'd1': False}, 'b1': {'g2': False, 'b2': False, 'b1': False, 'g1': True, 'd1': False}, 'g1': {'g2': False, 'b2': False, 'b1': True, 'g1': False, 'd1': False}, 'd1': {'g2': False, 'b2': False, 'b1': False, 'g1': False, 'd1': False}})
-    True
-    >>> print(m0.evaluate(r'\x. dog(x) (adam)', g0))
-    False
-    >>> print(m0.evaluate(r'\x. (dog(x) | boy(x)) (adam)', g0))
-    True
-    >>> print(m0.evaluate(r'\x. \y. love(x, y)(fido)', g0) == {'g2': False, 'b2': False, 'b1': False, 'g1': False, 'd1': False})
-    True
-    >>> print(m0.evaluate(r'\x. \y. love(x, y)(adam)', g0) == {'g2': False, 'b2': False, 'b1': False, 'g1': True, 'd1': False})
-    True
-    >>> print(m0.evaluate(r'\x. \y. love(x, y)(betty)', g0) == {'g2': False, 'b2': False, 'b1': True, 'g1': False, 'd1': False})
-    True
-    >>> print(m0.evaluate(r'\x. \y. love(x, y)(betty)(adam)', g0))
-    True
-    >>> print(m0.evaluate(r'\x. \y. love(x, y)(betty, adam)', g0))
-    True
-    >>> print(m0.evaluate(r'\y. \x. love(x, y)(fido)(adam)', g0))
-    False
-    >>> print(m0.evaluate(r'\y. \x. love(x, y)(betty, adam)', g0))
-    True
-    >>> print(m0.evaluate(r'\x. exists y. love(x, y)', g0) == {'g2': True, 'b2': True, 'b1': True, 'g1': True, 'd1': False})
-    True
-    >>> print(m0.evaluate(r'\z. adam', g0) == {'g2': 'b1', 'b2': 'b1', 'b1': 'b1', 'g1': 'b1', 'd1': 'b1'})
-    True
-    >>> print(m0.evaluate(r'\z. love(x, y)', g0) == {'g2': False, 'b2': False, 'b1': False, 'g1': False, 'd1': False})
-    True
-
-
-Propositional Model Test
-------------------------
-
-    >>> tests = [
-    ...     ('P & Q', True),
-    ...     ('P & R', False),
-    ...     ('- P', False),
-    ...     ('- R', True),
-    ...     ('- - P', True),
-    ...     ('- (P & R)', True),
-    ...     ('P | R', True),
-    ...     ('R | P', True),
-    ...     ('R | R', False),
-    ...     ('- P | R', False),
-    ...     ('P | - P', True),
-    ...     ('P -> Q', True),
-    ...     ('P -> R', False),
-    ...     ('R -> P', True),
-    ...     ('P <-> P', True),
-    ...     ('R <-> R', True),
-    ...     ('P <-> R', False),
-    ...     ]
-    >>> val1 = Valuation([('P', True), ('Q', True), ('R', False)])
-    >>> dom = set([])
-    >>> m = Model(dom, val1)
-    >>> g = Assignment(dom)
-    >>> for (sent, testvalue) in tests:
-    ...     semvalue = m.evaluate(sent, g)
-    ...     if semvalue == testvalue:
-    ...         print('*', end=' ')
-    * * * * * * * * * * * * * * * * *
-
-
-Test of i Function
-------------------
-
-    >>> from nltk.sem import Expression
-    >>> v = [('adam', 'b1'), ('betty', 'g1'), ('fido', 'd1'),
-    ...      ('girl', set(['g1', 'g2'])), ('boy', set(['b1', 'b2'])), ('dog', set(['d1'])),
-    ...      ('love', set([('b1', 'g1'), ('b2', 'g2'), ('g1', 'b1'), ('g2', 'b1')]))]
-    >>> val = Valuation(v)
-    >>> dom = val.domain
-    >>> m = Model(dom, val)
-    >>> g = Assignment(dom, [('x', 'b1'), ('y', 'g2')])
-    >>> exprs = ['adam', 'girl', 'love', 'walks', 'x', 'y', 'z']
-    >>> parsed_exprs = [Expression.fromstring(e) for e in exprs]
-    >>> sorted_set = lambda x: sorted(x) if isinstance(x, set) else x
-    >>> for parsed in parsed_exprs:
-    ...     try:
-    ...         print("'%s' gets value %s" % (parsed, sorted_set(m.i(parsed, g))))
-    ...     except Undefined:
-    ...         print("'%s' is Undefined" % parsed)
-    'adam' gets value b1
-    'girl' gets value [('g1',), ('g2',)]
-    'love' gets value [('b1', 'g1'), ('b2', 'g2'), ('g1', 'b1'), ('g2', 'b1')]
-    'walks' is Undefined
-    'x' gets value b1
-    'y' gets value g2
-    'z' is Undefined
-
-Test for formulas in Model
---------------------------
-
-    >>> tests = [
-    ...     ('love(adam, betty)', True),
-    ...     ('love(adam, sue)', 'Undefined'),
-    ...     ('dog(fido)', True),
-    ...     ('- dog(fido)', False),
-    ...     ('- - dog(fido)', True),
-    ...     ('- dog(sue)', 'Undefined'),
-    ...     ('dog(fido) & boy(adam)', True),
-    ...     ('- (dog(fido) & boy(adam))', False),
-    ...     ('- dog(fido) & boy(adam)', False),
-    ...     ('dog(fido) | boy(adam)', True),
-    ...     ('- (dog(fido) | boy(adam))', False),
-    ...     ('- dog(fido) | boy(adam)', True),
-    ...     ('- dog(fido) | - boy(adam)', False),
-    ...     ('dog(fido) -> boy(adam)', True),
-    ...     ('- (dog(fido) -> boy(adam))', False),
-    ...     ('- dog(fido) -> boy(adam)', True),
-    ...     ('exists x . love(adam, x)', True),
-    ...     ('all x . love(adam, x)', False),
-    ...     ('fido = fido', True),
-    ...     ('exists x . all y. love(x, y)', False),
-    ...     ('exists x . (x = fido)', True),
-    ...     ('all x . (dog(x) | - dog(x))', True),
-    ...     ('adam = mia', 'Undefined'),
-    ...     ('\\x. (boy(x) | girl(x))', {'g2': True, 'b2': True, 'b1': True, 'g1': True, 'd1': False}),
-    ...     ('\\x. exists y. (boy(x) & love(x, y))', {'g2': False, 'b2': True, 'b1': True, 'g1': False, 'd1': False}),
-    ...     ('exists z1. boy(z1)', True),
-    ...     ('exists x. (boy(x) & - (x = adam))', True),
-    ...     ('exists x. (boy(x) & all y. love(y, x))', False),
-    ...     ('all x. (boy(x) | girl(x))', False),
-    ...     ('all x. (girl(x) -> exists y. boy(y) & love(x, y))', False),
-    ...     ('exists x. (boy(x) & all y. (girl(y) -> love(y, x)))', True),
-    ...     ('exists x. (boy(x) & all y. (girl(y) -> love(x, y)))', False),
-    ...     ('all x. (dog(x) -> - girl(x))', True),
-    ...     ('exists x. exists y. (love(x, y) & love(x, y))', True),
-    ...     ]
-    >>> for (sent, testvalue) in tests:
-    ...     semvalue = m.evaluate(sent, g)
-    ...     if semvalue == testvalue:
-    ...         print('*', end=' ')
-    ...     else:
-    ...         print(sent, semvalue)
-    * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
-
-
-
-Satisfier Tests
----------------
-
-    >>> formulas = [
-    ...     'boy(x)',
-    ...     '(x = x)',
-    ...     '(boy(x) | girl(x))',
-    ...     '(boy(x) & girl(x))',
-    ...     'love(adam, x)',
-    ...     'love(x, adam)',
-    ...     '- (x = adam)',
-    ...     'exists z22. love(x, z22)',
-    ...     'exists y. love(y, x)',
-    ...     'all y. (girl(y) -> love(x, y))',
-    ...     'all y. (girl(y) -> love(y, x))',
-    ...     'all y. (girl(y) -> (boy(x) & love(y, x)))',
-    ...     'boy(x) & all y. (girl(y) -> love(x, y))',
-    ...     'boy(x) & all y. (girl(y) -> love(y, x))',
-    ...     'boy(x) & exists y. (girl(y) & love(y, x))',
-    ...     'girl(x) -> dog(x)',
-    ...     'all y. (dog(y) -> (x = y))',
-    ...     '- exists y. love(y, x)',
-    ...     'exists y. (love(adam, y) & love(y, x))'
-    ...     ]
-    >>> g.purge()
-    >>> g.add('x', 'b1')
-    {'x': 'b1'}
-    >>> for f in formulas: # doctest: +NORMALIZE_WHITESPACE
-    ...     try:
-    ...         print("'%s' gets value: %s" % (f, m.evaluate(f, g)))
-    ...     except Undefined:
-    ...         print("'%s' is Undefined" % f)
-    'boy(x)' gets value: True
-    '(x = x)' gets value: True
-    '(boy(x) | girl(x))' gets value: True
-    '(boy(x) & girl(x))' gets value: False
-    'love(adam, x)' gets value: False
-    'love(x, adam)' gets value: False
-    '- (x = adam)' gets value: False
-    'exists z22. love(x, z22)' gets value: True
-    'exists y. love(y, x)' gets value: True
-    'all y. (girl(y) -> love(x, y))' gets value: False
-    'all y. (girl(y) -> love(y, x))' gets value: True
-    'all y. (girl(y) -> (boy(x) & love(y, x)))' gets value: True
-    'boy(x) & all y. (girl(y) -> love(x, y))' gets value: False
-    'boy(x) & all y. (girl(y) -> love(y, x))' gets value: True
-    'boy(x) & exists y. (girl(y) & love(y, x))' gets value: True
-    'girl(x) -> dog(x)' gets value: True
-    'all y. (dog(y) -> (x = y))' gets value: False
-    '- exists y. love(y, x)' gets value: False
-    'exists y. (love(adam, y) & love(y, x))' gets value: True
-
-    >>> from nltk.sem import Expression
-    >>> for fmla in formulas: # doctest: +NORMALIZE_WHITESPACE
-    ...     p = Expression.fromstring(fmla)
-    ...     g.purge()
-    ...     print("Satisfiers of '%s':\n\t%s" % (p, sorted(m.satisfiers(p, 'x', g))))
-    Satisfiers of 'boy(x)':
-    ['b1', 'b2']
-    Satisfiers of '(x = x)':
-    ['b1', 'b2', 'd1', 'g1', 'g2']
-    Satisfiers of '(boy(x) | girl(x))':
-    ['b1', 'b2', 'g1', 'g2']
-    Satisfiers of '(boy(x) & girl(x))':
-    []
-    Satisfiers of 'love(adam,x)':
-    ['g1']
-    Satisfiers of 'love(x,adam)':
-    ['g1', 'g2']
-    Satisfiers of '-(x = adam)':
-    ['b2', 'd1', 'g1', 'g2']
-    Satisfiers of 'exists z22.love(x,z22)':
-    ['b1', 'b2', 'g1', 'g2']
-    Satisfiers of 'exists y.love(y,x)':
-    ['b1', 'g1', 'g2']
-    Satisfiers of 'all y.(girl(y) -> love(x,y))':
-    []
-    Satisfiers of 'all y.(girl(y) -> love(y,x))':
-    ['b1']
-    Satisfiers of 'all y.(girl(y) -> (boy(x) & love(y,x)))':
-    ['b1']
-    Satisfiers of '(boy(x) & all y.(girl(y) -> love(x,y)))':
-    []
-    Satisfiers of '(boy(x) & all y.(girl(y) -> love(y,x)))':
-    ['b1']
-    Satisfiers of '(boy(x) & exists y.(girl(y) & love(y,x)))':
-    ['b1']
-    Satisfiers of '(girl(x) -> dog(x))':
-    ['b1', 'b2', 'd1']
-    Satisfiers of 'all y.(dog(y) -> (x = y))':
-    ['d1']
-    Satisfiers of '-exists y.love(y,x)':
-    ['b2', 'd1']
-    Satisfiers of 'exists y.(love(adam,y) & love(y,x))':
-    ['b1']
-
-
-Tests based on the Blackburn & Bos testsuite
---------------------------------------------
-
-    >>> v1 = [('jules', 'd1'), ('vincent', 'd2'), ('pumpkin', 'd3'),
-    ...       ('honey_bunny', 'd4'), ('yolanda', 'd5'),
-    ...       ('customer', set(['d1', 'd2'])),
-    ...       ('robber', set(['d3', 'd4'])),
-    ...       ('love', set([('d3', 'd4')]))]
-    >>> val1 = Valuation(v1)
-    >>> dom1 = val1.domain
-    >>> m1 = Model(dom1, val1)
-    >>> g1 = Assignment(dom1)
-
-    >>> v2 = [('jules', 'd1'), ('vincent', 'd2'), ('pumpkin', 'd3'),
-    ...       ('honey_bunny', 'd4'), ('yolanda', 'd4'),
-    ...       ('customer', set(['d1', 'd2', 'd5', 'd6'])),
-    ...       ('robber', set(['d3', 'd4'])),
-    ...       ('love', set([(None, None)]))]
-    >>> val2 = Valuation(v2)
-    >>> dom2 = set(['d1', 'd2', 'd3', 'd4', 'd5', 'd6'])
-    >>> m2 = Model(dom2, val2)
-    >>> g2 = Assignment(dom2)
-    >>> g21 = Assignment(dom2)
-    >>> g21.add('y', 'd3')
-    {'y': 'd3'}
-
-    >>> v3 = [('mia', 'd1'), ('jody', 'd2'), ('jules', 'd3'),
-    ...       ('vincent', 'd4'),
-    ...       ('woman', set(['d1', 'd2'])), ('man', set(['d3', 'd4'])),
-    ...       ('joke', set(['d5', 'd6'])), ('episode', set(['d7', 'd8'])),
-    ...       ('in', set([('d5', 'd7'), ('d5', 'd8')])),
-    ...       ('tell', set([('d1', 'd5'), ('d2', 'd6')]))]
-    >>> val3 = Valuation(v3)
-    >>> dom3 = set(['d1', 'd2', 'd3', 'd4', 'd5', 'd6', 'd7', 'd8'])
-    >>> m3 = Model(dom3, val3)
-    >>> g3 = Assignment(dom3)
-
-    >>> tests = [
-    ...     ('exists x. robber(x)', m1, g1, True),
-    ...     ('exists x. exists y. love(y, x)', m1, g1, True),
-    ...     ('exists x0. exists x1. love(x1, x0)', m2, g2, False),
-    ...     ('all x. all y. love(y, x)', m2, g2, False),
-    ...     ('- (all x. all y. love(y, x))', m2, g2, True),
-    ...     ('all x. all y. - love(y, x)', m2, g2, True),
-    ...     ('yolanda = honey_bunny', m2, g2, True),
-    ...     ('mia = honey_bunny', m2, g2, 'Undefined'),
-    ...     ('- (yolanda = honey_bunny)', m2, g2, False),
-    ...     ('- (mia = honey_bunny)', m2, g2, 'Undefined'),
-    ...     ('all x. (robber(x) | customer(x))', m2, g2, True),
-    ...     ('- (all x. (robber(x) | customer(x)))', m2, g2, False),
-    ...     ('(robber(x) | customer(x))', m2, g2, 'Undefined'),
-    ...     ('(robber(y) | customer(y))', m2, g21, True),
-    ...     ('exists x. (man(x) & exists x. woman(x))', m3, g3, True),
-    ...     ('exists x. (man(x) & exists x. woman(x))', m3, g3, True),
-    ...     ('- exists x. woman(x)', m3, g3, False),
-    ...     ('exists x. (tasty(x) & burger(x))', m3, g3, 'Undefined'),
-    ...     ('- exists x. (tasty(x) & burger(x))', m3, g3, 'Undefined'),
-    ...     ('exists x. (man(x) & - exists y. woman(y))', m3, g3, False),
-    ...     ('exists x. (man(x) & - exists x. woman(x))', m3, g3, False),
-    ...     ('exists x. (woman(x) & - exists x. customer(x))', m2, g2, 'Undefined'),
-    ... ]
-
-    >>> for item in tests:
-    ...     sentence, model, g, testvalue = item
-    ...     semvalue = model.evaluate(sentence, g)
-    ...     if semvalue == testvalue:
-    ...         print('*', end=' ')
-    ...     g.purge()
-    * * * * * * * * * * * * * * * * * * * * * *
-
-
-Tests for mapping from syntax to semantics
-------------------------------------------
-
-Load a valuation from a file.
-
-    >>> import nltk.data
-    >>> from nltk.sem.util import parse_sents
-    >>> val = nltk.data.load('grammars/sample_grammars/valuation1.val')
-    >>> dom = val.domain
-    >>> m = Model(dom, val)
-    >>> g = Assignment(dom)
-    >>> gramfile = 'grammars/sample_grammars/sem2.fcfg'
-    >>> inputs = ['John sees a girl', 'every dog barks']
-    >>> parses = parse_sents(inputs, gramfile)
-    >>> for sent, trees in zip(inputs, parses):
-    ...     print()
-    ...     print("Sentence: %s" % sent)
-    ...     for tree in trees:
-    ...         print("Parse:\n %s" %tree)
-    ...         print("Semantics: %s" %  root_semrep(tree))
-    <BLANKLINE>
-    Sentence: John sees a girl
-    Parse:
-     (S[SEM=<exists x.(girl(x) & see(john,x))>]
-      (NP[-LOC, NUM='sg', SEM=<\P.P(john)>]
-        (PropN[-LOC, NUM='sg', SEM=<\P.P(john)>] John))
-      (VP[NUM='sg', SEM=<\y.exists x.(girl(x) & see(y,x))>]
-        (TV[NUM='sg', SEM=<\X y.X(\x.see(y,x))>, TNS='pres'] sees)
-        (NP[NUM='sg', SEM=<\Q.exists x.(girl(x) & Q(x))>]
-          (Det[NUM='sg', SEM=<\P Q.exists x.(P(x) & Q(x))>] a)
-          (Nom[NUM='sg', SEM=<\x.girl(x)>]
-            (N[NUM='sg', SEM=<\x.girl(x)>] girl)))))
-    Semantics: exists x.(girl(x) & see(john,x))
-    <BLANKLINE>
-    Sentence: every dog barks
-    Parse:
-     (S[SEM=<all x.(dog(x) -> bark(x))>]
-      (NP[NUM='sg', SEM=<\Q.all x.(dog(x) -> Q(x))>]
-        (Det[NUM='sg', SEM=<\P Q.all x.(P(x) -> Q(x))>] every)
-        (Nom[NUM='sg', SEM=<\x.dog(x)>]
-          (N[NUM='sg', SEM=<\x.dog(x)>] dog)))
-      (VP[NUM='sg', SEM=<\x.bark(x)>]
-        (IV[NUM='sg', SEM=<\x.bark(x)>, TNS='pres'] barks)))
-    Semantics: all x.(dog(x) -> bark(x))
-
-    >>> sent = "every dog barks"
-    >>> result = nltk.sem.util.interpret_sents([sent], gramfile)[0]
-    >>> for (syntree, semrep) in result:
-    ...     print(syntree)
-    ...     print()
-    ...     print(semrep)
-    (S[SEM=<all x.(dog(x) -> bark(x))>]
-      (NP[NUM='sg', SEM=<\Q.all x.(dog(x) -> Q(x))>]
-        (Det[NUM='sg', SEM=<\P Q.all x.(P(x) -> Q(x))>] every)
-        (Nom[NUM='sg', SEM=<\x.dog(x)>]
-          (N[NUM='sg', SEM=<\x.dog(x)>] dog)))
-      (VP[NUM='sg', SEM=<\x.bark(x)>]
-        (IV[NUM='sg', SEM=<\x.bark(x)>, TNS='pres'] barks)))
-    <BLANKLINE>
-    all x.(dog(x) -> bark(x))
-
-    >>> result = nltk.sem.util.evaluate_sents([sent], gramfile, m, g)[0]
-    >>> for (syntree, semrel, value) in result:
-    ...     print(syntree)
-    ...     print()
-    ...     print(semrep)
-    ...     print()
-    ...     print(value)
-    (S[SEM=<all x.(dog(x) -> bark(x))>]
-      (NP[NUM='sg', SEM=<\Q.all x.(dog(x) -> Q(x))>]
-        (Det[NUM='sg', SEM=<\P Q.all x.(P(x) -> Q(x))>] every)
-        (Nom[NUM='sg', SEM=<\x.dog(x)>]
-          (N[NUM='sg', SEM=<\x.dog(x)>] dog)))
-      (VP[NUM='sg', SEM=<\x.bark(x)>]
-        (IV[NUM='sg', SEM=<\x.bark(x)>, TNS='pres'] barks)))
-    <BLANKLINE>
-    all x.(dog(x) -> bark(x))
-    <BLANKLINE>
-    True
-
-    >>> sents = ['Mary walks', 'John sees a dog']
-    >>> results = nltk.sem.util.interpret_sents(sents, 'grammars/sample_grammars/sem2.fcfg')
-    >>> for result in results:
-    ...     for (synrep, semrep) in result:
-    ...         print(synrep)
-    (S[SEM=<walk(mary)>]
-      (NP[-LOC, NUM='sg', SEM=<\P.P(mary)>]
-        (PropN[-LOC, NUM='sg', SEM=<\P.P(mary)>] Mary))
-      (VP[NUM='sg', SEM=<\x.walk(x)>]
-        (IV[NUM='sg', SEM=<\x.walk(x)>, TNS='pres'] walks)))
-    (S[SEM=<exists x.(dog(x) & see(john,x))>]
-      (NP[-LOC, NUM='sg', SEM=<\P.P(john)>]
-        (PropN[-LOC, NUM='sg', SEM=<\P.P(john)>] John))
-      (VP[NUM='sg', SEM=<\y.exists x.(dog(x) & see(y,x))>]
-        (TV[NUM='sg', SEM=<\X y.X(\x.see(y,x))>, TNS='pres'] sees)
-        (NP[NUM='sg', SEM=<\Q.exists x.(dog(x) & Q(x))>]
-          (Det[NUM='sg', SEM=<\P Q.exists x.(P(x) & Q(x))>] a)
-          (Nom[NUM='sg', SEM=<\x.dog(x)>]
-            (N[NUM='sg', SEM=<\x.dog(x)>] dog)))))
-
-Cooper Storage
---------------
-
-    >>> from nltk.sem import cooper_storage as cs
-    >>> sentence = 'every girl chases a dog'
-    >>> trees = cs.parse_with_bindops(sentence, grammar='grammars/book_grammars/storage.fcfg')
-    >>> semrep = trees[0].label()['SEM']
-    >>> cs_semrep = cs.CooperStore(semrep)
-    >>> print(cs_semrep.core)
-    chase(z2,z4)
-    >>> for bo in cs_semrep.store:
-    ...     print(bo)
-    bo(\P.all x.(girl(x) -> P(x)),z2)
-    bo(\P.exists x.(dog(x) & P(x)),z4)
-    >>> cs_semrep.s_retrieve(trace=True)
-    Permutation 1
-       (\P.all x.(girl(x) -> P(x)))(\z2.chase(z2,z4))
-       (\P.exists x.(dog(x) & P(x)))(\z4.all x.(girl(x) -> chase(x,z4)))
-    Permutation 2
-       (\P.exists x.(dog(x) & P(x)))(\z4.chase(z2,z4))
-       (\P.all x.(girl(x) -> P(x)))(\z2.exists x.(dog(x) & chase(z2,x)))
-
-    >>> for reading in cs_semrep.readings:
-    ...     print(reading)
-    exists x.(dog(x) & all z3.(girl(z3) -> chase(z3,x)))
-    all x.(girl(x) -> exists z4.(dog(z4) & chase(x,z4)))
-
-
diff --git a/nlp_resource_data/nltk/test/semantics_fixt.py b/nlp_resource_data/nltk/test/semantics_fixt.py

deleted file mode 100644 (file)

index 135180d..0000000
--- a/nlp_resource_data/nltk/test/semantics_fixt.py
+++ /dev/null
@@ -1,8 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import
-
-# reset the variables counter before running tests
-def setup_module(module):
-    from nltk.sem import logic
-
-    logic._counter._value = 0
diff --git a/nlp_resource_data/nltk/test/sentiment.doctest b/nlp_resource_data/nltk/test/sentiment.doctest

deleted file mode 100644 (file)

index 359e165..0000000
--- a/nlp_resource_data/nltk/test/sentiment.doctest
+++ /dev/null
@@ -1,233 +0,0 @@
-.. Copyright (C) 2001-2019 NLTK Project
-.. For license information, see LICENSE.TXT
-
-===================
-Sentiment Analysis
-===================
-
-    >>> from nltk.classify import NaiveBayesClassifier
-    >>> from nltk.corpus import subjectivity
-    >>> from nltk.sentiment import SentimentAnalyzer
-    >>> from nltk.sentiment.util import *
-
-    >>> n_instances = 100
-    >>> subj_docs = [(sent, 'subj') for sent in subjectivity.sents(categories='subj')[:n_instances]]
-    >>> obj_docs = [(sent, 'obj') for sent in subjectivity.sents(categories='obj')[:n_instances]]
-    >>> len(subj_docs), len(obj_docs)
-    (100, 100)
-
-Each document is represented by a tuple (sentence, label). The sentence is tokenized,
-so it is represented by a list of strings:
-
-    >>> subj_docs[0]
-    (['smart', 'and', 'alert', ',', 'thirteen', 'conversations', 'about', 'one',
-    'thing', 'is', 'a', 'small', 'gem', '.'], 'subj')
-
-We separately split subjective and objective instances to keep a balanced uniform
-class distribution in both train and test sets.
-
-    >>> train_subj_docs = subj_docs[:80]
-    >>> test_subj_docs = subj_docs[80:100]
-    >>> train_obj_docs = obj_docs[:80]
-    >>> test_obj_docs = obj_docs[80:100]
-    >>> training_docs = train_subj_docs+train_obj_docs
-    >>> testing_docs = test_subj_docs+test_obj_docs
-
-    >>> sentim_analyzer = SentimentAnalyzer()
-    >>> all_words_neg = sentim_analyzer.all_words([mark_negation(doc) for doc in training_docs])
-
-We use simple unigram word features, handling negation:
-
-    >>> unigram_feats = sentim_analyzer.unigram_word_feats(all_words_neg, min_freq=4)
-    >>> len(unigram_feats)
-    83
-    >>> sentim_analyzer.add_feat_extractor(extract_unigram_feats, unigrams=unigram_feats)
-
-We apply features to obtain a feature-value representation of our datasets:
-
-    >>> training_set = sentim_analyzer.apply_features(training_docs)
-    >>> test_set = sentim_analyzer.apply_features(testing_docs)
-
-We can now train our classifier on the training set, and subsequently output the
-evaluation results:
-
-    >>> trainer = NaiveBayesClassifier.train
-    >>> classifier = sentim_analyzer.train(trainer, training_set)
-    Training classifier
-    >>> for key,value in sorted(sentim_analyzer.evaluate(test_set).items()):
-    ...     print('{0}: {1}'.format(key, value))
-    Evaluating NaiveBayesClassifier results...
-    Accuracy: 0.8
-    F-measure [obj]: 0.8
-    F-measure [subj]: 0.8
-    Precision [obj]: 0.8
-    Precision [subj]: 0.8
-    Recall [obj]: 0.8
-    Recall [subj]: 0.8
-
-
-Vader
-------
-
-    >>> from nltk.sentiment.vader import SentimentIntensityAnalyzer
-    >>> sentences = ["VADER is smart, handsome, and funny.", # positive sentence example
-    ...    "VADER is smart, handsome, and funny!", # punctuation emphasis handled correctly (sentiment intensity adjusted)
-    ...    "VADER is very smart, handsome, and funny.",  # booster words handled correctly (sentiment intensity adjusted)
-    ...    "VADER is VERY SMART, handsome, and FUNNY.",  # emphasis for ALLCAPS handled
-    ...    "VADER is VERY SMART, handsome, and FUNNY!!!",# combination of signals - VADER appropriately adjusts intensity
-    ...    "VADER is VERY SMART, really handsome, and INCREDIBLY FUNNY!!!",# booster words & punctuation make this close to ceiling for score
-    ...    "The book was good.",         # positive sentence
-    ...    "The book was kind of good.", # qualified positive sentence is handled correctly (intensity adjusted)
-    ...    "The plot was good, but the characters are uncompelling and the dialog is not great.", # mixed negation sentence
-    ...    "A really bad, horrible book.",       # negative sentence with booster words
-    ...    "At least it isn't a horrible book.", # negated negative sentence with contraction
-    ...    ":) and :D",     # emoticons handled
-    ...    "",              # an empty string is correctly handled
-    ...    "Today sux",     #  negative slang handled
-    ...    "Today sux!",    #  negative slang with punctuation emphasis handled
-    ...    "Today SUX!",    #  negative slang with capitalization emphasis
-    ...    "Today kinda sux! But I'll get by, lol" # mixed sentiment example with slang and constrastive conjunction "but"
-    ... ]
-    >>> paragraph = "It was one of the worst movies I've seen, despite good reviews. \
-    ... Unbelievably bad acting!! Poor direction. VERY poor production. \
-    ... The movie was bad. Very bad movie. VERY bad movie. VERY BAD movie. VERY BAD movie!"
-
-    >>> from nltk import tokenize
-    >>> lines_list = tokenize.sent_tokenize(paragraph)
-    >>> sentences.extend(lines_list)
-
-    >>> tricky_sentences = [
-    ...    "Most automated sentiment analysis tools are shit.",
-    ...    "VADER sentiment analysis is the shit.",
-    ...    "Sentiment analysis has never been good.",
-    ...    "Sentiment analysis with VADER has never been this good.",
-    ...    "Warren Beatty has never been so entertaining.",
-    ...    "I won't say that the movie is astounding and I wouldn't claim that \
-    ...    the movie is too banal either.",
-    ...    "I like to hate Michael Bay films, but I couldn't fault this one",
-    ...    "It's one thing to watch an Uwe Boll film, but another thing entirely \
-    ...    to pay for it",
-    ...    "The movie was too good",
-    ...    "This movie was actually neither that funny, nor super witty.",
-    ...    "This movie doesn't care about cleverness, wit or any other kind of \
-    ...    intelligent humor.",
-    ...    "Those who find ugly meanings in beautiful things are corrupt without \
-    ...    being charming.",
-    ...    "There are slow and repetitive parts, BUT it has just enough spice to \
-    ...    keep it interesting.",
-    ...    "The script is not fantastic, but the acting is decent and the cinematography \
-    ...    is EXCELLENT!",
-    ...    "Roger Dodger is one of the most compelling variations on this theme.",
-    ...    "Roger Dodger is one of the least compelling variations on this theme.",
-    ...    "Roger Dodger is at least compelling as a variation on the theme.",
-    ...    "they fall in love with the product",
-    ...    "but then it breaks",
-    ...    "usually around the time the 90 day warranty expires",
-    ...    "the twin towers collapsed today",
-    ...    "However, Mr. Carter solemnly argues, his client carried out the kidnapping \
-    ...    under orders and in the ''least offensive way possible.''"
-    ... ]
-    >>> sentences.extend(tricky_sentences)
-    >>> for sentence in sentences:
-    ...     sid = SentimentIntensityAnalyzer()
-    ...     print(sentence)
-    ...     ss = sid.polarity_scores(sentence)
-    ...     for k in sorted(ss):
-    ...         print('{0}: {1}, '.format(k, ss[k]), end='')
-    ...     print()
-    VADER is smart, handsome, and funny.
-    compound: 0.8316, neg: 0.0, neu: 0.254, pos: 0.746,
-    VADER is smart, handsome, and funny!
-    compound: 0.8439, neg: 0.0, neu: 0.248, pos: 0.752,
-    VADER is very smart, handsome, and funny.
-    compound: 0.8545, neg: 0.0, neu: 0.299, pos: 0.701,
-    VADER is VERY SMART, handsome, and FUNNY.
-    compound: 0.9227, neg: 0.0, neu: 0.246, pos: 0.754,
-    VADER is VERY SMART, handsome, and FUNNY!!!
-    compound: 0.9342, neg: 0.0, neu: 0.233, pos: 0.767,
-    VADER is VERY SMART, really handsome, and INCREDIBLY FUNNY!!!
-    compound: 0.9469, neg: 0.0, neu: 0.294, pos: 0.706,
-    The book was good.
-    compound: 0.4404, neg: 0.0, neu: 0.508, pos: 0.492,
-    The book was kind of good.
-    compound: 0.3832, neg: 0.0, neu: 0.657, pos: 0.343,
-    The plot was good, but the characters are uncompelling and the dialog is not great.
-    compound: -0.7042, neg: 0.327, neu: 0.579, pos: 0.094,
-    A really bad, horrible book.
-    compound: -0.8211, neg: 0.791, neu: 0.209, pos: 0.0,
-    At least it isn't a horrible book.
-    compound: 0.431, neg: 0.0, neu: 0.637, pos: 0.363,
-    :) and :D
-    compound: 0.7925, neg: 0.0, neu: 0.124, pos: 0.876,
-    <BLANKLINE>
-    compound: 0.0, neg: 0.0, neu: 0.0, pos: 0.0,
-    Today sux
-    compound: -0.3612, neg: 0.714, neu: 0.286, pos: 0.0,
-    Today sux!
-    compound: -0.4199, neg: 0.736, neu: 0.264, pos: 0.0,
-    Today SUX!
-    compound: -0.5461, neg: 0.779, neu: 0.221, pos: 0.0,
-    Today kinda sux! But I'll get by, lol
-    compound: 0.2228, neg: 0.195, neu: 0.531, pos: 0.274,
-    It was one of the worst movies I've seen, despite good reviews.
-    compound: -0.7584, neg: 0.394, neu: 0.606, pos: 0.0,
-    Unbelievably bad acting!!
-    compound: -0.6572, neg: 0.686, neu: 0.314, pos: 0.0,
-    Poor direction.
-    compound: -0.4767, neg: 0.756, neu: 0.244, pos: 0.0,
-    VERY poor production.
-    compound: -0.6281, neg: 0.674, neu: 0.326, pos: 0.0,
-    The movie was bad.
-    compound: -0.5423, neg: 0.538, neu: 0.462, pos: 0.0,
-    Very bad movie.
-    compound: -0.5849, neg: 0.655, neu: 0.345, pos: 0.0,
-    VERY bad movie.
-    compound: -0.6732, neg: 0.694, neu: 0.306, pos: 0.0,
-    VERY BAD movie.
-    compound: -0.7398, neg: 0.724, neu: 0.276, pos: 0.0,
-    VERY BAD movie!
-    compound: -0.7616, neg: 0.735, neu: 0.265, pos: 0.0,
-    Most automated sentiment analysis tools are shit.
-    compound: -0.5574, neg: 0.375, neu: 0.625, pos: 0.0,
-    VADER sentiment analysis is the shit.
-    compound: 0.6124, neg: 0.0, neu: 0.556, pos: 0.444,
-    Sentiment analysis has never been good.
-    compound: -0.3412, neg: 0.325, neu: 0.675, pos: 0.0,
-    Sentiment analysis with VADER has never been this good.
-    compound: 0.5228, neg: 0.0, neu: 0.703, pos: 0.297,
-    Warren Beatty has never been so entertaining.
-    compound: 0.5777, neg: 0.0, neu: 0.616, pos: 0.384,
-    I won't say that the movie is astounding and I wouldn't claim that the movie is too banal either.
-    compound: 0.4215, neg: 0.0, neu: 0.851, pos: 0.149,
-    I like to hate Michael Bay films, but I couldn't fault this one
-    compound: 0.3153, neg: 0.157, neu: 0.534, pos: 0.309,
-    It's one thing to watch an Uwe Boll film, but another thing entirely to pay for it
-    compound: -0.2541, neg: 0.112, neu: 0.888, pos: 0.0,
-    The movie was too good
-    compound: 0.4404, neg: 0.0, neu: 0.58, pos: 0.42,
-    This movie was actually neither that funny, nor super witty.
-    compound: -0.6759, neg: 0.41, neu: 0.59, pos: 0.0,
-    This movie doesn't care about cleverness, wit or any other kind of intelligent humor.
-    compound: -0.1338, neg: 0.265, neu: 0.497, pos: 0.239,
-    Those who find ugly meanings in beautiful things are corrupt without being charming.
-    compound: -0.3553, neg: 0.314, neu: 0.493, pos: 0.192,
-    There are slow and repetitive parts, BUT it has just enough spice to keep it interesting.
-    compound: 0.4678, neg: 0.079, neu: 0.735, pos: 0.186,
-    The script is not fantastic, but the acting is decent and the cinematography is EXCELLENT!
-    compound: 0.7565, neg: 0.092, neu: 0.607, pos: 0.301,
-    Roger Dodger is one of the most compelling variations on this theme.
-    compound: 0.2944, neg: 0.0, neu: 0.834, pos: 0.166,
-    Roger Dodger is one of the least compelling variations on this theme.
-    compound: -0.1695, neg: 0.132, neu: 0.868, pos: 0.0,
-    Roger Dodger is at least compelling as a variation on the theme.
-    compound: 0.2263, neg: 0.0, neu: 0.84, pos: 0.16,
-    they fall in love with the product
-    compound: 0.6369, neg: 0.0, neu: 0.588, pos: 0.412,
-    but then it breaks
-    compound: 0.0, neg: 0.0, neu: 1.0, pos: 0.0,
-    usually around the time the 90 day warranty expires
-    compound: 0.0, neg: 0.0, neu: 1.0, pos: 0.0,
-    the twin towers collapsed today
-    compound: -0.2732, neg: 0.344, neu: 0.656, pos: 0.0,
-    However, Mr. Carter solemnly argues, his client carried out the kidnapping under orders and in the ''least offensive way possible.''
-    compound: -0.5859, neg: 0.23, neu: 0.697, pos: 0.074,
diff --git a/nlp_resource_data/nltk/test/sentiwordnet.doctest b/nlp_resource_data/nltk/test/sentiwordnet.doctest

deleted file mode 100644 (file)

index 46126bb..0000000
--- a/nlp_resource_data/nltk/test/sentiwordnet.doctest
+++ /dev/null
@@ -1,41 +0,0 @@
-.. Copyright (C) 2001-2019 NLTK Project
-.. For license information, see LICENSE.TXT
-
-======================
-SentiWordNet Interface
-======================
-
-SentiWordNet can be imported like this:
-
-    >>> from nltk.corpus import sentiwordnet as swn
-
-------------
-SentiSynsets
-------------
-
-    >>> breakdown = swn.senti_synset('breakdown.n.03')
-    >>> print(breakdown)
-    <breakdown.n.03: PosScore=0.0 NegScore=0.25>
-    >>> breakdown.pos_score()
-    0.0
-    >>> breakdown.neg_score()
-    0.25
-    >>> breakdown.obj_score()
-    0.75
-
-
-------
-Lookup
-------
-
-    >>> list(swn.senti_synsets('slow')) # doctest: +NORMALIZE_WHITESPACE
-    [SentiSynset('decelerate.v.01'), SentiSynset('slow.v.02'),
-    SentiSynset('slow.v.03'), SentiSynset('slow.a.01'),
-    SentiSynset('slow.a.02'), SentiSynset('dense.s.04'),
-    SentiSynset('slow.a.04'), SentiSynset('boring.s.01'),
-    SentiSynset('dull.s.08'), SentiSynset('slowly.r.01'),
-    SentiSynset('behind.r.03')]
-
-    >>> happy = swn.senti_synsets('happy', 'a')
-
-    >>> all = swn.all_senti_synsets()
diff --git a/nlp_resource_data/nltk/test/simple.doctest b/nlp_resource_data/nltk/test/simple.doctest

deleted file mode 100644 (file)

index 5636163..0000000
--- a/nlp_resource_data/nltk/test/simple.doctest
+++ /dev/null
@@ -1,85 +0,0 @@
-.. Copyright (C) 2001-2019 NLTK Project
-.. For license information, see LICENSE.TXT
-
-=================
-EasyInstall Tests
-=================
-
-This file contains some simple tests that will be run by EasyInstall in
-order to test the installation when NLTK-Data is absent.
-
-    >>> from __future__ import print_function
-
-------------
-Tokenization
-------------
-
-    >>> from nltk.tokenize import wordpunct_tokenize
-    >>> s = ("Good muffins cost $3.88\nin New York.  Please buy me\n"
-    ...      "two of them.\n\nThanks.")
-    >>> wordpunct_tokenize(s) # doctest: +NORMALIZE_WHITESPACE
-    ['Good', 'muffins', 'cost', '$', '3', '.', '88', 'in', 'New', 'York', '.',
-    'Please', 'buy', 'me', 'two', 'of', 'them', '.', 'Thanks', '.']
-
--------
-Metrics
--------
-
-    >>> from nltk.metrics import precision, recall, f_measure
-    >>> reference = 'DET NN VB DET JJ NN NN IN DET NN'.split()
-    >>> test    = 'DET VB VB DET NN NN NN IN DET NN'.split()
-    >>> reference_set = set(reference)
-    >>> test_set = set(test)
-    >>> precision(reference_set, test_set)
-    1.0
-    >>> print(recall(reference_set, test_set))
-    0.8
-    >>> print(f_measure(reference_set, test_set))
-    0.88888888888...
-
-------------------
-Feature Structures
-------------------
-
-    >>> from nltk import FeatStruct
-    >>> fs1 = FeatStruct(PER=3, NUM='pl', GND='fem')
-    >>> fs2 = FeatStruct(POS='N', AGR=fs1)
-    >>> print(fs2)
-    [       [ GND = 'fem' ] ]
-    [ AGR = [ NUM = 'pl'  ] ]
-    [       [ PER = 3     ] ]
-    [                       ]
-    [ POS = 'N'             ]
-    >>> print(fs2['AGR'])
-    [ GND = 'fem' ]
-    [ NUM = 'pl'  ]
-    [ PER = 3     ]
-    >>> print(fs2['AGR']['PER'])
-    3
-
--------
-Parsing
--------
-
-    >>> from nltk.parse.recursivedescent import RecursiveDescentParser
-    >>> from nltk.grammar import CFG
-    >>> grammar = CFG.fromstring("""
-    ... S -> NP VP
-    ... PP -> P NP
-    ... NP -> 'the' N | N PP | 'the' N PP
-    ... VP -> V NP | V PP | V NP PP
-    ... N -> 'cat' | 'dog' | 'rug'
-    ... V -> 'chased'
-    ... P -> 'on'
-    ... """)
-    >>> rd = RecursiveDescentParser(grammar)
-    >>> sent = 'the cat chased the dog on the rug'.split()
-    >>> for t in rd.parse(sent):
-    ...     print(t)
-    (S
-      (NP the (N cat))
-      (VP (V chased) (NP the (N dog) (PP (P on) (NP the (N rug))))))
-    (S
-      (NP the (N cat))
-      (VP (V chased) (NP the (N dog)) (PP (P on) (NP the (N rug)))))
-
diff --git a/nlp_resource_data/nltk/test/stem.doctest b/nlp_resource_data/nltk/test/stem.doctest

deleted file mode 100644 (file)

index 2cf9857..0000000
--- a/nlp_resource_data/nltk/test/stem.doctest
+++ /dev/null
@@ -1,78 +0,0 @@
-.. Copyright (C) 2001-2019 NLTK Project
-.. For license information, see LICENSE.TXT
-
-==========
- Stemmers
-==========
-
-Overview
-~~~~~~~~
-
-Stemmers remove morphological affixes from words, leaving only the
-word stem.
-
-    >>> from __future__ import print_function
-    >>> from nltk.stem import *
-
-Unit tests for the Porter stemmer
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-    >>> from nltk.stem.porter import *
-
-Create a new Porter stemmer.
-
-    >>> stemmer = PorterStemmer()
-
-Test the stemmer on various pluralised words.
-
-    >>> plurals = ['caresses', 'flies', 'dies', 'mules', 'denied',
-    ...            'died', 'agreed', 'owned', 'humbled', 'sized',
-    ...            'meeting', 'stating', 'siezing', 'itemization',
-    ...            'sensational', 'traditional', 'reference', 'colonizer',
-    ...            'plotted']
-
-    >>> singles = [stemmer.stem(plural) for plural in plurals]
-
-    >>> print(' '.join(singles))  # doctest: +NORMALIZE_WHITESPACE
-    caress fli die mule deni die agre own humbl size meet
-    state siez item sensat tradit refer colon plot
-
-
-Unit tests for Snowball stemmer
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-    >>> from nltk.stem.snowball import SnowballStemmer
-
-See which languages are supported.
-
-    >>> print(" ".join(SnowballStemmer.languages))
-    arabic danish dutch english finnish french german hungarian italian
-    norwegian porter portuguese romanian russian spanish swedish
-
-Create a new instance of a language specific subclass.
-
-    >>> stemmer = SnowballStemmer("english")
-
-Stem a word.
-
-    >>> print(stemmer.stem("running"))
-    run
-
-Decide not to stem stopwords.
-
-    >>> stemmer2 = SnowballStemmer("english", ignore_stopwords=True)
-    >>> print(stemmer.stem("having"))
-    have
-    >>> print(stemmer2.stem("having"))
-    having
-
-The 'english' stemmer is better than the original 'porter' stemmer.
-
-    >>> print(SnowballStemmer("english").stem("generously"))
-    generous
-    >>> print(SnowballStemmer("porter").stem("generously"))
-    gener
-
-.. note::
-
-    Extra stemmer tests can be found in `nltk.test.unit.test_stem`.
diff --git a/nlp_resource_data/nltk/test/tag.doctest b/nlp_resource_data/nltk/test/tag.doctest

deleted file mode 100644 (file)

index 2248cba..0000000
--- a/nlp_resource_data/nltk/test/tag.doctest
+++ /dev/null
@@ -1,33 +0,0 @@
-.. Copyright (C) 2001-2019 NLTK Project
-.. For license information, see LICENSE.TXT
-
-Regression Tests
-~~~~~~~~~~~~~~~~
-
-Sequential Taggers
-------------------
-
-Add tests for:
-  - make sure backoff is being done correctly.
-  - make sure ngram taggers don't use previous sentences for context.
-  - make sure ngram taggers see 'beginning of the sentence' as a
-    unique context
-  - make sure regexp tagger's regexps are tried in order
-  - train on some simple examples, & make sure that the size & the
-    generated models are correct.
-  - make sure cutoff works as intended
-  - make sure that ngram models only exclude contexts covered by the
-    backoff tagger if the backoff tagger gets that context correct at
-    *all* locations.
-
-
-Regression Testing for issue #1025
-==================================
-
-We want to ensure that a RegexpTagger can be created with more than 100 patterns
-and does not fail with:
- "AssertionError: sorry, but this version only supports 100 named groups"
-
-    >>> from nltk.tag import RegexpTagger
-    >>> patterns = [(str(i), 'NNP',) for i in range(200)]
-    >>> tagger = RegexpTagger(patterns)
diff --git a/nlp_resource_data/nltk/test/tokenize.doctest b/nlp_resource_data/nltk/test/tokenize.doctest

deleted file mode 100644 (file)

index f99e22a..0000000
--- a/nlp_resource_data/nltk/test/tokenize.doctest
+++ /dev/null
@@ -1,290 +0,0 @@
-.. Copyright (C) 2001-2019 NLTK Project
-.. For license information, see LICENSE.TXT
-
-    >>> from __future__ import print_function
-    >>> from nltk.tokenize import *
-
-Regression Tests: Treebank Tokenizer
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Some test strings.
-
-    >>> s1 = "On a $50,000 mortgage of 30 years at 8 percent, the monthly payment would be $366.88."
-    >>> word_tokenize(s1)
-    ['On', 'a', '$', '50,000', 'mortgage', 'of', '30', 'years', 'at', '8', 'percent', ',', 'the', 'monthly', 'payment', 'would', 'be', '$', '366.88', '.']
-    >>> s2 = "\"We beat some pretty good teams to get here,\" Slocum said."
-    >>> word_tokenize(s2)
-    ['``', 'We', 'beat', 'some', 'pretty', 'good', 'teams', 'to', 'get', 'here', ',', "''", 'Slocum', 'said', '.']
-    >>> s3 = "Well, we couldn't have this predictable, cliche-ridden, \"Touched by an Angel\" (a show creator John Masius worked on) wanna-be if she didn't."
-    >>> word_tokenize(s3)
-    ['Well', ',', 'we', 'could', "n't", 'have', 'this', 'predictable', ',', 'cliche-ridden', ',', '``', 'Touched', 'by', 'an', 'Angel', "''", '(', 'a', 'show', 'creator', 'John', 'Masius', 'worked', 'on', ')', 'wanna-be', 'if', 'she', 'did', "n't", '.']
-    >>> s4 = "I cannot cannot work under these conditions!"
-    >>> word_tokenize(s4)
-    ['I', 'can', 'not', 'can', 'not', 'work', 'under', 'these', 'conditions', '!']
-    >>> s5 = "The company spent $30,000,000 last year."
-    >>> word_tokenize(s5)
-    ['The', 'company', 'spent', '$', '30,000,000', 'last', 'year', '.']
-    >>> s6 = "The company spent 40.75% of its income last year."
-    >>> word_tokenize(s6)
-    ['The', 'company', 'spent', '40.75', '%', 'of', 'its', 'income', 'last', 'year', '.']
-    >>> s7 = "He arrived at 3:00 pm."
-    >>> word_tokenize(s7)
-    ['He', 'arrived', 'at', '3:00', 'pm', '.']
-    >>> s8 = "I bought these items: books, pencils, and pens."
-    >>> word_tokenize(s8)
-    ['I', 'bought', 'these', 'items', ':', 'books', ',', 'pencils', ',', 'and', 'pens', '.']
-    >>> s9 = "Though there were 150, 100 of them were old."
-    >>> word_tokenize(s9)
-    ['Though', 'there', 'were', '150', ',', '100', 'of', 'them', 'were', 'old', '.']
-    >>> s10 = "There were 300,000, but that wasn't enough."
-    >>> word_tokenize(s10)
-    ['There', 'were', '300,000', ',', 'but', 'that', 'was', "n't", 'enough', '.']
-
-
-Testing improvement made to the TreebankWordTokenizer
-
-    >>> sx1 = u'\xabNow that I can do.\xbb'
-    >>> expected = [u'\xab', u'Now', u'that', u'I', u'can', u'do', u'.', u'\xbb']
-    >>> word_tokenize(sx1) == expected
-    True
-    >>> sx2 = u'The unicode 201C and 201D \u201cLEFT(RIGHT) DOUBLE QUOTATION MARK\u201d is also OPEN_PUNCT and CLOSE_PUNCT.'
-    >>> expected = [u'The', u'unicode', u'201C', u'and', u'201D', u'\u201c', u'LEFT', u'(', u'RIGHT', u')', u'DOUBLE', u'QUOTATION', u'MARK', u'\u201d', u'is', u'also', u'OPEN_PUNCT', u'and', u'CLOSE_PUNCT', u'.']
-    >>> word_tokenize(sx2) == expected
-    True
-
-
-Sentence tokenization in word_tokenize:
-
-    >>> s11 = "I called Dr. Jones. I called Dr. Jones."
-    >>> word_tokenize(s11)
-    ['I', 'called', 'Dr.', 'Jones', '.', 'I', 'called', 'Dr.', 'Jones', '.']
-    >>> s12 = ("Ich muss unbedingt daran denken, Mehl, usw. fur einen "
-    ...        "Kuchen einzukaufen. Ich muss.")
-    >>> word_tokenize(s12)
-    ['Ich', 'muss', 'unbedingt', 'daran', 'denken', ',', 'Mehl', ',', 'usw',
-     '.', 'fur', 'einen', 'Kuchen', 'einzukaufen', '.', 'Ich', 'muss', '.']
-    >>> word_tokenize(s12, 'german')
-    ['Ich', 'muss', 'unbedingt', 'daran', 'denken', ',', 'Mehl', ',', 'usw.',
-     'fur', 'einen', 'Kuchen', 'einzukaufen', '.', 'Ich', 'muss', '.']
-
-
-Regression Tests: Regexp Tokenizer
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Some additional test strings.
-
-    >>> s = ("Good muffins cost $3.88\nin New York.  Please buy me\n"
-    ...      "two of them.\n\nThanks.")
-    >>> s2 = ("Alas, it has not rained today. When, do you think, "
-    ...       "will it rain again?")
-    >>> s3 = ("<p>Although this is <b>not</b> the case here, we must "
-    ...       "not relax our vigilance!</p>")
-
-    >>> regexp_tokenize(s2, r'[,\.\?!"]\s*', gaps=False)
-    [', ', '. ', ', ', ', ', '?']
-    >>> regexp_tokenize(s2, r'[,\.\?!"]\s*', gaps=True)
-    ['Alas', 'it has not rained today', 'When', 'do you think',
-     'will it rain again']
-
-Take care to avoid using capturing groups:
-
-    >>> regexp_tokenize(s3, r'</?[bp]>', gaps=False)
-    ['<p>', '<b>', '</b>', '</p>']
-    >>> regexp_tokenize(s3, r'</?(?:b|p)>', gaps=False)
-    ['<p>', '<b>', '</b>', '</p>']
-    >>> regexp_tokenize(s3, r'</?(?:b|p)>', gaps=True)
-    ['Although this is ', 'not',
-     ' the case here, we must not relax our vigilance!']
-
-Named groups are capturing groups, and confuse the tokenizer:
-
-    >>> regexp_tokenize(s3, r'</?(?P<named>b|p)>', gaps=False)
-    ['p', 'b', 'b', 'p']
-    >>> regexp_tokenize(s3, r'</?(?P<named>b|p)>', gaps=True)
-    ['p', 'Although this is ', 'b', 'not', 'b',
-     ' the case here, we must not relax our vigilance!', 'p']
-
-Make sure that nested groups don't confuse the tokenizer:
-
-    >>> regexp_tokenize(s2, r'(?:h|r|l)a(?:s|(?:i|n0))', gaps=False)
-    ['las', 'has', 'rai', 'rai']
-    >>> regexp_tokenize(s2, r'(?:h|r|l)a(?:s|(?:i|n0))', gaps=True)
-    ['A', ', it ', ' not ', 'ned today. When, do you think, will it ',
-     'n again?']
-
-Back-references require capturing groups, and these are not supported:
-
-    >>> regexp_tokenize("aabbbcccc", r'(.)\1')
-    ['a', 'b', 'c', 'c']
-
-A simple sentence tokenizer '\.(\s+|$)'
-
-    >>> regexp_tokenize(s, pattern=r'\.(?:\s+|$)', gaps=True)
-    ['Good muffins cost $3.88\nin New York',
-     'Please buy me\ntwo of them', 'Thanks']
-
-
-Regression Tests: TweetTokenizer
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-TweetTokenizer is a tokenizer specifically designed for micro-blogging tokenization tasks.
-
-    >>> from nltk.tokenize import TweetTokenizer
-    >>> tknzr = TweetTokenizer()
-    >>> s0 = "This is a cooool #dummysmiley: :-) :-P <3 and some arrows < > -> <--"
-    >>> tknzr.tokenize(s0)
-    ['This', 'is', 'a', 'cooool', '#dummysmiley', ':', ':-)', ':-P', '<3', 'and', 'some', 'arrows', '<', '>', '->', '<--']
-    >>> s1 = "@Joyster2012 @CathStaincliffe Good for you, girl!! Best wishes :-)"
-    >>> tknzr.tokenize(s1)
-    ['@Joyster2012', '@CathStaincliffe', 'Good', 'for', 'you', ',', 'girl', '!', '!', 'Best', 'wishes', ':-)']
-    >>> s2 = "3Points for #DreamTeam Gooo BAILEY! :) #PBB737Gold @PBBabscbn"
-    >>> tknzr.tokenize(s2)
-    ['3Points', 'for', '#DreamTeam', 'Gooo', 'BAILEY', '!', ':)', '#PBB737Gold', '@PBBabscbn']
-    >>> s3 = "@Insanomania They do... Their mentality doesn't :("
-    >>> tknzr.tokenize(s3)
-    ['@Insanomania', 'They', 'do', '...', 'Their', 'mentality', "doesn't", ':(']
-    >>> s4 = "RT @facugambande: Ya por arrancar a grabar !!! #TirenTirenTiren vamoo !!"
-    >>> tknzr.tokenize(s4)
-    ['RT', '@facugambande', ':', 'Ya', 'por', 'arrancar', 'a', 'grabar', '!', '!', '!', '#TirenTirenTiren', 'vamoo', '!', '!']
-    >>> tknzr = TweetTokenizer(reduce_len=True)
-    >>> s5 = "@crushinghes the summer holidays are great but I'm so bored already :("
-    >>> tknzr.tokenize(s5)
-    ['@crushinghes', 'the', 'summer', 'holidays', 'are', 'great', 'but', "I'm", 'so', 'bored', 'already', ':(']
-
-It is possible to specify `strip_handles` and `reduce_len` parameters for a TweetTokenizer instance. Setting `strip_handles` to True, the tokenizer will remove Twitter handles (e.g. usernames). Setting `reduce_len` to True, repeated character sequences of length 3 or greater will be replaced with sequences of length 3.
-
-    >>> tknzr = TweetTokenizer(strip_handles=True, reduce_len=True)
-    >>> s6 = '@remy: This is waaaaayyyy too much for you!!!!!!'
-    >>> tknzr.tokenize(s6)
-    [':', 'This', 'is', 'waaayyy', 'too', 'much', 'for', 'you', '!', '!', '!']
-    >>> s7 = '@_willy65: No place for @chuck tonight. Sorry.'
-    >>> tknzr.tokenize(s7)
-    [':', 'No', 'place', 'for', 'tonight', '.', 'Sorry', '.']
-    >>> s8 = '@mar_tin is a great developer. Contact him at mar_tin@email.com.'
-    >>> tknzr.tokenize(s8)
-    ['is', 'a', 'great', 'developer', '.', 'Contact', 'him', 'at', 'mar_tin@email.com', '.']
-
-The `preserve_case` parameter (default: True) allows to convert uppercase tokens to lowercase tokens. Emoticons are not affected:
-
-    >>> tknzr = TweetTokenizer(preserve_case=False)
-    >>> s9 = "@jrmy: I'm REALLY HAPPYYY about that! NICEEEE :D :P"
-    >>> tknzr.tokenize(s9)
-    ['@jrmy', ':', "i'm", 'really', 'happyyy', 'about', 'that', '!', 'niceeee', ':D', ':P']
-
-It should not hang on long sequences of the same punctuation character.
-
-    >>> tknzr = TweetTokenizer()
-    >>> s10 = "Photo: Aujourd'hui sur http://t.co/0gebOFDUzn Projet... http://t.co/bKfIUbydz2.............................. http://fb.me/3b6uXpz0L"
-    >>> tknzr.tokenize(s10)
-    [u'Photo', u':', u"Aujourd'hui", u'sur', u'http://t.co/0gebOFDUzn', u'Projet', u'...', u'http://t.co/bKfIUbydz2', u'...', u'http://fb.me/3b6uXpz0L']
-
-
-Regression Tests: PunktSentenceTokenizer
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-The sentence splitter should remove whitespace following the sentence boundary.
-
-    >>> pst = PunktSentenceTokenizer()
-    >>> pst.tokenize('See Section 3).  Or Section 2).  ')
-    ['See Section 3).', 'Or Section 2).']
-    >>> pst.tokenize('See Section 3.)  Or Section 2.)  ')
-    ['See Section 3.)', 'Or Section 2.)']
-    >>> pst.tokenize('See Section 3.)  Or Section 2.)  ', realign_boundaries=False)
-    ['See Section 3.', ')  Or Section 2.', ')']
-
-
-Two instances of PunktSentenceTokenizer should not share PunktParameters.
-
-    >>> pst = PunktSentenceTokenizer()
-    >>> pst2 = PunktSentenceTokenizer()
-    >>> pst._params is pst2._params
-    False
-    
-Testing mutable default arguments for https://github.com/nltk/nltk/pull/2067
-
-    >>> from nltk.tokenize.punkt import PunktBaseClass, PunktTrainer, PunktSentenceTokenizer
-    >>> from nltk.tokenize.punkt import PunktLanguageVars, PunktParameters
-    >>> pbc = PunktBaseClass(lang_vars=None, params=None)
-    >>> type(pbc._params)
-    <class 'nltk.tokenize.punkt.PunktParameters'>
-    >>> type(pbc._lang_vars)
-    <class 'nltk.tokenize.punkt.PunktLanguageVars'>
-    >>> pt = PunktTrainer(lang_vars=None)
-    >>> type(pt._lang_vars)
-    <class 'nltk.tokenize.punkt.PunktLanguageVars'>
-    >>> pst = PunktSentenceTokenizer(lang_vars=None)
-    >>> type(pst._lang_vars)
-    <class 'nltk.tokenize.punkt.PunktLanguageVars'>
-    
-
-Regression Tests: align_tokens
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Post-hoc alignment of tokens with a source string
-
-    >>> from nltk.tokenize.util import align_tokens
-    >>> list(align_tokens([''], ""))
-    [(0, 0)]
-    >>> list(align_tokens([''], " "))
-    [(0, 0)]
-    >>> list(align_tokens([], ""))
-    []
-    >>> list(align_tokens([], " "))
-    []
-    >>> list(align_tokens(['a'], "a"))
-    [(0, 1)]
-    >>> list(align_tokens(['abc', 'def'], "abcdef"))
-    [(0, 3), (3, 6)]
-    >>> list(align_tokens(['abc', 'def'], "abc def"))
-    [(0, 3), (4, 7)]
-    >>> list(align_tokens(['ab', 'cd'], "ab cd ef"))
-    [(0, 2), (3, 5)]
-    >>> list(align_tokens(['ab', 'cd', 'ef'], "ab cd ef"))
-    [(0, 2), (3, 5), (6, 8)]
-    >>> list(align_tokens(['ab', 'cd', 'efg'], "ab cd ef"))
-    Traceback (most recent call last):
-    ....
-    ValueError: substring "efg" not found in "ab cd ef"
-    >>> list(align_tokens(['ab', 'cd', 'ef', 'gh'], "ab cd ef"))
-    Traceback (most recent call last):
-    ....
-    ValueError: substring "gh" not found in "ab cd ef"
-    >>> list(align_tokens(['The', 'plane', ',', 'bound', 'for', 'St', 'Petersburg', ',', 'crashed', 'in', 'Egypt', "'s", 'Sinai', 'desert', 'just', '23', 'minutes', 'after', 'take-off', 'from', 'Sharm', 'el-Sheikh', 'on', 'Saturday', '.'], "The plane, bound for St Petersburg, crashed in Egypt's Sinai desert just 23 minutes after take-off from Sharm el-Sheikh on Saturday."))
-    [(0, 3), (4, 9), (9, 10), (11, 16), (17, 20), (21, 23), (24, 34), (34, 35), (36, 43), (44, 46), (47, 52), (52, 54), (55, 60), (61, 67), (68, 72), (73, 75), (76, 83), (84, 89), (90, 98), (99, 103), (104, 109), (110, 119), (120, 122), (123, 131), (131, 132)]
-
-
-Regression Tests: MWETokenizer
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Pickle an MWETokenizer
-
-    >>> from nltk.tokenize import MWETokenizer
-    >>> import pickle
-
-    >>> tokenizer = MWETokenizer([('hors', "d'oeuvre")], separator='+')
-    >>> p = pickle.dumps(tokenizer)
-    >>> unpickeled = pickle.loads(p)
-    >>> unpickeled.tokenize("An hors d'oeuvre tonight, sir?".split())
-    ['An', "hors+d'oeuvre", 'tonight,', 'sir?']
-
-
-Regression Tests: TextTilingTokenizer
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-TextTilingTokneizer tokenizes text into coherent subtopic chunks based upon Hearst's TextTiling algorithm.
-
-    >>> from nltk.tokenize import TextTilingTokenizer
-    >>> from nltk.corpus import brown
-    >>> tt = TextTilingTokenizer()
-    >>> tt.tokenize(brown.raw()[0:1000])
-    ["\n\n\tThe/at Fulton/np-tl County/nn-tl Grand/jj-tl Jury/nn-tl said/vbd Friday/nr an/at investigation/nn of/in Atlanta's/np$ recent/jj primary/nn election/nn produced/vbd ``/`` no/at evidence/nn ''/'' that/cs any/dti irregularities/nns took/vbd place/nn ./.\n\n\n\tThe/at jury/nn further/rbr said/vbd in/in term-end/nn presentments/nns that/cs the/at City/nn-tl Executive/jj-tl Committee/nn-tl ,/, which/wdt had/hvd over-all/jj charge/nn of/in the/at election/nn ,/, ``/`` deserves/vbz the/at praise/nn and/cc thanks/nns of/in the/at City/nn-tl of/in-tl Atlanta/np-tl ''/'' for/in the/at manner/nn in/in which/wdt the/at election/nn was/bedz conducted/vbn ./.\n\n\n\tThe/at September-October/np term/nn jury/nn had/hvd been/ben charged/vbn by/in Fulton/np-tl Superior/jj-tl Court/nn-tl Judge/nn-tl Durwood/np Pye/np to/to investigate/vb reports/nns of/in possible/jj ``/`` irregularities/nns ''/'' in/in the/at hard-fought/jj primary/nn which/wdt was/bedz won/vbn by/in Mayor-nominate/nn-tl Ivan/np Allen/np Jr./"]
-
-Test that `ValueError` exceptions are raised when illegal arguments are used.
-
-    >>> TextTilingTokenizer(similarity_method='foo').tokenize(brown.raw()[0:1000])
-    Traceback (most recent call last):
-      ...
-    ValueError: Similarity method foo not recognized
-    >>> TextTilingTokenizer(smoothing_method='bar').tokenize(brown.raw()[0:1000])
-    Traceback (most recent call last):
-      ...
-    ValueError: Smoothing method bar not recognized
-
diff --git a/nlp_resource_data/nltk/test/toolbox.doctest b/nlp_resource_data/nltk/test/toolbox.doctest

deleted file mode 100644 (file)

index 1abf684..0000000
--- a/nlp_resource_data/nltk/test/toolbox.doctest
+++ /dev/null
@@ -1,307 +0,0 @@
-.. Copyright (C) 2001-2019 NLTK Project
-.. For license information, see LICENSE.TXT
-
-===============================
-Unit test cases for ``toolbox``
-===============================
-
-    >>> from nltk import toolbox
-
---------------------------
-``toolbox.StandardFormat``
---------------------------
-
-    >>> f = toolbox.StandardFormat()
-
-``toolbox.StandardFormat.open()``
----------------------------------
-    >>> import os, tempfile
-    >>> (fd, fname) = tempfile.mkstemp()
-    >>> tf = os.fdopen(fd, "w")
-    >>> _ = tf.write('\\lx a value\n\\lx another value\n')
-    >>> tf.close()
-    >>> f = toolbox.StandardFormat()
-    >>> f.open(fname)
-    >>> list(f.fields())
-    [('lx', 'a value'), ('lx', 'another value')]
-    >>> f.close()
-    >>> os.unlink(fname)
-
-``toolbox.StandardFormat.open_string()``
-----------------------------------------
-    >>> f = toolbox.StandardFormat()
-    >>> f.open_string('\\lx a value\n\\lx another value\n')
-    >>> list(f.fields())
-    [('lx', 'a value'), ('lx', 'another value')]
-    >>> f.close()
-
-``toolbox.StandardFormat.close()``
-----------------------------------
-    >>> f = toolbox.StandardFormat()
-    >>> f.open_string('\\lx a value\n\\lx another value\n')
-    >>> list(f.fields())
-    [('lx', 'a value'), ('lx', 'another value')]
-    >>> f.close()
-
-``toolbox.StandardFormat.line_num``
----------------------------------------
-
-``StandardFormat.line_num`` contains the line number of the last line returned:
-
-    >>> f = toolbox.StandardFormat()
-    >>> f.open_string('\\lx a value\n\\lx another value\n\\lx a third value\n')
-    >>> line_nums = []
-    >>> for l in f.raw_fields():
-    ...     line_nums.append(f.line_num)
-    >>> line_nums
-    [1, 2, 3]
-
-``StandardFormat.line_num`` contains the line number of the last line returned:
-
-    >>> f = toolbox.StandardFormat()
-    >>> f.open_string('\\lx two\nlines\n\\lx three\nlines\n\n\\lx two\nlines\n')
-    >>> line_nums = []
-    >>> for l in f.raw_fields():
-    ...     line_nums.append(f.line_num)
-    >>> line_nums
-    [2, 5, 7]
-
-``StandardFormat.line_num`` doesn't exist before openning or after closing
-a file or string:
-
-    >>> f = toolbox.StandardFormat()
-    >>> f.line_num
-    Traceback (most recent call last):
-        ...
-    AttributeError: 'StandardFormat' object has no attribute 'line_num'
-    >>> f.open_string('\\lx two\nlines\n\\lx three\nlines\n\n\\lx two\nlines\n')
-    >>> line_nums = []
-    >>> for l in f.raw_fields():
-    ...     line_nums.append(f.line_num)
-    >>> line_nums
-    [2, 5, 7]
-    >>> f.close()
-    >>> f.line_num
-    Traceback (most recent call last):
-        ...
-    AttributeError: 'StandardFormat' object has no attribute 'line_num'
-
-``toolbox.StandardFormat.raw_fields()``
----------------------------------------
-``raw_fields()`` returns an iterator over tuples of two strings representing the
-marker and its value. The marker is given without the backslash and the value
-without its trailing newline:
-
-    >>> f = toolbox.StandardFormat()
-    >>> f.open_string('\\lx a value\n\\lx another value\n')
-    >>> list(f.raw_fields())
-    [('lx', 'a value'), ('lx', 'another value')]
-
-an empty file returns nothing:
-
-    >>> f = toolbox.StandardFormat()
-    >>> f.open_string('')
-    >>> list(f.raw_fields())
-    []
-
-file with only a newline returns WHAT SHOULD IT RETURN???:
-
-    >>> f = toolbox.StandardFormat()
-    >>> f.open_string('\n')
-    >>> list(f.raw_fields())
-    [(None, '')]
-
-file with only one field should be parsed ok:
-
-    >>> f = toolbox.StandardFormat()
-    >>> f.open_string('\\lx one value\n')
-    >>> list(f.raw_fields())
-    [('lx', 'one value')]
-
-file without a trailing newline should be parsed ok:
-
-    >>> f = toolbox.StandardFormat()
-    >>> f.open_string('\\lx a value\n\\lx another value')
-    >>> list(f.raw_fields())
-    [('lx', 'a value'), ('lx', 'another value')]
-
-trailing white space is preserved except for the final newline:
-
-    >>> f = toolbox.StandardFormat()
-    >>> f.open_string('\\lx trailing space \n\\lx trailing tab\t\n\\lx extra newline\n\n')
-    >>> list(f.raw_fields())
-    [('lx', 'trailing space '), ('lx', 'trailing tab\t'), ('lx', 'extra newline\n')]
-
-line wrapping is preserved:
-
-    >>> f = toolbox.StandardFormat()
-    >>> f.open_string('\\lx a value\nmore of the value\nand still more\n\\lc another val\n')
-    >>> list(f.raw_fields())
-    [('lx', 'a value\nmore of the value\nand still more'), ('lc', 'another val')]
-
-file beginning with a multiline record should be parsed ok:
-
-    >>> f = toolbox.StandardFormat()
-    >>> f.open_string('\\lx a value\nmore of the value\nand still more\n\\lc another val\n')
-    >>> list(f.raw_fields())
-    [('lx', 'a value\nmore of the value\nand still more'), ('lc', 'another val')]
-
-file ending with a multiline record should be parsed ok:
-
-    >>> f = toolbox.StandardFormat()
-    >>> f.open_string('\\lc a value\n\\lx another value\nmore of the value\nand still more\n')
-    >>> list(f.raw_fields())
-    [('lc', 'a value'), ('lx', 'another value\nmore of the value\nand still more')]
-
-file beginning with a BOM should be parsed ok:
-
-    >>> f = toolbox.StandardFormat()
-    >>> f.open_string('\xef\xbb\xbf\\lx a value\n\\lx another value\n')
-    >>> list(f.raw_fields())
-    [('lx', 'a value'), ('lx', 'another value')]
-
-file beginning with two BOMs should ignore only the first one:
-
-    >>> f = toolbox.StandardFormat()
-    >>> f.open_string('\xef\xbb\xbf\xef\xbb\xbf\\lx a value\n\\lx another value\n')
-    >>> list(f.raw_fields())
-    [(None, '\xef\xbb\xbf\\lx a value'), ('lx', 'another value')]
-
-should not ignore a BOM not at the beginning of the file:
-
-    >>> f = toolbox.StandardFormat()
-    >>> f.open_string('\\lx a value\n\xef\xbb\xbf\\lx another value\n')
-    >>> list(f.raw_fields())
-    [('lx', 'a value\n\xef\xbb\xbf\\lx another value')]
-
-``toolbox.StandardFormat.fields()``
------------------------------------
-trailing white space is not preserved:
-
-    >>> f = toolbox.StandardFormat()
-    >>> f.open_string('\\lx trailing space \n\\lx trailing tab\t\n\\lx extra newline\n\n')
-    >>> list(f.fields())
-    [('lx', 'trailing space'), ('lx', 'trailing tab'), ('lx', 'extra newline')]
-
-multiline fields are unwrapped:
-
-    >>> f = toolbox.StandardFormat()
-    >>> f.open_string('\\lx a value\nmore of the value\nand still more\n\\lc another val\n')
-    >>> list(f.fields())
-    [('lx', 'a value more of the value and still more'), ('lc', 'another val')]
-
-markers
--------
-A backslash in the first position on a new line indicates the start of a
-marker. The backslash is not part of the marker:
-
-    >>> f = toolbox.StandardFormat()
-    >>> f.open_string('\\mk a value\n')
-    >>> list(f.fields())
-    [('mk', 'a value')]
-
-If the backslash occurs later in the line it does not indicate the start
-of a marker:
-
-    >>> f = toolbox.StandardFormat()
-    >>> f.open_string('\\mk a value\n \\mk another one\n')
-    >>> list(f.raw_fields())
-    [('mk', 'a value\n \\mk another one')]
-
-There is no specific limit to the length of a marker:
-
-    >>> f = toolbox.StandardFormat()
-    >>> f.open_string('\\this_is_an_extremely_long_marker value\n')
-    >>> list(f.fields())
-    [('this_is_an_extremely_long_marker', 'value')]
-
-A marker can contain any non white space character:
-
-    >>> f = toolbox.StandardFormat()
-    >>> f.open_string('\\`~!@#$%^&*()_-=+[{]}\|,<.>/?;:"0123456789 value\n')
-    >>> list(f.fields())
-    [('`~!@#$%^&*()_-=+[{]}\\|,<.>/?;:"0123456789', 'value')]
-
-A marker is terminated by any white space character:
-
-    >>> f = toolbox.StandardFormat()
-    >>> f.open_string('\\mk a value\n\\mk\tanother one\n\\mk\rthird one\n\\mk\ffourth one')
-    >>> list(f.fields())
-    [('mk', 'a value'), ('mk', 'another one'), ('mk', 'third one'), ('mk', 'fourth one')]
-
-Consecutive whitespace characters (except newline) are treated the same as one:
-
-    >>> f = toolbox.StandardFormat()
-    >>> f.open_string('\\mk \t\r\fa value\n')
-    >>> list(f.fields())
-    [('mk', 'a value')]
-
------------------------
-``toolbox.ToolboxData``
------------------------
-
-    >>> db = toolbox.ToolboxData()
-
-``toolbox.ToolboxData.parse()``
--------------------------------
-check that normal parsing works:
-
-    >>> from xml.etree import ElementTree
-    >>> td = toolbox.ToolboxData()
-    >>> s = """\\_sh v3.0  400  Rotokas Dictionary
-    ... \\_DateStampHasFourDigitYear
-    ...
-    ... \\lx kaa
-    ... \\ps V.A
-    ... \\ge gag
-    ... \\gp nek i pas
-    ...
-    ... \\lx kaa
-    ... \\ps V.B
-    ... \\ge strangle
-    ... \\gp pasim nek
-    ... """
-    >>> td.open_string(s)
-    >>> tree = td.parse(key='lx')
-    >>> tree.tag
-    'toolbox_data'
-    >>> ElementTree.tostring(list(tree)[0]).decode('utf8')
-    '<header><_sh>v3.0  400  Rotokas Dictionary</_sh><_DateStampHasFourDigitYear /></header>'
-    >>> ElementTree.tostring(list(tree)[1]).decode('utf8')
-    '<record><lx>kaa</lx><ps>V.A</ps><ge>gag</ge><gp>nek i pas</gp></record>'
-    >>> ElementTree.tostring(list(tree)[2]).decode('utf8')
-    '<record><lx>kaa</lx><ps>V.B</ps><ge>strangle</ge><gp>pasim nek</gp></record>'
-
-check that guessing the key marker works:
-
-    >>> from xml.etree import ElementTree
-    >>> td = toolbox.ToolboxData()
-    >>> s = """\\_sh v3.0  400  Rotokas Dictionary
-    ... \\_DateStampHasFourDigitYear
-    ...
-    ... \\lx kaa
-    ... \\ps V.A
-    ... \\ge gag
-    ... \\gp nek i pas
-    ...
-    ... \\lx kaa
-    ... \\ps V.B
-    ... \\ge strangle
-    ... \\gp pasim nek
-    ... """
-    >>> td.open_string(s)
-    >>> tree = td.parse()
-    >>> ElementTree.tostring(list(tree)[0]).decode('utf8')
-    '<header><_sh>v3.0  400  Rotokas Dictionary</_sh><_DateStampHasFourDigitYear /></header>'
-    >>> ElementTree.tostring(list(tree)[1]).decode('utf8')
-    '<record><lx>kaa</lx><ps>V.A</ps><ge>gag</ge><gp>nek i pas</gp></record>'
-    >>> ElementTree.tostring(list(tree)[2]).decode('utf8')
-    '<record><lx>kaa</lx><ps>V.B</ps><ge>strangle</ge><gp>pasim nek</gp></record>'
-
------------------------
-``toolbox`` functions
------------------------
-
-``toolbox.to_sfm_string()``
--------------------------------
-
diff --git a/nlp_resource_data/nltk/test/translate.doctest b/nlp_resource_data/nltk/test/translate.doctest

deleted file mode 100644 (file)

index 6a1bb70..0000000
--- a/nlp_resource_data/nltk/test/translate.doctest
+++ /dev/null
@@ -1,242 +0,0 @@
-.. Copyright (C) 2001-2019 NLTK Project
-.. For license information, see LICENSE.TXT
-
-.. -*- coding: utf-8 -*-
-
-=========
-Alignment
-=========
-
-Corpus Reader
--------------
-
-    >>> from nltk.corpus import comtrans
-    >>> words = comtrans.words('alignment-en-fr.txt')
-    >>> for word in words[:6]:
-    ...     print(word)
-    Resumption
-    of
-    the
-    session
-    I
-    declare
-    >>> als = comtrans.aligned_sents('alignment-en-fr.txt')[0]
-    >>> als  # doctest: +NORMALIZE_WHITESPACE
-    AlignedSent(['Resumption', 'of', 'the', 'session'],
-    ['Reprise', 'de', 'la', 'session'],
-    Alignment([(0, 0), (1, 1), (2, 2), (3, 3)]))
-
-
-Alignment Objects
------------------
-
-Aligned sentences are simply a mapping between words in a sentence:
-
-    >>> print(" ".join(als.words))
-    Resumption of the session
-    >>> print(" ".join(als.mots))
-    Reprise de la session
-    >>> als.alignment
-    Alignment([(0, 0), (1, 1), (2, 2), (3, 3)])
-
-
-Usually we look at them from the perspective of a source to a target language,
-but they are easily inverted:
-
-    >>> als.invert() # doctest: +NORMALIZE_WHITESPACE
-    AlignedSent(['Reprise', 'de', 'la', 'session'],
-    ['Resumption', 'of', 'the', 'session'],
-    Alignment([(0, 0), (1, 1), (2, 2), (3, 3)]))
-
-
-We can create new alignments, but these need to be in the correct range of
-the corresponding sentences:
-
-    >>> from nltk.translate import Alignment, AlignedSent
-    >>> als = AlignedSent(['Reprise', 'de', 'la', 'session'],
-    ...                   ['Resumption', 'of', 'the', 'session'],
-    ...                   Alignment([(0, 0), (1, 4), (2, 1), (3, 3)]))
-    Traceback (most recent call last):
-        ...
-    IndexError: Alignment is outside boundary of mots
-
-
-You can set alignments with any sequence of tuples, so long as the first two
-indexes of the tuple are the alignment indices:
-
-    >>> als.alignment = Alignment([(0, 0), (1, 1), (2, 2, "boat"), (3, 3, False, (1,2))])
-
-    >>> Alignment([(0, 0), (1, 1), (2, 2, "boat"), (3, 3, False, (1,2))])
-    Alignment([(0, 0), (1, 1), (2, 2, 'boat'), (3, 3, False, (1, 2))])
-
-
-Alignment Algorithms
---------------------
-
-EM for IBM Model 1
-~~~~~~~~~~~~~~~~~~
-
-Here is an example from Koehn, 2010:
-
-    >>> from nltk.translate import IBMModel1
-    >>> corpus = [AlignedSent(['the', 'house'], ['das', 'Haus']),
-    ...           AlignedSent(['the', 'book'], ['das', 'Buch']),
-    ...           AlignedSent(['a', 'book'], ['ein', 'Buch'])]
-    >>> em_ibm1 = IBMModel1(corpus, 20)
-    >>> print(round(em_ibm1.translation_table['the']['das'], 1))
-    1.0
-    >>> print(round(em_ibm1.translation_table['book']['das'], 1))
-    0.0
-    >>> print(round(em_ibm1.translation_table['house']['das'], 1))
-    0.0
-    >>> print(round(em_ibm1.translation_table['the']['Buch'], 1))
-    0.0
-    >>> print(round(em_ibm1.translation_table['book']['Buch'], 1))
-    1.0
-    >>> print(round(em_ibm1.translation_table['a']['Buch'], 1))
-    0.0
-    >>> print(round(em_ibm1.translation_table['book']['ein'], 1))
-    0.0
-    >>> print(round(em_ibm1.translation_table['a']['ein'], 1))
-    1.0
-    >>> print(round(em_ibm1.translation_table['the']['Haus'], 1))
-    0.0
-    >>> print(round(em_ibm1.translation_table['house']['Haus'], 1))
-    1.0
-    >>> print(round(em_ibm1.translation_table['book'][None], 1))
-    0.5
-
-And using an NLTK corpus. We train on only 10 sentences, since it is so slow:
-
-    >>> from nltk.corpus import comtrans
-    >>> com_ibm1 = IBMModel1(comtrans.aligned_sents()[:10], 20)
-    >>> print(round(com_ibm1.translation_table['bitte']['Please'], 1))
-    0.2
-    >>> print(round(com_ibm1.translation_table['Sitzungsperiode']['session'], 1))
-    1.0
-
-
-Evaluation
-----------
-The evaluation metrics for alignments are usually not interested in the
-contents of alignments but more often the comparison to a "gold standard"
-alignment that has been been constructed by human experts. For this reason we
-often want to work just with raw set operations against the alignment points.
-This then gives us a very clean form for defining our evaluation metrics.
-
-.. Note::
-    The AlignedSent class has no distinction of "possible" or "sure"
-    alignments. Thus all alignments are treated as "sure".
-
-Consider the following aligned sentence for evaluation:
-
-    >>> my_als = AlignedSent(['Resumption', 'of', 'the', 'session'],
-    ...     ['Reprise', 'de', 'la', 'session'],
-    ...     Alignment([(0, 0), (3, 3), (1, 2), (1, 1), (1, 3)]))
-
-Precision
-~~~~~~~~~
-``precision = |A∩P| / |A|``
-
-**Precision** is probably the most well known evaluation metric and it is implemented
-in `nltk.metrics.scores.precision`_.  Since precision is simply interested in the
-proportion of correct alignments, we calculate the ratio of the number of our
-test alignments (*A*) that match a possible alignment (*P*), over the number of
-test alignments provided. There is no penalty for missing a possible alignment
-in our test alignments. An easy way to game this metric is to provide just one
-test alignment that is in *P* [OCH2000]_.
-
-Here are some examples:
-
-    >>> from nltk.metrics import precision
-    >>> als.alignment = Alignment([(0,0), (1,1), (2,2), (3,3)])
-    >>> precision(Alignment([]), als.alignment)
-    0.0
-    >>> precision(Alignment([(0,0), (1,1), (2,2), (3,3)]), als.alignment)
-    1.0
-    >>> precision(Alignment([(0,0), (3,3)]), als.alignment)
-    0.5
-    >>> precision(Alignment.fromstring('0-0 3-3'), als.alignment)
-    0.5
-    >>> precision(Alignment([(0,0), (1,1), (2,2), (3,3), (1,2), (2,1)]), als.alignment)
-    1.0
-    >>> precision(als.alignment, my_als.alignment)
-    0.6
-
-
-.. _nltk.metrics.scores.precision:
-    http://www.nltk.org/api/nltk.metrics.html#nltk.metrics.scores.precision
-
-
-Recall
-~~~~~~
-``recall = |A∩S| / |S|``
-
-**Recall** is another well known evaluation metric that has a set based
-implementation in NLTK as `nltk.metrics.scores.recall`_. Since recall is
-simply interested in the proportion of found alignments, we calculate the
-ratio of the number of our test alignments (*A*) that match a sure alignment
-(*S*) over the number of sure alignments. There is no penalty for producing
-a lot of test alignments. An easy way to game this metric is to include every
-possible alignment in our test alignments, regardless if they are correct or
-not [OCH2000]_.
-
-Here are some examples:
-
-    >>> from nltk.metrics import recall
-    >>> print(recall(Alignment([]), als.alignment))
-    None
-    >>> recall(Alignment([(0,0), (1,1), (2,2), (3,3)]), als.alignment)
-    1.0
-    >>> recall(Alignment.fromstring('0-0 3-3'), als.alignment)
-    1.0
-    >>> recall(Alignment([(0,0), (3,3)]), als.alignment)
-    1.0
-    >>> recall(Alignment([(0,0), (1,1), (2,2), (3,3), (1,2), (2,1)]), als.alignment)
-    0.66666...
-    >>> recall(als.alignment, my_als.alignment)
-    0.75
-
-
-.. _nltk.metrics.scores.recall:
-    http://www.nltk.org/api/nltk.metrics.html#nltk.metrics.scores.recall
-
-
-Alignment Error Rate (AER)
-~~~~~~~~~~~~~~~~~~~~~~~~~~
-``AER = 1 - (|A∩S| + |A∩P|) / (|A| + |S|)``
-
-**Alignment Error Rate** is commonly used metric for assessing sentence
-alignments. It combines precision and recall metrics together such that a
-perfect alignment must have all of the sure alignments and may have some
-possible alignments [MIHALCEA2003]_ [KOEHN2010]_.
-
-.. Note::
-    [KOEHN2010]_ defines the AER as ``AER = (|A∩S| + |A∩P|) / (|A| + |S|)``
-    in his book, but corrects it to the above in his online errata. This is
-    in line with [MIHALCEA2003]_.
-
-Here are some examples:
-
-    >>> from nltk.translate import alignment_error_rate
-    >>> alignment_error_rate(Alignment([]), als.alignment)
-    1.0
-    >>> alignment_error_rate(Alignment([(0,0), (1,1), (2,2), (3,3)]), als.alignment)
-    0.0
-    >>> alignment_error_rate(als.alignment, my_als.alignment)
-    0.333333...
-    >>> alignment_error_rate(als.alignment, my_als.alignment,
-    ...     als.alignment | Alignment([(1,2), (2,1)]))
-    0.222222...
-
-
-.. [OCH2000] Och, F. and Ney, H. (2000)
-    *Statistical Machine Translation*, EAMT Workshop
-
-.. [MIHALCEA2003] Mihalcea, R. and Pedersen, T. (2003)
-    *An evaluation exercise for word alignment*, HLT-NAACL 2003
-
-.. [KOEHN2010] Koehn, P. (2010)
-    *Statistical Machine Translation*, Cambridge University Press
-
-
diff --git a/nlp_resource_data/nltk/test/translate_fixt.py b/nlp_resource_data/nltk/test/translate_fixt.py

deleted file mode 100644 (file)

index ce0cd83..0000000
--- a/nlp_resource_data/nltk/test/translate_fixt.py
+++ /dev/null
@@ -1,4 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import
-
-from nltk.corpus import teardown_module
diff --git a/nlp_resource_data/nltk/test/tree.doctest b/nlp_resource_data/nltk/test/tree.doctest

deleted file mode 100644 (file)

index a4b93ed..0000000
--- a/nlp_resource_data/nltk/test/tree.doctest
+++ /dev/null
@@ -1,1101 +0,0 @@
-.. Copyright (C) 2001-2019 NLTK Project
-.. For license information, see LICENSE.TXT
-
-===============================
- Unit tests for nltk.tree.Tree
-===============================
-
-    >>> from nltk.tree import *
-
-Some trees to run tests on:
-
-    >>> dp1 = Tree('dp', [Tree('d', ['the']), Tree('np', ['dog'])])
-    >>> dp2 = Tree('dp', [Tree('d', ['the']), Tree('np', ['cat'])])
-    >>> vp = Tree('vp', [Tree('v', ['chased']), dp2])
-    >>> tree = Tree('s', [dp1, vp])
-    >>> print(tree)
-    (s (dp (d the) (np dog)) (vp (v chased) (dp (d the) (np cat))))
-
-The node label is accessed using the `label()` method:
-
-    >>> dp1.label(), dp2.label(), vp.label(), tree.label()
-    ('dp', 'dp', 'vp', 's')
-
-    >>> print(tree[1,1,1,0])
-    cat
-
-The `treepositions` method returns a list of the tree positions of
-subtrees and leaves in a tree.  By default, it gives the position of
-every tree, subtree, and leaf, in prefix order:
-
-    >>> print(tree.treepositions())
-    [(), (0,), (0, 0), (0, 0, 0), (0, 1), (0, 1, 0), (1,), (1, 0), (1, 0, 0), (1, 1), (1, 1, 0), (1, 1, 0, 0), (1, 1, 1), (1, 1, 1, 0)]
-
-In addition to `str` and `repr`, several methods exist to convert a
-tree object to one of several standard tree encodings:
-
-    >>> print(tree.pformat_latex_qtree())
-    \Tree [.s
-            [.dp [.d the ] [.np dog ] ]
-            [.vp [.v chased ] [.dp [.d the ] [.np cat ] ] ] ]
-
-There is also a fancy ASCII art representation:
-
-    >>> tree.pretty_print()
-                  s               
-          ________|_____           
-         |              vp        
-         |         _____|___       
-         dp       |         dp    
-      ___|___     |      ___|___   
-     d       np   v     d       np
-     |       |    |     |       |  
-    the     dog chased the     cat
-
-    >>> tree.pretty_print(unicodelines=True, nodedist=4)
-                           s                        
-            ┌──────────────┴────────┐                   
-            │                       vp              
-            │              ┌────────┴──────┐            
-            dp             │               dp       
-     ┌──────┴──────┐       │        ┌──────┴──────┐     
-     d             np      v        d             np
-     │             │       │        │             │     
-    the           dog    chased    the           cat
-
-Trees can be initialized from treebank strings:
-
-    >>> tree2 = Tree.fromstring('(S (NP I) (VP (V enjoyed) (NP my cookie)))')
-    >>> print(tree2)
-    (S (NP I) (VP (V enjoyed) (NP my cookie)))
-
-Trees can be compared for equality:
-
-    >>> tree == Tree.fromstring(str(tree))
-    True
-    >>> tree2 == Tree.fromstring(str(tree2))
-    True
-    >>> tree == tree2
-    False
-    >>> tree == Tree.fromstring(str(tree2))
-    False
-    >>> tree2 == Tree.fromstring(str(tree))
-    False
-
-    >>> tree != Tree.fromstring(str(tree))
-    False
-    >>> tree2 != Tree.fromstring(str(tree2))
-    False
-    >>> tree != tree2
-    True
-    >>> tree != Tree.fromstring(str(tree2))
-    True
-    >>> tree2 != Tree.fromstring(str(tree))
-    True
-
-    >>> tree < tree2 or tree > tree2
-    True
-
-Tree Parsing
-============
-
-The class method `Tree.fromstring()` can be used to parse trees, and it
-provides some additional options.
-
-    >>> tree = Tree.fromstring('(S (NP I) (VP (V enjoyed) (NP my cookie)))')
-    >>> print(tree)
-    (S (NP I) (VP (V enjoyed) (NP my cookie)))
-
-When called on a subclass of `Tree`, it will create trees of that
-type:
-
-    >>> tree = ImmutableTree.fromstring('(VP (V enjoyed) (NP my cookie))')
-    >>> print(tree)
-    (VP (V enjoyed) (NP my cookie))
-    >>> print(type(tree))
-    <class 'nltk.tree.ImmutableTree'>
-    >>> tree[1] = 'x'
-    Traceback (most recent call last):
-      . . .
-    ValueError: ImmutableTree may not be modified
-    >>> del tree[0]
-    Traceback (most recent call last):
-      . . .
-    ValueError: ImmutableTree may not be modified
-
-The ``brackets`` parameter can be used to specify two characters that
-should be used as brackets:
-
-    >>> print(Tree.fromstring('[S [NP I] [VP [V enjoyed] [NP my cookie]]]',
-    ...                  brackets='[]'))
-    (S (NP I) (VP (V enjoyed) (NP my cookie)))
-    >>> print(Tree.fromstring('<S <NP I> <VP <V enjoyed> <NP my cookie>>>',
-    ...                  brackets='<>'))
-    (S (NP I) (VP (V enjoyed) (NP my cookie)))
-
-If ``brackets`` is not a string, or is not exactly two characters,
-then `Tree.fromstring` raises an exception:
-
-    >>> Tree.fromstring('<VP <V enjoyed> <NP my cookie>>', brackets='')
-    Traceback (most recent call last):
-      . . .
-    TypeError: brackets must be a length-2 string
-    >>> Tree.fromstring('<VP <V enjoyed> <NP my cookie>>', brackets='<<>>')
-    Traceback (most recent call last):
-      . . .
-    TypeError: brackets must be a length-2 string
-    >>> Tree.fromstring('<VP <V enjoyed> <NP my cookie>>', brackets=12)
-    Traceback (most recent call last):
-      . . .
-    TypeError: brackets must be a length-2 string
-    >>> Tree.fromstring('<<NP my cookie>>', brackets=('<<','>>'))
-    Traceback (most recent call last):
-      . . .
-    TypeError: brackets must be a length-2 string
-
-(We may add support for multi-character brackets in the future, in
-which case the ``brackets=('<<','>>')`` example would start working.)
-
-Whitespace brackets are not permitted:
-
-    >>> Tree.fromstring('(NP my cookie\n', brackets='(\n')
-    Traceback (most recent call last):
-      . . .
-    TypeError: whitespace brackets not allowed
-
-If an invalid tree is given to Tree.fromstring, then it raises a
-ValueError, with a description of the problem:
-
-    >>> Tree.fromstring('(NP my cookie) (NP my milk)')
-    Traceback (most recent call last):
-      . . .
-    ValueError: Tree.fromstring(): expected 'end-of-string' but got '(NP'
-                at index 15.
-                    "...y cookie) (NP my mil..."
-                                  ^
-    >>> Tree.fromstring(')NP my cookie(')
-    Traceback (most recent call last):
-      . . .
-    ValueError: Tree.fromstring(): expected '(' but got ')'
-                at index 0.
-                    ")NP my coo..."
-                     ^
-    >>> Tree.fromstring('(NP my cookie))')
-    Traceback (most recent call last):
-      . . .
-    ValueError: Tree.fromstring(): expected 'end-of-string' but got ')'
-                at index 14.
-                    "...my cookie))"
-                                  ^
-    >>> Tree.fromstring('my cookie)')
-    Traceback (most recent call last):
-      . . .
-    ValueError: Tree.fromstring(): expected '(' but got 'my'
-                at index 0.
-                    "my cookie)"
-                     ^
-    >>> Tree.fromstring('(NP my cookie')
-    Traceback (most recent call last):
-      . . .
-    ValueError: Tree.fromstring(): expected ')' but got 'end-of-string'
-                at index 13.
-                    "... my cookie"
-                                  ^
-    >>> Tree.fromstring('')
-    Traceback (most recent call last):
-      . . .
-    ValueError: Tree.fromstring(): expected '(' but got 'end-of-string'
-                at index 0.
-                    ""
-                     ^
-
-Trees with no children are supported:
-
-    >>> print(Tree.fromstring('(S)'))
-    (S )
-    >>> print(Tree.fromstring('(X (Y) (Z))'))
-    (X (Y ) (Z ))
-
-Trees with an empty node label and no children are supported:
-
-    >>> print(Tree.fromstring('()'))
-    ( )
-    >>> print(Tree.fromstring('(X () ())'))
-    (X ( ) ( ))
-
-Trees with an empty node label and children are supported, but only if the
-first child is not a leaf (otherwise, it will be treated as the node label).
-
-    >>> print(Tree.fromstring('((A) (B) (C))'))
-    ( (A ) (B ) (C ))
-    >>> print(Tree.fromstring('((A) leaf)'))
-    ( (A ) leaf)
-    >>> print(Tree.fromstring('(((())))'))
-    ( ( ( ( ))))
-
-The optional arguments `read_node` and `read_leaf` may be used to
-transform the string values of nodes or leaves.
-
-    >>> print(Tree.fromstring('(A b (C d e) (F (G h i)))',
-    ...                  read_node=lambda s: '<%s>' % s,
-    ...                  read_leaf=lambda s: '"%s"' % s))
-    (<A> "b" (<C> "d" "e") (<F> (<G> "h" "i")))
-
-These transformation functions are typically used when the node or
-leaf labels should be parsed to a non-string value (such as a feature
-structure).  If node and leaf labels need to be able to include
-whitespace, then you must also use the optional `node_pattern` and
-`leaf_pattern` arguments.
-
-    >>> from nltk.featstruct import FeatStruct
-    >>> tree = Tree.fromstring('([cat=NP] [lex=the] [lex=dog])',
-    ...                   read_node=FeatStruct, read_leaf=FeatStruct)
-    >>> tree.set_label(tree.label().unify(FeatStruct('[num=singular]')))
-    >>> print(tree)
-    ([cat='NP', num='singular'] [lex='the'] [lex='dog'])
-
-The optional argument ``remove_empty_top_bracketing`` can be used to
-remove any top-level empty bracketing that occurs.
-
-    >>> print(Tree.fromstring('((S (NP I) (VP (V enjoyed) (NP my cookie))))',
-    ...                  remove_empty_top_bracketing=True))
-    (S (NP I) (VP (V enjoyed) (NP my cookie)))
-
-It will not remove a top-level empty bracketing with multiple children:
-
-    >>> print(Tree.fromstring('((A a) (B b))'))
-    ( (A a) (B b))
-
-Parented Trees
-==============
-`ParentedTree` is a subclass of `Tree` that automatically maintains
-parent pointers for single-parented trees.  Parented trees can be
-created directly from a node label and a list of children:
-
-    >>> ptree = (
-    ...     ParentedTree('VP', [
-    ...         ParentedTree('VERB', ['saw']),
-    ...         ParentedTree('NP', [
-    ...             ParentedTree('DET', ['the']),
-    ...             ParentedTree('NOUN', ['dog'])])]))
-    >>> print(ptree)
-    (VP (VERB saw) (NP (DET the) (NOUN dog)))
-
-Parented trees can be created from strings using the classmethod
-`ParentedTree.fromstring`:
-
-    >>> ptree = ParentedTree.fromstring('(VP (VERB saw) (NP (DET the) (NOUN dog)))')
-    >>> print(ptree)
-    (VP (VERB saw) (NP (DET the) (NOUN dog)))
-    >>> print(type(ptree))
-    <class 'nltk.tree.ParentedTree'>
-
-Parented trees can also be created by using the classmethod
-`ParentedTree.convert` to convert another type of tree to a parented
-tree:
-
-    >>> tree = Tree.fromstring('(VP (VERB saw) (NP (DET the) (NOUN dog)))')
-    >>> ptree = ParentedTree.convert(tree)
-    >>> print(ptree)
-    (VP (VERB saw) (NP (DET the) (NOUN dog)))
-    >>> print(type(ptree))
-    <class 'nltk.tree.ParentedTree'>
-
-.. clean-up:
-
-    >>> del tree
-
-`ParentedTree`\ s should never be used in the same tree as `Tree`\ s
-or `MultiParentedTree`\ s.  Mixing tree implementations may result in
-incorrect parent pointers and in `TypeError` exceptions:
-
-    >>> # Inserting a Tree in a ParentedTree gives an exception:
-    >>> ParentedTree('NP', [
-    ...     Tree('DET', ['the']), Tree('NOUN', ['dog'])])
-    Traceback (most recent call last):
-      . . .
-    TypeError: Can not insert a non-ParentedTree into a ParentedTree
-
-    >>> # inserting a ParentedTree in a Tree gives incorrect parent pointers:
-    >>> broken_tree = Tree('NP', [
-    ...     ParentedTree('DET', ['the']), ParentedTree('NOUN', ['dog'])])
-    >>> print(broken_tree[0].parent())
-    None
-
-Parented Tree Methods
-------------------------
-In addition to all the methods defined by the `Tree` class, the
-`ParentedTree` class adds six new methods whose values are
-automatically updated whenver a parented tree is modified: `parent()`,
-`parent_index()`, `left_sibling()`, `right_sibling()`, `root()`, and
-`treeposition()`.
-
-The `parent()` method contains a `ParentedTree`\ 's parent, if it has
-one; and ``None`` otherwise.  `ParentedTree`\ s that do not have
-parents are known as "root trees."
-
-    >>> for subtree in ptree.subtrees():
-    ...     print(subtree)
-    ...     print('  Parent = %s' % subtree.parent())
-    (VP (VERB saw) (NP (DET the) (NOUN dog)))
-      Parent = None
-    (VERB saw)
-      Parent = (VP (VERB saw) (NP (DET the) (NOUN dog)))
-    (NP (DET the) (NOUN dog))
-      Parent = (VP (VERB saw) (NP (DET the) (NOUN dog)))
-    (DET the)
-      Parent = (NP (DET the) (NOUN dog))
-    (NOUN dog)
-      Parent = (NP (DET the) (NOUN dog))
-
-The `parent_index()` method stores the index of a tree in its parent's
-child list.  If a tree does not have a parent, then its `parent_index`
-is ``None``.
-
-    >>> for subtree in ptree.subtrees():
-    ...     print(subtree)
-    ...     print('  Parent Index = %s' % subtree.parent_index())
-    ...     assert (subtree.parent() is None or
-    ...             subtree.parent()[subtree.parent_index()] is subtree)
-    (VP (VERB saw) (NP (DET the) (NOUN dog)))
-      Parent Index = None
-    (VERB saw)
-      Parent Index = 0
-    (NP (DET the) (NOUN dog))
-      Parent Index = 1
-    (DET the)
-      Parent Index = 0
-    (NOUN dog)
-      Parent Index = 1
-
-Note that ``ptree.parent().index(ptree)`` is *not* equivalent to
-``ptree.parent_index()``.  In particular, ``ptree.parent().index(ptree)``
-will return the index of the first child of ``ptree.parent()`` that is
-equal to ``ptree`` (using ``==``); and that child may not be
-``ptree``:
-
-    >>> on_and_on = ParentedTree('CONJP', [
-    ...     ParentedTree('PREP', ['on']),
-    ...     ParentedTree('COJN', ['and']),
-    ...     ParentedTree('PREP', ['on'])])
-    >>> second_on = on_and_on[2]
-    >>> print(second_on.parent_index())
-    2
-    >>> print(second_on.parent().index(second_on))
-    0
-
-The methods `left_sibling()` and `right_sibling()` can be used to get a
-parented tree's siblings.  If a tree does not have a left or right
-sibling, then the corresponding method's value is ``None``:
-
-    >>> for subtree in ptree.subtrees():
-    ...     print(subtree)
-    ...     print('  Left Sibling  = %s' % subtree.left_sibling())
-    ...     print('  Right Sibling = %s' % subtree.right_sibling())
-    (VP (VERB saw) (NP (DET the) (NOUN dog)))
-      Left Sibling  = None
-      Right Sibling = None
-    (VERB saw)
-      Left Sibling  = None
-      Right Sibling = (NP (DET the) (NOUN dog))
-    (NP (DET the) (NOUN dog))
-      Left Sibling  = (VERB saw)
-      Right Sibling = None
-    (DET the)
-      Left Sibling  = None
-      Right Sibling = (NOUN dog)
-    (NOUN dog)
-      Left Sibling  = (DET the)
-      Right Sibling = None
-
-A parented tree's root tree can be accessed using the `root()`
-method.  This method follows the tree's parent pointers until it
-finds a tree without a parent.  If a tree does not have a parent, then
-it is its own root:
-
-    >>> for subtree in ptree.subtrees():
-    ...     print(subtree)
-    ...     print('  Root = %s' % subtree.root())
-    (VP (VERB saw) (NP (DET the) (NOUN dog)))
-      Root = (VP (VERB saw) (NP (DET the) (NOUN dog)))
-    (VERB saw)
-      Root = (VP (VERB saw) (NP (DET the) (NOUN dog)))
-    (NP (DET the) (NOUN dog))
-      Root = (VP (VERB saw) (NP (DET the) (NOUN dog)))
-    (DET the)
-      Root = (VP (VERB saw) (NP (DET the) (NOUN dog)))
-    (NOUN dog)
-      Root = (VP (VERB saw) (NP (DET the) (NOUN dog)))
-
-The `treeposition()` method can be used to find a tree's treeposition
-relative to its root:
-
-    >>> for subtree in ptree.subtrees():
-    ...     print(subtree)
-    ...     print('  Tree Position = %s' % (subtree.treeposition(),))
-    ...     assert subtree.root()[subtree.treeposition()] is subtree
-    (VP (VERB saw) (NP (DET the) (NOUN dog)))
-      Tree Position = ()
-    (VERB saw)
-      Tree Position = (0,)
-    (NP (DET the) (NOUN dog))
-      Tree Position = (1,)
-    (DET the)
-      Tree Position = (1, 0)
-    (NOUN dog)
-      Tree Position = (1, 1)
-
-Whenever a parented tree is modified, all of the methods described
-above (`parent()`, `parent_index()`, `left_sibling()`, `right_sibling()`,
-`root()`, and `treeposition()`) are automatically updated.  For example,
-if we replace ``ptree``\ 's subtree for the word "dog" with a new
-subtree for "cat," the method values for both the "dog" subtree and the
-"cat" subtree get automatically updated:
-
-    >>> # Replace the dog with a cat
-    >>> dog = ptree[1,1]
-    >>> cat = ParentedTree('NOUN', ['cat'])
-    >>> ptree[1,1] = cat
-
-    >>> # the noun phrase is no longer the dog's parent:
-    >>> print(dog.parent(), dog.parent_index(), dog.left_sibling())
-    None None None
-    >>> # dog is now its own root.
-    >>> print(dog.root())
-    (NOUN dog)
-    >>> print(dog.treeposition())
-    ()
-
-    >>> # the cat's parent is now the noun phrase:
-    >>> print(cat.parent())
-    (NP (DET the) (NOUN cat))
-    >>> print(cat.parent_index())
-    1
-    >>> print(cat.left_sibling())
-    (DET the)
-    >>> print(cat.root())
-    (VP (VERB saw) (NP (DET the) (NOUN cat)))
-    >>> print(cat.treeposition())
-    (1, 1)
-
-ParentedTree Regression Tests
------------------------------
-Keep track of all trees that we create (including subtrees) using this
-variable:
-
-    >>> all_ptrees = []
-
-Define a helper funciton to create new parented trees:
-
-    >>> def make_ptree(s):
-    ...     ptree = ParentedTree.convert(Tree.fromstring(s))
-    ...     all_ptrees.extend(t for t in ptree.subtrees()
-    ...                       if isinstance(t, Tree))
-    ...     return ptree
-
-Define a test function that examines every subtree in all_ptrees; and
-checks that all six of its methods are defined correctly.  If any
-ptrees are passed as arguments, then they are printed.
-
-    >>> def pcheck(*print_ptrees):
-    ...     for ptree in all_ptrees:
-    ...         # Check ptree's methods.
-    ...         if ptree.parent() is not None:
-    ...             i = ptree.parent_index()
-    ...             assert ptree.parent()[i] is ptree
-    ...             if i > 0:
-    ...                 assert ptree.left_sibling() is ptree.parent()[i-1]
-    ...             if i < (len(ptree.parent())-1):
-    ...                 assert ptree.right_sibling() is ptree.parent()[i+1]
-    ...             assert len(ptree.treeposition()) > 0
-    ...             assert (ptree.treeposition() ==
-    ...                     ptree.parent().treeposition() + (ptree.parent_index(),))
-    ...             assert ptree.root() is not ptree
-    ...             assert ptree.root() is not None
-    ...             assert ptree.root() is ptree.parent().root()
-    ...             assert ptree.root()[ptree.treeposition()] is ptree
-    ...         else:
-    ...             assert ptree.parent_index() is None
-    ...             assert ptree.left_sibling() is None
-    ...             assert ptree.right_sibling() is None
-    ...             assert ptree.root() is ptree
-    ...             assert ptree.treeposition() == ()
-    ...         # Check ptree's children's methods:
-    ...         for i, child in enumerate(ptree):
-    ...             if isinstance(child, Tree):
-    ...                 # pcheck parent() & parent_index() methods
-    ...                 assert child.parent() is ptree
-    ...                 assert child.parent_index() == i
-    ...                 # pcheck sibling methods
-    ...                 if i == 0:
-    ...                     assert child.left_sibling() is None
-    ...                 else:
-    ...                     assert child.left_sibling() is ptree[i-1]
-    ...                 if i == len(ptree)-1:
-    ...                     assert child.right_sibling() is None
-    ...                 else:
-    ...                     assert child.right_sibling() is ptree[i+1]
-    ...     if print_ptrees:
-    ...         print('ok!', end=' ')
-    ...         for ptree in print_ptrees: print(ptree)
-    ...     else:
-    ...         print('ok!')
-
-Run our test function on a variety of newly-created trees:
-
-    >>> pcheck(make_ptree('(A)'))
-    ok! (A )
-    >>> pcheck(make_ptree('(A (B (C (D) (E f)) g) h)'))
-    ok! (A (B (C (D ) (E f)) g) h)
-    >>> pcheck(make_ptree('(A (B) (C c) (D d d) (E e e e))'))
-    ok! (A (B ) (C c) (D d d) (E e e e))
-    >>> pcheck(make_ptree('(A (B) (C (c)) (D (d) (d)) (E (e) (e) (e)))'))
-    ok! (A (B ) (C (c )) (D (d ) (d )) (E (e ) (e ) (e )))
-
-Run our test function after performing various tree-modification
-operations:
-
-**__delitem__()**
-
-    >>> ptree = make_ptree('(A (B (C (D) (E f) (Q p)) g) h)')
-    >>> e = ptree[0,0,1]
-    >>> del ptree[0,0,1]; pcheck(ptree); pcheck(e)
-    ok! (A (B (C (D ) (Q p)) g) h)
-    ok! (E f)
-    >>> del ptree[0,0,0]; pcheck(ptree)
-    ok! (A (B (C (Q p)) g) h)
-    >>> del ptree[0,1]; pcheck(ptree)
-    ok! (A (B (C (Q p))) h)
-    >>> del ptree[-1]; pcheck(ptree)
-    ok! (A (B (C (Q p))))
-    >>> del ptree[-100]
-    Traceback (most recent call last):
-      . . .
-    IndexError: index out of range
-    >>> del ptree[()]
-    Traceback (most recent call last):
-      . . .
-    IndexError: The tree position () may not be deleted.
-
-    >>> # With slices:
-    >>> ptree = make_ptree('(A (B c) (D e) f g (H i) j (K l))')
-    >>> b = ptree[0]
-    >>> del ptree[0:0]; pcheck(ptree)
-    ok! (A (B c) (D e) f g (H i) j (K l))
-    >>> del ptree[:1]; pcheck(ptree); pcheck(b)
-    ok! (A (D e) f g (H i) j (K l))
-    ok! (B c)
-    >>> del ptree[-2:]; pcheck(ptree)
-    ok! (A (D e) f g (H i))
-    >>> del ptree[1:3]; pcheck(ptree)
-    ok! (A (D e) (H i))
-    >>> ptree = make_ptree('(A (B c) (D e) f g (H i) j (K l))')
-    >>> del ptree[5:1000]; pcheck(ptree)
-    ok! (A (B c) (D e) f g (H i))
-    >>> del ptree[-2:1000]; pcheck(ptree)
-    ok! (A (B c) (D e) f)
-    >>> del ptree[-100:1]; pcheck(ptree)
-    ok! (A (D e) f)
-    >>> ptree = make_ptree('(A (B c) (D e) f g (H i) j (K l))')
-    >>> del ptree[1:-2:2]; pcheck(ptree)
-    ok! (A (B c) f (H i) j (K l))
-
-**__setitem__()**
-
-    >>> ptree = make_ptree('(A (B (C (D) (E f) (Q p)) g) h)')
-    >>> d, e, q = ptree[0,0]
-    >>> ptree[0,0,0] = 'x'; pcheck(ptree); pcheck(d)
-    ok! (A (B (C x (E f) (Q p)) g) h)
-    ok! (D )
-    >>> ptree[0,0,1] = make_ptree('(X (Y z))'); pcheck(ptree); pcheck(e)
-    ok! (A (B (C x (X (Y z)) (Q p)) g) h)
-    ok! (E f)
-    >>> ptree[1] = d; pcheck(ptree)
-    ok! (A (B (C x (X (Y z)) (Q p)) g) (D ))
-    >>> ptree[-1] = 'x'; pcheck(ptree)
-    ok! (A (B (C x (X (Y z)) (Q p)) g) x)
-    >>> ptree[-100] = 'y'
-    Traceback (most recent call last):
-      . . .
-    IndexError: index out of range
-    >>> ptree[()] = make_ptree('(X y)')
-    Traceback (most recent call last):
-      . . .
-    IndexError: The tree position () may not be assigned to.
-
-    >>> # With slices:
-    >>> ptree = make_ptree('(A (B c) (D e) f g (H i) j (K l))')
-    >>> b = ptree[0]
-    >>> ptree[0:0] = ('x', make_ptree('(Y)')); pcheck(ptree)
-    ok! (A x (Y ) (B c) (D e) f g (H i) j (K l))
-    >>> ptree[2:6] = (); pcheck(ptree); pcheck(b)
-    ok! (A x (Y ) (H i) j (K l))
-    ok! (B c)
-    >>> ptree[-2:] = ('z', 'p'); pcheck(ptree)
-    ok! (A x (Y ) (H i) z p)
-    >>> ptree[1:3] = [make_ptree('(X)') for x in range(10)]; pcheck(ptree)
-    ok! (A x (X ) (X ) (X ) (X ) (X ) (X ) (X ) (X ) (X ) (X ) z p)
-    >>> ptree[5:1000] = []; pcheck(ptree)
-    ok! (A x (X ) (X ) (X ) (X ))
-    >>> ptree[-2:1000] = ['n']; pcheck(ptree)
-    ok! (A x (X ) (X ) n)
-    >>> ptree[-100:1] = [make_ptree('(U v)')]; pcheck(ptree)
-    ok! (A (U v) (X ) (X ) n)
-    >>> ptree[-1:] = (make_ptree('(X)') for x in range(3)); pcheck(ptree)
-    ok! (A (U v) (X ) (X ) (X ) (X ) (X ))
-    >>> ptree[1:-2:2] = ['x', 'y']; pcheck(ptree)
-    ok! (A (U v) x (X ) y (X ) (X ))
-
-**append()**
-
-    >>> ptree = make_ptree('(A (B (C (D) (E f) (Q p)) g) h)')
-    >>> ptree.append('x'); pcheck(ptree)
-    ok! (A (B (C (D ) (E f) (Q p)) g) h x)
-    >>> ptree.append(make_ptree('(X (Y z))')); pcheck(ptree)
-    ok! (A (B (C (D ) (E f) (Q p)) g) h x (X (Y z)))
-
-**extend()**
-
-    >>> ptree = make_ptree('(A (B (C (D) (E f) (Q p)) g) h)')
-    >>> ptree.extend(['x', 'y', make_ptree('(X (Y z))')]); pcheck(ptree)
-    ok! (A (B (C (D ) (E f) (Q p)) g) h x y (X (Y z)))
-    >>> ptree.extend([]); pcheck(ptree)
-    ok! (A (B (C (D ) (E f) (Q p)) g) h x y (X (Y z)))
-    >>> ptree.extend(make_ptree('(X)') for x in range(3)); pcheck(ptree)
-    ok! (A (B (C (D ) (E f) (Q p)) g) h x y (X (Y z)) (X ) (X ) (X ))
-
-**insert()**
-
-    >>> ptree = make_ptree('(A (B (C (D) (E f) (Q p)) g) h)')
-    >>> ptree.insert(0, make_ptree('(X (Y z))')); pcheck(ptree)
-    ok! (A (X (Y z)) (B (C (D ) (E f) (Q p)) g) h)
-    >>> ptree.insert(-1, make_ptree('(X (Y z))')); pcheck(ptree)
-    ok! (A (X (Y z)) (B (C (D ) (E f) (Q p)) g) (X (Y z)) h)
-    >>> ptree.insert(-4, make_ptree('(X (Y z))')); pcheck(ptree)
-    ok! (A (X (Y z)) (X (Y z)) (B (C (D ) (E f) (Q p)) g) (X (Y z)) h)
-    >>> # Note: as with ``list``, inserting at a negative index that
-    >>> # gives a position before the start of the list does *not*
-    >>> # raise an IndexError exception; it just inserts at 0.
-    >>> ptree.insert(-400, make_ptree('(X (Y z))')); pcheck(ptree)
-    ok! (A
-      (X (Y z))
-      (X (Y z))
-      (X (Y z))
-      (B (C (D ) (E f) (Q p)) g)
-      (X (Y z))
-      h)
-
-**pop()**
-
-    >>> ptree = make_ptree('(A (B (C (D) (E f) (Q p)) g) h)')
-    >>> ptree[0,0].pop(1); pcheck(ptree)
-    ParentedTree('E', ['f'])
-    ok! (A (B (C (D ) (Q p)) g) h)
-    >>> ptree[0].pop(-1); pcheck(ptree)
-    'g'
-    ok! (A (B (C (D ) (Q p))) h)
-    >>> ptree.pop(); pcheck(ptree)
-    'h'
-    ok! (A (B (C (D ) (Q p))))
-    >>> ptree.pop(-100)
-    Traceback (most recent call last):
-      . . .
-    IndexError: index out of range
-
-**remove()**
-
-    >>> ptree = make_ptree('(A (B (C (D) (E f) (Q p)) g) h)')
-    >>> e = ptree[0,0,1]
-    >>> ptree[0,0].remove(ptree[0,0,1]); pcheck(ptree); pcheck(e)
-    ok! (A (B (C (D ) (Q p)) g) h)
-    ok! (E f)
-    >>> ptree[0,0].remove(make_ptree('(Q p)')); pcheck(ptree)
-    ok! (A (B (C (D )) g) h)
-    >>> ptree[0,0].remove(make_ptree('(Q p)'))
-    Traceback (most recent call last):
-      . . .
-    ValueError: ParentedTree('Q', ['p']) is not in list
-    >>> ptree.remove('h'); pcheck(ptree)
-    ok! (A (B (C (D )) g))
-    >>> ptree.remove('h');
-    Traceback (most recent call last):
-      . . .
-    ValueError: 'h' is not in list
-    >>> # remove() removes the first subtree that is equal (==) to the
-    >>> # given tree, which may not be the identical tree we give it:
-    >>> ptree = make_ptree('(A (X x) (Y y) (X x))')
-    >>> x1, y, x2 = ptree
-    >>> ptree.remove(ptree[-1]); pcheck(ptree)
-    ok! (A (Y y) (X x))
-    >>> print(x1.parent()); pcheck(x1)
-    None
-    ok! (X x)
-    >>> print(x2.parent())
-    (A (Y y) (X x))
-
-Test that a tree can not be given multiple parents:
-
-    >>> ptree = make_ptree('(A (X x) (Y y) (Z z))')
-    >>> ptree[0] = ptree[1]
-    Traceback (most recent call last):
-      . . .
-    ValueError: Can not insert a subtree that already has a parent.
-    >>> pcheck()
-    ok!
-
-[more to be written]
-
-
-ImmutableParentedTree Regression Tests
---------------------------------------
-
-    >>> iptree = ImmutableParentedTree.convert(ptree)
-    >>> type(iptree)
-    <class 'nltk.tree.ImmutableParentedTree'>
-    >>> del iptree[0]
-    Traceback (most recent call last):
-      . . .
-    ValueError: ImmutableParentedTree may not be modified
-    >>> iptree.set_label('newnode')
-    Traceback (most recent call last):
-      . . .
-    ValueError: ImmutableParentedTree may not be modified
-
-
-MultiParentedTree Regression Tests
-----------------------------------
-Keep track of all trees that we create (including subtrees) using this
-variable:
-
-    >>> all_mptrees = []
-
-Define a helper funciton to create new parented trees:
-
-    >>> def make_mptree(s):
-    ...     mptree = MultiParentedTree.convert(Tree.fromstring(s))
-    ...     all_mptrees.extend(t for t in mptree.subtrees()
-    ...                       if isinstance(t, Tree))
-    ...     return mptree
-
-Define a test function that examines every subtree in all_mptrees; and
-checks that all six of its methods are defined correctly.  If any
-mptrees are passed as arguments, then they are printed.
-
-    >>> def mpcheck(*print_mptrees):
-    ...     def has(seq, val): # uses identity comparison
-    ...         for item in seq:
-    ...             if item is val: return True
-    ...         return False
-    ...     for mptree in all_mptrees:
-    ...         # Check mptree's methods.
-    ...         if len(mptree.parents()) == 0:
-    ...             assert len(mptree.left_siblings()) == 0
-    ...             assert len(mptree.right_siblings()) == 0
-    ...             assert len(mptree.roots()) == 1
-    ...             assert mptree.roots()[0] is mptree
-    ...             assert mptree.treepositions(mptree) == [()]
-    ...             left_siblings = right_siblings = ()
-    ...             roots = {id(mptree): 1}
-    ...         else:
-    ...             roots = dict((id(r), 0) for r in mptree.roots())
-    ...             left_siblings = mptree.left_siblings()
-    ...             right_siblings = mptree.right_siblings()
-    ...         for parent in mptree.parents():
-    ...             for i in mptree.parent_indices(parent):
-    ...                 assert parent[i] is mptree
-    ...                 # check left siblings
-    ...                 if i > 0:
-    ...                     for j in range(len(left_siblings)):
-    ...                         if left_siblings[j] is parent[i-1]:
-    ...                             del left_siblings[j]
-    ...                             break
-    ...                     else:
-    ...                         assert 0, 'sibling not found!'
-    ...                 # check ight siblings
-    ...                 if i < (len(parent)-1):
-    ...                     for j in range(len(right_siblings)):
-    ...                         if right_siblings[j] is parent[i+1]:
-    ...                             del right_siblings[j]
-    ...                             break
-    ...                     else:
-    ...                         assert 0, 'sibling not found!'
-    ...             # check roots
-    ...             for root in parent.roots():
-    ...                 assert id(root) in roots, 'missing root'
-    ...                 roots[id(root)] += 1
-    ...         # check that we don't have any unexplained values
-    ...         assert len(left_siblings)==0, 'unexpected sibling'
-    ...         assert len(right_siblings)==0, 'unexpected sibling'
-    ...         for v in roots.values(): assert v>0, roots #'unexpected root'
-    ...         # check treepositions
-    ...         for root in mptree.roots():
-    ...             for treepos in mptree.treepositions(root):
-    ...                 assert root[treepos] is mptree
-    ...         # Check mptree's children's methods:
-    ...         for i, child in enumerate(mptree):
-    ...             if isinstance(child, Tree):
-    ...                 # mpcheck parent() & parent_index() methods
-    ...                 assert has(child.parents(), mptree)
-    ...                 assert i in child.parent_indices(mptree)
-    ...                 # mpcheck sibling methods
-    ...                 if i > 0:
-    ...                     assert has(child.left_siblings(), mptree[i-1])
-    ...                 if i < len(mptree)-1:
-    ...                     assert has(child.right_siblings(), mptree[i+1])
-    ...     if print_mptrees:
-    ...         print('ok!', end=' ')
-    ...         for mptree in print_mptrees: print(mptree)
-    ...     else:
-    ...         print('ok!')
-
-Run our test function on a variety of newly-created trees:
-
-    >>> mpcheck(make_mptree('(A)'))
-    ok! (A )
-    >>> mpcheck(make_mptree('(A (B (C (D) (E f)) g) h)'))
-    ok! (A (B (C (D ) (E f)) g) h)
-    >>> mpcheck(make_mptree('(A (B) (C c) (D d d) (E e e e))'))
-    ok! (A (B ) (C c) (D d d) (E e e e))
-    >>> mpcheck(make_mptree('(A (B) (C (c)) (D (d) (d)) (E (e) (e) (e)))'))
-    ok! (A (B ) (C (c )) (D (d ) (d )) (E (e ) (e ) (e )))
-    >>> subtree = make_mptree('(A (B (C (D) (E f)) g) h)')
-
-Including some trees that contain multiple parents:
-
-    >>> mpcheck(MultiParentedTree('Z', [subtree, subtree]))
-    ok! (Z (A (B (C (D ) (E f)) g) h) (A (B (C (D ) (E f)) g) h))
-
-Run our test function after performing various tree-modification
-operations (n.b., these are the same tests that we ran for
-`ParentedTree`, above; thus, none of these trees actually *uses*
-multiple parents.)
-
-**__delitem__()**
-
-    >>> mptree = make_mptree('(A (B (C (D) (E f) (Q p)) g) h)')
-    >>> e = mptree[0,0,1]
-    >>> del mptree[0,0,1]; mpcheck(mptree); mpcheck(e)
-    ok! (A (B (C (D ) (Q p)) g) h)
-    ok! (E f)
-    >>> del mptree[0,0,0]; mpcheck(mptree)
-    ok! (A (B (C (Q p)) g) h)
-    >>> del mptree[0,1]; mpcheck(mptree)
-    ok! (A (B (C (Q p))) h)
-    >>> del mptree[-1]; mpcheck(mptree)
-    ok! (A (B (C (Q p))))
-    >>> del mptree[-100]
-    Traceback (most recent call last):
-      . . .
-    IndexError: index out of range
-    >>> del mptree[()]
-    Traceback (most recent call last):
-      . . .
-    IndexError: The tree position () may not be deleted.
-
-    >>> # With slices:
-    >>> mptree = make_mptree('(A (B c) (D e) f g (H i) j (K l))')
-    >>> b = mptree[0]
-    >>> del mptree[0:0]; mpcheck(mptree)
-    ok! (A (B c) (D e) f g (H i) j (K l))
-    >>> del mptree[:1]; mpcheck(mptree); mpcheck(b)
-    ok! (A (D e) f g (H i) j (K l))
-    ok! (B c)
-    >>> del mptree[-2:]; mpcheck(mptree)
-    ok! (A (D e) f g (H i))
-    >>> del mptree[1:3]; mpcheck(mptree)
-    ok! (A (D e) (H i))
-    >>> mptree = make_mptree('(A (B c) (D e) f g (H i) j (K l))')
-    >>> del mptree[5:1000]; mpcheck(mptree)
-    ok! (A (B c) (D e) f g (H i))
-    >>> del mptree[-2:1000]; mpcheck(mptree)
-    ok! (A (B c) (D e) f)
-    >>> del mptree[-100:1]; mpcheck(mptree)
-    ok! (A (D e) f)
-    >>> mptree = make_mptree('(A (B c) (D e) f g (H i) j (K l))')
-    >>> del mptree[1:-2:2]; mpcheck(mptree)
-    ok! (A (B c) f (H i) j (K l))
-
-**__setitem__()**
-
-    >>> mptree = make_mptree('(A (B (C (D) (E f) (Q p)) g) h)')
-    >>> d, e, q = mptree[0,0]
-    >>> mptree[0,0,0] = 'x'; mpcheck(mptree); mpcheck(d)
-    ok! (A (B (C x (E f) (Q p)) g) h)
-    ok! (D )
-    >>> mptree[0,0,1] = make_mptree('(X (Y z))'); mpcheck(mptree); mpcheck(e)
-    ok! (A (B (C x (X (Y z)) (Q p)) g) h)
-    ok! (E f)
-    >>> mptree[1] = d; mpcheck(mptree)
-    ok! (A (B (C x (X (Y z)) (Q p)) g) (D ))
-    >>> mptree[-1] = 'x'; mpcheck(mptree)
-    ok! (A (B (C x (X (Y z)) (Q p)) g) x)
-    >>> mptree[-100] = 'y'
-    Traceback (most recent call last):
-      . . .
-    IndexError: index out of range
-    >>> mptree[()] = make_mptree('(X y)')
-    Traceback (most recent call last):
-      . . .
-    IndexError: The tree position () may not be assigned to.
-
-    >>> # With slices:
-    >>> mptree = make_mptree('(A (B c) (D e) f g (H i) j (K l))')
-    >>> b = mptree[0]
-    >>> mptree[0:0] = ('x', make_mptree('(Y)')); mpcheck(mptree)
-    ok! (A x (Y ) (B c) (D e) f g (H i) j (K l))
-    >>> mptree[2:6] = (); mpcheck(mptree); mpcheck(b)
-    ok! (A x (Y ) (H i) j (K l))
-    ok! (B c)
-    >>> mptree[-2:] = ('z', 'p'); mpcheck(mptree)
-    ok! (A x (Y ) (H i) z p)
-    >>> mptree[1:3] = [make_mptree('(X)') for x in range(10)]; mpcheck(mptree)
-    ok! (A x (X ) (X ) (X ) (X ) (X ) (X ) (X ) (X ) (X ) (X ) z p)
-    >>> mptree[5:1000] = []; mpcheck(mptree)
-    ok! (A x (X ) (X ) (X ) (X ))
-    >>> mptree[-2:1000] = ['n']; mpcheck(mptree)
-    ok! (A x (X ) (X ) n)
-    >>> mptree[-100:1] = [make_mptree('(U v)')]; mpcheck(mptree)
-    ok! (A (U v) (X ) (X ) n)
-    >>> mptree[-1:] = (make_mptree('(X)') for x in range(3)); mpcheck(mptree)
-    ok! (A (U v) (X ) (X ) (X ) (X ) (X ))
-    >>> mptree[1:-2:2] = ['x', 'y']; mpcheck(mptree)
-    ok! (A (U v) x (X ) y (X ) (X ))
-
-**append()**
-
-    >>> mptree = make_mptree('(A (B (C (D) (E f) (Q p)) g) h)')
-    >>> mptree.append('x'); mpcheck(mptree)
-    ok! (A (B (C (D ) (E f) (Q p)) g) h x)
-    >>> mptree.append(make_mptree('(X (Y z))')); mpcheck(mptree)
-    ok! (A (B (C (D ) (E f) (Q p)) g) h x (X (Y z)))
-
-**extend()**
-
-    >>> mptree = make_mptree('(A (B (C (D) (E f) (Q p)) g) h)')
-    >>> mptree.extend(['x', 'y', make_mptree('(X (Y z))')]); mpcheck(mptree)
-    ok! (A (B (C (D ) (E f) (Q p)) g) h x y (X (Y z)))
-    >>> mptree.extend([]); mpcheck(mptree)
-    ok! (A (B (C (D ) (E f) (Q p)) g) h x y (X (Y z)))
-    >>> mptree.extend(make_mptree('(X)') for x in range(3)); mpcheck(mptree)
-    ok! (A (B (C (D ) (E f) (Q p)) g) h x y (X (Y z)) (X ) (X ) (X ))
-
-**insert()**
-
-    >>> mptree = make_mptree('(A (B (C (D) (E f) (Q p)) g) h)')
-    >>> mptree.insert(0, make_mptree('(X (Y z))')); mpcheck(mptree)
-    ok! (A (X (Y z)) (B (C (D ) (E f) (Q p)) g) h)
-    >>> mptree.insert(-1, make_mptree('(X (Y z))')); mpcheck(mptree)
-    ok! (A (X (Y z)) (B (C (D ) (E f) (Q p)) g) (X (Y z)) h)
-    >>> mptree.insert(-4, make_mptree('(X (Y z))')); mpcheck(mptree)
-    ok! (A (X (Y z)) (X (Y z)) (B (C (D ) (E f) (Q p)) g) (X (Y z)) h)
-    >>> # Note: as with ``list``, inserting at a negative index that
-    >>> # gives a position before the start of the list does *not*
-    >>> # raise an IndexError exception; it just inserts at 0.
-    >>> mptree.insert(-400, make_mptree('(X (Y z))')); mpcheck(mptree)
-    ok! (A
-      (X (Y z))
-      (X (Y z))
-      (X (Y z))
-      (B (C (D ) (E f) (Q p)) g)
-      (X (Y z))
-      h)
-
-**pop()**
-
-    >>> mptree = make_mptree('(A (B (C (D) (E f) (Q p)) g) h)')
-    >>> mptree[0,0].pop(1); mpcheck(mptree)
-    MultiParentedTree('E', ['f'])
-    ok! (A (B (C (D ) (Q p)) g) h)
-    >>> mptree[0].pop(-1); mpcheck(mptree)
-    'g'
-    ok! (A (B (C (D ) (Q p))) h)
-    >>> mptree.pop(); mpcheck(mptree)
-    'h'
-    ok! (A (B (C (D ) (Q p))))
-    >>> mptree.pop(-100)
-    Traceback (most recent call last):
-      . . .
-    IndexError: index out of range
-
-**remove()**
-
-    >>> mptree = make_mptree('(A (B (C (D) (E f) (Q p)) g) h)')
-    >>> e = mptree[0,0,1]
-    >>> mptree[0,0].remove(mptree[0,0,1]); mpcheck(mptree); mpcheck(e)
-    ok! (A (B (C (D ) (Q p)) g) h)
-    ok! (E f)
-    >>> mptree[0,0].remove(make_mptree('(Q p)')); mpcheck(mptree)
-    ok! (A (B (C (D )) g) h)
-    >>> mptree[0,0].remove(make_mptree('(Q p)'))
-    Traceback (most recent call last):
-      . . .
-    ValueError: MultiParentedTree('Q', ['p']) is not in list
-    >>> mptree.remove('h'); mpcheck(mptree)
-    ok! (A (B (C (D )) g))
-    >>> mptree.remove('h');
-    Traceback (most recent call last):
-      . . .
-    ValueError: 'h' is not in list
-    >>> # remove() removes the first subtree that is equal (==) to the
-    >>> # given tree, which may not be the identical tree we give it:
-    >>> mptree = make_mptree('(A (X x) (Y y) (X x))')
-    >>> x1, y, x2 = mptree
-    >>> mptree.remove(mptree[-1]); mpcheck(mptree)
-    ok! (A (Y y) (X x))
-    >>> print([str(p) for p in x1.parents()])
-    []
-    >>> print([str(p) for p in x2.parents()])
-    ['(A (Y y) (X x))']
-
-
-ImmutableMultiParentedTree Regression Tests
--------------------------------------------
-
-    >>> imptree = ImmutableMultiParentedTree.convert(mptree)
-    >>> type(imptree)
-    <class 'nltk.tree.ImmutableMultiParentedTree'>
-    >>> del imptree[0]
-    Traceback (most recent call last):
-      . . .
-    ValueError: ImmutableMultiParentedTree may not be modified
-    >>> imptree.set_label('newnode')
-    Traceback (most recent call last):
-      . . .
-    ValueError: ImmutableMultiParentedTree may not be modified
-
-
-ProbabilisticTree Regression Tests
-----------------------------------
-
-    >>> prtree = ProbabilisticTree("S", [ProbabilisticTree("NP", ["N"], prob=0.3)], prob=0.6)
-    >>> print(prtree)
-    (S (NP N)) (p=0.6)
-    >>> import copy
-    >>> prtree == copy.deepcopy(prtree) == prtree.copy(deep=True) == prtree.copy()
-    True
-    >>> prtree[0] is prtree.copy()[0]
-    True
-    >>> prtree[0] is prtree.copy(deep=True)[0]
-    False
-
-    >>> imprtree = ImmutableProbabilisticTree.convert(prtree)
-    >>> type(imprtree)
-    <class 'nltk.tree.ImmutableProbabilisticTree'>
-    >>> del imprtree[0]
-    Traceback (most recent call last):
-      . . .
-    ValueError: ImmutableProbabilisticTree may not be modified
-    >>> imprtree.set_label('newnode')
-    Traceback (most recent call last):
-      . . .
-    ValueError: ImmutableProbabilisticTree may not be modified
-
-
-Squashed Bugs
-=============
-
-This used to discard the ``(B b)`` subtree (fixed in svn 6270):
-
-    >>> print(Tree.fromstring('((A a) (B b))'))
-    ( (A a) (B b))
-
diff --git a/nlp_resource_data/nltk/test/treeprettyprinter.doctest b/nlp_resource_data/nltk/test/treeprettyprinter.doctest

deleted file mode 100644 (file)

index 8302c2c..0000000
--- a/nlp_resource_data/nltk/test/treeprettyprinter.doctest
+++ /dev/null
@@ -1,127 +0,0 @@
-.. Copyright (C) 2001-2019 NLTK Project
-.. For license information, see LICENSE.TXT
-
-========================================================
- Unit tests for nltk.treeprettyprinter.TreePrettyPrinter
-========================================================
-
-    >>> from nltk.tree import Tree
-    >>> from nltk.treeprettyprinter import TreePrettyPrinter
-
-Tree nr 2170 from nltk.corpus.treebank:
-
-    >>> tree = Tree.fromstring(
-    ...     '(S (NP-SBJ (PRP I)) (VP (VBP feel) (ADJP-PRD (RB pretty) '
-    ...     '(JJ good)) (PP-CLR (IN about) (NP (PRP it)))) (. .))')
-    >>> tpp = TreePrettyPrinter(tree)
-    >>> print(tpp.text())
-                                 S                       
-       __________________________|_____________________   
-      |                          VP                    | 
-      |      ____________________|___________          |  
-      |     |             |                PP-CLR      | 
-      |     |             |             _____|_____    |  
-    NP-SBJ  |          ADJP-PRD        |           NP  | 
-      |     |      _______|______      |           |   |  
-     PRP   VBP    RB             JJ    IN         PRP  . 
-      |     |     |              |     |           |   |  
-      I    feel pretty          good about         it  . 
-
-    >>> print(tpp.text(unicodelines=True))
-                                 S                       
-      ┌──────────────────────────┼─────────────────────┐  
-      │                          VP                    │ 
-      │     ┌─────────────┬──────┴───────────┐         │  
-      │     │             │                PP-CLR      │ 
-      │     │             │            ┌─────┴─────┐   │  
-    NP-SBJ  │          ADJP-PRD        │           NP  │ 
-      │     │     ┌───────┴──────┐     │           │   │  
-     PRP   VBP    RB             JJ    IN         PRP  . 
-      │     │     │              │     │           │   │  
-      I    feel pretty          good about         it  . 
-
-A tree with long labels:
-
-    >>> tree = Tree.fromstring(
-    ...     '(sentence (plural-noun-phrase (plural-noun Superconductors)) '
-    ...     '(verb-phrase (plural-verb conduct) '
-    ...     '(noun-phrase (singular-noun electricity))))')
-    >>> tpp = TreePrettyPrinter(tree)
-    >>> print(tpp.text(abbreviate=8, nodedist=2))
-                sentence                      
-         __________|__________                  
-        |                 verb-phr.           
-        |           __________|__________       
-    plural-n.      |                 noun-phr.
-        |          |                     |      
-    plural-n.  plural-v.             singular.
-        |          |                     |      
-    Supercon.   conduct              electric.
-
-    >>> print(tpp.text(maxwidth=8, nodedist=2))
-              sentence                   
-        _________|________                 
-       |                verb-            
-       |                phrase           
-       |          ________|_________       
-    plural-      |                noun-  
-     noun-       |                phrase 
-     phrase      |                  |    
-       |         |                  |      
-    plural-   plural-           singular-
-      noun      verb               noun  
-       |         |                  |      
-    Supercon  conduct            electric
-    ductors                        ity   
-
-A discontinuous tree:
-
-    >>> tree = Tree.fromstring(
-    ...     '(top (punct 8) (smain (noun 0) (verb 1) (inf (verb 5) (inf (verb 6) '
-    ...     '(conj (inf (pp (prep 2) (np (det 3) (noun 4))) (verb 7)) (inf (verb 9)) '
-    ...     '(vg 10) (inf (verb 11)))))) (punct 12))', read_leaf=int)
-    >>> sentence = ('Ze had met haar moeder kunnen gaan winkelen ,'
-    ...             ' zwemmen of terrassen .'.split())
-    >>> tpp = TreePrettyPrinter(tree, sentence)
-    >>> print(tpp.text())
-                                          top                                                
-                                      _____|______________________________________________    
-                                   smain                      |                           |  
-      _______________________________|_____                   |                           |   
-     |    |                               inf                 |                           |  
-     |    |                           _____|____              |                           |   
-     |    |                          |         inf            |                           |  
-     |    |                          |      ____|_____        |                           |   
-     |    |                          |     |         conj     |                           |  
-     |    |                    _____ | ___ | _________|______ | __________________        |   
-     |    |                  inf     |     |                  |      |     |      |       |  
-     |    |          _________|_____ | ___ | _________        |      |     |      |       |   
-     |    |         pp               |     |          |       |      |     |      |       |  
-     |    |     ____|____            |     |          |       |      |     |      |       |   
-     |    |    |         np          |     |          |       |     inf    |     inf      |  
-     |    |    |     ____|____       |     |          |       |      |     |      |       |   
-    noun verb prep det       noun   verb  verb       verb   punct   verb   vg    verb   punct
-     |    |    |    |         |      |     |          |       |      |     |      |       |   
-     Ze  had  met  haar     moeder kunnen gaan     winkelen   ,   zwemmen  of terrassen   .  
-
-    >>> print(tpp.text(unicodelines=True))
-                                          top                                                
-                                     ┌─────┴──────────────────┬───────────────────────────┐   
-                                   smain                      │                           │  
-     ┌────┬──────────────────────────┴─────┐                  │                           │   
-     │    │                               inf                 │                           │  
-     │    │                          ┌─────┴────┐             │                           │   
-     │    │                          │         inf            │                           │  
-     │    │                          │     ┌────┴─────┐       │                           │   
-     │    │                          │     │         conj     │                           │  
-     │    │                   ┌───── │ ─── │ ─────────┴────── │ ─────┬─────┬──────┐       │   
-     │    │                  inf     │     │                  │      │     │      │       │  
-     │    │         ┌─────────┴───── │ ─── │ ─────────┐       │      │     │      │       │   
-     │    │         pp               │     │          │       │      │     │      │       │  
-     │    │    ┌────┴────┐           │     │          │       │      │     │      │       │   
-     │    │    │         np          │     │          │       │     inf    │     inf      │  
-     │    │    │    ┌────┴────┐      │     │          │       │      │     │      │       │   
-    noun verb prep det       noun   verb  verb       verb   punct   verb   vg    verb   punct
-     │    │    │    │         │      │     │          │       │      │     │      │       │   
-     Ze  had  met  haar     moeder kunnen gaan     winkelen   ,   zwemmen  of terrassen   .  
-
diff --git a/nlp_resource_data/nltk/test/treetransforms.doctest b/nlp_resource_data/nltk/test/treetransforms.doctest

deleted file mode 100644 (file)

index e44e504..0000000
--- a/nlp_resource_data/nltk/test/treetransforms.doctest
+++ /dev/null
@@ -1,156 +0,0 @@
-.. Copyright (C) 2001-2019 NLTK Project
-.. For license information, see LICENSE.TXT
-
--------------------------------------------
-Unit tests for the TreeTransformation class
--------------------------------------------
-
-    >>> from copy import deepcopy
-    >>> from nltk.tree import *
-    >>> from nltk.treetransforms import *
-
-    >>> tree_string = "(TOP (S (S (VP (VBN Turned) (ADVP (RB loose)) (PP (IN in) (NP (NP (NNP Shane) (NNP Longman) (POS 's)) (NN trading) (NN room))))) (, ,) (NP (DT the) (NN yuppie) (NNS dealers)) (VP (AUX do) (NP (NP (RB little)) (ADJP (RB right)))) (. .)))"
-
-    >>> tree = Tree.fromstring(tree_string)
-    >>> print(tree)
-    (TOP
-      (S
-        (S
-          (VP
-            (VBN Turned)
-            (ADVP (RB loose))
-            (PP
-              (IN in)
-              (NP
-                (NP (NNP Shane) (NNP Longman) (POS 's))
-                (NN trading)
-                (NN room)))))
-        (, ,)
-        (NP (DT the) (NN yuppie) (NNS dealers))
-        (VP (AUX do) (NP (NP (RB little)) (ADJP (RB right))))
-        (. .)))
-
-Make a copy of the original tree and collapse the subtrees with only one child
-
-    >>> collapsedTree = deepcopy(tree)
-    >>> collapse_unary(collapsedTree)
-    >>> print(collapsedTree)
-    (TOP
-      (S
-        (S+VP
-          (VBN Turned)
-          (ADVP (RB loose))
-          (PP
-            (IN in)
-            (NP
-              (NP (NNP Shane) (NNP Longman) (POS 's))
-              (NN trading)
-              (NN room))))
-        (, ,)
-        (NP (DT the) (NN yuppie) (NNS dealers))
-        (VP (AUX do) (NP (NP (RB little)) (ADJP (RB right))))
-        (. .)))
-
-    >>> collapsedTree2 = deepcopy(tree)
-    >>> collapse_unary(collapsedTree2, collapsePOS=True, collapseRoot=True)
-    >>> print(collapsedTree2)
-    (TOP+S
-      (S+VP
-        (VBN Turned)
-        (ADVP+RB loose)
-        (PP
-          (IN in)
-          (NP
-            (NP (NNP Shane) (NNP Longman) (POS 's))
-            (NN trading)
-            (NN room))))
-      (, ,)
-      (NP (DT the) (NN yuppie) (NNS dealers))
-      (VP (AUX do) (NP (NP+RB little) (ADJP+RB right)))
-      (. .))
-
-Convert the tree to Chomsky Normal Form i.e. each subtree has either two
-subtree children or a single leaf value. This conversion can be performed
-using either left- or right-factoring.
-
-    >>> cnfTree = deepcopy(collapsedTree)
-    >>> chomsky_normal_form(cnfTree, factor='left')
-    >>> print(cnfTree)
-    (TOP
-      (S
-        (S|<S+VP-,-NP-VP>
-          (S|<S+VP-,-NP>
-            (S|<S+VP-,>
-              (S+VP
-                (S+VP|<VBN-ADVP> (VBN Turned) (ADVP (RB loose)))
-                (PP
-                  (IN in)
-                  (NP
-                    (NP|<NP-NN>
-                      (NP
-                        (NP|<NNP-NNP> (NNP Shane) (NNP Longman))
-                        (POS 's))
-                      (NN trading))
-                    (NN room))))
-              (, ,))
-            (NP (NP|<DT-NN> (DT the) (NN yuppie)) (NNS dealers)))
-          (VP (AUX do) (NP (NP (RB little)) (ADJP (RB right)))))
-        (. .)))
-
-    >>> cnfTree = deepcopy(collapsedTree)
-    >>> chomsky_normal_form(cnfTree, factor='right')
-    >>> print(cnfTree)
-    (TOP
-      (S
-        (S+VP
-          (VBN Turned)
-          (S+VP|<ADVP-PP>
-            (ADVP (RB loose))
-            (PP
-              (IN in)
-              (NP
-                (NP (NNP Shane) (NP|<NNP-POS> (NNP Longman) (POS 's)))
-                (NP|<NN-NN> (NN trading) (NN room))))))
-        (S|<,-NP-VP-.>
-          (, ,)
-          (S|<NP-VP-.>
-            (NP (DT the) (NP|<NN-NNS> (NN yuppie) (NNS dealers)))
-            (S|<VP-.>
-              (VP (AUX do) (NP (NP (RB little)) (ADJP (RB right))))
-              (. .))))))
-
-Employ some Markov smoothing to make the artificial node labels a bit more
-readable. See the treetransforms.py documentation for more details.
-
-    >>> markovTree = deepcopy(collapsedTree)
-    >>> chomsky_normal_form(markovTree, horzMarkov=2, vertMarkov=1)
-    >>> print(markovTree)
-    (TOP
-      (S^<TOP>
-        (S+VP^<S>
-          (VBN Turned)
-          (S+VP|<ADVP-PP>^<S>
-            (ADVP^<S+VP> (RB loose))
-            (PP^<S+VP>
-              (IN in)
-              (NP^<PP>
-                (NP^<NP>
-                  (NNP Shane)
-                  (NP|<NNP-POS>^<NP> (NNP Longman) (POS 's)))
-                (NP|<NN-NN>^<PP> (NN trading) (NN room))))))
-        (S|<,-NP>^<TOP>
-          (, ,)
-          (S|<NP-VP>^<TOP>
-            (NP^<S> (DT the) (NP|<NN-NNS>^<S> (NN yuppie) (NNS dealers)))
-            (S|<VP-.>^<TOP>
-              (VP^<S>
-                (AUX do)
-                (NP^<VP> (NP^<NP> (RB little)) (ADJP^<NP> (RB right))))
-              (. .))))))
-
-Convert the transformed tree back to its original form
-
-    >>> un_chomsky_normal_form(markovTree)
-    >>> tree == markovTree
-    True
-
diff --git a/nlp_resource_data/nltk/test/unit/__init__.py b/nlp_resource_data/nltk/test/unit/__init__.py

deleted file mode 100644 (file)

index e69de29..0000000
diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/__init__.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/__init__.cpython-37.pyc

deleted file mode 100644 (file)

index 2b18bf3..0000000

Binary files a/nlp_resource_data/nltk/test/unit/__pycache__/__init__.cpython-37.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_2x_compat.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_2x_compat.cpython-37.pyc

deleted file mode 100644 (file)

index e912498..0000000

Binary files a/nlp_resource_data/nltk/test/unit/__pycache__/test_2x_compat.cpython-37.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_aline.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_aline.cpython-37.pyc

deleted file mode 100644 (file)

index 8b09c95..0000000

Binary files a/nlp_resource_data/nltk/test/unit/__pycache__/test_aline.cpython-37.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_brill.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_brill.cpython-37.pyc

deleted file mode 100644 (file)

index 4770fe9..0000000

Binary files a/nlp_resource_data/nltk/test/unit/__pycache__/test_brill.cpython-37.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_chunk.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_chunk.cpython-37.pyc

deleted file mode 100644 (file)

index b9ab911..0000000

Binary files a/nlp_resource_data/nltk/test/unit/__pycache__/test_chunk.cpython-37.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_classify.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_classify.cpython-37.pyc

deleted file mode 100644 (file)

index 3f89e36..0000000

Binary files a/nlp_resource_data/nltk/test/unit/__pycache__/test_classify.cpython-37.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_collocations.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_collocations.cpython-37.pyc

deleted file mode 100644 (file)

index 957a3ce..0000000

Binary files a/nlp_resource_data/nltk/test/unit/__pycache__/test_collocations.cpython-37.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_concordance.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_concordance.cpython-37.pyc

deleted file mode 100644 (file)

index a8179e8..0000000

Binary files a/nlp_resource_data/nltk/test/unit/__pycache__/test_concordance.cpython-37.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_corenlp.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_corenlp.cpython-37.pyc

deleted file mode 100644 (file)

index 06741c6..0000000

Binary files a/nlp_resource_data/nltk/test/unit/__pycache__/test_corenlp.cpython-37.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_corpora.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_corpora.cpython-37.pyc

deleted file mode 100644 (file)

index 26a4114..0000000

Binary files a/nlp_resource_data/nltk/test/unit/__pycache__/test_corpora.cpython-37.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_corpus_views.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_corpus_views.cpython-37.pyc

deleted file mode 100644 (file)

index c44070a..0000000

Binary files a/nlp_resource_data/nltk/test/unit/__pycache__/test_corpus_views.cpython-37.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_data.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_data.cpython-37.pyc

deleted file mode 100644 (file)

index 4aefd5c..0000000

Binary files a/nlp_resource_data/nltk/test/unit/__pycache__/test_data.cpython-37.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_disagreement.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_disagreement.cpython-37.pyc

deleted file mode 100644 (file)

index 86a7e6c..0000000

Binary files a/nlp_resource_data/nltk/test/unit/__pycache__/test_disagreement.cpython-37.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_hmm.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_hmm.cpython-37.pyc

deleted file mode 100644 (file)

index e0810a3..0000000

Binary files a/nlp_resource_data/nltk/test/unit/__pycache__/test_hmm.cpython-37.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_json2csv_corpus.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_json2csv_corpus.cpython-37.pyc

deleted file mode 100644 (file)

index 91c9a9d..0000000

Binary files a/nlp_resource_data/nltk/test/unit/__pycache__/test_json2csv_corpus.cpython-37.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_naivebayes.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_naivebayes.cpython-37.pyc

deleted file mode 100644 (file)

index 34f3af4..0000000

Binary files a/nlp_resource_data/nltk/test/unit/__pycache__/test_naivebayes.cpython-37.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_pos_tag.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_pos_tag.cpython-37.pyc

deleted file mode 100644 (file)

index e8a2725..0000000

Binary files a/nlp_resource_data/nltk/test/unit/__pycache__/test_pos_tag.cpython-37.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_rte_classify.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_rte_classify.cpython-37.pyc

deleted file mode 100644 (file)

index c984006..0000000

Binary files a/nlp_resource_data/nltk/test/unit/__pycache__/test_rte_classify.cpython-37.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_seekable_unicode_stream_reader.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_seekable_unicode_stream_reader.cpython-37.pyc

deleted file mode 100644 (file)

index a0c09b1..0000000

Binary files a/nlp_resource_data/nltk/test/unit/__pycache__/test_seekable_unicode_stream_reader.cpython-37.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_senna.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_senna.cpython-37.pyc

deleted file mode 100644 (file)

index 2d04fb9..0000000

Binary files a/nlp_resource_data/nltk/test/unit/__pycache__/test_senna.cpython-37.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_stem.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_stem.cpython-37.pyc

deleted file mode 100644 (file)

index dc55d3b..0000000

Binary files a/nlp_resource_data/nltk/test/unit/__pycache__/test_stem.cpython-37.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_tag.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_tag.cpython-37.pyc

deleted file mode 100644 (file)

index 9be0186..0000000

Binary files a/nlp_resource_data/nltk/test/unit/__pycache__/test_tag.cpython-37.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_tgrep.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_tgrep.cpython-37.pyc

deleted file mode 100644 (file)

index 9f6d092..0000000

Binary files a/nlp_resource_data/nltk/test/unit/__pycache__/test_tgrep.cpython-37.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_tokenize.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_tokenize.cpython-37.pyc

deleted file mode 100644 (file)

index 4a2306a..0000000

Binary files a/nlp_resource_data/nltk/test/unit/__pycache__/test_tokenize.cpython-37.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_twitter_auth.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_twitter_auth.cpython-37.pyc

deleted file mode 100644 (file)

index 15975a5..0000000

Binary files a/nlp_resource_data/nltk/test/unit/__pycache__/test_twitter_auth.cpython-37.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/test_wordnet.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/test_wordnet.cpython-37.pyc

deleted file mode 100644 (file)

index 1122826..0000000

Binary files a/nlp_resource_data/nltk/test/unit/__pycache__/test_wordnet.cpython-37.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/unit/__pycache__/utils.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/__pycache__/utils.cpython-37.pyc

deleted file mode 100644 (file)

index 8f2a1f1..0000000

Binary files a/nlp_resource_data/nltk/test/unit/__pycache__/utils.cpython-37.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/unit/lm/__init__.py b/nlp_resource_data/nltk/test/unit/lm/__init__.py

deleted file mode 100644 (file)

index e69de29..0000000
diff --git a/nlp_resource_data/nltk/test/unit/lm/__pycache__/__init__.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/lm/__pycache__/__init__.cpython-37.pyc

deleted file mode 100644 (file)

index 130b497..0000000

Binary files a/nlp_resource_data/nltk/test/unit/lm/__pycache__/__init__.cpython-37.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/unit/lm/__pycache__/test_counter.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/lm/__pycache__/test_counter.cpython-37.pyc

deleted file mode 100644 (file)

index 1cb5393..0000000

Binary files a/nlp_resource_data/nltk/test/unit/lm/__pycache__/test_counter.cpython-37.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/unit/lm/__pycache__/test_models.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/lm/__pycache__/test_models.cpython-37.pyc

deleted file mode 100644 (file)

index d32131e..0000000

Binary files a/nlp_resource_data/nltk/test/unit/lm/__pycache__/test_models.cpython-37.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/unit/lm/__pycache__/test_preprocessing.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/lm/__pycache__/test_preprocessing.cpython-37.pyc

deleted file mode 100644 (file)

index e81feda..0000000

Binary files a/nlp_resource_data/nltk/test/unit/lm/__pycache__/test_preprocessing.cpython-37.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/unit/lm/__pycache__/test_vocabulary.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/lm/__pycache__/test_vocabulary.cpython-37.pyc

deleted file mode 100644 (file)

index 08fdd71..0000000

Binary files a/nlp_resource_data/nltk/test/unit/lm/__pycache__/test_vocabulary.cpython-37.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/unit/lm/test_counter.py b/nlp_resource_data/nltk/test/unit/lm/test_counter.py

deleted file mode 100644 (file)

index 31fab79..0000000
--- a/nlp_resource_data/nltk/test/unit/lm/test_counter.py
+++ /dev/null
@@ -1,135 +0,0 @@
-# -*- coding: utf-8 -*-
-# Natural Language Toolkit: Language Model Unit Tests
-#
-# Copyright (C) 2001-2019 NLTK Project
-# Author: Ilia Kurenkov <ilia.kurenkov@gmail.com>
-# URL: <http://nltk.org/>
-# For license information, see LICENSE.TXT
-
-import unittest
-
-import six
-
-from nltk import FreqDist
-from nltk.lm import NgramCounter
-from nltk.util import everygrams
-
-
-class NgramCounterTests(unittest.TestCase):
-    """Tests for NgramCounter that only involve lookup, no modification."""
-
-    @classmethod
-    def setUpClass(cls):
-
-        text = [list("abcd"), list("egdbe")]
-        cls.trigram_counter = NgramCounter(
-            (everygrams(sent, max_len=3) for sent in text)
-        )
-        cls.bigram_counter = NgramCounter(
-            (everygrams(sent, max_len=2) for sent in text)
-        )
-
-    def test_N(self):
-        self.assertEqual(self.bigram_counter.N(), 16)
-        self.assertEqual(self.trigram_counter.N(), 21)
-
-    def test_counter_len_changes_with_lookup(self):
-        self.assertEqual(len(self.bigram_counter), 2)
-        _ = self.bigram_counter[50]
-        self.assertEqual(len(self.bigram_counter), 3)
-
-    def test_ngram_order_access_unigrams(self):
-        self.assertEqual(self.bigram_counter[1], self.bigram_counter.unigrams)
-
-    def test_ngram_conditional_freqdist(self):
-        expected_trigram_contexts = [
-            ("a", "b"),
-            ("b", "c"),
-            ("e", "g"),
-            ("g", "d"),
-            ("d", "b"),
-        ]
-        expected_bigram_contexts = [("a",), ("b",), ("d",), ("e",), ("c",), ("g",)]
-
-        bigrams = self.trigram_counter[2]
-        trigrams = self.trigram_counter[3]
-
-        six.assertCountEqual(self, expected_bigram_contexts, bigrams.conditions())
-        six.assertCountEqual(self, expected_trigram_contexts, trigrams.conditions())
-
-    def test_bigram_counts_seen_ngrams(self):
-        b_given_a_count = 1
-        unk_given_b_count = 1
-
-        self.assertEqual(b_given_a_count, self.bigram_counter[["a"]]["b"])
-        self.assertEqual(unk_given_b_count, self.bigram_counter[["b"]]["c"])
-
-    def test_bigram_counts_unseen_ngrams(self):
-        z_given_b_count = 0
-
-        self.assertEqual(z_given_b_count, self.bigram_counter[["b"]]["z"])
-
-    def test_unigram_counts_seen_words(self):
-        expected_count_b = 2
-
-        self.assertEqual(expected_count_b, self.bigram_counter["b"])
-
-    def test_unigram_counts_completely_unseen_words(self):
-        unseen_count = 0
-
-        self.assertEqual(unseen_count, self.bigram_counter["z"])
-
-
-class NgramCounterTrainingTests(unittest.TestCase):
-    def setUp(self):
-        self.counter = NgramCounter()
-
-    def test_empty_string(self):
-        test = NgramCounter("")
-        self.assertNotIn(2, test)
-        self.assertEqual(test[1], FreqDist())
-
-    def test_empty_list(self):
-        test = NgramCounter([])
-        self.assertNotIn(2, test)
-        self.assertEqual(test[1], FreqDist())
-
-    def test_None(self):
-        test = NgramCounter(None)
-        self.assertNotIn(2, test)
-        self.assertEqual(test[1], FreqDist())
-
-    def test_train_on_unigrams(self):
-        words = list("abcd")
-        counter = NgramCounter([[(w,) for w in words]])
-
-        self.assertFalse(bool(counter[3]))
-        self.assertFalse(bool(counter[2]))
-        six.assertCountEqual(self, words, counter[1].keys())
-
-    def test_train_on_illegal_sentences(self):
-        str_sent = ["Check", "this", "out", "!"]
-        list_sent = [["Check", "this"], ["this", "out"], ["out", "!"]]
-
-        with self.assertRaises(TypeError):
-            NgramCounter([str_sent])
-
-        with self.assertRaises(TypeError):
-            NgramCounter([list_sent])
-
-    def test_train_on_bigrams(self):
-        bigram_sent = [("a", "b"), ("c", "d")]
-        counter = NgramCounter([bigram_sent])
-
-        self.assertFalse(bool(counter[3]))
-
-    def test_train_on_mix(self):
-        mixed_sent = [("a", "b"), ("c", "d"), ("e", "f", "g"), ("h",)]
-        counter = NgramCounter([mixed_sent])
-        unigrams = ["h"]
-        bigram_contexts = [("a",), ("c",)]
-        trigram_contexts = [("e", "f")]
-
-        six.assertCountEqual(self, unigrams, counter[1].keys())
-        six.assertCountEqual(self, bigram_contexts, counter[2].keys())
-        six.assertCountEqual(self, trigram_contexts, counter[3].keys())
diff --git a/nlp_resource_data/nltk/test/unit/lm/test_models.py b/nlp_resource_data/nltk/test/unit/lm/test_models.py

deleted file mode 100644 (file)

index f19edd4..0000000
--- a/nlp_resource_data/nltk/test/unit/lm/test_models.py
+++ /dev/null
@@ -1,442 +0,0 @@
-# -*- coding: utf-8 -*-
-# Natural Language Toolkit: Language Model Unit Tests
-#
-# Copyright (C) 2001-2019 NLTK Project
-# Author: Ilia Kurenkov <ilia.kurenkov@gmail.com>
-# URL: <http://nltk.org/>
-# For license information, see LICENSE.TXT
-
-from __future__ import division
-
-import math
-import sys
-import unittest
-
-from six import add_metaclass
-
-from nltk.lm import (
-    Vocabulary,
-    MLE,
-    Lidstone,
-    Laplace,
-    WittenBellInterpolated,
-    KneserNeyInterpolated,
-)
-from nltk.lm.preprocessing import padded_everygrams
-
-
-def _prepare_test_data(ngram_order):
-    return (
-        Vocabulary(["a", "b", "c", "d", "z", "<s>", "</s>"], unk_cutoff=1),
-        [
-            list(padded_everygrams(ngram_order, sent))
-            for sent in (list("abcd"), list("egadbe"))
-        ],
-    )
-
-
-class ParametrizeTestsMeta(type):
-    """Metaclass for generating parametrized tests."""
-
-    def __new__(cls, name, bases, dct):
-        contexts = (
-            ("a",),
-            ("c",),
-            (u"<s>",),
-            ("b",),
-            (u"<UNK>",),
-            ("d",),
-            ("e",),
-            ("r",),
-            ("w",),
-        )
-        for i, c in enumerate(contexts):
-            dct["test_sumto1_{0}".format(i)] = cls.add_sum_to_1_test(c)
-        scores = dct.get("score_tests", [])
-        for i, (word, context, expected_score) in enumerate(scores):
-            dct["test_score_{0}".format(i)] = cls.add_score_test(
-                word, context, expected_score
-            )
-        return super(ParametrizeTestsMeta, cls).__new__(cls, name, bases, dct)
-
-    @classmethod
-    def add_score_test(cls, word, context, expected_score):
-        if sys.version_info > (3, 5):
-            message = "word='{word}', context={context}"
-        else:
-            # Python 2 doesn't report the mismatched values if we pass a custom
-            # message, so we have to report them manually.
-            message = (
-                "{score} != {expected_score} within 4 places, "
-                "word='{word}', context={context}"
-            )
-
-        def test_method(self):
-            score = self.model.score(word, context)
-            self.assertAlmostEqual(
-                score, expected_score, msg=message.format(**locals()), places=4
-            )
-
-        return test_method
-
-    @classmethod
-    def add_sum_to_1_test(cls, context):
-        def test(self):
-            s = sum(self.model.score(w, context) for w in self.model.vocab)
-            self.assertAlmostEqual(s, 1.0, msg="The context is {}".format(context))
-
-        return test
-
-
-@add_metaclass(ParametrizeTestsMeta)
-class MleBigramTests(unittest.TestCase):
-    """unit tests for MLENgramModel class"""
-
-    score_tests = [
-        ("d", ["c"], 1),
-        # Unseen ngrams should yield 0
-        ("d", ["e"], 0),
-        # Unigrams should also be 0
-        ("z", None, 0),
-        # N unigrams = 14
-        # count('a') = 2
-        ("a", None, 2.0 / 14),
-        # count('y') = 3
-        ("y", None, 3.0 / 14),
-    ]
-
-    def setUp(self):
-        vocab, training_text = _prepare_test_data(2)
-        self.model = MLE(2, vocabulary=vocab)
-        self.model.fit(training_text)
-
-    def test_logscore_zero_score(self):
-        # logscore of unseen ngrams should be -inf
-        logscore = self.model.logscore("d", ["e"])
-
-        self.assertTrue(math.isinf(logscore))
-
-    def test_entropy_perplexity_seen(self):
-        # ngrams seen during training
-        trained = [
-            ("<s>", "a"),
-            ("a", "b"),
-            ("b", "<UNK>"),
-            ("<UNK>", "a"),
-            ("a", "d"),
-            ("d", "</s>"),
-        ]
-        # Ngram = Log score
-        # <s>, a    = -1
-        # a, b      = -1
-        # b, UNK    = -1
-        # UNK, a    = -1.585
-        # a, d      = -1
-        # d, </s>   = -1
-        # TOTAL logscores   = -6.585
-        # - AVG logscores   = 1.0975
-        H = 1.0975
-        perplexity = 2.1398
-
-        self.assertAlmostEqual(H, self.model.entropy(trained), places=4)
-        self.assertAlmostEqual(perplexity, self.model.perplexity(trained), places=4)
-
-    def test_entropy_perplexity_unseen(self):
-        # In MLE, even one unseen ngram should make entropy and perplexity infinite
-        untrained = [("<s>", "a"), ("a", "c"), ("c", "d"), ("d", "</s>")]
-
-        self.assertTrue(math.isinf(self.model.entropy(untrained)))
-        self.assertTrue(math.isinf(self.model.perplexity(untrained)))
-
-    def test_entropy_perplexity_unigrams(self):
-        # word = score, log score
-        # <s>   = 0.1429, -2.8074
-        # a     = 0.1429, -2.8074
-        # c     = 0.0714, -3.8073
-        # UNK   = 0.2143, -2.2224
-        # d     = 0.1429, -2.8074
-        # c     = 0.0714, -3.8073
-        # </s>  = 0.1429, -2.8074
-        # TOTAL logscores = -21.6243
-        # - AVG logscores = 3.0095
-        H = 3.0095
-        perplexity = 8.0529
-
-        text = [("<s>",), ("a",), ("c",), ("-",), ("d",), ("c",), ("</s>",)]
-
-        self.assertAlmostEqual(H, self.model.entropy(text), places=4)
-        self.assertAlmostEqual(perplexity, self.model.perplexity(text), places=4)
-
-
-@add_metaclass(ParametrizeTestsMeta)
-class MleTrigramTests(unittest.TestCase):
-    """MLE trigram model tests"""
-
-    score_tests = [
-        # count(d | b, c) = 1
-        # count(b, c) = 1
-        ("d", ("b", "c"), 1),
-        # count(d | c) = 1
-        # count(c) = 1
-        ("d", ["c"], 1),
-        # total number of tokens is 18, of which "a" occured 2 times
-        ("a", None, 2.0 / 18),
-        # in vocabulary but unseen
-        ("z", None, 0),
-        # out of vocabulary should use "UNK" score
-        ("y", None, 3.0 / 18),
-    ]
-
-    def setUp(self):
-        vocab, training_text = _prepare_test_data(3)
-        self.model = MLE(3, vocabulary=vocab)
-        self.model.fit(training_text)
-
-
-@add_metaclass(ParametrizeTestsMeta)
-class LidstoneBigramTests(unittest.TestCase):
-    """unit tests for Lidstone class"""
-
-    score_tests = [
-        # count(d | c) = 1
-        # *count(d | c) = 1.1
-        # Count(w | c for w in vocab) = 1
-        # *Count(w | c for w in vocab) = 1.8
-        ("d", ["c"], 1.1 / 1.8),
-        # Total unigrams: 14
-        # Vocab size: 8
-        # Denominator: 14 + 0.8 = 14.8
-        # count("a") = 2
-        # *count("a") = 2.1
-        ("a", None, 2.1 / 14.8),
-        # in vocabulary but unseen
-        # count("z") = 0
-        # *count("z") = 0.1
-        ("z", None, 0.1 / 14.8),
-        # out of vocabulary should use "UNK" score
-        # count("<UNK>") = 3
-        # *count("<UNK>") = 3.1
-        ("y", None, 3.1 / 14.8),
-    ]
-
-    def setUp(self):
-        vocab, training_text = _prepare_test_data(2)
-        self.model = Lidstone(0.1, 2, vocabulary=vocab)
-        self.model.fit(training_text)
-
-    def test_gamma(self):
-        self.assertEqual(0.1, self.model.gamma)
-
-    def test_entropy_perplexity(self):
-        text = [
-            ("<s>", "a"),
-            ("a", "c"),
-            ("c", "<UNK>"),
-            ("<UNK>", "d"),
-            ("d", "c"),
-            ("c", "</s>"),
-        ]
-        # Unlike MLE this should be able to handle completely novel ngrams
-        # Ngram = score, log score
-        # <s>, a    = 0.3929, -1.3479
-        # a, c      = 0.0357, -4.8074
-        # c, UNK    = 0.0(5), -4.1699
-        # UNK, d    = 0.0263,  -5.2479
-        # d, c      = 0.0357, -4.8074
-        # c, </s>   = 0.0(5), -4.1699
-        # TOTAL logscore: −24.5504
-        # - AVG logscore: 4.0917
-        H = 4.0917
-        perplexity = 17.0504
-        self.assertAlmostEqual(H, self.model.entropy(text), places=4)
-        self.assertAlmostEqual(perplexity, self.model.perplexity(text), places=4)
-
-
-@add_metaclass(ParametrizeTestsMeta)
-class LidstoneTrigramTests(unittest.TestCase):
-    score_tests = [
-        # Logic behind this is the same as for bigram model
-        ("d", ["c"], 1.1 / 1.8),
-        # if we choose a word that hasn't appeared after (b, c)
-        ("e", ["c"], 0.1 / 1.8),
-        # Trigram score now
-        ("d", ["b", "c"], 1.1 / 1.8),
-        ("e", ["b", "c"], 0.1 / 1.8),
-    ]
-
-    def setUp(self):
-        vocab, training_text = _prepare_test_data(3)
-        self.model = Lidstone(0.1, 3, vocabulary=vocab)
-        self.model.fit(training_text)
-
-
-@add_metaclass(ParametrizeTestsMeta)
-class LaplaceBigramTests(unittest.TestCase):
-    """unit tests for Laplace class"""
-
-    score_tests = [
-        # basic sanity-check:
-        # count(d | c) = 1
-        # *count(d | c) = 2
-        # Count(w | c for w in vocab) = 1
-        # *Count(w | c for w in vocab) = 9
-        ("d", ["c"], 2.0 / 9),
-        # Total unigrams: 14
-        # Vocab size: 8
-        # Denominator: 14 + 8 = 22
-        # count("a") = 2
-        # *count("a") = 3
-        ("a", None, 3.0 / 22),
-        # in vocabulary but unseen
-        # count("z") = 0
-        # *count("z") = 1
-        ("z", None, 1.0 / 22),
-        # out of vocabulary should use "UNK" score
-        # count("<UNK>") = 3
-        # *count("<UNK>") = 4
-        ("y", None, 4.0 / 22),
-    ]
-
-    def setUp(self):
-        vocab, training_text = _prepare_test_data(2)
-        self.model = Laplace(2, vocabulary=vocab)
-        self.model.fit(training_text)
-
-    def test_gamma(self):
-        # Make sure the gamma is set to 1
-        self.assertEqual(1, self.model.gamma)
-
-    def test_entropy_perplexity(self):
-        text = [
-            ("<s>", "a"),
-            ("a", "c"),
-            ("c", "<UNK>"),
-            ("<UNK>", "d"),
-            ("d", "c"),
-            ("c", "</s>"),
-        ]
-        # Unlike MLE this should be able to handle completely novel ngrams
-        # Ngram = score, log score
-        # <s>, a    = 0.2, -2.3219
-        # a, c      = 0.1, -3.3219
-        # c, UNK    = 0.(1), -3.1699
-        # UNK, d    = 0.(09), 3.4594
-        # d, c      = 0.1 -3.3219
-        # c, </s>   = 0.(1), -3.1699
-        # Total logscores: −18.7651
-        # - AVG logscores: 3.1275
-        H = 3.1275
-        perplexity = 8.7393
-        self.assertAlmostEqual(H, self.model.entropy(text), places=4)
-        self.assertAlmostEqual(perplexity, self.model.perplexity(text), places=4)
-
-
-@add_metaclass(ParametrizeTestsMeta)
-class WittenBellInterpolatedTrigramTests(unittest.TestCase):
-    def setUp(self):
-        vocab, training_text = _prepare_test_data(3)
-        self.model = WittenBellInterpolated(3, vocabulary=vocab)
-        self.model.fit(training_text)
-
-    score_tests = [
-        # For unigram scores by default revert to MLE
-        # Total unigrams: 18
-        # count('c'): 1
-        ("c", None, 1.0 / 18),
-        # in vocabulary but unseen
-        # count("z") = 0
-        ("z", None, 0.0 / 18),
-        # out of vocabulary should use "UNK" score
-        # count("<UNK>") = 3
-        ("y", None, 3.0 / 18),
-        # gamma(['b']) = 0.1111
-        # mle.score('c', ['b']) = 0.5
-        # (1 - gamma) * mle + gamma * mle('c') ~= 0.45 + .3 / 18
-        ("c", ["b"], (1 - 0.1111) * 0.5 + 0.1111 * 1 / 18),
-        # building on that, let's try 'a b c' as the trigram
-        # gamma(['a', 'b']) = 0.0667
-        # mle("c", ["a", "b"]) = 1
-        ("c", ["a", "b"], (1 - 0.0667) + 0.0667 * ((1 - 0.1111) * 0.5 + 0.1111 / 18)),
-    ]
-
-
-@add_metaclass(ParametrizeTestsMeta)
-class KneserNeyInterpolatedTrigramTests(unittest.TestCase):
-    def setUp(self):
-        vocab, training_text = _prepare_test_data(3)
-        self.model = KneserNeyInterpolated(3, vocabulary=vocab)
-        self.model.fit(training_text)
-
-    score_tests = [
-        # For unigram scores revert to uniform
-        # Vocab size: 8
-        # count('c'): 1
-        ("c", None, 1.0 / 8),
-        # in vocabulary but unseen, still uses uniform
-        ("z", None, 1 / 8),
-        # out of vocabulary should use "UNK" score, i.e. again uniform
-        ("y", None, 1.0 / 8),
-        # alpha = count('bc') - discount = 1 - 0.1 = 0.9
-        # gamma(['b']) = discount * number of unique words that follow ['b'] = 0.1 * 2
-        # normalizer = total number of bigrams with this context = 2
-        # the final should be: (alpha + gamma * unigram_score("c"))
-        ("c", ["b"], (0.9 + 0.2 * (1 / 8)) / 2),
-        # building on that, let's try 'a b c' as the trigram
-        # alpha = count('abc') - discount = 1 - 0.1 = 0.9
-        # gamma(['a', 'b']) = 0.1 * 1
-        # normalizer = total number of trigrams with prefix "ab" = 1 => we can ignore it!
-        ("c", ["a", "b"], 0.9 + 0.1 * ((0.9 + 0.2 * (1 / 8)) / 2)),
-    ]
-
-
-class NgramModelTextGenerationTests(unittest.TestCase):
-    """Using MLE estimator, generate some text."""
-
-    def setUp(self):
-        vocab, training_text = _prepare_test_data(3)
-        self.model = MLE(3, vocabulary=vocab)
-        self.model.fit(training_text)
-
-    def test_generate_one_no_context(self):
-        self.assertEqual(self.model.generate(random_seed=3), "<UNK>")
-
-    def test_generate_one_limiting_context(self):
-        # We don't need random_seed for contexts with only one continuation
-        self.assertEqual(self.model.generate(text_seed=["c"]), "d")
-        self.assertEqual(self.model.generate(text_seed=["b", "c"]), "d")
-        self.assertEqual(self.model.generate(text_seed=["a", "c"]), "d")
-
-    def test_generate_one_varied_context(self):
-        # When context doesn't limit our options enough, seed the random choice
-        self.assertEqual(
-            self.model.generate(text_seed=("a", "<s>"), random_seed=2), "a"
-        )
-
-    def test_generate_no_seed_unigrams(self):
-        self.assertEqual(
-            self.model.generate(5, random_seed=3),
-            ["<UNK>", "</s>", "</s>", "</s>", "</s>"],
-        )
-
-    def test_generate_with_text_seed(self):
-        self.assertEqual(
-            self.model.generate(5, text_seed=("<s>", "e"), random_seed=3),
-            ["<UNK>", "a", "d", "b", "<UNK>"],
-        )
-
-    def test_generate_oov_text_seed(self):
-        self.assertEqual(
-            self.model.generate(text_seed=("aliens",), random_seed=3),
-            self.model.generate(text_seed=("<UNK>",), random_seed=3),
-        )
-
-    def test_generate_None_text_seed(self):
-        # should crash with type error when we try to look it up in vocabulary
-        with self.assertRaises(TypeError):
-            self.model.generate(text_seed=(None,))
-
-        # This will work
-        self.assertEqual(
-            self.model.generate(text_seed=None, random_seed=3),
-            self.model.generate(random_seed=3),
-        )
diff --git a/nlp_resource_data/nltk/test/unit/lm/test_preprocessing.py b/nlp_resource_data/nltk/test/unit/lm/test_preprocessing.py

deleted file mode 100644 (file)

index 02a8af5..0000000
--- a/nlp_resource_data/nltk/test/unit/lm/test_preprocessing.py
+++ /dev/null
@@ -1,31 +0,0 @@
-# -*- coding: utf-8 -*-
-# Natural Language Toolkit: Language Model Unit Tests
-#
-# Copyright (C) 2001-2019 NLTK Project
-# Author: Ilia Kurenkov <ilia.kurenkov@gmail.com>
-# URL: <http://nltk.org/>
-# For license information, see LICENSE.TXT
-import unittest
-
-from nltk.lm.preprocessing import padded_everygram_pipeline
-
-
-class TestPreprocessing(unittest.TestCase):
-    def test_padded_everygram_pipeline(self):
-        expected_train = [
-            [
-                ("<s>",),
-                ("a",),
-                ("b",),
-                ("c",),
-                ("</s>",),
-                ("<s>", "a"),
-                ("a", "b"),
-                ("b", "c"),
-                ("c", "</s>"),
-            ]
-        ]
-        expected_vocab = ["<s>", "a", "b", "c", "</s>"]
-        train_data, vocab_data = padded_everygram_pipeline(2, [["a", "b", "c"]])
-        self.assertEqual([list(sent) for sent in train_data], expected_train)
-        self.assertEqual(list(vocab_data), expected_vocab)
diff --git a/nlp_resource_data/nltk/test/unit/lm/test_vocabulary.py b/nlp_resource_data/nltk/test/unit/lm/test_vocabulary.py

deleted file mode 100644 (file)

index dd78b42..0000000
--- a/nlp_resource_data/nltk/test/unit/lm/test_vocabulary.py
+++ /dev/null
@@ -1,141 +0,0 @@
-# -*- coding: utf-8 -*-
-# Natural Language Toolkit: Language Model Unit Tests
-#
-# Copyright (C) 2001-2019 NLTK Project
-# Author: Ilia Kurenkov <ilia.kurenkov@gmail.com>
-# URL: <http://nltk.org/>
-# For license information, see LICENSE.TXT
-
-import unittest
-from collections import Counter
-
-import six
-from nltk.lm import Vocabulary
-
-
-class NgramModelVocabularyTests(unittest.TestCase):
-    """tests Vocabulary Class"""
-
-    @classmethod
-    def setUpClass(cls):
-        cls.vocab = Vocabulary(
-            ["z", "a", "b", "c", "f", "d", "e", "g", "a", "d", "b", "e", "w"],
-            unk_cutoff=2,
-        )
-
-    def test_truthiness(self):
-        self.assertTrue(self.vocab)
-
-    def test_cutoff_value_set_correctly(self):
-        self.assertEqual(self.vocab.cutoff, 2)
-
-    def test_unable_to_change_cutoff(self):
-        with self.assertRaises(AttributeError):
-            self.vocab.cutoff = 3
-
-    def test_cutoff_setter_checks_value(self):
-        with self.assertRaises(ValueError) as exc_info:
-            Vocabulary("abc", unk_cutoff=0)
-        expected_error_msg = "Cutoff value cannot be less than 1. Got: 0"
-        self.assertEqual(expected_error_msg, str(exc_info.exception))
-
-    def test_counts_set_correctly(self):
-        self.assertEqual(self.vocab.counts["a"], 2)
-        self.assertEqual(self.vocab.counts["b"], 2)
-        self.assertEqual(self.vocab.counts["c"], 1)
-
-    def test_membership_check_respects_cutoff(self):
-        # a was seen 2 times, so it should be considered part of the vocabulary
-        self.assertTrue("a" in self.vocab)
-        # "c" was seen once, it shouldn't be considered part of the vocab
-        self.assertFalse("c" in self.vocab)
-        # "z" was never seen at all, also shouldn't be considered in the vocab
-        self.assertFalse("z" in self.vocab)
-
-    def test_vocab_len_respects_cutoff(self):
-        # Vocab size is the number of unique tokens that occur at least as often
-        # as the cutoff value, plus 1 to account for unknown words.
-        self.assertEqual(5, len(self.vocab))
-
-    def test_vocab_iter_respects_cutoff(self):
-        vocab_counts = ["a", "b", "c", "d", "e", "f", "g", "w", "z"]
-        vocab_items = ["a", "b", "d", "e", "<UNK>"]
-
-        six.assertCountEqual(self, vocab_counts, list(self.vocab.counts.keys()))
-        six.assertCountEqual(self, vocab_items, list(self.vocab))
-
-    def test_update_empty_vocab(self):
-        empty = Vocabulary(unk_cutoff=2)
-        self.assertEqual(len(empty), 0)
-        self.assertFalse(empty)
-        self.assertIn(empty.unk_label, empty)
-
-        empty.update(list("abcde"))
-        self.assertIn(empty.unk_label, empty)
-
-    def test_lookup(self):
-        self.assertEqual(self.vocab.lookup("a"), "a")
-        self.assertEqual(self.vocab.lookup("c"), "<UNK>")
-
-    def test_lookup_iterables(self):
-        self.assertEqual(self.vocab.lookup(["a", "b"]), ("a", "b"))
-        self.assertEqual(self.vocab.lookup(("a", "b")), ("a", "b"))
-        self.assertEqual(self.vocab.lookup(("a", "c")), ("a", "<UNK>"))
-        self.assertEqual(
-            self.vocab.lookup(map(str, range(3))), ("<UNK>", "<UNK>", "<UNK>")
-        )
-
-    def test_lookup_empty_iterables(self):
-        self.assertEqual(self.vocab.lookup(()), ())
-        self.assertEqual(self.vocab.lookup([]), ())
-        self.assertEqual(self.vocab.lookup(iter([])), ())
-        self.assertEqual(self.vocab.lookup(n for n in range(0, 0)), ())
-
-    def test_lookup_recursive(self):
-        self.assertEqual(
-            self.vocab.lookup([["a", "b"], ["a", "c"]]), (("a", "b"), ("a", "<UNK>"))
-        )
-        self.assertEqual(self.vocab.lookup([["a", "b"], "c"]), (("a", "b"), "<UNK>"))
-        self.assertEqual(self.vocab.lookup([[[[["a", "b"]]]]]), ((((("a", "b"),),),),))
-
-    def test_lookup_None(self):
-        with self.assertRaises(TypeError):
-            self.vocab.lookup(None)
-        with self.assertRaises(TypeError):
-            list(self.vocab.lookup([None, None]))
-
-    def test_lookup_int(self):
-        with self.assertRaises(TypeError):
-            self.vocab.lookup(1)
-        with self.assertRaises(TypeError):
-            list(self.vocab.lookup([1, 2]))
-
-    def test_lookup_empty_str(self):
-        self.assertEqual(self.vocab.lookup(""), "<UNK>")
-
-    def test_eqality(self):
-        v1 = Vocabulary(["a", "b", "c"], unk_cutoff=1)
-        v2 = Vocabulary(["a", "b", "c"], unk_cutoff=1)
-        v3 = Vocabulary(["a", "b", "c"], unk_cutoff=1, unk_label="blah")
-        v4 = Vocabulary(["a", "b"], unk_cutoff=1)
-
-        self.assertEqual(v1, v2)
-        self.assertNotEqual(v1, v3)
-        self.assertNotEqual(v1, v4)
-
-    def test_str(self):
-        self.assertEqual(
-            str(self.vocab),
-            ("<Vocabulary with cutoff=2 " "unk_label='<UNK>' and 5 items>"),
-        )
-
-    def test_creation_with_counter(self):
-        self.assertEqual(
-            self.vocab,
-            Vocabulary(
-                Counter(
-                    ["z", "a", "b", "c", "f", "d", "e", "g", "a", "d", "b", "e", "w"]
-                ),
-                unk_cutoff=2,
-            ),
-        )
diff --git a/nlp_resource_data/nltk/test/unit/test_2x_compat.py b/nlp_resource_data/nltk/test/unit/test_2x_compat.py

deleted file mode 100644 (file)

index f078373..0000000
--- a/nlp_resource_data/nltk/test/unit/test_2x_compat.py
+++ /dev/null
@@ -1,65 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Unit tests for nltk.compat.
-See also nltk/test/compat.doctest.
-"""
-from __future__ import absolute_import, unicode_literals
-import unittest
-
-from nltk.text import Text
-from nltk.compat import PY3, python_2_unicode_compatible
-
-
-def setup_module(module):
-    from nose import SkipTest
-
-    if PY3:
-        raise SkipTest("test_2x_compat is for testing nltk.compat under Python 2.x")
-
-
-class TestTextTransliteration(unittest.TestCase):
-    txt = Text(["São", "Tomé", "and", "Príncipe"])
-
-    def test_repr(self):
-        self.assertEqual(repr(self.txt), br"<Text: S\xe3o Tom\xe9 and Pr\xedncipe...>")
-
-    def test_str(self):
-        self.assertEqual(str(self.txt), b"<Text: Sao Tome and Principe...>")
-
-
-class TestFraction(unittest.TestCase):
-    def test_unnoramlize_fraction(self):
-        from fractions import Fraction as NativePythonFraction
-        from nltk.compat import Fraction as NLTKFraction
-
-        # The native fraction should throw a TypeError in Python < 3.5
-        with self.assertRaises(TypeError):
-            NativePythonFraction(0, 1000, _normalize=False)
-
-        # Using nltk.compat.Fraction in Python < 3.5
-        compat_frac = NLTKFraction(0, 1000, _normalize=False)
-        # The numerator and denominator does not change.
-        assert compat_frac.numerator == 0
-        assert compat_frac.denominator == 1000
-        # The floating point value remains normalized.
-        assert float(compat_frac) == 0.0
-
-        # Checks that the division is not divided by
-        # # by greatest common divisor (gcd).
-        six_twelve = NLTKFraction(6, 12, _normalize=False)
-        assert six_twelve.numerator == 6
-        assert six_twelve.denominator == 12
-
-        one_two = NLTKFraction(1, 2, _normalize=False)
-        assert one_two.numerator == 1
-        assert one_two.denominator == 2
-
-        # Checks against the native fraction.
-        six_twelve_original = NativePythonFraction(6, 12)
-        # Checks that rational values of one_two and six_twelve is the same.
-        assert float(one_two) == float(six_twelve) == float(six_twelve_original)
-
-        # Checks that the fraction does get normalized, even when
-        # _normalize == False when numerator is using native
-        # fractions.Fraction.from_float
-        assert NLTKFraction(3.142, _normalize=False) == NativePythonFraction(3.142)
diff --git a/nlp_resource_data/nltk/test/unit/test_aline.py b/nlp_resource_data/nltk/test/unit/test_aline.py

deleted file mode 100644 (file)

index 72b92c7..0000000
--- a/nlp_resource_data/nltk/test/unit/test_aline.py
+++ /dev/null
@@ -1,78 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Unit tests for nltk.metrics.aline
-"""
-
-from __future__ import unicode_literals
-
-import unittest
-
-from nltk.metrics import aline
-
-
-class TestAline(unittest.TestCase):
-    """
-    Test Aline algorithm for aligning phonetic sequences
-    """
-
-    def test_aline(self):
-        result = aline.align('θin', 'tenwis')
-        expected = [
-            [('θ', 't'), ('i', 'e'), ('n', 'n'), ('-', 'w'), ('-', 'i'), ('-', 's')]
-        ]
-
-        self.assertEqual(result, expected)
-
-        result = aline.align('jo', 'ʒə')
-        expected = [[('j', 'ʒ'), ('o', 'ə')]]
-
-        self.assertEqual(result, expected)
-
-        result = aline.align('pematesiweni', 'pematesewen')
-        expected = [
-            [
-                ('p', 'p'),
-                ('e', 'e'),
-                ('m', 'm'),
-                ('a', 'a'),
-                ('t', 't'),
-                ('e', 'e'),
-                ('s', 's'),
-                ('i', 'e'),
-                ('w', 'w'),
-                ('e', 'e'),
-                ('n', 'n'),
-                ('i', '-'),
-            ]
-        ]
-
-        self.assertEqual(result, expected)
-
-        result = aline.align('tuwθ', 'dentis')
-        expected = [
-            [
-                ('t', 'd'),
-                ('u', 'e'),
-                ('w', '-'),
-                ('-', 'n'),
-                ('-', 't'),
-                ('-', 'i'),
-                ('θ', 's'),
-            ]
-        ]
-
-        self.assertEqual(result, expected)
-
-    def test_aline_delta(self):
-        """
-        Test aline for computing the difference between two segments
-        """
-        result = aline.delta('p', 'q')
-        expected = 20.0
-
-        self.assertEqual(result, expected)
-
-        result = aline.delta('a', 'A')
-        expected = 0.0
-
-        self.assertEqual(result, expected)
diff --git a/nlp_resource_data/nltk/test/unit/test_brill.py b/nlp_resource_data/nltk/test/unit/test_brill.py

deleted file mode 100644 (file)

index 5297fe1..0000000
--- a/nlp_resource_data/nltk/test/unit/test_brill.py
+++ /dev/null
@@ -1,37 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Tests for Brill tagger.
-"""
-
-import unittest
-
-from nltk.tag import UnigramTagger, brill, brill_trainer
-from nltk.tbl import Template
-from nltk.corpus import treebank
-
-from nltk.tbl import demo
-
-
-class TestBrill(unittest.TestCase):
-    def test_pos_template(self):
-        train_sents = treebank.tagged_sents()[:1000]
-        tagger = UnigramTagger(train_sents)
-        trainer = brill_trainer.BrillTaggerTrainer(
-            tagger, [brill.Template(brill.Pos([-1]))]
-        )
-        brill_tagger = trainer.train(train_sents)
-        # Example from https://github.com/nltk/nltk/issues/769
-        result = brill_tagger.tag('This is a foo bar sentence'.split())
-        expected = [
-            ('This', 'DT'),
-            ('is', 'VBZ'),
-            ('a', 'DT'),
-            ('foo', None),
-            ('bar', 'NN'),
-            ('sentence', None),
-        ]
-        self.assertEqual(result, expected)
-
-    @unittest.skip("Should be tested in __main__ of nltk.tbl.demo")
-    def test_brill_demo(self):
-        demo()
diff --git a/nlp_resource_data/nltk/test/unit/test_chunk.py b/nlp_resource_data/nltk/test/unit/test_chunk.py

deleted file mode 100644 (file)

index 8c40dfc..0000000
--- a/nlp_resource_data/nltk/test/unit/test_chunk.py
+++ /dev/null
@@ -1,87 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import, unicode_literals
-import unittest
-
-from nltk import RegexpParser
-
-
-class TestChunkRule(unittest.TestCase):
-    def test_tag_pattern2re_pattern_quantifier(self):
-        """Test for bug https://github.com/nltk/nltk/issues/1597
-
-        Ensures that curly bracket quantifiers can be used inside a chunk rule.
-        This type of quantifier has been used for the supplementary example
-        in http://www.nltk.org/book/ch07.html#exploring-text-corpora.
-        """
-        sent = [
-            ('The', 'AT'),
-            ('September-October', 'NP'),
-            ('term', 'NN'),
-            ('jury', 'NN'),
-            ('had', 'HVD'),
-            ('been', 'BEN'),
-            ('charged', 'VBN'),
-            ('by', 'IN'),
-            ('Fulton', 'NP-TL'),
-            ('Superior', 'JJ-TL'),
-            ('Court', 'NN-TL'),
-            ('Judge', 'NN-TL'),
-            ('Durwood', 'NP'),
-            ('Pye', 'NP'),
-            ('to', 'TO'),
-            ('investigate', 'VB'),
-            ('reports', 'NNS'),
-            ('of', 'IN'),
-            ('possible', 'JJ'),
-            ('``', '``'),
-            ('irregularities', 'NNS'),
-            ("''", "''"),
-            ('in', 'IN'),
-            ('the', 'AT'),
-            ('hard-fought', 'JJ'),
-            ('primary', 'NN'),
-            ('which', 'WDT'),
-            ('was', 'BEDZ'),
-            ('won', 'VBN'),
-            ('by', 'IN'),
-            ('Mayor-nominate', 'NN-TL'),
-            ('Ivan', 'NP'),
-            ('Allen', 'NP'),
-            ('Jr.', 'NP'),
-            ('.', '.'),
-        ]  # source: brown corpus
-        cp = RegexpParser('CHUNK: {<N.*>{4,}}')
-        tree = cp.parse(sent)
-        assert (
-            tree.pformat()
-            == """(S
-  The/AT
-  September-October/NP
-  term/NN
-  jury/NN
-  had/HVD
-  been/BEN
-  charged/VBN
-  by/IN
-  Fulton/NP-TL
-  Superior/JJ-TL
-  (CHUNK Court/NN-TL Judge/NN-TL Durwood/NP Pye/NP)
-  to/TO
-  investigate/VB
-  reports/NNS
-  of/IN
-  possible/JJ
-  ``/``
-  irregularities/NNS
-  ''/''
-  in/IN
-  the/AT
-  hard-fought/JJ
-  primary/NN
-  which/WDT
-  was/BEDZ
-  won/VBN
-  by/IN
-  (CHUNK Mayor-nominate/NN-TL Ivan/NP Allen/NP Jr./NP)
-  ./.)"""
-        )
diff --git a/nlp_resource_data/nltk/test/unit/test_classify.py b/nlp_resource_data/nltk/test/unit/test_classify.py

deleted file mode 100644 (file)

index e9128d2..0000000
--- a/nlp_resource_data/nltk/test/unit/test_classify.py
+++ /dev/null
@@ -1,50 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Unit tests for nltk.classify. See also: nltk/test/classify.doctest
-"""
-from __future__ import absolute_import
-from nose import SkipTest
-from nltk import classify
-
-TRAIN = [
-    (dict(a=1, b=1, c=1), 'y'),
-    (dict(a=1, b=1, c=1), 'x'),
-    (dict(a=1, b=1, c=0), 'y'),
-    (dict(a=0, b=1, c=1), 'x'),
-    (dict(a=0, b=1, c=1), 'y'),
-    (dict(a=0, b=0, c=1), 'y'),
-    (dict(a=0, b=1, c=0), 'x'),
-    (dict(a=0, b=0, c=0), 'x'),
-    (dict(a=0, b=1, c=1), 'y'),
-]
-
-TEST = [
-    (dict(a=1, b=0, c=1)),  # unseen
-    (dict(a=1, b=0, c=0)),  # unseen
-    (dict(a=0, b=1, c=1)),  # seen 3 times, labels=y,y,x
-    (dict(a=0, b=1, c=0)),  # seen 1 time, label=x
-]
-
-RESULTS = [(0.16, 0.84), (0.46, 0.54), (0.41, 0.59), (0.76, 0.24)]
-
-
-def assert_classifier_correct(algorithm):
-    try:
-        classifier = classify.MaxentClassifier.train(
-            TRAIN, algorithm, trace=0, max_iter=1000
-        )
-    except (LookupError, AttributeError) as e:
-        raise SkipTest(str(e))
-
-    for (px, py), featureset in zip(RESULTS, TEST):
-        pdist = classifier.prob_classify(featureset)
-        assert abs(pdist.prob('x') - px) < 1e-2, (pdist.prob('x'), px)
-        assert abs(pdist.prob('y') - py) < 1e-2, (pdist.prob('y'), py)
-
-
-def test_megam():
-    assert_classifier_correct('MEGAM')
-
-
-def test_tadm():
-    assert_classifier_correct('TADM')
diff --git a/nlp_resource_data/nltk/test/unit/test_collocations.py b/nlp_resource_data/nltk/test/unit/test_collocations.py

deleted file mode 100644 (file)

index 8e3535f..0000000
--- a/nlp_resource_data/nltk/test/unit/test_collocations.py
+++ /dev/null
@@ -1,159 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import, unicode_literals
-import unittest
-
-from nltk.collocations import BigramCollocationFinder
-from nltk.metrics import BigramAssocMeasures
-
-## Test bigram counters with discontinuous bigrams and repeated words
-
-_EPSILON = 1e-8
-
-
-def close_enough(x, y):
-    """Verify that two sequences of n-gram association values are within
-       _EPSILON of each other.
-    """
-
-    for (x1, y1) in zip(x, y):
-        if x1[0] != y1[0] or abs(x1[1] - y1[1]) > _EPSILON:
-            return False
-    return True
-
-
-class TestBigram(unittest.TestCase):
-    def test_bigram2(self):
-        sent = 'this this is is a a test test'.split()
-
-        b = BigramCollocationFinder.from_words(sent)
-
-        # python 2.6 does not have assertItemsEqual or assertListEqual
-        self.assertEqual(
-            sorted(b.ngram_fd.items()),
-            sorted(
-                [
-                    (('a', 'a'), 1),
-                    (('a', 'test'), 1),
-                    (('is', 'a'), 1),
-                    (('is', 'is'), 1),
-                    (('test', 'test'), 1),
-                    (('this', 'is'), 1),
-                    (('this', 'this'), 1),
-                ]
-            ),
-        )
-        self.assertEqual(
-            sorted(b.word_fd.items()),
-            sorted([('a', 2), ('is', 2), ('test', 2), ('this', 2)]),
-        )
-        self.assertTrue(
-            len(sent) == sum(b.word_fd.values()) == sum(b.ngram_fd.values()) + 1
-        )
-        self.assertTrue(
-            close_enough(
-                sorted(b.score_ngrams(BigramAssocMeasures.pmi)),
-                sorted(
-                    [
-                        (('a', 'a'), 1.0),
-                        (('a', 'test'), 1.0),
-                        (('is', 'a'), 1.0),
-                        (('is', 'is'), 1.0),
-                        (('test', 'test'), 1.0),
-                        (('this', 'is'), 1.0),
-                        (('this', 'this'), 1.0),
-                    ]
-                ),
-            )
-        )
-
-    def test_bigram3(self):
-        sent = 'this this is is a a test test'.split()
-
-        b = BigramCollocationFinder.from_words(sent, window_size=3)
-        self.assertEqual(
-            sorted(b.ngram_fd.items()),
-            sorted(
-                [
-                    (('a', 'test'), 3),
-                    (('is', 'a'), 3),
-                    (('this', 'is'), 3),
-                    (('a', 'a'), 1),
-                    (('is', 'is'), 1),
-                    (('test', 'test'), 1),
-                    (('this', 'this'), 1),
-                ]
-            ),
-        )
-        self.assertEqual(
-            sorted(b.word_fd.items()),
-            sorted([('a', 2), ('is', 2), ('test', 2), ('this', 2)]),
-        )
-        self.assertTrue(
-            len(sent)
-            == sum(b.word_fd.values())
-            == (sum(b.ngram_fd.values()) + 2 + 1) / 2.0
-        )
-        self.assertTrue(
-            close_enough(
-                sorted(b.score_ngrams(BigramAssocMeasures.pmi)),
-                sorted(
-                    [
-                        (('a', 'test'), 1.584962500721156),
-                        (('is', 'a'), 1.584962500721156),
-                        (('this', 'is'), 1.584962500721156),
-                        (('a', 'a'), 0.0),
-                        (('is', 'is'), 0.0),
-                        (('test', 'test'), 0.0),
-                        (('this', 'this'), 0.0),
-                    ]
-                ),
-            )
-        )
-
-    def test_bigram5(self):
-        sent = 'this this is is a a test test'.split()
-
-        b = BigramCollocationFinder.from_words(sent, window_size=5)
-        self.assertEqual(
-            sorted(b.ngram_fd.items()),
-            sorted(
-                [
-                    (('a', 'test'), 4),
-                    (('is', 'a'), 4),
-                    (('this', 'is'), 4),
-                    (('is', 'test'), 3),
-                    (('this', 'a'), 3),
-                    (('a', 'a'), 1),
-                    (('is', 'is'), 1),
-                    (('test', 'test'), 1),
-                    (('this', 'this'), 1),
-                ]
-            ),
-        )
-        self.assertEqual(
-            sorted(b.word_fd.items()),
-            sorted([('a', 2), ('is', 2), ('test', 2), ('this', 2)]),
-        )
-        self.assertTrue(
-            len(sent)
-            == sum(b.word_fd.values())
-            == (sum(b.ngram_fd.values()) + 4 + 3 + 2 + 1) / 4.0
-        )
-        self.assertTrue(
-            close_enough(
-                sorted(b.score_ngrams(BigramAssocMeasures.pmi)),
-                sorted(
-                    [
-                        (('a', 'test'), 1.0),
-                        (('is', 'a'), 1.0),
-                        (('this', 'is'), 1.0),
-                        (('is', 'test'), 0.5849625007211562),
-                        (('this', 'a'), 0.5849625007211562),
-                        (('a', 'a'), -1.0),
-                        (('is', 'is'), -1.0),
-                        (('test', 'test'), -1.0),
-                        (('this', 'this'), -1.0),
-                    ]
-                ),
-            )
-        )
diff --git a/nlp_resource_data/nltk/test/unit/test_concordance.py b/nlp_resource_data/nltk/test/unit/test_concordance.py

deleted file mode 100644 (file)

index 81ac47b..0000000
--- a/nlp_resource_data/nltk/test/unit/test_concordance.py
+++ /dev/null
@@ -1,107 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import, unicode_literals
-
-import unittest
-import contextlib
-import sys
-
-from nose import with_setup
-
-from nltk.corpus import gutenberg
-from nltk.text import Text
-
-try:
-    from StringIO import StringIO
-except ImportError as e:
-    from io import StringIO
-
-
-@contextlib.contextmanager
-def stdout_redirect(where):
-    sys.stdout = where
-    try:
-        yield where
-    finally:
-        sys.stdout = sys.__stdout__
-
-
-class TestConcordance(unittest.TestCase):
-    """Text constructed using: http://www.nltk.org/book/ch01.html"""
-
-    @classmethod
-    def setup_class(cls):
-        cls.corpus = gutenberg.words('melville-moby_dick.txt')
-
-    @classmethod
-    def teardown_class(cls):
-        pass
-
-    def setUp(self):
-        self.text = Text(TestConcordance.corpus)
-        self.query = "monstrous"
-        self.maxDiff = None
-        self.list_out = [
-            'ong the former , one was of a most monstrous size . ... This came towards us , ',
-            'ON OF THE PSALMS . " Touching that monstrous bulk of the whale or ork we have r',
-            'll over with a heathenish array of monstrous clubs and spears . Some were thick',
-            'd as you gazed , and wondered what monstrous cannibal and savage could ever hav',
-            'that has survived the flood ; most monstrous and most mountainous ! That Himmal',
-            'they might scout at Moby Dick as a monstrous fable , or still worse and more de',
-            'th of Radney .\'" CHAPTER 55 Of the Monstrous Pictures of Whales . I shall ere l',
-            'ing Scenes . In connexion with the monstrous pictures of whales , I am strongly',
-            'ere to enter upon those still more monstrous stories of them which are to be fo',
-            'ght have been rummaged out of this monstrous cabinet there is no telling . But ',
-            'of Whale - Bones ; for Whales of a monstrous size are oftentimes cast up dead u',
-        ]
-
-    def tearDown(self):
-        pass
-
-    def test_concordance_list(self):
-        concordance_out = self.text.concordance_list(self.query)
-        self.assertEqual(self.list_out, [c.line for c in concordance_out])
-
-    def test_concordance_width(self):
-        list_out = [
-            "monstrous",
-            "monstrous",
-            "monstrous",
-            "monstrous",
-            "monstrous",
-            "monstrous",
-            "Monstrous",
-            "monstrous",
-            "monstrous",
-            "monstrous",
-            "monstrous",
-        ]
-
-        concordance_out = self.text.concordance_list(self.query, width=0)
-        self.assertEqual(list_out, [c.query for c in concordance_out])
-
-    def test_concordance_lines(self):
-        concordance_out = self.text.concordance_list(self.query, lines=3)
-        self.assertEqual(self.list_out[:3], [c.line for c in concordance_out])
-
-    def test_concordance_print(self):
-        print_out = """Displaying 11 of 11 matches:
-        ong the former , one was of a most monstrous size . ... This came towards us ,
-        ON OF THE PSALMS . " Touching that monstrous bulk of the whale or ork we have r
-        ll over with a heathenish array of monstrous clubs and spears . Some were thick
-        d as you gazed , and wondered what monstrous cannibal and savage could ever hav
-        that has survived the flood ; most monstrous and most mountainous ! That Himmal
-        they might scout at Moby Dick as a monstrous fable , or still worse and more de
-        th of Radney .'" CHAPTER 55 Of the Monstrous Pictures of Whales . I shall ere l
-        ing Scenes . In connexion with the monstrous pictures of whales , I am strongly
-        ere to enter upon those still more monstrous stories of them which are to be fo
-        ght have been rummaged out of this monstrous cabinet there is no telling . But
-        of Whale - Bones ; for Whales of a monstrous size are oftentimes cast up dead u
-        """
-
-        with stdout_redirect(StringIO()) as stdout:
-            self.text.concordance(self.query)
-
-        def strip_space(raw_str):
-            return raw_str.replace(" ", "")
-
-        self.assertEqual(strip_space(print_out), strip_space(stdout.getvalue()))
diff --git a/nlp_resource_data/nltk/test/unit/test_corenlp.py b/nlp_resource_data/nltk/test/unit/test_corenlp.py

deleted file mode 100644 (file)

index fed13e3..0000000
--- a/nlp_resource_data/nltk/test/unit/test_corenlp.py
+++ /dev/null
@@ -1,1419 +0,0 @@
-# -*- coding: utf-8 -*-
-
-"""
-Mock test for Stanford CoreNLP wrappers.
-"""
-
-import sys
-from itertools import chain
-from unittest import TestCase, SkipTest
-
-try:
-    from unittest.mock import MagicMock
-except ImportError:
-    raise SkipTest('unittest.mock no supported in Python2')
-from nltk.tree import Tree
-from nltk.parse import corenlp
-
-
-class TestTokenizerAPI(TestCase):
-    def test_tokenize(self):
-        corenlp_tokenizer = corenlp.CoreNLPParser()
-
-        api_return_value = {
-            u'sentences': [
-                {
-                    u'index': 0,
-                    u'tokens': [
-                        {
-                            u'after': u' ',
-                            u'before': u'',
-                            u'characterOffsetBegin': 0,
-                            u'characterOffsetEnd': 4,
-                            u'index': 1,
-                            u'originalText': u'Good',
-                            u'word': u'Good',
-                        },
-                        {
-                            u'after': u' ',
-                            u'before': u' ',
-                            u'characterOffsetBegin': 5,
-                            u'characterOffsetEnd': 12,
-                            u'index': 2,
-                            u'originalText': u'muffins',
-                            u'word': u'muffins',
-                        },
-                        {
-                            u'after': u' ',
-                            u'before': u' ',
-                            u'characterOffsetBegin': 13,
-                            u'characterOffsetEnd': 17,
-                            u'index': 3,
-                            u'originalText': u'cost',
-                            u'word': u'cost',
-                        },
-                        {
-                            u'after': u'',
-                            u'before': u' ',
-                            u'characterOffsetBegin': 18,
-                            u'characterOffsetEnd': 19,
-                            u'index': 4,
-                            u'originalText': u'$',
-                            u'word': u'$',
-                        },
-                        {
-                            u'after': u'\n',
-                            u'before': u'',
-                            u'characterOffsetBegin': 19,
-                            u'characterOffsetEnd': 23,
-                            u'index': 5,
-                            u'originalText': u'3.88',
-                            u'word': u'3.88',
-                        },
-                        {
-                            u'after': u' ',
-                            u'before': u'\n',
-                            u'characterOffsetBegin': 24,
-                            u'characterOffsetEnd': 26,
-                            u'index': 6,
-                            u'originalText': u'in',
-                            u'word': u'in',
-                        },
-                        {
-                            u'after': u' ',
-                            u'before': u' ',
-                            u'characterOffsetBegin': 27,
-                            u'characterOffsetEnd': 30,
-                            u'index': 7,
-                            u'originalText': u'New',
-                            u'word': u'New',
-                        },
-                        {
-                            u'after': u'',
-                            u'before': u' ',
-                            u'characterOffsetBegin': 31,
-                            u'characterOffsetEnd': 35,
-                            u'index': 8,
-                            u'originalText': u'York',
-                            u'word': u'York',
-                        },
-                        {
-                            u'after': u'  ',
-                            u'before': u'',
-                            u'characterOffsetBegin': 35,
-                            u'characterOffsetEnd': 36,
-                            u'index': 9,
-                            u'originalText': u'.',
-                            u'word': u'.',
-                        },
-                    ],
-                },
-                {
-                    u'index': 1,
-                    u'tokens': [
-                        {
-                            u'after': u' ',
-                            u'before': u'  ',
-                            u'characterOffsetBegin': 38,
-                            u'characterOffsetEnd': 44,
-                            u'index': 1,
-                            u'originalText': u'Please',
-                            u'word': u'Please',
-                        },
-                        {
-                            u'after': u' ',
-                            u'before': u' ',
-                            u'characterOffsetBegin': 45,
-                            u'characterOffsetEnd': 48,
-                            u'index': 2,
-                            u'originalText': u'buy',
-                            u'word': u'buy',
-                        },
-                        {
-                            u'after': u'\n',
-                            u'before': u' ',
-                            u'characterOffsetBegin': 49,
-                            u'characterOffsetEnd': 51,
-                            u'index': 3,
-                            u'originalText': u'me',
-                            u'word': u'me',
-                        },
-                        {
-                            u'after': u' ',
-                            u'before': u'\n',
-                            u'characterOffsetBegin': 52,
-                            u'characterOffsetEnd': 55,
-                            u'index': 4,
-                            u'originalText': u'two',
-                            u'word': u'two',
-                        },
-                        {
-                            u'after': u' ',
-                            u'before': u' ',
-                            u'characterOffsetBegin': 56,
-                            u'characterOffsetEnd': 58,
-                            u'index': 5,
-                            u'originalText': u'of',
-                            u'word': u'of',
-                        },
-                        {
-                            u'after': u'',
-                            u'before': u' ',
-                            u'characterOffsetBegin': 59,
-                            u'characterOffsetEnd': 63,
-                            u'index': 6,
-                            u'originalText': u'them',
-                            u'word': u'them',
-                        },
-                        {
-                            u'after': u'\n',
-                            u'before': u'',
-                            u'characterOffsetBegin': 63,
-                            u'characterOffsetEnd': 64,
-                            u'index': 7,
-                            u'originalText': u'.',
-                            u'word': u'.',
-                        },
-                    ],
-                },
-                {
-                    u'index': 2,
-                    u'tokens': [
-                        {
-                            u'after': u'',
-                            u'before': u'\n',
-                            u'characterOffsetBegin': 65,
-                            u'characterOffsetEnd': 71,
-                            u'index': 1,
-                            u'originalText': u'Thanks',
-                            u'word': u'Thanks',
-                        },
-                        {
-                            u'after': u'',
-                            u'before': u'',
-                            u'characterOffsetBegin': 71,
-                            u'characterOffsetEnd': 72,
-                            u'index': 2,
-                            u'originalText': u'.',
-                            u'word': u'.',
-                        },
-                    ],
-                },
-            ]
-        }
-        corenlp_tokenizer.api_call = MagicMock(return_value=api_return_value)
-
-        input_string = "Good muffins cost $3.88\nin New York.  Please buy me\ntwo of them.\nThanks."
-
-        expected_output = [
-            u'Good',
-            u'muffins',
-            u'cost',
-            u'$',
-            u'3.88',
-            u'in',
-            u'New',
-            u'York',
-            u'.',
-            u'Please',
-            u'buy',
-            u'me',
-            u'two',
-            u'of',
-            u'them',
-            u'.',
-            u'Thanks',
-            u'.',
-        ]
-
-        tokenized_output = list(corenlp_tokenizer.tokenize(input_string))
-
-        corenlp_tokenizer.api_call.assert_called_once_with(
-            'Good muffins cost $3.88\nin New York.  Please buy me\ntwo of them.\nThanks.',
-            properties={'annotators': 'tokenize,ssplit'},
-        )
-        self.assertEqual(expected_output, tokenized_output)
-
-
-class TestTaggerAPI(TestCase):
-    def test_pos_tagger(self):
-        corenlp_tagger = corenlp.CoreNLPParser(tagtype='pos')
-
-        api_return_value = {
-            u'sentences': [
-                {
-                    u'basicDependencies': [
-                        {
-                            u'dep': u'ROOT',
-                            u'dependent': 1,
-                            u'dependentGloss': u'What',
-                            u'governor': 0,
-                            u'governorGloss': u'ROOT',
-                        },
-                        {
-                            u'dep': u'cop',
-                            u'dependent': 2,
-                            u'dependentGloss': u'is',
-                            u'governor': 1,
-                            u'governorGloss': u'What',
-                        },
-                        {
-                            u'dep': u'det',
-                            u'dependent': 3,
-                            u'dependentGloss': u'the',
-                            u'governor': 4,
-                            u'governorGloss': u'airspeed',
-                        },
-                        {
-                            u'dep': u'nsubj',
-                            u'dependent': 4,
-                            u'dependentGloss': u'airspeed',
-                            u'governor': 1,
-                            u'governorGloss': u'What',
-                        },
-                        {
-                            u'dep': u'case',
-                            u'dependent': 5,
-                            u'dependentGloss': u'of',
-                            u'governor': 8,
-                            u'governorGloss': u'swallow',
-                        },
-                        {
-                            u'dep': u'det',
-                            u'dependent': 6,
-                            u'dependentGloss': u'an',
-                            u'governor': 8,
-                            u'governorGloss': u'swallow',
-                        },
-                        {
-                            u'dep': u'compound',
-                            u'dependent': 7,
-                            u'dependentGloss': u'unladen',
-                            u'governor': 8,
-                            u'governorGloss': u'swallow',
-                        },
-                        {
-                            u'dep': u'nmod',
-                            u'dependent': 8,
-                            u'dependentGloss': u'swallow',
-                            u'governor': 4,
-                            u'governorGloss': u'airspeed',
-                        },
-                        {
-                            u'dep': u'punct',
-                            u'dependent': 9,
-                            u'dependentGloss': u'?',
-                            u'governor': 1,
-                            u'governorGloss': u'What',
-                        },
-                    ],
-                    u'enhancedDependencies': [
-                        {
-                            u'dep': u'ROOT',
-                            u'dependent': 1,
-                            u'dependentGloss': u'What',
-                            u'governor': 0,
-                            u'governorGloss': u'ROOT',
-                        },
-                        {
-                            u'dep': u'cop',
-                            u'dependent': 2,
-                            u'dependentGloss': u'is',
-                            u'governor': 1,
-                            u'governorGloss': u'What',
-                        },
-                        {
-                            u'dep': u'det',
-                            u'dependent': 3,
-                            u'dependentGloss': u'the',
-                            u'governor': 4,
-                            u'governorGloss': u'airspeed',
-                        },
-                        {
-                            u'dep': u'nsubj',
-                            u'dependent': 4,
-                            u'dependentGloss': u'airspeed',
-                            u'governor': 1,
-                            u'governorGloss': u'What',
-                        },
-                        {
-                            u'dep': u'case',
-                            u'dependent': 5,
-                            u'dependentGloss': u'of',
-                            u'governor': 8,
-                            u'governorGloss': u'swallow',
-                        },
-                        {
-                            u'dep': u'det',
-                            u'dependent': 6,
-                            u'dependentGloss': u'an',
-                            u'governor': 8,
-                            u'governorGloss': u'swallow',
-                        },
-                        {
-                            u'dep': u'compound',
-                            u'dependent': 7,
-                            u'dependentGloss': u'unladen',
-                            u'governor': 8,
-                            u'governorGloss': u'swallow',
-                        },
-                        {
-                            u'dep': u'nmod:of',
-                            u'dependent': 8,
-                            u'dependentGloss': u'swallow',
-                            u'governor': 4,
-                            u'governorGloss': u'airspeed',
-                        },
-                        {
-                            u'dep': u'punct',
-                            u'dependent': 9,
-                            u'dependentGloss': u'?',
-                            u'governor': 1,
-                            u'governorGloss': u'What',
-                        },
-                    ],
-                    u'enhancedPlusPlusDependencies': [
-                        {
-                            u'dep': u'ROOT',
-                            u'dependent': 1,
-                            u'dependentGloss': u'What',
-                            u'governor': 0,
-                            u'governorGloss': u'ROOT',
-                        },
-                        {
-                            u'dep': u'cop',
-                            u'dependent': 2,
-                            u'dependentGloss': u'is',
-                            u'governor': 1,
-                            u'governorGloss': u'What',
-                        },
-                        {
-                            u'dep': u'det',
-                            u'dependent': 3,
-                            u'dependentGloss': u'the',
-                            u'governor': 4,
-                            u'governorGloss': u'airspeed',
-                        },
-                        {
-                            u'dep': u'nsubj',
-                            u'dependent': 4,
-                            u'dependentGloss': u'airspeed',
-                            u'governor': 1,
-                            u'governorGloss': u'What',
-                        },
-                        {
-                            u'dep': u'case',
-                            u'dependent': 5,
-                            u'dependentGloss': u'of',
-                            u'governor': 8,
-                            u'governorGloss': u'swallow',
-                        },
-                        {
-                            u'dep': u'det',
-                            u'dependent': 6,
-                            u'dependentGloss': u'an',
-                            u'governor': 8,
-                            u'governorGloss': u'swallow',
-                        },
-                        {
-                            u'dep': u'compound',
-                            u'dependent': 7,
-                            u'dependentGloss': u'unladen',
-                            u'governor': 8,
-                            u'governorGloss': u'swallow',
-                        },
-                        {
-                            u'dep': u'nmod:of',
-                            u'dependent': 8,
-                            u'dependentGloss': u'swallow',
-                            u'governor': 4,
-                            u'governorGloss': u'airspeed',
-                        },
-                        {
-                            u'dep': u'punct',
-                            u'dependent': 9,
-                            u'dependentGloss': u'?',
-                            u'governor': 1,
-                            u'governorGloss': u'What',
-                        },
-                    ],
-                    u'index': 0,
-                    u'parse': u'(ROOT\n  (SBARQ\n    (WHNP (WP What))\n    (SQ (VBZ is)\n      (NP\n        (NP (DT the) (NN airspeed))\n        (PP (IN of)\n          (NP (DT an) (NN unladen) (NN swallow)))))\n    (. ?)))',
-                    u'tokens': [
-                        {
-                            u'after': u' ',
-                            u'before': u'',
-                            u'characterOffsetBegin': 0,
-                            u'characterOffsetEnd': 4,
-                            u'index': 1,
-                            u'lemma': u'what',
-                            u'originalText': u'What',
-                            u'pos': u'WP',
-                            u'word': u'What',
-                        },
-                        {
-                            u'after': u' ',
-                            u'before': u' ',
-                            u'characterOffsetBegin': 5,
-                            u'characterOffsetEnd': 7,
-                            u'index': 2,
-                            u'lemma': u'be',
-                            u'originalText': u'is',
-                            u'pos': u'VBZ',
-                            u'word': u'is',
-                        },
-                        {
-                            u'after': u' ',
-                            u'before': u' ',
-                            u'characterOffsetBegin': 8,
-                            u'characterOffsetEnd': 11,
-                            u'index': 3,
-                            u'lemma': u'the',
-                            u'originalText': u'the',
-                            u'pos': u'DT',
-                            u'word': u'the',
-                        },
-                        {
-                            u'after': u' ',
-                            u'before': u' ',
-                            u'characterOffsetBegin': 12,
-                            u'characterOffsetEnd': 20,
-                            u'index': 4,
-                            u'lemma': u'airspeed',
-                            u'originalText': u'airspeed',
-                            u'pos': u'NN',
-                            u'word': u'airspeed',
-                        },
-                        {
-                            u'after': u' ',
-                            u'before': u' ',
-                            u'characterOffsetBegin': 21,
-                            u'characterOffsetEnd': 23,
-                            u'index': 5,
-                            u'lemma': u'of',
-                            u'originalText': u'of',
-                            u'pos': u'IN',
-                            u'word': u'of',
-                        },
-                        {
-                            u'after': u' ',
-                            u'before': u' ',
-                            u'characterOffsetBegin': 24,
-                            u'characterOffsetEnd': 26,
-                            u'index': 6,
-                            u'lemma': u'a',
-                            u'originalText': u'an',
-                            u'pos': u'DT',
-                            u'word': u'an',
-                        },
-                        {
-                            u'after': u' ',
-                            u'before': u' ',
-                            u'characterOffsetBegin': 27,
-                            u'characterOffsetEnd': 34,
-                            u'index': 7,
-                            u'lemma': u'unladen',
-                            u'originalText': u'unladen',
-                            u'pos': u'JJ',
-                            u'word': u'unladen',
-                        },
-                        {
-                            u'after': u' ',
-                            u'before': u' ',
-                            u'characterOffsetBegin': 35,
-                            u'characterOffsetEnd': 42,
-                            u'index': 8,
-                            u'lemma': u'swallow',
-                            u'originalText': u'swallow',
-                            u'pos': u'VB',
-                            u'word': u'swallow',
-                        },
-                        {
-                            u'after': u'',
-                            u'before': u' ',
-                            u'characterOffsetBegin': 43,
-                            u'characterOffsetEnd': 44,
-                            u'index': 9,
-                            u'lemma': u'?',
-                            u'originalText': u'?',
-                            u'pos': u'.',
-                            u'word': u'?',
-                        },
-                    ],
-                }
-            ]
-        }
-        corenlp_tagger.api_call = MagicMock(return_value=api_return_value)
-
-        input_tokens = 'What is the airspeed of an unladen swallow ?'.split()
-        expected_output = [
-            ('What', 'WP'),
-            ('is', 'VBZ'),
-            ('the', 'DT'),
-            ('airspeed', 'NN'),
-            ('of', 'IN'),
-            ('an', 'DT'),
-            ('unladen', 'JJ'),
-            ('swallow', 'VB'),
-            ('?', '.'),
-        ]
-        tagged_output = corenlp_tagger.tag(input_tokens)
-
-        corenlp_tagger.api_call.assert_called_once_with(
-            'What is the airspeed of an unladen swallow ?',
-            properties={
-                'ssplit.isOneSentence': 'true',
-                'annotators': 'tokenize,ssplit,pos',
-            },
-        )
-        self.assertEqual(expected_output, tagged_output)
-
-    def test_ner_tagger(self):
-        corenlp_tagger = corenlp.CoreNLPParser(tagtype='ner')
-
-        api_return_value = {
-            'sentences': [
-                {
-                    'index': 0,
-                    'tokens': [
-                        {
-                            'after': ' ',
-                            'before': '',
-                            'characterOffsetBegin': 0,
-                            'characterOffsetEnd': 4,
-                            'index': 1,
-                            'lemma': 'Rami',
-                            'ner': 'PERSON',
-                            'originalText': 'Rami',
-                            'pos': 'NNP',
-                            'word': 'Rami',
-                        },
-                        {
-                            'after': ' ',
-                            'before': ' ',
-                            'characterOffsetBegin': 5,
-                            'characterOffsetEnd': 8,
-                            'index': 2,
-                            'lemma': 'Eid',
-                            'ner': 'PERSON',
-                            'originalText': 'Eid',
-                            'pos': 'NNP',
-                            'word': 'Eid',
-                        },
-                        {
-                            'after': ' ',
-                            'before': ' ',
-                            'characterOffsetBegin': 9,
-                            'characterOffsetEnd': 11,
-                            'index': 3,
-                            'lemma': 'be',
-                            'ner': 'O',
-                            'originalText': 'is',
-                            'pos': 'VBZ',
-                            'word': 'is',
-                        },
-                        {
-                            'after': ' ',
-                            'before': ' ',
-                            'characterOffsetBegin': 12,
-                            'characterOffsetEnd': 20,
-                            'index': 4,
-                            'lemma': 'study',
-                            'ner': 'O',
-                            'originalText': 'studying',
-                            'pos': 'VBG',
-                            'word': 'studying',
-                        },
-                        {
-                            'after': ' ',
-                            'before': ' ',
-                            'characterOffsetBegin': 21,
-                            'characterOffsetEnd': 23,
-                            'index': 5,
-                            'lemma': 'at',
-                            'ner': 'O',
-                            'originalText': 'at',
-                            'pos': 'IN',
-                            'word': 'at',
-                        },
-                        {
-                            'after': ' ',
-                            'before': ' ',
-                            'characterOffsetBegin': 24,
-                            'characterOffsetEnd': 29,
-                            'index': 6,
-                            'lemma': 'Stony',
-                            'ner': 'ORGANIZATION',
-                            'originalText': 'Stony',
-                            'pos': 'NNP',
-                            'word': 'Stony',
-                        },
-                        {
-                            'after': ' ',
-                            'before': ' ',
-                            'characterOffsetBegin': 30,
-                            'characterOffsetEnd': 35,
-                            'index': 7,
-                            'lemma': 'Brook',
-                            'ner': 'ORGANIZATION',
-                            'originalText': 'Brook',
-                            'pos': 'NNP',
-                            'word': 'Brook',
-                        },
-                        {
-                            'after': ' ',
-                            'before': ' ',
-                            'characterOffsetBegin': 36,
-                            'characterOffsetEnd': 46,
-                            'index': 8,
-                            'lemma': 'University',
-                            'ner': 'ORGANIZATION',
-                            'originalText': 'University',
-                            'pos': 'NNP',
-                            'word': 'University',
-                        },
-                        {
-                            'after': ' ',
-                            'before': ' ',
-                            'characterOffsetBegin': 47,
-                            'characterOffsetEnd': 49,
-                            'index': 9,
-                            'lemma': 'in',
-                            'ner': 'O',
-                            'originalText': 'in',
-                            'pos': 'IN',
-                            'word': 'in',
-                        },
-                        {
-                            'after': '',
-                            'before': ' ',
-                            'characterOffsetBegin': 50,
-                            'characterOffsetEnd': 52,
-                            'index': 10,
-                            'lemma': 'NY',
-                            'ner': 'O',
-                            'originalText': 'NY',
-                            'pos': 'NNP',
-                            'word': 'NY',
-                        },
-                    ],
-                }
-            ]
-        }
-
-        corenlp_tagger.api_call = MagicMock(return_value=api_return_value)
-
-        input_tokens = 'Rami Eid is studying at Stony Brook University in NY'.split()
-        expected_output = [
-            ('Rami', 'PERSON'),
-            ('Eid', 'PERSON'),
-            ('is', 'O'),
-            ('studying', 'O'),
-            ('at', 'O'),
-            ('Stony', 'ORGANIZATION'),
-            ('Brook', 'ORGANIZATION'),
-            ('University', 'ORGANIZATION'),
-            ('in', 'O'),
-            ('NY', 'O'),
-        ]
-        tagged_output = corenlp_tagger.tag(input_tokens)
-
-        corenlp_tagger.api_call.assert_called_once_with(
-            'Rami Eid is studying at Stony Brook University in NY',
-            properties={
-                'ssplit.isOneSentence': 'true',
-                'annotators': 'tokenize,ssplit,ner',
-            },
-        )
-        self.assertEqual(expected_output, tagged_output)
-
-    def test_unexpected_tagtype(self):
-        with self.assertRaises(ValueError):
-            corenlp_tagger = corenlp.CoreNLPParser(tagtype='test')
-
-
-class TestParserAPI(TestCase):
-    def test_parse(self):
-        corenlp_parser = corenlp.CoreNLPParser()
-
-        api_return_value = {
-            'sentences': [
-                {
-                    'basicDependencies': [
-                        {
-                            'dep': 'ROOT',
-                            'dependent': 4,
-                            'dependentGloss': 'fox',
-                            'governor': 0,
-                            'governorGloss': 'ROOT',
-                        },
-                        {
-                            'dep': 'det',
-                            'dependent': 1,
-                            'dependentGloss': 'The',
-                            'governor': 4,
-                            'governorGloss': 'fox',
-                        },
-                        {
-                            'dep': 'amod',
-                            'dependent': 2,
-                            'dependentGloss': 'quick',
-                            'governor': 4,
-                            'governorGloss': 'fox',
-                        },
-                        {
-                            'dep': 'amod',
-                            'dependent': 3,
-                            'dependentGloss': 'brown',
-                            'governor': 4,
-                            'governorGloss': 'fox',
-                        },
-                        {
-                            'dep': 'dep',
-                            'dependent': 5,
-                            'dependentGloss': 'jumps',
-                            'governor': 4,
-                            'governorGloss': 'fox',
-                        },
-                        {
-                            'dep': 'case',
-                            'dependent': 6,
-                            'dependentGloss': 'over',
-                            'governor': 9,
-                            'governorGloss': 'dog',
-                        },
-                        {
-                            'dep': 'det',
-                            'dependent': 7,
-                            'dependentGloss': 'the',
-                            'governor': 9,
-                            'governorGloss': 'dog',
-                        },
-                        {
-                            'dep': 'amod',
-                            'dependent': 8,
-                            'dependentGloss': 'lazy',
-                            'governor': 9,
-                            'governorGloss': 'dog',
-                        },
-                        {
-                            'dep': 'nmod',
-                            'dependent': 9,
-                            'dependentGloss': 'dog',
-                            'governor': 5,
-                            'governorGloss': 'jumps',
-                        },
-                    ],
-                    'enhancedDependencies': [
-                        {
-                            'dep': 'ROOT',
-                            'dependent': 4,
-                            'dependentGloss': 'fox',
-                            'governor': 0,
-                            'governorGloss': 'ROOT',
-                        },
-                        {
-                            'dep': 'det',
-                            'dependent': 1,
-                            'dependentGloss': 'The',
-                            'governor': 4,
-                            'governorGloss': 'fox',
-                        },
-                        {
-                            'dep': 'amod',
-                            'dependent': 2,
-                            'dependentGloss': 'quick',
-                            'governor': 4,
-                            'governorGloss': 'fox',
-                        },
-                        {
-                            'dep': 'amod',
-                            'dependent': 3,
-                            'dependentGloss': 'brown',
-                            'governor': 4,
-                            'governorGloss': 'fox',
-                        },
-                        {
-                            'dep': 'dep',
-                            'dependent': 5,
-                            'dependentGloss': 'jumps',
-                            'governor': 4,
-                            'governorGloss': 'fox',
-                        },
-                        {
-                            'dep': 'case',
-                            'dependent': 6,
-                            'dependentGloss': 'over',
-                            'governor': 9,
-                            'governorGloss': 'dog',
-                        },
-                        {
-                            'dep': 'det',
-                            'dependent': 7,
-                            'dependentGloss': 'the',
-                            'governor': 9,
-                            'governorGloss': 'dog',
-                        },
-                        {
-                            'dep': 'amod',
-                            'dependent': 8,
-                            'dependentGloss': 'lazy',
-                            'governor': 9,
-                            'governorGloss': 'dog',
-                        },
-                        {
-                            'dep': 'nmod:over',
-                            'dependent': 9,
-                            'dependentGloss': 'dog',
-                            'governor': 5,
-                            'governorGloss': 'jumps',
-                        },
-                    ],
-                    'enhancedPlusPlusDependencies': [
-                        {
-                            'dep': 'ROOT',
-                            'dependent': 4,
-                            'dependentGloss': 'fox',
-                            'governor': 0,
-                            'governorGloss': 'ROOT',
-                        },
-                        {
-                            'dep': 'det',
-                            'dependent': 1,
-                            'dependentGloss': 'The',
-                            'governor': 4,
-                            'governorGloss': 'fox',
-                        },
-                        {
-                            'dep': 'amod',
-                            'dependent': 2,
-                            'dependentGloss': 'quick',
-                            'governor': 4,
-                            'governorGloss': 'fox',
-                        },
-                        {
-                            'dep': 'amod',
-                            'dependent': 3,
-                            'dependentGloss': 'brown',
-                            'governor': 4,
-                            'governorGloss': 'fox',
-                        },
-                        {
-                            'dep': 'dep',
-                            'dependent': 5,
-                            'dependentGloss': 'jumps',
-                            'governor': 4,
-                            'governorGloss': 'fox',
-                        },
-                        {
-                            'dep': 'case',
-                            'dependent': 6,
-                            'dependentGloss': 'over',
-                            'governor': 9,
-                            'governorGloss': 'dog',
-                        },
-                        {
-                            'dep': 'det',
-                            'dependent': 7,
-                            'dependentGloss': 'the',
-                            'governor': 9,
-                            'governorGloss': 'dog',
-                        },
-                        {
-                            'dep': 'amod',
-                            'dependent': 8,
-                            'dependentGloss': 'lazy',
-                            'governor': 9,
-                            'governorGloss': 'dog',
-                        },
-                        {
-                            'dep': 'nmod:over',
-                            'dependent': 9,
-                            'dependentGloss': 'dog',
-                            'governor': 5,
-                            'governorGloss': 'jumps',
-                        },
-                    ],
-                    'index': 0,
-                    'parse': '(ROOT\n  (NP\n    (NP (DT The) (JJ quick) (JJ brown) (NN fox))\n    (NP\n      (NP (NNS jumps))\n      (PP (IN over)\n        (NP (DT the) (JJ lazy) (NN dog))))))',
-                    'tokens': [
-                        {
-                            'after': ' ',
-                            'before': '',
-                            'characterOffsetBegin': 0,
-                            'characterOffsetEnd': 3,
-                            'index': 1,
-                            'lemma': 'the',
-                            'originalText': 'The',
-                            'pos': 'DT',
-                            'word': 'The',
-                        },
-                        {
-                            'after': ' ',
-                            'before': ' ',
-                            'characterOffsetBegin': 4,
-                            'characterOffsetEnd': 9,
-                            'index': 2,
-                            'lemma': 'quick',
-                            'originalText': 'quick',
-                            'pos': 'JJ',
-                            'word': 'quick',
-                        },
-                        {
-                            'after': ' ',
-                            'before': ' ',
-                            'characterOffsetBegin': 10,
-                            'characterOffsetEnd': 15,
-                            'index': 3,
-                            'lemma': 'brown',
-                            'originalText': 'brown',
-                            'pos': 'JJ',
-                            'word': 'brown',
-                        },
-                        {
-                            'after': ' ',
-                            'before': ' ',
-                            'characterOffsetBegin': 16,
-                            'characterOffsetEnd': 19,
-                            'index': 4,
-                            'lemma': 'fox',
-                            'originalText': 'fox',
-                            'pos': 'NN',
-                            'word': 'fox',
-                        },
-                        {
-                            'after': ' ',
-                            'before': ' ',
-                            'characterOffsetBegin': 20,
-                            'characterOffsetEnd': 25,
-                            'index': 5,
-                            'lemma': 'jump',
-                            'originalText': 'jumps',
-                            'pos': 'VBZ',
-                            'word': 'jumps',
-                        },
-                        {
-                            'after': ' ',
-                            'before': ' ',
-                            'characterOffsetBegin': 26,
-                            'characterOffsetEnd': 30,
-                            'index': 6,
-                            'lemma': 'over',
-                            'originalText': 'over',
-                            'pos': 'IN',
-                            'word': 'over',
-                        },
-                        {
-                            'after': ' ',
-                            'before': ' ',
-                            'characterOffsetBegin': 31,
-                            'characterOffsetEnd': 34,
-                            'index': 7,
-                            'lemma': 'the',
-                            'originalText': 'the',
-                            'pos': 'DT',
-                            'word': 'the',
-                        },
-                        {
-                            'after': ' ',
-                            'before': ' ',
-                            'characterOffsetBegin': 35,
-                            'characterOffsetEnd': 39,
-                            'index': 8,
-                            'lemma': 'lazy',
-                            'originalText': 'lazy',
-                            'pos': 'JJ',
-                            'word': 'lazy',
-                        },
-                        {
-                            'after': '',
-                            'before': ' ',
-                            'characterOffsetBegin': 40,
-                            'characterOffsetEnd': 43,
-                            'index': 9,
-                            'lemma': 'dog',
-                            'originalText': 'dog',
-                            'pos': 'NN',
-                            'word': 'dog',
-                        },
-                    ],
-                }
-            ]
-        }
-
-        corenlp_parser.api_call = MagicMock(return_value=api_return_value)
-
-        input_string = "The quick brown fox jumps over the lazy dog".split()
-        expected_output = Tree(
-            'ROOT',
-            [
-                Tree(
-                    'NP',
-                    [
-                        Tree(
-                            'NP',
-                            [
-                                Tree('DT', ['The']),
-                                Tree('JJ', ['quick']),
-                                Tree('JJ', ['brown']),
-                                Tree('NN', ['fox']),
-                            ],
-                        ),
-                        Tree(
-                            'NP',
-                            [
-                                Tree('NP', [Tree('NNS', ['jumps'])]),
-                                Tree(
-                                    'PP',
-                                    [
-                                        Tree('IN', ['over']),
-                                        Tree(
-                                            'NP',
-                                            [
-                                                Tree('DT', ['the']),
-                                                Tree('JJ', ['lazy']),
-                                                Tree('NN', ['dog']),
-                                            ],
-                                        ),
-                                    ],
-                                ),
-                            ],
-                        ),
-                    ],
-                )
-            ],
-        )
-
-        parsed_data = next(corenlp_parser.parse(input_string))
-
-        corenlp_parser.api_call.assert_called_once_with(
-            "The quick brown fox jumps over the lazy dog",
-            properties={'ssplit.ssplit.eolonly': 'true'},
-        )
-        self.assertEqual(expected_output, parsed_data)
-
-    def test_dependency_parser(self):
-        corenlp_parser = corenlp.CoreNLPDependencyParser()
-
-        api_return_value = {
-            'sentences': [
-                {
-                    'basicDependencies': [
-                        {
-                            'dep': 'ROOT',
-                            'dependent': 5,
-                            'dependentGloss': 'jumps',
-                            'governor': 0,
-                            'governorGloss': 'ROOT',
-                        },
-                        {
-                            'dep': 'det',
-                            'dependent': 1,
-                            'dependentGloss': 'The',
-                            'governor': 4,
-                            'governorGloss': 'fox',
-                        },
-                        {
-                            'dep': 'amod',
-                            'dependent': 2,
-                            'dependentGloss': 'quick',
-                            'governor': 4,
-                            'governorGloss': 'fox',
-                        },
-                        {
-                            'dep': 'amod',
-                            'dependent': 3,
-                            'dependentGloss': 'brown',
-                            'governor': 4,
-                            'governorGloss': 'fox',
-                        },
-                        {
-                            'dep': 'nsubj',
-                            'dependent': 4,
-                            'dependentGloss': 'fox',
-                            'governor': 5,
-                            'governorGloss': 'jumps',
-                        },
-                        {
-                            'dep': 'case',
-                            'dependent': 6,
-                            'dependentGloss': 'over',
-                            'governor': 9,
-                            'governorGloss': 'dog',
-                        },
-                        {
-                            'dep': 'det',
-                            'dependent': 7,
-                            'dependentGloss': 'the',
-                            'governor': 9,
-                            'governorGloss': 'dog',
-                        },
-                        {
-                            'dep': 'amod',
-                            'dependent': 8,
-                            'dependentGloss': 'lazy',
-                            'governor': 9,
-                            'governorGloss': 'dog',
-                        },
-                        {
-                            'dep': 'nmod',
-                            'dependent': 9,
-                            'dependentGloss': 'dog',
-                            'governor': 5,
-                            'governorGloss': 'jumps',
-                        },
-                    ],
-                    'enhancedDependencies': [
-                        {
-                            'dep': 'ROOT',
-                            'dependent': 5,
-                            'dependentGloss': 'jumps',
-                            'governor': 0,
-                            'governorGloss': 'ROOT',
-                        },
-                        {
-                            'dep': 'det',
-                            'dependent': 1,
-                            'dependentGloss': 'The',
-                            'governor': 4,
-                            'governorGloss': 'fox',
-                        },
-                        {
-                            'dep': 'amod',
-                            'dependent': 2,
-                            'dependentGloss': 'quick',
-                            'governor': 4,
-                            'governorGloss': 'fox',
-                        },
-                        {
-                            'dep': 'amod',
-                            'dependent': 3,
-                            'dependentGloss': 'brown',
-                            'governor': 4,
-                            'governorGloss': 'fox',
-                        },
-                        {
-                            'dep': 'nsubj',
-                            'dependent': 4,
-                            'dependentGloss': 'fox',
-                            'governor': 5,
-                            'governorGloss': 'jumps',
-                        },
-                        {
-                            'dep': 'case',
-                            'dependent': 6,
-                            'dependentGloss': 'over',
-                            'governor': 9,
-                            'governorGloss': 'dog',
-                        },
-                        {
-                            'dep': 'det',
-                            'dependent': 7,
-                            'dependentGloss': 'the',
-                            'governor': 9,
-                            'governorGloss': 'dog',
-                        },
-                        {
-                            'dep': 'amod',
-                            'dependent': 8,
-                            'dependentGloss': 'lazy',
-                            'governor': 9,
-                            'governorGloss': 'dog',
-                        },
-                        {
-                            'dep': 'nmod:over',
-                            'dependent': 9,
-                            'dependentGloss': 'dog',
-                            'governor': 5,
-                            'governorGloss': 'jumps',
-                        },
-                    ],
-                    'enhancedPlusPlusDependencies': [
-                        {
-                            'dep': 'ROOT',
-                            'dependent': 5,
-                            'dependentGloss': 'jumps',
-                            'governor': 0,
-                            'governorGloss': 'ROOT',
-                        },
-                        {
-                            'dep': 'det',
-                            'dependent': 1,
-                            'dependentGloss': 'The',
-                            'governor': 4,
-                            'governorGloss': 'fox',
-                        },
-                        {
-                            'dep': 'amod',
-                            'dependent': 2,
-                            'dependentGloss': 'quick',
-                            'governor': 4,
-                            'governorGloss': 'fox',
-                        },
-                        {
-                            'dep': 'amod',
-                            'dependent': 3,
-                            'dependentGloss': 'brown',
-                            'governor': 4,
-                            'governorGloss': 'fox',
-                        },
-                        {
-                            'dep': 'nsubj',
-                            'dependent': 4,
-                            'dependentGloss': 'fox',
-                            'governor': 5,
-                            'governorGloss': 'jumps',
-                        },
-                        {
-                            'dep': 'case',
-                            'dependent': 6,
-                            'dependentGloss': 'over',
-                            'governor': 9,
-                            'governorGloss': 'dog',
-                        },
-                        {
-                            'dep': 'det',
-                            'dependent': 7,
-                            'dependentGloss': 'the',
-                            'governor': 9,
-                            'governorGloss': 'dog',
-                        },
-                        {
-                            'dep': 'amod',
-                            'dependent': 8,
-                            'dependentGloss': 'lazy',
-                            'governor': 9,
-                            'governorGloss': 'dog',
-                        },
-                        {
-                            'dep': 'nmod:over',
-                            'dependent': 9,
-                            'dependentGloss': 'dog',
-                            'governor': 5,
-                            'governorGloss': 'jumps',
-                        },
-                    ],
-                    'index': 0,
-                    'tokens': [
-                        {
-                            'after': ' ',
-                            'before': '',
-                            'characterOffsetBegin': 0,
-                            'characterOffsetEnd': 3,
-                            'index': 1,
-                            'lemma': 'the',
-                            'originalText': 'The',
-                            'pos': 'DT',
-                            'word': 'The',
-                        },
-                        {
-                            'after': ' ',
-                            'before': ' ',
-                            'characterOffsetBegin': 4,
-                            'characterOffsetEnd': 9,
-                            'index': 2,
-                            'lemma': 'quick',
-                            'originalText': 'quick',
-                            'pos': 'JJ',
-                            'word': 'quick',
-                        },
-                        {
-                            'after': ' ',
-                            'before': ' ',
-                            'characterOffsetBegin': 10,
-                            'characterOffsetEnd': 15,
-                            'index': 3,
-                            'lemma': 'brown',
-                            'originalText': 'brown',
-                            'pos': 'JJ',
-                            'word': 'brown',
-                        },
-                        {
-                            'after': ' ',
-                            'before': ' ',
-                            'characterOffsetBegin': 16,
-                            'characterOffsetEnd': 19,
-                            'index': 4,
-                            'lemma': 'fox',
-                            'originalText': 'fox',
-                            'pos': 'NN',
-                            'word': 'fox',
-                        },
-                        {
-                            'after': ' ',
-                            'before': ' ',
-                            'characterOffsetBegin': 20,
-                            'characterOffsetEnd': 25,
-                            'index': 5,
-                            'lemma': 'jump',
-                            'originalText': 'jumps',
-                            'pos': 'VBZ',
-                            'word': 'jumps',
-                        },
-                        {
-                            'after': ' ',
-                            'before': ' ',
-                            'characterOffsetBegin': 26,
-                            'characterOffsetEnd': 30,
-                            'index': 6,
-                            'lemma': 'over',
-                            'originalText': 'over',
-                            'pos': 'IN',
-                            'word': 'over',
-                        },
-                        {
-                            'after': ' ',
-                            'before': ' ',
-                            'characterOffsetBegin': 31,
-                            'characterOffsetEnd': 34,
-                            'index': 7,
-                            'lemma': 'the',
-                            'originalText': 'the',
-                            'pos': 'DT',
-                            'word': 'the',
-                        },
-                        {
-                            'after': ' ',
-                            'before': ' ',
-                            'characterOffsetBegin': 35,
-                            'characterOffsetEnd': 39,
-                            'index': 8,
-                            'lemma': 'lazy',
-                            'originalText': 'lazy',
-                            'pos': 'JJ',
-                            'word': 'lazy',
-                        },
-                        {
-                            'after': '',
-                            'before': ' ',
-                            'characterOffsetBegin': 40,
-                            'characterOffsetEnd': 43,
-                            'index': 9,
-                            'lemma': 'dog',
-                            'originalText': 'dog',
-                            'pos': 'NN',
-                            'word': 'dog',
-                        },
-                    ],
-                }
-            ]
-        }
-
-        corenlp_parser.api_call = MagicMock(return_value=api_return_value)
-
-        input_string = "The quick brown fox jumps over the lazy dog".split()
-        expected_output = Tree(
-            'jumps',
-            [
-                Tree('fox', ['The', 'quick', 'brown']),
-                Tree('dog', ['over', 'the', 'lazy']),
-            ],
-        )
-
-        parsed_data = next(corenlp_parser.parse(input_string))
-
-        corenlp_parser.api_call.assert_called_once_with(
-            "The quick brown fox jumps over the lazy dog",
-            properties={'ssplit.ssplit.eolonly': 'true'},
-        )
-        self.assertEqual(expected_output, parsed_data.tree())
diff --git a/nlp_resource_data/nltk/test/unit/test_corpora.py b/nlp_resource_data/nltk/test/unit/test_corpora.py

deleted file mode 100644 (file)

index bce083b..0000000
--- a/nlp_resource_data/nltk/test/unit/test_corpora.py
+++ /dev/null
@@ -1,272 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import, unicode_literals
-import unittest
-
-from nltk.corpus import (
-    sinica_treebank,
-    conll2007,
-    indian,
-    cess_cat,
-    cess_esp,
-    floresta,
-    ptb,
-    udhr,
-)  # mwa_ppdb
-
-from nltk.compat import python_2_unicode_compatible
-from nltk.tree import Tree
-from nltk.test.unit.utils import skipIf
-
-
-class TestUdhr(unittest.TestCase):
-    def test_words(self):
-        for name in udhr.fileids():
-            try:
-                words = list(udhr.words(name))
-            except AssertionError:
-                print(name)
-                raise
-            self.assertTrue(words)
-
-    def test_raw_unicode(self):
-        for name in udhr.fileids():
-            txt = udhr.raw(name)
-            assert not isinstance(txt, bytes), name
-
-
-class TestIndian(unittest.TestCase):
-    def test_words(self):
-        words = indian.words()[:3]
-        self.assertEqual(words, ['মহিষের', 'সন্তান', ':'])
-
-    def test_tagged_words(self):
-        tagged_words = indian.tagged_words()[:3]
-        self.assertEqual(
-            tagged_words, [('মহিষের', 'NN'), ('সন্তান', 'NN'), (':', 'SYM')]
-        )
-
-
-class TestCess(unittest.TestCase):
-    def test_catalan(self):
-        words = cess_cat.words()[:15]
-        txt = "El Tribunal_Suprem -Fpa- TS -Fpt- ha confirmat la condemna a quatre anys d' inhabilitació especial"
-        self.assertEqual(words, txt.split())
-        self.assertEqual(cess_cat.tagged_sents()[0][34][0], "càrrecs")
-
-    def test_esp(self):
-        words = cess_esp.words()[:15]
-        txt = "El grupo estatal Electricité_de_France -Fpa- EDF -Fpt- anunció hoy , jueves , la compra del"
-        self.assertEqual(words, txt.split())
-        self.assertEqual(cess_esp.words()[115], "años")
-
-
-class TestFloresta(unittest.TestCase):
-    def test_words(self):
-        words = floresta.words()[:10]
-        txt = "Um revivalismo refrescante O 7_e_Meio é um ex-libris de a"
-        self.assertEqual(words, txt.split())
-
-
-class TestSinicaTreebank(unittest.TestCase):
-    def test_sents(self):
-        first_3_sents = sinica_treebank.sents()[:3]
-        self.assertEqual(
-            first_3_sents, [['一'], ['友情'], ['嘉珍', '和', '我', '住在', '同一條', '巷子']]
-        )
-
-    def test_parsed_sents(self):
-        parsed_sents = sinica_treebank.parsed_sents()[25]
-        self.assertEqual(
-            parsed_sents,
-            Tree(
-                'S',
-                [
-                    Tree('NP', [Tree('Nba', ['嘉珍'])]),
-                    Tree('V‧地', [Tree('VA11', ['不停']), Tree('DE', ['的'])]),
-                    Tree('VA4', ['哭泣']),
-                ],
-            ),
-        )
-
-
-class TestCoNLL2007(unittest.TestCase):
-    # Reading the CoNLL 2007 Dependency Treebanks
-
-    def test_sents(self):
-        sents = conll2007.sents('esp.train')[0]
-        self.assertEqual(
-            sents[:6], ['El', 'aumento', 'del', 'índice', 'de', 'desempleo']
-        )
-
-    def test_parsed_sents(self):
-
-        parsed_sents = conll2007.parsed_sents('esp.train')[0]
-
-        self.assertEqual(
-            parsed_sents.tree(),
-            Tree(
-                'fortaleció',
-                [
-                    Tree(
-                        'aumento',
-                        [
-                            'El',
-                            Tree(
-                                'del',
-                                [
-                                    Tree(
-                                        'índice',
-                                        [
-                                            Tree(
-                                                'de',
-                                                [Tree('desempleo', ['estadounidense'])],
-                                            )
-                                        ],
-                                    )
-                                ],
-                            ),
-                        ],
-                    ),
-                    'hoy',
-                    'considerablemente',
-                    Tree(
-                        'al',
-                        [
-                            Tree(
-                                'euro',
-                                [
-                                    Tree(
-                                        'cotizaba',
-                                        [
-                                            ',',
-                                            'que',
-                                            Tree('a', [Tree('15.35', ['las', 'GMT'])]),
-                                            'se',
-                                            Tree(
-                                                'en',
-                                                [
-                                                    Tree(
-                                                        'mercado',
-                                                        [
-                                                            'el',
-                                                            Tree('de', ['divisas']),
-                                                            Tree('de', ['Fráncfort']),
-                                                        ],
-                                                    )
-                                                ],
-                                            ),
-                                            Tree('a', ['0,9452_dólares']),
-                                            Tree(
-                                                'frente_a',
-                                                [
-                                                    ',',
-                                                    Tree(
-                                                        '0,9349_dólares',
-                                                        [
-                                                            'los',
-                                                            Tree(
-                                                                'de',
-                                                                [
-                                                                    Tree(
-                                                                        'mañana',
-                                                                        ['esta'],
-                                                                    )
-                                                                ],
-                                                            ),
-                                                        ],
-                                                    ),
-                                                ],
-                                            ),
-                                        ],
-                                    )
-                                ],
-                            )
-                        ],
-                    ),
-                    '.',
-                ],
-            ),
-        )
-
-
-@skipIf(not ptb.fileids(), "A full installation of the Penn Treebank is not available")
-class TestPTB(unittest.TestCase):
-    def test_fileids(self):
-        self.assertEqual(
-            ptb.fileids()[:4],
-            [
-                'BROWN/CF/CF01.MRG',
-                'BROWN/CF/CF02.MRG',
-                'BROWN/CF/CF03.MRG',
-                'BROWN/CF/CF04.MRG',
-            ],
-        )
-
-    def test_words(self):
-        self.assertEqual(
-            ptb.words('WSJ/00/WSJ_0003.MRG')[:7],
-            ['A', 'form', 'of', 'asbestos', 'once', 'used', '*'],
-        )
-
-    def test_tagged_words(self):
-        self.assertEqual(
-            ptb.tagged_words('WSJ/00/WSJ_0003.MRG')[:3],
-            [('A', 'DT'), ('form', 'NN'), ('of', 'IN')],
-        )
-
-    def test_categories(self):
-        self.assertEqual(
-            ptb.categories(),
-            [
-                'adventure',
-                'belles_lettres',
-                'fiction',
-                'humor',
-                'lore',
-                'mystery',
-                'news',
-                'romance',
-                'science_fiction',
-            ],
-        )
-
-    def test_news_fileids(self):
-        self.assertEqual(
-            ptb.fileids('news')[:3],
-            ['WSJ/00/WSJ_0001.MRG', 'WSJ/00/WSJ_0002.MRG', 'WSJ/00/WSJ_0003.MRG'],
-        )
-
-    def test_category_words(self):
-        self.assertEqual(
-            ptb.words(categories=['humor', 'fiction'])[:6],
-            ['Thirty-three', 'Scotty', 'did', 'not', 'go', 'back'],
-        )
-
-
-@unittest.skip("Skipping test for mwa_ppdb.")
-class TestMWAPPDB(unittest.TestCase):
-    def test_fileids(self):
-        self.assertEqual(
-            mwa_ppdb.fileids(), ['ppdb-1.0-xxxl-lexical.extended.synonyms.uniquepairs']
-        )
-
-    def test_entries(self):
-        self.assertEqual(
-            mwa_ppdb.entries()[:10],
-            [
-                ('10/17/01', '17/10/2001'),
-                ('102,70', '102.70'),
-                ('13,53', '13.53'),
-                ('3.2.5.3.2.1', '3.2.5.3.2.1.'),
-                ('53,76', '53.76'),
-                ('6.9.5', '6.9.5.'),
-                ('7.7.6.3', '7.7.6.3.'),
-                ('76,20', '76.20'),
-                ('79,85', '79.85'),
-                ('93,65', '93.65'),
-            ],
-        )
-
-
-# unload corpora
-from nltk.corpus import teardown_module
diff --git a/nlp_resource_data/nltk/test/unit/test_corpus_views.py b/nlp_resource_data/nltk/test/unit/test_corpus_views.py

deleted file mode 100644 (file)

index 222385a..0000000
--- a/nlp_resource_data/nltk/test/unit/test_corpus_views.py
+++ /dev/null
@@ -1,49 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Corpus View Regression Tests
-"""
-from __future__ import absolute_import, unicode_literals
-import unittest
-import nltk.data
-from nltk.corpus.reader.util import (
-    StreamBackedCorpusView,
-    read_whitespace_block,
-    read_line_block,
-)
-
-
-class TestCorpusViews(unittest.TestCase):
-
-    linetok = nltk.LineTokenizer(blanklines='keep')
-    names = [
-        'corpora/inaugural/README',  # A very short file (160 chars)
-        'corpora/inaugural/1793-Washington.txt',  # A relatively short file (791 chars)
-        'corpora/inaugural/1909-Taft.txt',  # A longer file (32k chars)
-    ]
-
-    def data(self):
-        for name in self.names:
-            f = nltk.data.find(name)
-            with f.open() as fp:
-                file_data = fp.read().decode('utf8')
-            yield f, file_data
-
-    def test_correct_values(self):
-        # Check that corpus views produce the correct sequence of values.
-
-        for f, file_data in self.data():
-            v = StreamBackedCorpusView(f, read_whitespace_block)
-            self.assertEqual(list(v), file_data.split())
-
-            v = StreamBackedCorpusView(f, read_line_block)
-            self.assertEqual(list(v), self.linetok.tokenize(file_data))
-
-    def test_correct_length(self):
-        # Check that the corpus views report the correct lengths:
-
-        for f, file_data in self.data():
-            v = StreamBackedCorpusView(f, read_whitespace_block)
-            self.assertEqual(len(v), len(file_data.split()))
-
-            v = StreamBackedCorpusView(f, read_line_block)
-            self.assertEqual(len(v), len(self.linetok.tokenize(file_data)))
diff --git a/nlp_resource_data/nltk/test/unit/test_data.py b/nlp_resource_data/nltk/test/unit/test_data.py

deleted file mode 100644 (file)

index b586155..0000000
--- a/nlp_resource_data/nltk/test/unit/test_data.py
+++ /dev/null
@@ -1,22 +0,0 @@
-import unittest
-import nltk.data
-from nose.tools import assert_raises
-
-
-class TestData(unittest.TestCase):
-    def test_find_raises_exception(self):
-
-        with assert_raises(LookupError) as context:
-            nltk.data.find('no_such_resource/foo')
-
-        assert type(context.exception) == LookupError, 'Unexpected exception raised'
-
-    def test_find_raises_exception_with_full_resource_name(self):
-        no_such_thing = 'no_such_thing/bar'
-
-        with assert_raises(LookupError) as context:
-            nltk.data.find(no_such_thing)
-
-        assert no_such_thing in str(
-            context.exception
-        ), 'Exception message does not include full resource name'
diff --git a/nlp_resource_data/nltk/test/unit/test_disagreement.py b/nlp_resource_data/nltk/test/unit/test_disagreement.py

deleted file mode 100644 (file)

index 3054868..0000000
--- a/nlp_resource_data/nltk/test/unit/test_disagreement.py
+++ /dev/null
@@ -1,142 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import, unicode_literals
-import unittest
-
-from nltk.metrics.agreement import AnnotationTask
-
-class TestDisagreement(unittest.TestCase):
-
-    '''
-    Class containing unit tests for nltk.metrics.agreement.Disagreement.
-    '''
-
-    def test_easy(self):
-        '''
-        Simple test, based on
-        https://github.com/foolswood/krippendorffs_alpha/raw/master/krippendorff.pdf.
-        '''
-        data = [('coder1', 'dress1', 'YES'),
-                ('coder2', 'dress1', 'NO'),
-                ('coder3', 'dress1', 'NO'),
-                ('coder1', 'dress2', 'YES'),
-                ('coder2', 'dress2', 'NO'),
-                ('coder3', 'dress3', 'NO'),
-                ]
-        annotation_task = AnnotationTask(data)
-        self.assertAlmostEqual(annotation_task.alpha(), -0.3333333)
-
-    def test_easy2(self):
-        '''
-        Same simple test with 1 rating removed.
-        Removal of that rating should not matter: K-Apha ignores items with
-        only 1 rating.
-        '''
-        data = [('coder1', 'dress1', 'YES'),
-                ('coder2', 'dress1', 'NO'),
-                ('coder3', 'dress1', 'NO'),
-                ('coder1', 'dress2', 'YES'),
-                ('coder2', 'dress2', 'NO'),
-                ]
-        annotation_task = AnnotationTask(data)
-        self.assertAlmostEqual(annotation_task.alpha(), -0.3333333)
-
-    def test_advanced(self):
-        '''
-        More advanced test, based on 
-        http://www.agreestat.com/research_papers/onkrippendorffalpha.pdf
-        '''
-        data = [('A', '1', '1'),
-                ('B', '1', '1'),
-                ('D', '1', '1'),
-                ('A', '2', '2'),
-                ('B', '2', '2'),
-                ('C', '2', '3'),
-                ('D', '2', '2'),
-                ('A', '3', '3'),
-                ('B', '3', '3'),
-                ('C', '3', '3'),
-                ('D', '3', '3'),
-                ('A', '4', '3'),
-                ('B', '4', '3'),
-                ('C', '4', '3'),
-                ('D', '4', '3'),
-                ('A', '5', '2'),
-                ('B', '5', '2'),
-                ('C', '5', '2'),
-                ('D', '5', '2'),
-                ('A', '6', '1'),
-                ('B', '6', '2'),
-                ('C', '6', '3'),
-                ('D', '6', '4'),
-                ('A', '7', '4'),
-                ('B', '7', '4'),
-                ('C', '7', '4'),
-                ('D', '7', '4'),
-                ('A', '8', '1'),
-                ('B', '8', '1'),
-                ('C', '8', '2'),
-                ('D', '8', '1'),
-                ('A', '9', '2'),
-                ('B', '9', '2'),
-                ('C', '9', '2'),
-                ('D', '9', '2'),
-                ('B', '10', '5'),
-                ('C', '10', '5'),
-                ('D', '10', '5'),
-                ('C', '11', '1'),
-                ('D', '11', '1'),
-                ('C', '12', '3'),
-                ]
-        annotation_task = AnnotationTask(data)
-        self.assertAlmostEqual(annotation_task.alpha(), 0.743421052632)
-
-    def test_advanced2(self):
-        '''
-        Same more advanced example, but with 1 rating removed.
-        Again, removal of that 1 rating shoudl not matter.
-        '''
-        data = [('A', '1', '1'),
-                ('B', '1', '1'),
-                ('D', '1', '1'),
-                ('A', '2', '2'),
-                ('B', '2', '2'),
-                ('C', '2', '3'),
-                ('D', '2', '2'),
-                ('A', '3', '3'),
-                ('B', '3', '3'),
-                ('C', '3', '3'),
-                ('D', '3', '3'),
-                ('A', '4', '3'),
-                ('B', '4', '3'),
-                ('C', '4', '3'),
-                ('D', '4', '3'),
-                ('A', '5', '2'),
-                ('B', '5', '2'),
-                ('C', '5', '2'),
-                ('D', '5', '2'),
-                ('A', '6', '1'),
-                ('B', '6', '2'),
-                ('C', '6', '3'),
-                ('D', '6', '4'),
-                ('A', '7', '4'),
-                ('B', '7', '4'),
-                ('C', '7', '4'),
-                ('D', '7', '4'),
-                ('A', '8', '1'),
-                ('B', '8', '1'),
-                ('C', '8', '2'),
-                ('D', '8', '1'),
-                ('A', '9', '2'),
-                ('B', '9', '2'),
-                ('C', '9', '2'),
-                ('D', '9', '2'),
-                ('B', '10', '5'),
-                ('C', '10', '5'),
-                ('D', '10', '5'),
-                ('C', '11', '1'),
-                ('D', '11', '1'),
-                ('C', '12', '3'),
-                ]
-        annotation_task = AnnotationTask(data)
-        self.assertAlmostEqual(annotation_task.alpha(), 0.743421052632)
-
diff --git a/nlp_resource_data/nltk/test/unit/test_hmm.py b/nlp_resource_data/nltk/test/unit/test_hmm.py

deleted file mode 100644 (file)

index d211bc2..0000000
--- a/nlp_resource_data/nltk/test/unit/test_hmm.py
+++ /dev/null
@@ -1,87 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import, unicode_literals
-from nltk.tag import hmm
-
-
-def _wikipedia_example_hmm():
-    # Example from wikipedia
-    # (http://en.wikipedia.org/wiki/Forward%E2%80%93backward_algorithm)
-
-    states = ['rain', 'no rain']
-    symbols = ['umbrella', 'no umbrella']
-
-    A = [[0.7, 0.3], [0.3, 0.7]]  # transition probabilities
-    B = [[0.9, 0.1], [0.2, 0.8]]  # emission probabilities
-    pi = [0.5, 0.5]  # initial probabilities
-
-    seq = ['umbrella', 'umbrella', 'no umbrella', 'umbrella', 'umbrella']
-    seq = list(zip(seq, [None] * len(seq)))
-
-    model = hmm._create_hmm_tagger(states, symbols, A, B, pi)
-    return model, states, symbols, seq
-
-
-def test_forward_probability():
-    from numpy.testing import assert_array_almost_equal
-
-    # example from p. 385, Huang et al
-    model, states, symbols = hmm._market_hmm_example()
-    seq = [('up', None), ('up', None)]
-    expected = [[0.35, 0.02, 0.09], [0.1792, 0.0085, 0.0357]]
-
-    fp = 2 ** model._forward_probability(seq)
-
-    assert_array_almost_equal(fp, expected)
-
-
-def test_forward_probability2():
-    from numpy.testing import assert_array_almost_equal
-
-    model, states, symbols, seq = _wikipedia_example_hmm()
-    fp = 2 ** model._forward_probability(seq)
-
-    # examples in wikipedia are normalized
-    fp = (fp.T / fp.sum(axis=1)).T
-
-    wikipedia_results = [
-        [0.8182, 0.1818],
-        [0.8834, 0.1166],
-        [0.1907, 0.8093],
-        [0.7308, 0.2692],
-        [0.8673, 0.1327],
-    ]
-
-    assert_array_almost_equal(wikipedia_results, fp, 4)
-
-
-def test_backward_probability():
-    from numpy.testing import assert_array_almost_equal
-
-    model, states, symbols, seq = _wikipedia_example_hmm()
-
-    bp = 2 ** model._backward_probability(seq)
-    # examples in wikipedia are normalized
-
-    bp = (bp.T / bp.sum(axis=1)).T
-
-    wikipedia_results = [
-        # Forward-backward algorithm doesn't need b0_5,
-        # so .backward_probability doesn't compute it.
-        # [0.6469, 0.3531],
-        [0.5923, 0.4077],
-        [0.3763, 0.6237],
-        [0.6533, 0.3467],
-        [0.6273, 0.3727],
-        [0.5, 0.5],
-    ]
-
-    assert_array_almost_equal(wikipedia_results, bp, 4)
-
-
-def setup_module(module):
-    from nose import SkipTest
-
-    try:
-        import numpy
-    except ImportError:
-        raise SkipTest("numpy is required for nltk.test.test_hmm")
diff --git a/nlp_resource_data/nltk/test/unit/test_json2csv_corpus.py b/nlp_resource_data/nltk/test/unit/test_json2csv_corpus.py

deleted file mode 100644 (file)

index ac61a65..0000000
--- a/nlp_resource_data/nltk/test/unit/test_json2csv_corpus.py
+++ /dev/null
@@ -1,237 +0,0 @@
-# -*- coding: utf-8 -*-
-# Natural Language Toolkit: Twitter client
-#
-# Copyright (C) 2001-2019 NLTK Project
-# Author: Lorenzo Rubio <lrnzcig@gmail.com>
-# URL: <http://nltk.org/>
-# For license information, see LICENSE.TXT
-
-"""
-Regression tests for `json2csv()` and `json2csv_entities()` in Twitter
-package.
-
-"""
-
-import os
-import unittest
-
-from six.moves import zip
-
-from nltk.compat import TemporaryDirectory
-from nltk.corpus import twitter_samples
-from nltk.twitter.common import json2csv, json2csv_entities
-
-
-def are_files_identical(filename1, filename2, debug=False):
-    """
-    Compare two files, ignoring carriage returns.
-    """
-    with open(filename1, "rb") as fileA:
-        with open(filename2, "rb") as fileB:
-            result = True
-            for lineA, lineB in zip(
-                sorted(fileA.readlines()), sorted(fileB.readlines())
-            ):
-                if lineA.strip() != lineB.strip():
-                    if debug:
-                        print(
-                            "Error while comparing files. "
-                            + "First difference at line below."
-                        )
-                        print("=> Output file line: {0}".format(lineA))
-                        print("=> Refer. file line: {0}".format(lineB))
-                    result = False
-                    break
-            return result
-
-
-class TestJSON2CSV(unittest.TestCase):
-    def setUp(self):
-        with open(twitter_samples.abspath("tweets.20150430-223406.json")) as infile:
-            self.infile = [next(infile) for x in range(100)]
-        infile.close()
-        self.msg = "Test and reference files are not the same"
-        self.subdir = os.path.join(os.path.dirname(__file__), 'files')
-
-    def tearDown(self):
-        return
-
-    def test_textoutput(self):
-        ref_fn = os.path.join(self.subdir, 'tweets.20150430-223406.text.csv.ref')
-        with TemporaryDirectory() as tempdir:
-            outfn = os.path.join(tempdir, 'tweets.20150430-223406.text.csv')
-            json2csv(self.infile, outfn, ['text'], gzip_compress=False)
-            self.assertTrue(are_files_identical(outfn, ref_fn), msg=self.msg)
-
-    def test_tweet_metadata(self):
-        ref_fn = os.path.join(self.subdir, 'tweets.20150430-223406.tweet.csv.ref')
-        fields = [
-            'created_at',
-            'favorite_count',
-            'id',
-            'in_reply_to_status_id',
-            'in_reply_to_user_id',
-            'retweet_count',
-            'retweeted',
-            'text',
-            'truncated',
-            'user.id',
-        ]
-
-        with TemporaryDirectory() as tempdir:
-            outfn = os.path.join(tempdir, 'tweets.20150430-223406.tweet.csv')
-            json2csv(self.infile, outfn, fields, gzip_compress=False)
-            self.assertTrue(are_files_identical(outfn, ref_fn), msg=self.msg)
-
-    def test_user_metadata(self):
-        ref_fn = os.path.join(self.subdir, 'tweets.20150430-223406.user.csv.ref')
-        fields = ['id', 'text', 'user.id', 'user.followers_count', 'user.friends_count']
-
-        with TemporaryDirectory() as tempdir:
-            outfn = os.path.join(tempdir, 'tweets.20150430-223406.user.csv')
-            json2csv(self.infile, outfn, fields, gzip_compress=False)
-            self.assertTrue(are_files_identical(outfn, ref_fn), msg=self.msg)
-
-    def test_tweet_hashtag(self):
-        ref_fn = os.path.join(self.subdir, 'tweets.20150430-223406.hashtag.csv.ref')
-        with TemporaryDirectory() as tempdir:
-            outfn = os.path.join(tempdir, 'tweets.20150430-223406.hashtag.csv')
-            json2csv_entities(
-                self.infile,
-                outfn,
-                ['id', 'text'],
-                'hashtags',
-                ['text'],
-                gzip_compress=False,
-            )
-            self.assertTrue(are_files_identical(outfn, ref_fn), msg=self.msg)
-
-    def test_tweet_usermention(self):
-        ref_fn = os.path.join(self.subdir, 'tweets.20150430-223406.usermention.csv.ref')
-        with TemporaryDirectory() as tempdir:
-            outfn = os.path.join(tempdir, 'tweets.20150430-223406.usermention.csv')
-            json2csv_entities(
-                self.infile,
-                outfn,
-                ['id', 'text'],
-                'user_mentions',
-                ['id', 'screen_name'],
-                gzip_compress=False,
-            )
-            self.assertTrue(are_files_identical(outfn, ref_fn), msg=self.msg)
-
-    def test_tweet_media(self):
-        ref_fn = os.path.join(self.subdir, 'tweets.20150430-223406.media.csv.ref')
-        with TemporaryDirectory() as tempdir:
-            outfn = os.path.join(tempdir, 'tweets.20150430-223406.media.csv')
-            json2csv_entities(
-                self.infile,
-                outfn,
-                ['id'],
-                'media',
-                ['media_url', 'url'],
-                gzip_compress=False,
-            )
-
-            self.assertTrue(are_files_identical(outfn, ref_fn), msg=self.msg)
-
-    def test_tweet_url(self):
-        ref_fn = os.path.join(self.subdir, 'tweets.20150430-223406.url.csv.ref')
-        with TemporaryDirectory() as tempdir:
-            outfn = os.path.join(tempdir, 'tweets.20150430-223406.url.csv')
-            json2csv_entities(
-                self.infile,
-                outfn,
-                ['id'],
-                'urls',
-                ['url', 'expanded_url'],
-                gzip_compress=False,
-            )
-
-            self.assertTrue(are_files_identical(outfn, ref_fn), msg=self.msg)
-
-    def test_userurl(self):
-        ref_fn = os.path.join(self.subdir, 'tweets.20150430-223406.userurl.csv.ref')
-        with TemporaryDirectory() as tempdir:
-            outfn = os.path.join(tempdir, 'tweets.20150430-223406.userurl.csv')
-            json2csv_entities(
-                self.infile,
-                outfn,
-                ['id', 'screen_name'],
-                'user.urls',
-                ['url', 'expanded_url'],
-                gzip_compress=False,
-            )
-
-            self.assertTrue(are_files_identical(outfn, ref_fn), msg=self.msg)
-
-    def test_tweet_place(self):
-        ref_fn = os.path.join(self.subdir, 'tweets.20150430-223406.place.csv.ref')
-        with TemporaryDirectory() as tempdir:
-            outfn = os.path.join(tempdir, 'tweets.20150430-223406.place.csv')
-            json2csv_entities(
-                self.infile,
-                outfn,
-                ['id', 'text'],
-                'place',
-                ['name', 'country'],
-                gzip_compress=False,
-            )
-
-            self.assertTrue(are_files_identical(outfn, ref_fn), msg=self.msg)
-
-    def test_tweet_place_boundingbox(self):
-        ref_fn = os.path.join(
-            self.subdir, 'tweets.20150430-223406.placeboundingbox.csv.ref'
-        )
-        with TemporaryDirectory() as tempdir:
-            outfn = os.path.join(tempdir, 'tweets.20150430-223406.placeboundingbox.csv')
-            json2csv_entities(
-                self.infile,
-                outfn,
-                ['id', 'name'],
-                'place.bounding_box',
-                ['coordinates'],
-                gzip_compress=False,
-            )
-
-            self.assertTrue(are_files_identical(outfn, ref_fn), msg=self.msg)
-
-    def test_retweet_original_tweet(self):
-        ref_fn = os.path.join(self.subdir, 'tweets.20150430-223406.retweet.csv.ref')
-        with TemporaryDirectory() as tempdir:
-            outfn = os.path.join(tempdir, 'tweets.20150430-223406.retweet.csv')
-            json2csv_entities(
-                self.infile,
-                outfn,
-                ['id'],
-                'retweeted_status',
-                [
-                    'created_at',
-                    'favorite_count',
-                    'id',
-                    'in_reply_to_status_id',
-                    'in_reply_to_user_id',
-                    'retweet_count',
-                    'text',
-                    'truncated',
-                    'user.id',
-                ],
-                gzip_compress=False,
-            )
-
-            self.assertTrue(are_files_identical(outfn, ref_fn), msg=self.msg)
-
-    def test_file_is_wrong(self):
-        """
-        Sanity check that file comparison is not giving false positives.
-        """
-        ref_fn = os.path.join(self.subdir, 'tweets.20150430-223406.retweet.csv.ref')
-        with TemporaryDirectory() as tempdir:
-            outfn = os.path.join(tempdir, 'tweets.20150430-223406.text.csv')
-            json2csv(self.infile, outfn, ['text'], gzip_compress=False)
-            self.assertFalse(are_files_identical(outfn, ref_fn), msg=self.msg)
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/nlp_resource_data/nltk/test/unit/test_naivebayes.py b/nlp_resource_data/nltk/test/unit/test_naivebayes.py

deleted file mode 100644 (file)

index 37e4411..0000000
--- a/nlp_resource_data/nltk/test/unit/test_naivebayes.py
+++ /dev/null
@@ -1,24 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import print_function, unicode_literals
-
-
-import unittest
-from nltk.classify.naivebayes import NaiveBayesClassifier
-
-
-class NaiveBayesClassifierTest(unittest.TestCase):
-    def test_simple(self):
-        training_features = [
-            ({'nice': True, 'good': True}, 'positive'),
-            ({'bad': True, 'mean': True}, 'negative'),
-        ]
-
-        classifier = NaiveBayesClassifier.train(training_features)
-
-        result = classifier.prob_classify({'nice': True})
-        self.assertTrue(result.prob('positive') > result.prob('negative'))
-        self.assertEqual(result.max(), 'positive')
-
-        result = classifier.prob_classify({'bad': True})
-        self.assertTrue(result.prob('positive') < result.prob('negative'))
-        self.assertEqual(result.max(), 'negative')
diff --git a/nlp_resource_data/nltk/test/unit/test_pos_tag.py b/nlp_resource_data/nltk/test/unit/test_pos_tag.py

deleted file mode 100644 (file)

index a0aa1d0..0000000
--- a/nlp_resource_data/nltk/test/unit/test_pos_tag.py
+++ /dev/null
@@ -1,85 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Tests for nltk.pos_tag
-"""
-
-from __future__ import unicode_literals
-
-import unittest
-
-from nltk import word_tokenize, pos_tag
-
-
-class TestPosTag(unittest.TestCase):
-    def test_pos_tag_eng(self):
-        text = "John's big idea isn't all that bad."
-        expected_tagged = [
-            ('John', 'NNP'),
-            ("'s", 'POS'),
-            ('big', 'JJ'),
-            ('idea', 'NN'),
-            ('is', 'VBZ'),
-            ("n't", 'RB'),
-            ('all', 'PDT'),
-            ('that', 'DT'),
-            ('bad', 'JJ'),
-            ('.', '.'),
-        ]
-        assert pos_tag(word_tokenize(text)) == expected_tagged
-
-    def test_pos_tag_eng_universal(self):
-        text = "John's big idea isn't all that bad."
-        expected_tagged = [
-            ('John', 'NOUN'),
-            ("'s", 'PRT'),
-            ('big', 'ADJ'),
-            ('idea', 'NOUN'),
-            ('is', 'VERB'),
-            ("n't", 'ADV'),
-            ('all', 'DET'),
-            ('that', 'DET'),
-            ('bad', 'ADJ'),
-            ('.', '.'),
-        ]
-        assert pos_tag(word_tokenize(text), tagset='universal') == expected_tagged
-
-    def test_pos_tag_rus(self):
-        text = u"Илья оторопел и дважды перечитал бумажку."
-        expected_tagged = [
-            ('Илья', 'S'),
-            ('оторопел', 'V'),
-            ('и', 'CONJ'),
-            ('дважды', 'ADV'),
-            ('перечитал', 'V'),
-            ('бумажку', 'S'),
-            ('.', 'NONLEX'),
-        ]
-        assert pos_tag(word_tokenize(text), lang='rus') == expected_tagged
-
-    def test_pos_tag_rus_universal(self):
-        text = u"Илья оторопел и дважды перечитал бумажку."
-        expected_tagged = [
-            ('Илья', 'NOUN'),
-            ('оторопел', 'VERB'),
-            ('и', 'CONJ'),
-            ('дважды', 'ADV'),
-            ('перечитал', 'VERB'),
-            ('бумажку', 'NOUN'),
-            ('.', '.'),
-        ]
-        assert (
-            pos_tag(word_tokenize(text), tagset='universal', lang='rus')
-            == expected_tagged
-        )
-
-    def test_pos_tag_unknown_lang(self):
-        text = u"모르겠 습니 다"
-        self.assertRaises(NotImplementedError, pos_tag, word_tokenize(text), lang='kor')
-        # Test for default kwarg, `lang=None`
-        self.assertRaises(NotImplementedError, pos_tag, word_tokenize(text), lang=None)
-
-    def test_unspecified_lang(self):
-        # Tries to force the lang='eng' option.
-        text = u"모르겠 습니 다"
-        expected_but_wrong = [('모르겠', 'JJ'), ('습니', 'NNP'), ('다', 'NN')]
-        assert pos_tag(word_tokenize(text)) == expected_but_wrong
diff --git a/nlp_resource_data/nltk/test/unit/test_rte_classify.py b/nlp_resource_data/nltk/test/unit/test_rte_classify.py

deleted file mode 100644 (file)

index b26298c..0000000
--- a/nlp_resource_data/nltk/test/unit/test_rte_classify.py
+++ /dev/null
@@ -1,92 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import print_function, unicode_literals
-
-import unittest
-
-from nltk.corpus import rte as rte_corpus
-from nltk.classify.rte_classify import RTEFeatureExtractor, rte_features, rte_classifier
-
-expected_from_rte_feature_extration = """
-alwayson        => True
-ne_hyp_extra    => 0
-ne_overlap      => 1
-neg_hyp         => 0
-neg_txt         => 0
-word_hyp_extra  => 3
-word_overlap    => 3
-
-alwayson        => True
-ne_hyp_extra    => 0
-ne_overlap      => 1
-neg_hyp         => 0
-neg_txt         => 0
-word_hyp_extra  => 2
-word_overlap    => 1
-
-alwayson        => True
-ne_hyp_extra    => 1
-ne_overlap      => 1
-neg_hyp         => 0
-neg_txt         => 0
-word_hyp_extra  => 1
-word_overlap    => 2
-
-alwayson        => True
-ne_hyp_extra    => 1
-ne_overlap      => 0
-neg_hyp         => 0
-neg_txt         => 0
-word_hyp_extra  => 6
-word_overlap    => 2
-
-alwayson        => True
-ne_hyp_extra    => 1
-ne_overlap      => 0
-neg_hyp         => 0
-neg_txt         => 0
-word_hyp_extra  => 4
-word_overlap    => 0
-
-alwayson        => True
-ne_hyp_extra    => 1
-ne_overlap      => 0
-neg_hyp         => 0
-neg_txt         => 0
-word_hyp_extra  => 3
-word_overlap    => 1
-"""
-
-
-class RTEClassifierTest(unittest.TestCase):
-    # Test the feature extraction method.
-    def test_rte_feature_extraction(self):
-        pairs = rte_corpus.pairs(['rte1_dev.xml'])[:6]
-        test_output = [
-            "%-15s => %s" % (key, rte_features(pair)[key])
-            for pair in pairs
-            for key in sorted(rte_features(pair))
-        ]
-        expected_output = expected_from_rte_feature_extration.strip().split('\n')
-        # Remove null strings.
-        expected_output = list(filter(None, expected_output))
-        self.assertEqual(test_output, expected_output)
-
-    # Test the RTEFeatureExtractor object.
-    def test_feature_extractor_object(self):
-        rtepair = rte_corpus.pairs(['rte3_dev.xml'])[33]
-        extractor = RTEFeatureExtractor(rtepair)
-        self.assertEqual(extractor.hyp_words, {'member', 'China', 'SCO.'})
-        self.assertEqual(extractor.overlap('word'), set())
-        self.assertEqual(extractor.overlap('ne'), {'China'})
-        self.assertEqual(extractor.hyp_extra('word'), {'member'})
-
-    # Test the RTE classifier training.
-    def test_rte_classification_without_megam(self):
-        clf = rte_classifier('IIS')
-        clf = rte_classifier('GIS')
-
-    @unittest.skip("Skipping tests with dependencies on MEGAM")
-    def test_rte_classification_with_megam(self):
-        nltk.config_megam('/usr/local/bin/megam')
-        clf = rte_classifier('megam')
-        clf = rte_classifier('BFGS')
diff --git a/nlp_resource_data/nltk/test/unit/test_seekable_unicode_stream_reader.py b/nlp_resource_data/nltk/test/unit/test_seekable_unicode_stream_reader.py

deleted file mode 100644 (file)

index a54c559..0000000
--- a/nlp_resource_data/nltk/test/unit/test_seekable_unicode_stream_reader.py
+++ /dev/null
@@ -1,140 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-The following test performs a random series of reads, seeks, and
-tells, and checks that the results are consistent.
-"""
-from __future__ import absolute_import, unicode_literals
-import random
-import functools
-from io import BytesIO
-from nltk.corpus.reader import SeekableUnicodeStreamReader
-
-
-def check_reader(unicode_string, encoding, n=1000):
-    bytestr = unicode_string.encode(encoding)
-    strlen = len(unicode_string)
-    stream = BytesIO(bytestr)
-    reader = SeekableUnicodeStreamReader(stream, encoding)
-    # Find all character positions
-    chars = []
-    while True:
-        pos = reader.tell()
-        chars.append((pos, reader.read(1)))
-        if chars[-1][1] == '':
-            break
-    # Find all strings
-    strings = dict((pos, '') for (pos, c) in chars)
-    for pos1, char in chars:
-        for pos2, _ in chars:
-            if pos2 <= pos1:
-                strings[pos2] += char
-    while True:
-        op = random.choice('tsrr')
-        # Check our position?
-        if op == 't':  # tell
-            reader.tell()
-        # Perform a seek?
-        if op == 's':  # seek
-            new_pos = random.choice([p for (p, c) in chars])
-            reader.seek(new_pos)
-        # Perform a read?
-        if op == 'r':  # read
-            if random.random() < 0.3:
-                pos = reader.tell()
-            else:
-                pos = None
-            if random.random() < 0.2:
-                size = None
-            elif random.random() < 0.8:
-                size = random.randint(0, int(strlen / 6))
-            else:
-                size = random.randint(0, strlen + 20)
-            if random.random() < 0.8:
-                s = reader.read(size)
-            else:
-                s = reader.readline(size)
-            # check that everything's consistent
-            if pos is not None:
-                assert pos in strings
-                assert strings[pos].startswith(s)
-                n -= 1
-                if n == 0:
-                    return 'passed'
-
-
-# Call the randomized test function `check_reader` with a variety of
-# input strings and encodings.
-
-ENCODINGS = ['ascii', 'latin1', 'greek', 'hebrew', 'utf-16', 'utf-8']
-
-STRINGS = [
-    """
-    This is a test file.
-    It is fairly short.
-    """,
-    "This file can be encoded with latin1. \x83",
-    """\
-    This is a test file.
-    Here's a blank line:
-
-    And here's some unicode: \xee \u0123 \uffe3
-    """,
-    """\
-    This is a test file.
-    Unicode characters: \xf3 \u2222 \u3333\u4444 \u5555
-    """,
-]
-
-
-def test_reader():
-    for string in STRINGS:
-        for encoding in ENCODINGS:
-            try:
-                # skip strings that can't be encoded with the current encoding
-                string.encode(encoding)
-                yield check_reader, string, encoding
-            except UnicodeEncodeError:
-                pass
-
-
-# nose shows the whole string arguments in a verbose mode; this is annoying,
-# so large string test is separated.
-
-LARGE_STRING = (
-    """\
-This is a larger file.  It has some lines that are longer \
-than 72 characters.  It's got lots of repetition.  Here's \
-some unicode chars: \xee \u0123 \uffe3 \ueeee \u2345
-
-How fun!  Let's repeat it twenty times.
-"""
-    * 10
-)
-
-
-def test_reader_on_large_string():
-    for encoding in ENCODINGS:
-        try:
-            # skip strings that can't be encoded with the current encoding
-            LARGE_STRING.encode(encoding)
-
-            def _check(encoding, n=1000):
-                check_reader(LARGE_STRING, encoding, n)
-
-            yield _check, encoding
-
-        except UnicodeEncodeError:
-            pass
-
-
-def test_reader_stream_is_closed():
-    reader = SeekableUnicodeStreamReader(BytesIO(b''), 'ascii')
-    assert reader.stream.closed is False
-    reader.__del__()
-    assert reader.stream.closed is True
-
-
-def teardown_module(module=None):
-    import gc
-
-    gc.collect()
diff --git a/nlp_resource_data/nltk/test/unit/test_senna.py b/nlp_resource_data/nltk/test/unit/test_senna.py

deleted file mode 100644 (file)

index 8701225..0000000
--- a/nlp_resource_data/nltk/test/unit/test_senna.py
+++ /dev/null
@@ -1,116 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Unit tests for Senna
-"""
-
-from __future__ import unicode_literals
-from os import environ, path, sep
-
-import logging
-import unittest
-
-from nltk.classify import Senna
-from nltk.tag import SennaTagger, SennaChunkTagger, SennaNERTagger
-
-# Set Senna executable path for tests if it is not specified as an environment variable
-if 'SENNA' in environ:
-    SENNA_EXECUTABLE_PATH = path.normpath(environ['SENNA']) + sep
-else:
-    SENNA_EXECUTABLE_PATH = '/usr/share/senna-v3.0'
-
-senna_is_installed = path.exists(SENNA_EXECUTABLE_PATH)
-
-
-@unittest.skipUnless(senna_is_installed, "Requires Senna executable")
-class TestSennaPipeline(unittest.TestCase):
-    """Unittest for nltk.classify.senna"""
-
-    def test_senna_pipeline(self):
-        """Senna pipeline interface"""
-
-        pipeline = Senna(SENNA_EXECUTABLE_PATH, ['pos', 'chk', 'ner'])
-        sent = 'Dusseldorf is an international business center'.split()
-        result = [
-            (token['word'], token['chk'], token['ner'], token['pos'])
-            for token in pipeline.tag(sent)
-        ]
-        expected = [
-            ('Dusseldorf', 'B-NP', 'B-LOC', 'NNP'),
-            ('is', 'B-VP', 'O', 'VBZ'),
-            ('an', 'B-NP', 'O', 'DT'),
-            ('international', 'I-NP', 'O', 'JJ'),
-            ('business', 'I-NP', 'O', 'NN'),
-            ('center', 'I-NP', 'O', 'NN'),
-        ]
-        self.assertEqual(result, expected)
-
-
-@unittest.skipUnless(senna_is_installed, "Requires Senna executable")
-class TestSennaTagger(unittest.TestCase):
-    """Unittest for nltk.tag.senna"""
-
-    def test_senna_tagger(self):
-        tagger = SennaTagger(SENNA_EXECUTABLE_PATH)
-        result = tagger.tag('What is the airspeed of an unladen swallow ?'.split())
-        expected = [
-            ('What', 'WP'),
-            ('is', 'VBZ'),
-            ('the', 'DT'),
-            ('airspeed', 'NN'),
-            ('of', 'IN'),
-            ('an', 'DT'),
-            ('unladen', 'NN'),
-            ('swallow', 'NN'),
-            ('?', '.'),
-        ]
-        self.assertEqual(result, expected)
-
-    def test_senna_chunk_tagger(self):
-        chktagger = SennaChunkTagger(SENNA_EXECUTABLE_PATH)
-        result_1 = chktagger.tag('What is the airspeed of an unladen swallow ?'.split())
-        expected_1 = [
-            ('What', 'B-NP'),
-            ('is', 'B-VP'),
-            ('the', 'B-NP'),
-            ('airspeed', 'I-NP'),
-            ('of', 'B-PP'),
-            ('an', 'B-NP'),
-            ('unladen', 'I-NP'),
-            ('swallow', 'I-NP'),
-            ('?', 'O'),
-        ]
-
-        result_2 = list(chktagger.bio_to_chunks(result_1, chunk_type='NP'))
-        expected_2 = [
-            ('What', '0'),
-            ('the airspeed', '2-3'),
-            ('an unladen swallow', '5-6-7'),
-        ]
-        self.assertEqual(result_1, expected_1)
-        self.assertEqual(result_2, expected_2)
-
-    def test_senna_ner_tagger(self):
-        nertagger = SennaNERTagger(SENNA_EXECUTABLE_PATH)
-        result_1 = nertagger.tag('Shakespeare theatre was in London .'.split())
-        expected_1 = [
-            ('Shakespeare', 'B-PER'),
-            ('theatre', 'O'),
-            ('was', 'O'),
-            ('in', 'O'),
-            ('London', 'B-LOC'),
-            ('.', 'O'),
-        ]
-
-        result_2 = nertagger.tag('UN headquarters are in NY , USA .'.split())
-        expected_2 = [
-            ('UN', 'B-ORG'),
-            ('headquarters', 'O'),
-            ('are', 'O'),
-            ('in', 'O'),
-            ('NY', 'B-LOC'),
-            (',', 'O'),
-            ('USA', 'B-LOC'),
-            ('.', 'O'),
-        ]
-        self.assertEqual(result_1, expected_1)
-        self.assertEqual(result_2, expected_2)
diff --git a/nlp_resource_data/nltk/test/unit/test_stem.py b/nlp_resource_data/nltk/test/unit/test_stem.py

deleted file mode 100644 (file)

index 67677df..0000000
--- a/nlp_resource_data/nltk/test/unit/test_stem.py
+++ /dev/null
@@ -1,149 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import print_function, unicode_literals
-
-import os
-import unittest
-from contextlib import closing
-
-from nltk import data
-from nltk.stem.snowball import SnowballStemmer
-from nltk.stem.porter import PorterStemmer
-
-
-class SnowballTest(unittest.TestCase):
-    def test_arabic(self):
-        """
-        this unit testing for test the snowball arabic light stemmer
-        this stemmer deals with prefixes and suffixes
-        """
-        # Test where the ignore_stopwords=True.
-        ar_stemmer = SnowballStemmer("arabic", True)
-        assert ar_stemmer.stem('الْعَرَبِــــــيَّة') == "عرب"
-        assert ar_stemmer.stem("العربية") == "عرب"
-        assert ar_stemmer.stem("فقالوا") == "قال"
-        assert ar_stemmer.stem("الطالبات") == "طالب"
-        assert ar_stemmer.stem("فالطالبات") == "طالب"
-        assert ar_stemmer.stem("والطالبات") == "طالب"
-        assert ar_stemmer.stem("الطالبون") == "طالب"
-        assert ar_stemmer.stem("اللذان") == "اللذان"
-        assert ar_stemmer.stem("من") == "من"
-        # Test where the ignore_stopwords=False.
-        ar_stemmer = SnowballStemmer("arabic", False)
-        assert ar_stemmer.stem("اللذان") == "اللذ"  # this is a stop word
-        assert ar_stemmer.stem("الطالبات") == "طالب"
-        assert ar_stemmer.stem("الكلمات") == "كلم"
-        # test where create the arabic stemmer without given init value to ignore_stopwords
-        ar_stemmer = SnowballStemmer("arabic")
-        assert ar_stemmer.stem('الْعَرَبِــــــيَّة') == "عرب"
-        assert ar_stemmer.stem("العربية") == "عرب"
-        assert ar_stemmer.stem("فقالوا") == "قال"
-        assert ar_stemmer.stem("الطالبات") == "طالب"
-        assert ar_stemmer.stem("الكلمات") == "كلم"
-
-    def test_russian(self):
-        # Russian words both consisting of Cyrillic
-        # and Roman letters can be stemmed.
-        stemmer_russian = SnowballStemmer("russian")
-        assert stemmer_russian.stem("авантненькая") == "авантненьк"
-        assert stemmer_russian.stem("avenantnen'kai^a") == "avenantnen'k"
-
-    def test_german(self):
-        stemmer_german = SnowballStemmer("german")
-        stemmer_german2 = SnowballStemmer("german", ignore_stopwords=True)
-
-        assert stemmer_german.stem("Schr\xe4nke") == 'schrank'
-        assert stemmer_german2.stem("Schr\xe4nke") == 'schrank'
-
-        assert stemmer_german.stem("keinen") == 'kein'
-        assert stemmer_german2.stem("keinen") == 'keinen'
-
-    def test_spanish(self):
-        stemmer = SnowballStemmer('spanish')
-
-        assert stemmer.stem("Visionado") == 'vision'
-
-        # The word 'algue' was raising an IndexError
-        assert stemmer.stem("algue") == 'algu'
-
-    def test_short_strings_bug(self):
-        stemmer = SnowballStemmer('english')
-        assert stemmer.stem("y's") == 'y'
-
-
-class PorterTest(unittest.TestCase):
-    def _vocabulary(self):
-        with closing(
-            data.find('stemmers/porter_test/porter_vocabulary.txt').open(
-                encoding='utf-8'
-            )
-        ) as fp:
-            return fp.read().splitlines()
-
-    def _test_against_expected_output(self, stemmer_mode, expected_stems):
-        stemmer = PorterStemmer(mode=stemmer_mode)
-        for word, true_stem in zip(self._vocabulary(), expected_stems):
-            our_stem = stemmer.stem(word)
-            assert our_stem == true_stem, (
-                "%s should stem to %s in %s mode but got %s"
-                % (word, true_stem, stemmer_mode, our_stem)
-            )
-
-    def test_vocabulary_martin_mode(self):
-        """Tests all words from the test vocabulary provided by M Porter
-
-        The sample vocabulary and output were sourced from:
-            http://tartarus.org/martin/PorterStemmer/voc.txt
-            http://tartarus.org/martin/PorterStemmer/output.txt
-        and are linked to from the Porter Stemmer algorithm's homepage
-        at
-            http://tartarus.org/martin/PorterStemmer/
-        """
-        with closing(
-            data.find('stemmers/porter_test/porter_martin_output.txt').open(
-                encoding='utf-8'
-            )
-        ) as fp:
-            self._test_against_expected_output(
-                PorterStemmer.MARTIN_EXTENSIONS, fp.read().splitlines()
-            )
-
-    def test_vocabulary_nltk_mode(self):
-        with closing(
-            data.find('stemmers/porter_test/porter_nltk_output.txt').open(
-                encoding='utf-8'
-            )
-        ) as fp:
-            self._test_against_expected_output(
-                PorterStemmer.NLTK_EXTENSIONS, fp.read().splitlines()
-            )
-
-    def test_vocabulary_original_mode(self):
-        # The list of stems for this test was generated by taking the
-        # Martin-blessed stemmer from
-        # http://tartarus.org/martin/PorterStemmer/c.txt
-        # and removing all the --DEPARTURE-- sections from it and
-        # running it against Martin's test vocabulary.
-
-        with closing(
-            data.find('stemmers/porter_test/porter_original_output.txt').open(
-                encoding='utf-8'
-            )
-        ) as fp:
-            self._test_against_expected_output(
-                PorterStemmer.ORIGINAL_ALGORITHM, fp.read().splitlines()
-            )
-
-        self._test_against_expected_output(
-            PorterStemmer.ORIGINAL_ALGORITHM,
-            data.find('stemmers/porter_test/porter_original_output.txt')
-            .open(encoding='utf-8')
-            .read()
-            .splitlines(),
-        )
-
-    def test_oed_bug(self):
-        """Test for bug https://github.com/nltk/nltk/issues/1581
-
-        Ensures that 'oed' can be stemmed without throwing an error.
-        """
-        assert PorterStemmer().stem('oed') == 'o'
diff --git a/nlp_resource_data/nltk/test/unit/test_tag.py b/nlp_resource_data/nltk/test/unit/test_tag.py

deleted file mode 100644 (file)

index c382074..0000000
--- a/nlp_resource_data/nltk/test/unit/test_tag.py
+++ /dev/null
@@ -1,30 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import, unicode_literals
-
-
-def test_basic():
-    from nltk.tag import pos_tag
-    from nltk.tokenize import word_tokenize
-
-    result = pos_tag(word_tokenize("John's big idea isn't all that bad."))
-    assert result == [
-        ('John', 'NNP'),
-        ("'s", 'POS'),
-        ('big', 'JJ'),
-        ('idea', 'NN'),
-        ('is', 'VBZ'),
-        ("n't", 'RB'),
-        ('all', 'PDT'),
-        ('that', 'DT'),
-        ('bad', 'JJ'),
-        ('.', '.'),
-    ]
-
-
-def setup_module(module):
-    from nose import SkipTest
-
-    try:
-        import numpy
-    except ImportError:
-        raise SkipTest("numpy is required for nltk.test.test_tag")
diff --git a/nlp_resource_data/nltk/test/unit/test_tgrep.py b/nlp_resource_data/nltk/test/unit/test_tgrep.py

deleted file mode 100644 (file)

index 17b2c4a..0000000
--- a/nlp_resource_data/nltk/test/unit/test_tgrep.py
+++ /dev/null
@@ -1,790 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# Natural Language Toolkit: TGrep search
-#
-# Copyright (C) 2001-2019 NLTK Project
-# Author: Will Roberts <wildwilhelm@gmail.com>
-# URL: <http://nltk.org/>
-# For license information, see LICENSE.TXT
-
-'''
-Unit tests for nltk.tgrep.
-'''
-
-from __future__ import absolute_import, print_function, unicode_literals
-
-import unittest
-
-from six import b
-
-from nltk.tree import ParentedTree
-from nltk import tgrep
-
-
-class TestSequenceFunctions(unittest.TestCase):
-
-    '''
-    Class containing unit tests for nltk.tgrep.
-    '''
-
-    def test_tokenize_simple(self):
-        '''
-        Simple test of tokenization.
-        '''
-        tokens = tgrep.tgrep_tokenize('A .. (B !< C . D) | ![<< (E , F) $ G]')
-        self.assertEqual(
-            tokens,
-            [
-                'A',
-                '..',
-                '(',
-                'B',
-                '!',
-                '<',
-                'C',
-                '.',
-                'D',
-                ')',
-                '|',
-                '!',
-                '[',
-                '<<',
-                '(',
-                'E',
-                ',',
-                'F',
-                ')',
-                '$',
-                'G',
-                ']',
-            ],
-        )
-
-    def test_tokenize_encoding(self):
-        '''
-        Test that tokenization handles bytes and strs the same way.
-        '''
-        self.assertEqual(
-            tgrep.tgrep_tokenize(b('A .. (B !< C . D) | ![<< (E , F) $ G]')),
-            tgrep.tgrep_tokenize('A .. (B !< C . D) | ![<< (E , F) $ G]'),
-        )
-
-    def test_tokenize_link_types(self):
-        '''
-        Test tokenization of basic link types.
-        '''
-        self.assertEqual(tgrep.tgrep_tokenize('A<B'), ['A', '<', 'B'])
-        self.assertEqual(tgrep.tgrep_tokenize('A>B'), ['A', '>', 'B'])
-        self.assertEqual(tgrep.tgrep_tokenize('A<3B'), ['A', '<3', 'B'])
-        self.assertEqual(tgrep.tgrep_tokenize('A>3B'), ['A', '>3', 'B'])
-        self.assertEqual(tgrep.tgrep_tokenize('A<,B'), ['A', '<,', 'B'])
-        self.assertEqual(tgrep.tgrep_tokenize('A>,B'), ['A', '>,', 'B'])
-        self.assertEqual(tgrep.tgrep_tokenize('A<-3B'), ['A', '<-3', 'B'])
-        self.assertEqual(tgrep.tgrep_tokenize('A>-3B'), ['A', '>-3', 'B'])
-        self.assertEqual(tgrep.tgrep_tokenize('A<-B'), ['A', '<-', 'B'])
-        self.assertEqual(tgrep.tgrep_tokenize('A>-B'), ['A', '>-', 'B'])
-        self.assertEqual(tgrep.tgrep_tokenize('A<\'B'), ['A', '<\'', 'B'])
-        self.assertEqual(tgrep.tgrep_tokenize('A>\'B'), ['A', '>\'', 'B'])
-        self.assertEqual(tgrep.tgrep_tokenize('A<:B'), ['A', '<:', 'B'])
-        self.assertEqual(tgrep.tgrep_tokenize('A>:B'), ['A', '>:', 'B'])
-        self.assertEqual(tgrep.tgrep_tokenize('A<<B'), ['A', '<<', 'B'])
-        self.assertEqual(tgrep.tgrep_tokenize('A>>B'), ['A', '>>', 'B'])
-        self.assertEqual(tgrep.tgrep_tokenize('A<<,B'), ['A', '<<,', 'B'])
-        self.assertEqual(tgrep.tgrep_tokenize('A>>,B'), ['A', '>>,', 'B'])
-        self.assertEqual(tgrep.tgrep_tokenize('A<<\'B'), ['A', '<<\'', 'B'])
-        self.assertEqual(tgrep.tgrep_tokenize('A>>\'B'), ['A', '>>\'', 'B'])
-        self.assertEqual(tgrep.tgrep_tokenize('A<<:B'), ['A', '<<:', 'B'])
-        self.assertEqual(tgrep.tgrep_tokenize('A>>:B'), ['A', '>>:', 'B'])
-        self.assertEqual(tgrep.tgrep_tokenize('A.B'), ['A', '.', 'B'])
-        self.assertEqual(tgrep.tgrep_tokenize('A,B'), ['A', ',', 'B'])
-        self.assertEqual(tgrep.tgrep_tokenize('A..B'), ['A', '..', 'B'])
-        self.assertEqual(tgrep.tgrep_tokenize('A,,B'), ['A', ',,', 'B'])
-        self.assertEqual(tgrep.tgrep_tokenize('A$B'), ['A', '$', 'B'])
-        self.assertEqual(tgrep.tgrep_tokenize('A$.B'), ['A', '$.', 'B'])
-        self.assertEqual(tgrep.tgrep_tokenize('A$,B'), ['A', '$,', 'B'])
-        self.assertEqual(tgrep.tgrep_tokenize('A$..B'), ['A', '$..', 'B'])
-        self.assertEqual(tgrep.tgrep_tokenize('A$,,B'), ['A', '$,,', 'B'])
-        self.assertEqual(tgrep.tgrep_tokenize('A!<B'), ['A', '!', '<', 'B'])
-        self.assertEqual(tgrep.tgrep_tokenize('A!>B'), ['A', '!', '>', 'B'])
-        self.assertEqual(tgrep.tgrep_tokenize('A!<3B'), ['A', '!', '<3', 'B'])
-        self.assertEqual(tgrep.tgrep_tokenize('A!>3B'), ['A', '!', '>3', 'B'])
-        self.assertEqual(tgrep.tgrep_tokenize('A!<,B'), ['A', '!', '<,', 'B'])
-        self.assertEqual(tgrep.tgrep_tokenize('A!>,B'), ['A', '!', '>,', 'B'])
-        self.assertEqual(tgrep.tgrep_tokenize('A!<-3B'), ['A', '!', '<-3', 'B'])
-        self.assertEqual(tgrep.tgrep_tokenize('A!>-3B'), ['A', '!', '>-3', 'B'])
-        self.assertEqual(tgrep.tgrep_tokenize('A!<-B'), ['A', '!', '<-', 'B'])
-        self.assertEqual(tgrep.tgrep_tokenize('A!>-B'), ['A', '!', '>-', 'B'])
-        self.assertEqual(tgrep.tgrep_tokenize('A!<\'B'), ['A', '!', '<\'', 'B'])
-        self.assertEqual(tgrep.tgrep_tokenize('A!>\'B'), ['A', '!', '>\'', 'B'])
-        self.assertEqual(tgrep.tgrep_tokenize('A!<:B'), ['A', '!', '<:', 'B'])
-        self.assertEqual(tgrep.tgrep_tokenize('A!>:B'), ['A', '!', '>:', 'B'])
-        self.assertEqual(tgrep.tgrep_tokenize('A!<<B'), ['A', '!', '<<', 'B'])
-        self.assertEqual(tgrep.tgrep_tokenize('A!>>B'), ['A', '!', '>>', 'B'])
-        self.assertEqual(tgrep.tgrep_tokenize('A!<<,B'), ['A', '!', '<<,', 'B'])
-        self.assertEqual(tgrep.tgrep_tokenize('A!>>,B'), ['A', '!', '>>,', 'B'])
-        self.assertEqual(tgrep.tgrep_tokenize('A!<<\'B'), ['A', '!', '<<\'', 'B'])
-        self.assertEqual(tgrep.tgrep_tokenize('A!>>\'B'), ['A', '!', '>>\'', 'B'])
-        self.assertEqual(tgrep.tgrep_tokenize('A!<<:B'), ['A', '!', '<<:', 'B'])
-        self.assertEqual(tgrep.tgrep_tokenize('A!>>:B'), ['A', '!', '>>:', 'B'])
-        self.assertEqual(tgrep.tgrep_tokenize('A!.B'), ['A', '!', '.', 'B'])
-        self.assertEqual(tgrep.tgrep_tokenize('A!,B'), ['A', '!', ',', 'B'])
-        self.assertEqual(tgrep.tgrep_tokenize('A!..B'), ['A', '!', '..', 'B'])
-        self.assertEqual(tgrep.tgrep_tokenize('A!,,B'), ['A', '!', ',,', 'B'])
-        self.assertEqual(tgrep.tgrep_tokenize('A!$B'), ['A', '!', '$', 'B'])
-        self.assertEqual(tgrep.tgrep_tokenize('A!$.B'), ['A', '!', '$.', 'B'])
-        self.assertEqual(tgrep.tgrep_tokenize('A!$,B'), ['A', '!', '$,', 'B'])
-        self.assertEqual(tgrep.tgrep_tokenize('A!$..B'), ['A', '!', '$..', 'B'])
-        self.assertEqual(tgrep.tgrep_tokenize('A!$,,B'), ['A', '!', '$,,', 'B'])
-
-    def test_tokenize_examples(self):
-        '''
-        Test tokenization of the TGrep2 manual example patterns.
-        '''
-        self.assertEqual(tgrep.tgrep_tokenize('NP < PP'), ['NP', '<', 'PP'])
-        self.assertEqual(tgrep.tgrep_tokenize('/^NP/'), ['/^NP/'])
-        self.assertEqual(
-            tgrep.tgrep_tokenize('NP << PP . VP'), ['NP', '<<', 'PP', '.', 'VP']
-        )
-        self.assertEqual(
-            tgrep.tgrep_tokenize('NP << PP | . VP'), ['NP', '<<', 'PP', '|', '.', 'VP']
-        )
-        self.assertEqual(
-            tgrep.tgrep_tokenize('NP !<< PP [> NP | >> VP]'),
-            ['NP', '!', '<<', 'PP', '[', '>', 'NP', '|', '>>', 'VP', ']'],
-        )
-        self.assertEqual(
-            tgrep.tgrep_tokenize('NP << (PP . VP)'),
-            ['NP', '<<', '(', 'PP', '.', 'VP', ')'],
-        )
-        self.assertEqual(
-            tgrep.tgrep_tokenize('NP <\' (PP <, (IN < on))'),
-            ['NP', '<\'', '(', 'PP', '<,', '(', 'IN', '<', 'on', ')', ')'],
-        )
-        self.assertEqual(
-            tgrep.tgrep_tokenize('S < (A < B) < C'),
-            ['S', '<', '(', 'A', '<', 'B', ')', '<', 'C'],
-        )
-        self.assertEqual(
-            tgrep.tgrep_tokenize('S < ((A < B) < C)'),
-            ['S', '<', '(', '(', 'A', '<', 'B', ')', '<', 'C', ')'],
-        )
-        self.assertEqual(
-            tgrep.tgrep_tokenize('S < (A < B < C)'),
-            ['S', '<', '(', 'A', '<', 'B', '<', 'C', ')'],
-        )
-        self.assertEqual(tgrep.tgrep_tokenize('A<B&.C'), ['A', '<', 'B', '&', '.', 'C'])
-
-    def test_tokenize_quoting(self):
-        '''
-        Test tokenization of quoting.
-        '''
-        self.assertEqual(
-            tgrep.tgrep_tokenize('"A<<:B"<<:"A $.. B"<"A>3B"<C'),
-            ['"A<<:B"', '<<:', '"A $.. B"', '<', '"A>3B"', '<', 'C'],
-        )
-
-    def test_tokenize_nodenames(self):
-        '''
-        Test tokenization of node names.
-        '''
-        self.assertEqual(tgrep.tgrep_tokenize('Robert'), ['Robert'])
-        self.assertEqual(tgrep.tgrep_tokenize('/^[Bb]ob/'), ['/^[Bb]ob/'])
-        self.assertEqual(tgrep.tgrep_tokenize('*'), ['*'])
-        self.assertEqual(tgrep.tgrep_tokenize('__'), ['__'])
-        # test tokenization of NLTK tree position syntax
-        self.assertEqual(tgrep.tgrep_tokenize('N()'), ['N(', ')'])
-        self.assertEqual(tgrep.tgrep_tokenize('N(0,)'), ['N(', '0', ',', ')'])
-        self.assertEqual(tgrep.tgrep_tokenize('N(0,0)'), ['N(', '0', ',', '0', ')'])
-        self.assertEqual(
-            tgrep.tgrep_tokenize('N(0,0,)'), ['N(', '0', ',', '0', ',', ')']
-        )
-
-    def test_tokenize_macros(self):
-        '''
-        Test tokenization of macro definitions.
-        '''
-        self.assertEqual(
-            tgrep.tgrep_tokenize(
-                '@ NP /^NP/;\n@ NN /^NN/;\n@NP [!< NP | < @NN] !$.. @NN'
-            ),
-            [
-                '@',
-                'NP',
-                '/^NP/',
-                ';',
-                '@',
-                'NN',
-                '/^NN/',
-                ';',
-                '@NP',
-                '[',
-                '!',
-                '<',
-                'NP',
-                '|',
-                '<',
-                '@NN',
-                ']',
-                '!',
-                '$..',
-                '@NN',
-            ],
-        )
-
-    def test_node_simple(self):
-        '''
-        Test a simple use of tgrep for finding nodes matching a given
-        pattern.
-        '''
-        tree = ParentedTree.fromstring(
-            '(S (NP (DT the) (JJ big) (NN dog)) ' '(VP bit) (NP (DT a) (NN cat)))'
-        )
-        self.assertEqual(list(tgrep.tgrep_positions('NN', [tree])), [[(0, 2), (2, 1)]])
-        self.assertEqual(
-            list(tgrep.tgrep_nodes('NN', [tree])), [[tree[0, 2], tree[2, 1]]]
-        )
-        self.assertEqual(
-            list(tgrep.tgrep_positions('NN|JJ', [tree])), [[(0, 1), (0, 2), (2, 1)]]
-        )
-
-    def test_node_printing(self):
-        '''Test that the tgrep print operator ' is properly ignored.'''
-        tree = ParentedTree.fromstring('(S (n x) (N x))')
-        self.assertEqual(
-            list(tgrep.tgrep_positions('N', [tree])),
-            list(tgrep.tgrep_positions('\'N', [tree])),
-        )
-        self.assertEqual(
-            list(tgrep.tgrep_positions('/[Nn]/', [tree])),
-            list(tgrep.tgrep_positions('\'/[Nn]/', [tree])),
-        )
-
-    def test_node_encoding(self):
-        '''
-        Test that tgrep search strings handles bytes and strs the same
-        way.
-        '''
-        tree = ParentedTree.fromstring(
-            '(S (NP (DT the) (JJ big) (NN dog)) ' '(VP bit) (NP (DT a) (NN cat)))'
-        )
-        self.assertEqual(
-            list(tgrep.tgrep_positions(b('NN'), [tree])),
-            list(tgrep.tgrep_positions('NN', [tree])),
-        )
-        self.assertEqual(
-            list(tgrep.tgrep_nodes(b('NN'), [tree])),
-            list(tgrep.tgrep_nodes('NN', [tree])),
-        )
-        self.assertEqual(
-            list(tgrep.tgrep_positions(b('NN|JJ'), [tree])),
-            list(tgrep.tgrep_positions('NN|JJ', [tree])),
-        )
-
-    def test_node_nocase(self):
-        '''
-        Test selecting nodes using case insensitive node names.
-        '''
-        tree = ParentedTree.fromstring('(S (n x) (N x))')
-        self.assertEqual(list(tgrep.tgrep_positions('"N"', [tree])), [[(1,)]])
-        self.assertEqual(list(tgrep.tgrep_positions('i@"N"', [tree])), [[(0,), (1,)]])
-
-    def test_node_quoted(self):
-        '''
-        Test selecting nodes using quoted node names.
-        '''
-        tree = ParentedTree.fromstring('(N ("N" x) (N" x) ("\\" x))')
-        self.assertEqual(list(tgrep.tgrep_positions('"N"', [tree])), [[()]])
-        self.assertEqual(list(tgrep.tgrep_positions('"\\"N\\""', [tree])), [[(0,)]])
-        self.assertEqual(list(tgrep.tgrep_positions('"N\\""', [tree])), [[(1,)]])
-        self.assertEqual(list(tgrep.tgrep_positions('"\\"\\\\\\""', [tree])), [[(2,)]])
-
-    def test_node_regex(self):
-        '''
-        Test regex matching on nodes.
-        '''
-        tree = ParentedTree.fromstring('(S (NP-SBJ x) (NP x) (NNP x) (VP x))')
-        # This is a regular expression that matches any node whose
-        # name starts with NP, including NP-SBJ:
-        self.assertEqual(list(tgrep.tgrep_positions('/^NP/', [tree])), [[(0,), (1,)]])
-
-    def test_node_regex_2(self):
-        '''
-        Test regex matching on nodes.
-        '''
-        tree = ParentedTree.fromstring('(S (SBJ x) (SBJ1 x) (NP-SBJ x))')
-        self.assertEqual(list(tgrep.tgrep_positions('/^SBJ/', [tree])), [[(0,), (1,)]])
-        # This is a regular expression that matches any node whose
-        # name includes SBJ, including NP-SBJ:
-        self.assertEqual(
-            list(tgrep.tgrep_positions('/SBJ/', [tree])), [[(0,), (1,), (2,)]]
-        )
-
-    def test_node_tree_position(self):
-        '''
-        Test matching on nodes based on NLTK tree position.
-        '''
-        tree = ParentedTree.fromstring('(S (NP-SBJ x) (NP x) (NNP x) (VP x))')
-        # test all tree positions that are not leaves
-        leaf_positions = set(
-            tree.leaf_treeposition(x) for x in range(len(tree.leaves()))
-        )
-        tree_positions = [x for x in tree.treepositions() if x not in leaf_positions]
-        for position in tree_positions:
-            node_id = 'N{0}'.format(position)
-            tgrep_positions = list(tgrep.tgrep_positions(node_id, [tree]))
-            self.assertEqual(len(tgrep_positions[0]), 1)
-            self.assertEqual(tgrep_positions[0][0], position)
-
-    def test_node_noleaves(self):
-        '''
-        Test node name matching with the search_leaves flag set to False.
-        '''
-        tree = ParentedTree.fromstring('(S (A (T x)) (B (N x)))')
-        self.assertEqual(
-            list(tgrep.tgrep_positions('x', [tree])), [[(0, 0, 0), (1, 0, 0)]]
-        )
-        self.assertEqual(list(tgrep.tgrep_positions('x', [tree], False)), [[]])
-
-    def tests_rel_dominance(self):
-        '''
-        Test matching nodes based on dominance relations.
-        '''
-        tree = ParentedTree.fromstring('(S (A (T x)) (B (N x)))')
-        self.assertEqual(list(tgrep.tgrep_positions('* < T', [tree])), [[(0,)]])
-        self.assertEqual(list(tgrep.tgrep_positions('* < T > S', [tree])), [[(0,)]])
-        self.assertEqual(
-            list(tgrep.tgrep_positions('* !< T', [tree])),
-            [[(), (0, 0), (0, 0, 0), (1,), (1, 0), (1, 0, 0)]],
-        )
-        self.assertEqual(list(tgrep.tgrep_positions('* !< T > S', [tree])), [[(1,)]])
-        self.assertEqual(list(tgrep.tgrep_positions('* > A', [tree])), [[(0, 0)]])
-        self.assertEqual(list(tgrep.tgrep_positions('* > B', [tree])), [[(1, 0)]])
-        self.assertEqual(
-            list(tgrep.tgrep_positions('* !> B', [tree])),
-            [[(), (0,), (0, 0), (0, 0, 0), (1,), (1, 0, 0)]],
-        )
-        self.assertEqual(
-            list(tgrep.tgrep_positions('* !> B >> S', [tree])), [[(0,), (0, 0), (1,)]]
-        )
-        self.assertEqual(
-            list(tgrep.tgrep_positions('* >> S', [tree])),
-            [[(0,), (0, 0), (1,), (1, 0)]],
-        )
-        self.assertEqual(
-            list(tgrep.tgrep_positions('* >>, S', [tree])), [[(0,), (0, 0)]]
-        )
-        self.assertEqual(
-            list(tgrep.tgrep_positions('* >>\' S', [tree])), [[(1,), (1, 0)]]
-        )
-        # Known issue:
-        # self.assertEqual(list(tgrep.tgrep_positions('* !>> S', [tree])),
-        #                 [[()]])
-        self.assertEqual(list(tgrep.tgrep_positions('* << T', [tree])), [[(), (0,)]])
-        self.assertEqual(list(tgrep.tgrep_positions('* <<\' T', [tree])), [[(0,)]])
-        self.assertEqual(list(tgrep.tgrep_positions('* <<1 N', [tree])), [[(1,)]])
-        self.assertEqual(
-            list(tgrep.tgrep_positions('* !<< T', [tree])),
-            [[(0, 0), (0, 0, 0), (1,), (1, 0), (1, 0, 0)]],
-        )
-        tree = ParentedTree.fromstring('(S (A (T x)) (B (T x) (N x )))')
-        self.assertEqual(list(tgrep.tgrep_positions('* <: T', [tree])), [[(0,)]])
-        self.assertEqual(list(tgrep.tgrep_positions('* < T', [tree])), [[(0,), (1,)]])
-        self.assertEqual(
-            list(tgrep.tgrep_positions('* !<: T', [tree])),
-            [[(), (0, 0), (0, 0, 0), (1,), (1, 0), (1, 0, 0), (1, 1), (1, 1, 0)]],
-        )
-        self.assertEqual(list(tgrep.tgrep_positions('* !<: T > S', [tree])), [[(1,)]])
-        tree = ParentedTree.fromstring('(S (T (A x) (B x)) (T (C x)))')
-        self.assertEqual(list(tgrep.tgrep_positions('* >: T', [tree])), [[(1, 0)]])
-        self.assertEqual(
-            list(tgrep.tgrep_positions('* !>: T', [tree])),
-            [[(), (0,), (0, 0), (0, 0, 0), (0, 1), (0, 1, 0), (1,), (1, 0, 0)]],
-        )
-        tree = ParentedTree.fromstring(
-            '(S (A (B (C (D (E (T x))))))' ' (A (B (C (D (E (T x))) (N x)))))'
-        )
-        self.assertEqual(
-            list(tgrep.tgrep_positions('* <<: T', [tree])),
-            [
-                [
-                    (0,),
-                    (0, 0),
-                    (0, 0, 0),
-                    (0, 0, 0, 0),
-                    (0, 0, 0, 0, 0),
-                    (1, 0, 0, 0),
-                    (1, 0, 0, 0, 0),
-                ]
-            ],
-        )
-        self.assertEqual(
-            list(tgrep.tgrep_positions('* >>: A', [tree])),
-            [
-                [
-                    (0, 0),
-                    (0, 0, 0),
-                    (0, 0, 0, 0),
-                    (0, 0, 0, 0, 0),
-                    (0, 0, 0, 0, 0, 0),
-                    (1, 0),
-                    (1, 0, 0),
-                ]
-            ],
-        )
-
-    def test_bad_operator(self):
-        '''
-        Test error handling of undefined tgrep operators.
-        '''
-        tree = ParentedTree.fromstring('(S (A (T x)) (B (N x)))')
-        self.assertRaises(
-            tgrep.TgrepException, list, tgrep.tgrep_positions('* >>> S', [tree])
-        )
-
-    def test_comments(self):
-        '''
-        Test that comments are correctly filtered out of tgrep search
-        strings.
-        '''
-        tree = ParentedTree.fromstring('(S (NN x) (NP x) (NN x))')
-        search1 = '''
-        @ NP /^NP/;
-        @ NN /^NN/;
-        @NN
-        '''
-        self.assertEqual(list(tgrep.tgrep_positions(search1, [tree])), [[(0,), (2,)]])
-        search2 = '''
-        # macros
-        @ NP /^NP/;
-        @ NN /^NN/;
-
-        # search string
-        @NN
-        '''
-        self.assertEqual(list(tgrep.tgrep_positions(search2, [tree])), [[(0,), (2,)]])
-
-    def test_rel_sister_nodes(self):
-        '''
-        Test matching sister nodes in a tree.
-        '''
-        tree = ParentedTree.fromstring('(S (A x) (B x) (C x))')
-        self.assertEqual(list(tgrep.tgrep_positions('* $. B', [tree])), [[(0,)]])
-        self.assertEqual(list(tgrep.tgrep_positions('* $.. B', [tree])), [[(0,)]])
-        self.assertEqual(list(tgrep.tgrep_positions('* $, B', [tree])), [[(2,)]])
-        self.assertEqual(list(tgrep.tgrep_positions('* $,, B', [tree])), [[(2,)]])
-        self.assertEqual(list(tgrep.tgrep_positions('* $ B', [tree])), [[(0,), (2,)]])
-
-    def tests_rel_indexed_children(self):
-        '''
-        Test matching nodes based on their index in their parent node.
-        '''
-        tree = ParentedTree.fromstring('(S (A x) (B x) (C x))')
-        self.assertEqual(list(tgrep.tgrep_positions('* >, S', [tree])), [[(0,)]])
-        self.assertEqual(list(tgrep.tgrep_positions('* >1 S', [tree])), [[(0,)]])
-        self.assertEqual(list(tgrep.tgrep_positions('* >2 S', [tree])), [[(1,)]])
-        self.assertEqual(list(tgrep.tgrep_positions('* >3 S', [tree])), [[(2,)]])
-        self.assertEqual(list(tgrep.tgrep_positions('* >\' S', [tree])), [[(2,)]])
-        self.assertEqual(list(tgrep.tgrep_positions('* >-1 S', [tree])), [[(2,)]])
-        self.assertEqual(list(tgrep.tgrep_positions('* >-2 S', [tree])), [[(1,)]])
-        self.assertEqual(list(tgrep.tgrep_positions('* >-3 S', [tree])), [[(0,)]])
-        tree = ParentedTree.fromstring(
-            '(S (D (A x) (B x) (C x)) (E (B x) (C x) (A x)) ' '(F (C x) (A x) (B x)))'
-        )
-        self.assertEqual(list(tgrep.tgrep_positions('* <, A', [tree])), [[(0,)]])
-        self.assertEqual(list(tgrep.tgrep_positions('* <1 A', [tree])), [[(0,)]])
-        self.assertEqual(list(tgrep.tgrep_positions('* <2 A', [tree])), [[(2,)]])
-        self.assertEqual(list(tgrep.tgrep_positions('* <3 A', [tree])), [[(1,)]])
-        self.assertEqual(list(tgrep.tgrep_positions('* <\' A', [tree])), [[(1,)]])
-        self.assertEqual(list(tgrep.tgrep_positions('* <-1 A', [tree])), [[(1,)]])
-        self.assertEqual(list(tgrep.tgrep_positions('* <-2 A', [tree])), [[(2,)]])
-        self.assertEqual(list(tgrep.tgrep_positions('* <-3 A', [tree])), [[(0,)]])
-
-    def test_rel_precedence(self):
-        '''
-        Test matching nodes based on precedence relations.
-        '''
-        tree = ParentedTree.fromstring(
-            '(S (NP (NP (PP x)) (NP (AP x)))'
-            ' (VP (AP (X (PP x)) (Y (AP x))))'
-            ' (NP (RC (NP (AP x)))))'
-        )
-        self.assertEqual(
-            list(tgrep.tgrep_positions('* . X', [tree])), [[(0,), (0, 1), (0, 1, 0)]]
-        )
-        self.assertEqual(
-            list(tgrep.tgrep_positions('* . Y', [tree])), [[(1, 0, 0), (1, 0, 0, 0)]]
-        )
-        self.assertEqual(
-            list(tgrep.tgrep_positions('* .. X', [tree])),
-            [[(0,), (0, 0), (0, 0, 0), (0, 1), (0, 1, 0)]],
-        )
-        self.assertEqual(
-            list(tgrep.tgrep_positions('* .. Y', [tree])),
-            [[(0,), (0, 0), (0, 0, 0), (0, 1), (0, 1, 0), (1, 0, 0), (1, 0, 0, 0)]],
-        )
-        self.assertEqual(
-            list(tgrep.tgrep_positions('* , X', [tree])), [[(1, 0, 1), (1, 0, 1, 0)]]
-        )
-        self.assertEqual(
-            list(tgrep.tgrep_positions('* , Y', [tree])),
-            [[(2,), (2, 0), (2, 0, 0), (2, 0, 0, 0)]],
-        )
-        self.assertEqual(
-            list(tgrep.tgrep_positions('* ,, X', [tree])),
-            [[(1, 0, 1), (1, 0, 1, 0), (2,), (2, 0), (2, 0, 0), (2, 0, 0, 0)]],
-        )
-        self.assertEqual(
-            list(tgrep.tgrep_positions('* ,, Y', [tree])),
-            [[(2,), (2, 0), (2, 0, 0), (2, 0, 0, 0)]],
-        )
-
-    def test_examples(self):
-        '''
-        Test the Basic Examples from the TGrep2 manual.
-        '''
-        tree = ParentedTree.fromstring('(S (NP (AP x)) (NP (PP x)))')
-        # This matches any NP node that immediately dominates a PP:
-        self.assertEqual(list(tgrep.tgrep_positions('NP < PP', [tree])), [[(1,)]])
-
-        tree = ParentedTree.fromstring('(S (NP x) (VP x) (NP (PP x)) (VP x))')
-        # This matches an NP that dominates a PP and is immediately
-        # followed by a VP:
-        self.assertEqual(list(tgrep.tgrep_positions('NP << PP . VP', [tree])), [[(2,)]])
-
-        tree = ParentedTree.fromstring(
-            '(S (NP (AP x)) (NP (PP x)) ' '(NP (DET x) (NN x)) (VP x))'
-        )
-        # This matches an NP that dominates a PP or is immediately
-        # followed by a VP:
-        self.assertEqual(
-            list(tgrep.tgrep_positions('NP << PP | . VP', [tree])), [[(1,), (2,)]]
-        )
-
-        tree = ParentedTree.fromstring(
-            '(S (NP (NP (PP x)) (NP (AP x)))'
-            ' (VP (AP (NP (PP x)) (NP (AP x))))'
-            ' (NP (RC (NP (AP x)))))'
-        )
-        # This matches an NP that does not dominate a PP. Also, the NP
-        # must either have a parent that is an NP or be dominated by a
-        # VP:
-        self.assertEqual(
-            list(tgrep.tgrep_positions('NP !<< PP [> NP | >> VP]', [tree])),
-            [[(0, 1), (1, 0, 1)]],
-        )
-
-        tree = ParentedTree.fromstring(
-            '(S (NP (AP (PP x) (VP x))) ' '(NP (AP (PP x) (NP x))) (NP x))'
-        )
-        # This matches an NP that dominates a PP which itself is
-        # immediately followed by a VP. Note the use of parentheses to
-        # group ". VP" with the PP rather than with the NP:
-        self.assertEqual(
-            list(tgrep.tgrep_positions('NP << (PP . VP)', [tree])), [[(0,)]]
-        )
-
-        tree = ParentedTree.fromstring(
-            '(S (NP (DET a) (NN cat) (PP (IN on) (NP x)))'
-            ' (NP (DET a) (NN cat) (PP (IN on) (NP x)) (PP x))'
-            ' (NP x))'
-        )
-        # This matches an NP whose last child is a PP that begins with
-        # the preposition "on":
-        self.assertEqual(
-            list(tgrep.tgrep_positions('NP <\' (PP <, (IN < on))', [tree])), [[(0,)]]
-        )
-
-        tree = ParentedTree.fromstring(
-            '(S (S (C x) (A (B x))) (S (C x) (A x)) ' '(S (D x) (A (B x))))'
-        )
-        # The following pattern matches an S which has a child A and
-        # another child that is a C and that the A has a child B:
-        self.assertEqual(
-            list(tgrep.tgrep_positions('S < (A < B) < C', [tree])), [[(0,)]]
-        )
-
-        tree = ParentedTree.fromstring(
-            '(S (S (A (B x) (C x))) (S (S (C x) (A (B x)))))'
-        )
-        # However, this pattern means that S has child A and that A
-        # has children B and C:
-        self.assertEqual(
-            list(tgrep.tgrep_positions('S < ((A < B) < C)', [tree])), [[(0,)]]
-        )
-
-        # It is equivalent to this:
-        self.assertEqual(
-            list(tgrep.tgrep_positions('S < (A < B < C)', [tree])), [[(0,)]]
-        )
-
-    def test_use_macros(self):
-        '''
-        Test defining and using tgrep2 macros.
-        '''
-        tree = ParentedTree.fromstring(
-            '(VP (VB sold) (NP (DET the) '
-            '(NN heiress)) (NP (NN deed) (PREP to) '
-            '(NP (DET the) (NN school) (NN house))))'
-        )
-        self.assertEqual(
-            list(
-                tgrep.tgrep_positions(
-                    '@ NP /^NP/;\n@ NN /^NN/;\n@NP !< @NP !$.. @NN', [tree]
-                )
-            ),
-            [[(1,), (2, 2)]],
-        )
-        # use undefined macro @CNP
-        self.assertRaises(
-            tgrep.TgrepException,
-            list,
-            tgrep.tgrep_positions(
-                '@ NP /^NP/;\n@ NN /^NN/;\n@CNP !< @NP !$.. @NN', [tree]
-            ),
-        )
-
-    def test_tokenize_node_labels(self):
-        '''Test tokenization of labeled nodes.'''
-        self.assertEqual(
-            tgrep.tgrep_tokenize('S < @SBJ < (@VP < (@VB $.. @OBJ))'),
-            [
-                'S',
-                '<',
-                '@SBJ',
-                '<',
-                '(',
-                '@VP',
-                '<',
-                '(',
-                '@VB',
-                '$..',
-                '@OBJ',
-                ')',
-                ')',
-            ],
-        )
-        self.assertEqual(
-            tgrep.tgrep_tokenize('S < @SBJ=s < (@VP=v < (@VB $.. @OBJ))'),
-            [
-                'S',
-                '<',
-                '@SBJ',
-                '=',
-                's',
-                '<',
-                '(',
-                '@VP',
-                '=',
-                'v',
-                '<',
-                '(',
-                '@VB',
-                '$..',
-                '@OBJ',
-                ')',
-                ')',
-            ],
-        )
-
-    def test_tokenize_segmented_patterns(self):
-        '''Test tokenization of segmented patterns.'''
-        self.assertEqual(
-            tgrep.tgrep_tokenize('S < @SBJ=s < (@VP=v < (@VB $.. @OBJ)) : =s .. =v'),
-            [
-                'S',
-                '<',
-                '@SBJ',
-                '=',
-                's',
-                '<',
-                '(',
-                '@VP',
-                '=',
-                'v',
-                '<',
-                '(',
-                '@VB',
-                '$..',
-                '@OBJ',
-                ')',
-                ')',
-                ':',
-                '=s',
-                '..',
-                '=v',
-            ],
-        )
-
-    def test_labeled_nodes(self):
-        '''
-        Test labeled nodes.
-
-        Test case from Emily M. Bender.
-        '''
-        search = '''
-            # macros
-            @ SBJ /SBJ/;
-            @ VP /VP/;
-            @ VB /VB/;
-            @ VPoB /V[PB]/;
-            @ OBJ /OBJ/;
-
-            # 1 svo
-            S < @SBJ=s < (@VP=v < (@VB $.. @OBJ)) : =s .. =v'''
-        sent1 = ParentedTree.fromstring(
-            '(S (NP-SBJ I) (VP (VB eat) (NP-OBJ (NNS apples))))'
-        )
-        sent2 = ParentedTree.fromstring(
-            '(S (VP (VB eat) (NP-OBJ (NNS apples))) (NP-SBJ I))'
-        )
-        search_firsthalf = search.split('\n\n')[0] + 'S < @SBJ < (@VP < (@VB $.. @OBJ))'
-        search_rewrite = 'S < (/.*SBJ/ $.. (/VP/ < (/VB/ $.. /.*OBJ/)))'
-
-        self.assertTrue(list(tgrep.tgrep_positions(search_firsthalf, [sent1]))[0])
-        self.assertTrue(list(tgrep.tgrep_positions(search, [sent1]))[0])
-        self.assertTrue(list(tgrep.tgrep_positions(search_rewrite, [sent1]))[0])
-        self.assertEqual(
-            list(tgrep.tgrep_positions(search, [sent1])),
-            list(tgrep.tgrep_positions(search_rewrite, [sent1])),
-        )
-        self.assertTrue(list(tgrep.tgrep_positions(search_firsthalf, [sent2]))[0])
-        self.assertFalse(list(tgrep.tgrep_positions(search, [sent2]))[0])
-        self.assertFalse(list(tgrep.tgrep_positions(search_rewrite, [sent2]))[0])
-        self.assertEqual(
-            list(tgrep.tgrep_positions(search, [sent2])),
-            list(tgrep.tgrep_positions(search_rewrite, [sent2])),
-        )
-
-    def test_multiple_conjs(self):
-        '''
-        Test that multiple (3 or more) conjunctions of node relations are
-        handled properly.
-        '''
-        sent = ParentedTree.fromstring('((A (B b) (C c)) (A (B b) (C c) (D d)))')
-        # search = '(A < B < C < D)'
-        # search_tworels = '(A < B < C)'
-        self.assertEqual(
-            list(tgrep.tgrep_positions('(A < B < C < D)', [sent])), [[(1,)]]
-        )
-        self.assertEqual(
-            list(tgrep.tgrep_positions('(A < B < C)', [sent])), [[(0,), (1,)]]
-        )
-
-    def test_trailing_semicolon(self):
-        '''
-        Test that semicolons at the end of a tgrep2 search string won't
-        cause a parse failure.
-        '''
-        tree = ParentedTree.fromstring(
-            '(S (NP (DT the) (JJ big) (NN dog)) ' '(VP bit) (NP (DT a) (NN cat)))'
-        )
-        self.assertEqual(list(tgrep.tgrep_positions('NN', [tree])), [[(0, 2), (2, 1)]])
-        self.assertEqual(list(tgrep.tgrep_positions('NN;', [tree])), [[(0, 2), (2, 1)]])
-        self.assertEqual(
-            list(tgrep.tgrep_positions('NN;;', [tree])), [[(0, 2), (2, 1)]]
-        )
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/nlp_resource_data/nltk/test/unit/test_tokenize.py b/nlp_resource_data/nltk/test/unit/test_tokenize.py

deleted file mode 100644 (file)

index fa0c286..0000000
--- a/nlp_resource_data/nltk/test/unit/test_tokenize.py
+++ /dev/null
@@ -1,362 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Unit tests for nltk.tokenize.
-See also nltk/test/tokenize.doctest
-"""
-
-from __future__ import unicode_literals
-
-import os
-import unittest
-
-from nose import SkipTest
-
-from nltk.tokenize import word_tokenize
-from nltk.tokenize import TweetTokenizer, StanfordSegmenter, TreebankWordTokenizer
-
-
-class TestTokenize(unittest.TestCase):
-    def test_tweet_tokenizer(self):
-        """
-        Test TweetTokenizer using words with special and accented characters.
-        """
-
-        tokenizer = TweetTokenizer(strip_handles=True, reduce_len=True)
-        s9 = "@myke: Let's test these words: resumé España München français"
-        tokens = tokenizer.tokenize(s9)
-        expected = [
-            ':',
-            "Let's",
-            'test',
-            'these',
-            'words',
-            ':',
-            'resumé',
-            'España',
-            'München',
-            'français',
-        ]
-        self.assertEqual(tokens, expected)
-
-    def test_stanford_segmenter_arabic(self):
-        """
-        Test the Stanford Word Segmenter for Arabic (default config)
-        """
-        try:
-            seg = StanfordSegmenter()
-            seg.default_config('ar')
-            sent = u'يبحث علم الحاسوب استخدام الحوسبة بجميع اشكالها لحل المشكلات'
-            segmented_sent = seg.segment(sent.split())
-            assert segmented_sent.split() == [
-                'يبحث',
-                'علم',
-                'الحاسوب',
-                'استخدام',
-                'الحوسبة',
-                'ب',
-                'جميع',
-                'اشكال',
-                'ها',
-                'ل',
-                'حل',
-                'المشكلات',
-            ]
-        except LookupError as e:
-            raise SkipTest(str(e))
-
-    def test_stanford_segmenter_chinese(self):
-        """
-        Test the Stanford Word Segmenter for Chinese (default config)
-        """
-        try:
-            seg = StanfordSegmenter()
-            seg.default_config('zh')
-            sent = u"这是斯坦福中文分词器测试"
-            segmented_sent = seg.segment(sent.split())
-            assert segmented_sent.split() == ['这', '是', '斯坦福', '中文', '分词器', '测试']
-        except LookupError as e:
-            raise SkipTest(str(e))
-
-    def test_phone_tokenizer(self):
-        """
-        Test a string that resembles a phone number but contains a newline
-        """
-
-        # Should be recognized as a phone number, albeit one with multiple spaces
-        tokenizer = TweetTokenizer()
-        test1 = "(393)  928 -3010"
-        expected = ['(393)  928 -3010']
-        result = tokenizer.tokenize(test1)
-        self.assertEqual(result, expected)
-
-        # Due to newline, first three elements aren't part of a phone number;
-        # fourth is
-        test2 = "(393)\n928 -3010"
-        expected = ['(', '393', ')', "928 -3010"]
-        result = tokenizer.tokenize(test2)
-        self.assertEqual(result, expected)
-
-    def test_remove_handle(self):
-        """
-        Test remove_handle() from casual.py with specially crafted edge cases
-        """
-
-        tokenizer = TweetTokenizer(strip_handles=True)
-
-        # Simple example. Handles with just numbers should be allowed
-        test1 = "@twitter hello @twi_tter_. hi @12345 @123news"
-        expected = ['hello', '.', 'hi']
-        result = tokenizer.tokenize(test1)
-        self.assertEqual(result, expected)
-
-        # Handles are allowed to follow any of the following characters
-        test2 = "@n`@n~@n(@n)@n-@n=@n+@n\\@n|@n[@n]@n{@n}@n;@n:@n'@n\"@n/@n?@n.@n,@n<@n>@n @n\n@n ñ@n.ü@n.ç@n."
-        expected = [
-            '`',
-            '~',
-            '(',
-            ')',
-            '-',
-            '=',
-            '+',
-            '\\',
-            '|',
-            '[',
-            ']',
-            '{',
-            '}',
-            ';',
-            ':',
-            "'",
-            '"',
-            '/',
-            '?',
-            '.',
-            ',',
-            '<',
-            '>',
-            'ñ',
-            '.',
-            'ü',
-            '.',
-            'ç',
-            '.',
-        ]
-        result = tokenizer.tokenize(test2)
-        self.assertEqual(result, expected)
-
-        # Handles are NOT allowed to follow any of the following characters
-        test3 = "a@n j@n z@n A@n L@n Z@n 1@n 4@n 7@n 9@n 0@n _@n !@n @@n #@n $@n %@n &@n *@n"
-        expected = [
-            'a',
-            '@n',
-            'j',
-            '@n',
-            'z',
-            '@n',
-            'A',
-            '@n',
-            'L',
-            '@n',
-            'Z',
-            '@n',
-            '1',
-            '@n',
-            '4',
-            '@n',
-            '7',
-            '@n',
-            '9',
-            '@n',
-            '0',
-            '@n',
-            '_',
-            '@n',
-            '!',
-            '@n',
-            '@',
-            '@n',
-            '#',
-            '@n',
-            '$',
-            '@n',
-            '%',
-            '@n',
-            '&',
-            '@n',
-            '*',
-            '@n',
-        ]
-        result = tokenizer.tokenize(test3)
-        self.assertEqual(result, expected)
-
-        # Handles are allowed to precede the following characters
-        test4 = "@n!a @n#a @n$a @n%a @n&a @n*a"
-        expected = ['!', 'a', '#', 'a', '$', 'a', '%', 'a', '&', 'a', '*', 'a']
-        result = tokenizer.tokenize(test4)
-        self.assertEqual(result, expected)
-
-        # Tests interactions with special symbols and multiple @
-        test5 = "@n!@n @n#@n @n$@n @n%@n @n&@n @n*@n @n@n @@n @n@@n @n_@n @n7@n @nj@n"
-        expected = [
-            '!',
-            '@n',
-            '#',
-            '@n',
-            '$',
-            '@n',
-            '%',
-            '@n',
-            '&',
-            '@n',
-            '*',
-            '@n',
-            '@n',
-            '@n',
-            '@',
-            '@n',
-            '@n',
-            '@',
-            '@n',
-            '@n_',
-            '@n',
-            '@n7',
-            '@n',
-            '@nj',
-            '@n',
-        ]
-        result = tokenizer.tokenize(test5)
-        self.assertEqual(result, expected)
-
-        # Tests that handles can have a max length of 20
-        test6 = "@abcdefghijklmnopqrstuvwxyz @abcdefghijklmnopqrst1234 @abcdefghijklmnopqrst_ @abcdefghijklmnopqrstendofhandle"
-        expected = ['uvwxyz', '1234', '_', 'endofhandle']
-        result = tokenizer.tokenize(test6)
-        self.assertEqual(result, expected)
-
-        # Edge case where an @ comes directly after a long handle
-        test7 = "@abcdefghijklmnopqrstu@abcde @abcdefghijklmnopqrst@abcde @abcdefghijklmnopqrst_@abcde @abcdefghijklmnopqrst5@abcde"
-        expected = [
-            'u',
-            '@abcde',
-            '@abcdefghijklmnopqrst',
-            '@abcde',
-            '_',
-            '@abcde',
-            '5',
-            '@abcde',
-        ]
-        result = tokenizer.tokenize(test7)
-        self.assertEqual(result, expected)
-
-    def test_treebank_span_tokenizer(self):
-        """
-        Test TreebankWordTokenizer.span_tokenize function
-        """
-
-        tokenizer = TreebankWordTokenizer()
-
-        # Test case in the docstring
-        test1 = "Good muffins cost $3.88\nin New (York).  Please (buy) me\ntwo of them.\n(Thanks)."
-        expected = [
-            (0, 4),
-            (5, 12),
-            (13, 17),
-            (18, 19),
-            (19, 23),
-            (24, 26),
-            (27, 30),
-            (31, 32),
-            (32, 36),
-            (36, 37),
-            (37, 38),
-            (40, 46),
-            (47, 48),
-            (48, 51),
-            (51, 52),
-            (53, 55),
-            (56, 59),
-            (60, 62),
-            (63, 68),
-            (69, 70),
-            (70, 76),
-            (76, 77),
-            (77, 78),
-        ]
-        result = list(tokenizer.span_tokenize(test1))
-        self.assertEqual(result, expected)
-
-        # Test case with double quotation
-        test2 = "The DUP is similar to the \"religious right\" in the United States and takes a hardline stance on social issues"
-        expected = [
-            (0, 3),
-            (4, 7),
-            (8, 10),
-            (11, 18),
-            (19, 21),
-            (22, 25),
-            (26, 27),
-            (27, 36),
-            (37, 42),
-            (42, 43),
-            (44, 46),
-            (47, 50),
-            (51, 57),
-            (58, 64),
-            (65, 68),
-            (69, 74),
-            (75, 76),
-            (77, 85),
-            (86, 92),
-            (93, 95),
-            (96, 102),
-            (103, 109),
-        ]
-        result = list(tokenizer.span_tokenize(test2))
-        self.assertEqual(result, expected)
-
-        # Test case with double qoutation as well as converted quotations
-        test3 = "The DUP is similar to the \"religious right\" in the United States and takes a ``hardline'' stance on social issues"
-        expected = [
-            (0, 3),
-            (4, 7),
-            (8, 10),
-            (11, 18),
-            (19, 21),
-            (22, 25),
-            (26, 27),
-            (27, 36),
-            (37, 42),
-            (42, 43),
-            (44, 46),
-            (47, 50),
-            (51, 57),
-            (58, 64),
-            (65, 68),
-            (69, 74),
-            (75, 76),
-            (77, 79),
-            (79, 87),
-            (87, 89),
-            (90, 96),
-            (97, 99),
-            (100, 106),
-            (107, 113),
-        ]
-        result = list(tokenizer.span_tokenize(test3))
-        self.assertEqual(result, expected)
-
-        
-    def test_word_tokenize(self):
-        """
-        Test word_tokenize function
-        """
-        
-        sentence = "The 'v', I've been fooled but I'll seek revenge."
-        expected = ['The', "'", 'v', "'", ',', 'I', "'ve", 'been', 'fooled', 
-                    'but', 'I', "'ll", 'seek', 'revenge', '.']
-        self.assertEqual(word_tokenize(sentence), expected)
-        
-        sentence = "'v' 're'"
-        expected = ["'", 'v', "'", "'re", "'"]
-        self.assertEqual(word_tokenize(sentence), expected)
diff --git a/nlp_resource_data/nltk/test/unit/test_twitter_auth.py b/nlp_resource_data/nltk/test/unit/test_twitter_auth.py

deleted file mode 100644 (file)

index e0189fb..0000000
--- a/nlp_resource_data/nltk/test/unit/test_twitter_auth.py
+++ /dev/null
@@ -1,181 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Tests for static parts of Twitter package
-"""
-
-import os
-import unittest
-from nose import SkipTest
-
-try:
-    import twython
-except ImportError as e:
-    raise SkipTest("The twython library has not been installed.")
-
-from nltk.twitter import Authenticate
-
-
-class TestCredentials(unittest.TestCase):
-    """
-    Tests that Twitter credentials information from file is handled correctly.
-    """
-
-    def setUp(self):
-        self.subdir = os.path.join(os.path.dirname(__file__), 'files')
-        self.auth = Authenticate()
-        os.environ['TWITTER'] = 'twitter-files'
-
-    def test_environment(self):
-        """
-        Test that environment variable has been read correctly.
-        """
-        fn = os.path.basename(self.auth.creds_subdir)
-        self.assertEqual(fn, os.environ['TWITTER'])
-
-    def test_empty_subdir1(self):
-        """
-        Setting subdir to empty path should raise an error.
-        """
-        try:
-            self.auth.load_creds(subdir='')
-        # raises ValueError (zero length field name in format) for python 2.6
-        # OSError for the rest
-        except OSError:
-            pass
-        except ValueError:
-            pass
-        except Exception as e:
-            self.fail('Unexpected exception thrown: %s' % e)
-        else:
-            self.fail('OSError exception not thrown.')
-
-    def test_empty_subdir2(self):
-        """
-        Setting subdir to `None` should raise an error.
-        """
-        self.auth.creds_subdir = None
-        try:
-            self.auth.load_creds()
-        except ValueError:
-            pass
-        except Exception as e:
-            self.fail('Unexpected exception thrown: %s' % e)
-        else:
-            self.fail('ValueError exception not thrown.')
-
-    def test_missingdir(self):
-        """
-        Setting subdir to nonexistent directory should raise an error.
-        """
-        try:
-            self.auth.load_creds(subdir='/nosuchdir')
-        # raises ValueError (zero length field name in format) for python 2.6
-        # OSError for the rest
-        except OSError:
-            pass
-        except ValueError:
-            pass
-        except Exception as e:
-            self.fail('Unexpected exception thrown: %s' % e)
-        else:
-            self.fail('OSError exception not thrown.')
-
-    def test_missingfile1(self):
-        """
-        Defaults for authentication will fail since 'credentials.txt' not
-        present in default subdir, as read from `os.environ['TWITTER']`.
-        """
-        try:
-            self.auth.load_creds()
-        # raises ValueError (zero length field name in format) for python 2.6
-        # OSError for the rest
-        except OSError:
-            pass
-        except ValueError:
-            pass
-        except Exception as e:
-            self.fail('Unexpected exception thrown: %s' % e)
-        else:
-            self.fail('OSError exception not thrown.')
-
-    def test_missingfile2(self):
-        """
-        Credentials file 'foobar' cannot be found in default subdir.
-        """
-        try:
-            self.auth.load_creds(creds_file='foobar')
-        # raises ValueError (zero length field name in format) for python 2.6
-        # OSError for the rest
-        except OSError:
-            pass
-        except ValueError:
-            pass
-        except Exception as e:
-            self.fail('Unexpected exception thrown: %s' % e)
-        else:
-            self.fail('OSError exception not thrown.')
-
-    def test_incomplete_file(self):
-        """
-        Credentials file 'bad_oauth1-1.txt' is incomplete
-        """
-        try:
-            self.auth.load_creds(creds_file='bad_oauth1-1.txt', subdir=self.subdir)
-        except ValueError:
-            pass
-        except Exception as e:
-            self.fail('Unexpected exception thrown: %s' % e)
-        else:
-            self.fail('ValueError exception not thrown.')
-
-    def test_malformed_file1(self):
-        """
-        First key in credentials file 'bad_oauth1-2.txt' is ill-formed
-        """
-        try:
-            self.auth.load_creds(creds_file='bad_oauth1-2.txt', subdir=self.subdir)
-        except ValueError:
-            pass
-        except Exception as e:
-            self.fail('Unexpected exception thrown: %s' % e)
-        else:
-            self.fail('ValueError exception not thrown.')
-
-    def test_malformed_file2(self):
-        """
-        First key in credentials file 'bad_oauth1-2.txt' is ill-formed
-        """
-        try:
-            self.auth.load_creds(creds_file='bad_oauth1-3.txt', subdir=self.subdir)
-        except ValueError:
-            pass
-        except Exception as e:
-            self.fail('Unexpected exception thrown: %s' % e)
-        else:
-            self.fail('ValueError exception not thrown.')
-
-    def test_correct_path(self):
-        """
-        Path to default credentials file is well-formed, given specified
-        subdir.
-        """
-        self.auth.load_creds(subdir=self.subdir)
-        self.auth.creds_fullpath = os.path.join(self.subdir, self.auth.creds_file)
-
-    def test_correct_file1(self):
-        """
-        Default credentials file is identified
-        """
-        self.auth.load_creds(subdir=self.subdir)
-        self.assertEqual(self.auth.creds_file, 'credentials.txt')
-
-    def test_correct_file2(self):
-        """
-        Default credentials file has been read correctluy
-        """
-        oauth = self.auth.load_creds(subdir=self.subdir)
-        self.assertEqual(oauth['app_key'], 'a')
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/nlp_resource_data/nltk/test/unit/test_wordnet.py b/nlp_resource_data/nltk/test/unit/test_wordnet.py

deleted file mode 100644 (file)

index f2191d3..0000000
--- a/nlp_resource_data/nltk/test/unit/test_wordnet.py
+++ /dev/null
@@ -1,197 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Unit tests for nltk.corpus.wordnet
-See also nltk/test/wordnet.doctest
-"""
-
-from __future__ import unicode_literals
-
-import os
-import unittest
-
-from nose import SkipTest
-
-from nltk.corpus.reader.wordnet import WordNetCorpusReader
-from nltk.corpus import wordnet as wn
-from nltk.corpus import wordnet_ic as wnic
-from nltk.data import find as find_data
-
-
-wn.ensure_loaded()
-S = wn.synset
-L = wn.lemma
-
-
-class WordnNetDemo(unittest.TestCase):
-    def test_retrieve_synset(self):
-        move_synset = S('go.v.21')
-        self.assertEqual(move_synset.name(), "move.v.15")
-        self.assertEqual(move_synset.lemma_names(), ['move', 'go'])
-        self.assertEqual(
-            move_synset.definition(), "have a turn; make one's move in a game"
-        )
-        self.assertEqual(move_synset.examples(), ['Can I go now?'])
-
-    def test_retrieve_synsets(self):
-        self.assertEqual(sorted(wn.synsets('zap', pos='n')), [S('zap.n.01')])
-        self.assertEqual(
-            sorted(wn.synsets('zap', pos='v')),
-            [S('microwave.v.01'), S('nuke.v.01'), S('zap.v.01'), S('zap.v.02')],
-        )
-
-    def test_hyperhyponyms(self):
-        # Not every synset as hypernyms()
-        self.assertEqual(S('travel.v.01').hypernyms(), [])
-        self.assertEqual(S('travel.v.02').hypernyms(), [S('travel.v.03')])
-        self.assertEqual(S('travel.v.03').hypernyms(), [])
-
-        # Test hyper-/hyponyms.
-        self.assertEqual(S('breakfast.n.1').hypernyms(), [S('meal.n.01')])
-        first_five_meal_hypo = [
-            S('banquet.n.02'),
-            S('bite.n.04'),
-            S('breakfast.n.01'),
-            S('brunch.n.01'),
-            S('buffet.n.02'),
-        ]
-        self.assertEqual(sorted(S('meal.n.1').hyponyms()[:5]), first_five_meal_hypo)
-        self.assertEqual(S('Austen.n.1').instance_hypernyms(), [S('writer.n.01')])
-        first_five_composer_hypo = [
-            S('ambrose.n.01'),
-            S('bach.n.01'),
-            S('barber.n.01'),
-            S('bartok.n.01'),
-            S('beethoven.n.01'),
-        ]
-        self.assertEqual(
-            S('composer.n.1').instance_hyponyms()[:5], first_five_composer_hypo
-        )
-
-        # Test root hyper-/hyponyms
-        self.assertEqual(S('person.n.01').root_hypernyms(), [S('entity.n.01')])
-        self.assertEqual(S('sail.v.01').root_hypernyms(), [S('travel.v.01')])
-        self.assertEqual(
-            S('fall.v.12').root_hypernyms(), [S('act.v.01'), S('fall.v.17')]
-        )
-
-    def test_derivationally_related_forms(self):
-        # Test `derivationally_related_forms()`
-        self.assertEqual(
-            L('zap.v.03.nuke').derivationally_related_forms(),
-            [L('atomic_warhead.n.01.nuke')],
-        )
-        self.assertEqual(
-            L('zap.v.03.atomize').derivationally_related_forms(),
-            [L('atomization.n.02.atomization')],
-        )
-        self.assertEqual(
-            L('zap.v.03.atomise').derivationally_related_forms(),
-            [L('atomization.n.02.atomisation')],
-        )
-        self.assertEqual(L('zap.v.03.zap').derivationally_related_forms(), [])
-
-    def test_meronyms_holonyms(self):
-        # Test meronyms, holonyms.
-        self.assertEqual(
-            S('dog.n.01').member_holonyms(), [S('canis.n.01'), S('pack.n.06')]
-        )
-        self.assertEqual(S('dog.n.01').part_meronyms(), [S('flag.n.07')])
-
-        self.assertEqual(S('faculty.n.2').member_meronyms(), [S('professor.n.01')])
-        self.assertEqual(S('copilot.n.1').member_holonyms(), [S('crew.n.01')])
-
-        self.assertEqual(
-            S('table.n.2').part_meronyms(),
-            [S('leg.n.03'), S('tabletop.n.01'), S('tableware.n.01')],
-        )
-        self.assertEqual(S('course.n.7').part_holonyms(), [S('meal.n.01')])
-
-        self.assertEqual(
-            S('water.n.1').substance_meronyms(), [S('hydrogen.n.01'), S('oxygen.n.01')]
-        )
-        self.assertEqual(
-            S('gin.n.1').substance_holonyms(),
-            [
-                S('gin_and_it.n.01'),
-                S('gin_and_tonic.n.01'),
-                S('martini.n.01'),
-                S('pink_lady.n.01'),
-            ],
-        )
-
-    def test_antonyms(self):
-        # Test antonyms.
-        self.assertEqual(
-            L('leader.n.1.leader').antonyms(), [L('follower.n.01.follower')]
-        )
-        self.assertEqual(
-            L('increase.v.1.increase').antonyms(), [L('decrease.v.01.decrease')]
-        )
-
-    def test_misc_relations(self):
-        # Test misc relations.
-        self.assertEqual(S('snore.v.1').entailments(), [S('sleep.v.01')])
-        self.assertEqual(
-            S('heavy.a.1').similar_tos(),
-            [
-                S('dense.s.03'),
-                S('doughy.s.01'),
-                S('heavier-than-air.s.01'),
-                S('hefty.s.02'),
-                S('massive.s.04'),
-                S('non-buoyant.s.01'),
-                S('ponderous.s.02'),
-            ],
-        )
-        self.assertEqual(S('light.a.1').attributes(), [S('weight.n.01')])
-        self.assertEqual(S('heavy.a.1').attributes(), [S('weight.n.01')])
-
-        # Test pertainyms.
-        self.assertEqual(
-            L('English.a.1.English').pertainyms(), [L('england.n.01.England')]
-        )
-
-    def test_lch(self):
-        # Test LCH.
-        self.assertEqual(
-            S('person.n.01').lowest_common_hypernyms(S('dog.n.01')),
-            [S('organism.n.01')],
-        )
-        self.assertEqual(
-            S('woman.n.01').lowest_common_hypernyms(S('girlfriend.n.02')),
-            [S('woman.n.01')],
-        )
-
-    def test_domains(self):
-        # Test domains.
-        self.assertEqual(S('code.n.03').topic_domains(), [S('computer_science.n.01')])
-        self.assertEqual(S('pukka.a.01').region_domains(), [S('india.n.01')])
-        self.assertEqual(S('freaky.a.01').usage_domains(), [S('slang.n.02')])
-
-    def test_in_topic_domains(self):
-        # Test in domains.
-        self.assertEqual(
-            S('computer_science.n.01').in_topic_domains()[0], S('access.n.05')
-        )
-        self.assertEqual(S('germany.n.01').in_region_domains()[23], S('trillion.n.02'))
-        self.assertEqual(S('slang.n.02').in_usage_domains()[1], S('airhead.n.01'))
-
-    def test_wordnet_similarities(self):
-        # Path based similarities.
-        self.assertAlmostEqual(S('cat.n.01').path_similarity(S('cat.n.01')), 1.0)
-        self.assertAlmostEqual(S('dog.n.01').path_similarity(S('cat.n.01')), 0.2)
-        self.assertAlmostEqual(
-            S('dog.n.01').lch_similarity(S('cat.n.01')), 2.028, places=3
-        )
-        self.assertAlmostEqual(
-            S('dog.n.01').wup_similarity(S('cat.n.01')), 0.8571, places=3
-        )
-        # Information Content similarities.
-        brown_ic = wnic.ic('ic-brown.dat')
-        self.assertAlmostEqual(
-            S('dog.n.01').jcn_similarity(S('cat.n.01'), brown_ic), 0.4497, places=3
-        )
-        semcor_ic = wnic.ic('ic-semcor.dat')
-        self.assertAlmostEqual(
-            S('dog.n.01').lin_similarity(S('cat.n.01'), semcor_ic), 0.8863, places=3
-        )
diff --git a/nlp_resource_data/nltk/test/unit/translate/__init__.py b/nlp_resource_data/nltk/test/unit/translate/__init__.py

deleted file mode 100644 (file)

index e69de29..0000000
diff --git a/nlp_resource_data/nltk/test/unit/translate/__pycache__/__init__.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/translate/__pycache__/__init__.cpython-37.pyc

deleted file mode 100644 (file)

index 48b64c7..0000000

Binary files a/nlp_resource_data/nltk/test/unit/translate/__pycache__/__init__.cpython-37.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_bleu.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_bleu.cpython-37.pyc

deleted file mode 100644 (file)

index 2dce884..0000000

Binary files a/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_bleu.cpython-37.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_gdfa.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_gdfa.cpython-37.pyc

deleted file mode 100644 (file)

index c973f8a..0000000

Binary files a/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_gdfa.cpython-37.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_ibm1.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_ibm1.cpython-37.pyc

deleted file mode 100644 (file)

index dc66b4d..0000000

Binary files a/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_ibm1.cpython-37.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_ibm2.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_ibm2.cpython-37.pyc

deleted file mode 100644 (file)

index 02014af..0000000

Binary files a/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_ibm2.cpython-37.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_ibm3.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_ibm3.cpython-37.pyc

deleted file mode 100644 (file)

index 316900a..0000000

Binary files a/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_ibm3.cpython-37.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_ibm4.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_ibm4.cpython-37.pyc

deleted file mode 100644 (file)

index 4288e58..0000000

Binary files a/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_ibm4.cpython-37.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_ibm5.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_ibm5.cpython-37.pyc

deleted file mode 100644 (file)

index ed639a8..0000000

Binary files a/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_ibm5.cpython-37.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_ibm_model.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_ibm_model.cpython-37.pyc

deleted file mode 100644 (file)

index fba69a8..0000000

Binary files a/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_ibm_model.cpython-37.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_nist.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_nist.cpython-37.pyc

deleted file mode 100644 (file)

index 5c7aacc..0000000

Binary files a/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_nist.cpython-37.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_stack_decoder.cpython-37.pyc b/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_stack_decoder.cpython-37.pyc

deleted file mode 100644 (file)

index 10c1bcc..0000000

Binary files a/nlp_resource_data/nltk/test/unit/translate/__pycache__/test_stack_decoder.cpython-37.pyc and /dev/null differ
diff --git a/nlp_resource_data/nltk/test/unit/translate/test_bleu.py b/nlp_resource_data/nltk/test/unit/translate/test_bleu.py

deleted file mode 100644 (file)

index a97d4de..0000000
--- a/nlp_resource_data/nltk/test/unit/translate/test_bleu.py
+++ /dev/null
@@ -1,271 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Tests for BLEU translation evaluation metric
-"""
-
-import functools
-import io
-import unittest
-
-from nltk.data import find
-from nltk.translate.bleu_score import (
-    modified_precision,
-    brevity_penalty,
-    closest_ref_length,
-)
-from nltk.translate.bleu_score import sentence_bleu, corpus_bleu, SmoothingFunction
-
-
-class TestBLEU(unittest.TestCase):
-    def test_modified_precision(self):
-        """
-        Examples from the original BLEU paper
-        http://www.aclweb.org/anthology/P02-1040.pdf
-        """
-        # Example 1: the "the*" example.
-        # Reference sentences.
-        ref1 = 'the cat is on the mat'.split()
-        ref2 = 'there is a cat on the mat'.split()
-        # Hypothesis sentence(s).
-        hyp1 = 'the the the the the the the'.split()
-
-        references = [ref1, ref2]
-
-        # Testing modified unigram precision.
-        hyp1_unigram_precision = float(modified_precision(references, hyp1, n=1))
-        assert round(hyp1_unigram_precision, 4) == 0.2857
-        # With assertAlmostEqual at 4 place precision.
-        self.assertAlmostEqual(hyp1_unigram_precision, 0.28571428, places=4)
-
-        # Testing modified bigram precision.
-        assert float(modified_precision(references, hyp1, n=2)) == 0.0
-
-        # Example 2: the "of the" example.
-        # Reference sentences
-        ref1 = str(
-            'It is a guide to action that ensures that the military '
-            'will forever heed Party commands'
-        ).split()
-        ref2 = str(
-            'It is the guiding principle which guarantees the military '
-            'forces always being under the command of the Party'
-        ).split()
-        ref3 = str(
-            'It is the practical guide for the army always to heed '
-            'the directions of the party'
-        ).split()
-        # Hypothesis sentence(s).
-        hyp1 = 'of the'.split()
-
-        references = [ref1, ref2, ref3]
-        # Testing modified unigram precision.
-        assert float(modified_precision(references, hyp1, n=1)) == 1.0
-
-        # Testing modified bigram precision.
-        assert float(modified_precision(references, hyp1, n=2)) == 1.0
-
-        # Example 3: Proper MT outputs.
-        hyp1 = str(
-            'It is a guide to action which ensures that the military '
-            'always obeys the commands of the party'
-        ).split()
-        hyp2 = str(
-            'It is to insure the troops forever hearing the activity '
-            'guidebook that party direct'
-        ).split()
-
-        references = [ref1, ref2, ref3]
-
-        # Unigram precision.
-        hyp1_unigram_precision = float(modified_precision(references, hyp1, n=1))
-        hyp2_unigram_precision = float(modified_precision(references, hyp2, n=1))
-        # Test unigram precision with assertAlmostEqual at 4 place precision.
-        self.assertAlmostEqual(hyp1_unigram_precision, 0.94444444, places=4)
-        self.assertAlmostEqual(hyp2_unigram_precision, 0.57142857, places=4)
-        # Test unigram precision with rounding.
-        assert round(hyp1_unigram_precision, 4) == 0.9444
-        assert round(hyp2_unigram_precision, 4) == 0.5714
-
-        # Bigram precision
-        hyp1_bigram_precision = float(modified_precision(references, hyp1, n=2))
-        hyp2_bigram_precision = float(modified_precision(references, hyp2, n=2))
-        # Test bigram precision with assertAlmostEqual at 4 place precision.
-        self.assertAlmostEqual(hyp1_bigram_precision, 0.58823529, places=4)
-        self.assertAlmostEqual(hyp2_bigram_precision, 0.07692307, places=4)
-        # Test bigram precision with rounding.
-        assert round(hyp1_bigram_precision, 4) == 0.5882
-        assert round(hyp2_bigram_precision, 4) == 0.0769
-
-    def test_brevity_penalty(self):
-        # Test case from brevity_penalty_closest function in mteval-v13a.pl.
-        # Same test cases as in the doctest in nltk.translate.bleu_score.py
-        references = [['a'] * 11, ['a'] * 8]
-        hypothesis = ['a'] * 7
-        hyp_len = len(hypothesis)
-        closest_ref_len = closest_ref_length(references, hyp_len)
-        self.assertAlmostEqual(
-            brevity_penalty(closest_ref_len, hyp_len), 0.8669, places=4
-        )
-
-        references = [['a'] * 11, ['a'] * 8, ['a'] * 6, ['a'] * 7]
-        hypothesis = ['a'] * 7
-        hyp_len = len(hypothesis)
-        closest_ref_len = closest_ref_length(references, hyp_len)
-        assert brevity_penalty(closest_ref_len, hyp_len) == 1.0
-
-    def test_zero_matches(self):
-        # Test case where there's 0 matches
-        references = ['The candidate has no alignment to any of the references'.split()]
-        hypothesis = 'John loves Mary'.split()
-
-        # Test BLEU to nth order of n-grams, where n is len(hypothesis).
-        for n in range(1, len(hypothesis)):
-            weights = [1.0 / n] * n  # Uniform weights.
-            assert sentence_bleu(references, hypothesis, weights) == 0
-
-    def test_full_matches(self):
-        # Test case where there's 100% matches
-        references = ['John loves Mary'.split()]
-        hypothesis = 'John loves Mary'.split()
-
-        # Test BLEU to nth order of n-grams, where n is len(hypothesis).
-        for n in range(1, len(hypothesis)):
-            weights = [1.0 / n] * n  # Uniform weights.
-            assert sentence_bleu(references, hypothesis, weights) == 1.0
-
-    def test_partial_matches_hypothesis_longer_than_reference(self):
-        references = ['John loves Mary'.split()]
-        hypothesis = 'John loves Mary who loves Mike'.split()
-        # Since no 4-grams matches were found the result should be zero
-        # exp(w_1 * 1 * w_2 * 1 * w_3 * 1 * w_4 * -inf) = 0
-        self.assertAlmostEqual(sentence_bleu(references, hypothesis), 0.0, places=4)
-        # Checks that the warning has been raised because len(reference) < 4.
-        try:
-            self.assertWarns(UserWarning, sentence_bleu, references, hypothesis)
-        except AttributeError:
-            pass  # unittest.TestCase.assertWarns is only supported in Python >= 3.2.
-
-
-# @unittest.skip("Skipping fringe cases for BLEU.")
-class TestBLEUFringeCases(unittest.TestCase):
-    def test_case_where_n_is_bigger_than_hypothesis_length(self):
-        # Test BLEU to nth order of n-grams, where n > len(hypothesis).
-        references = ['John loves Mary ?'.split()]
-        hypothesis = 'John loves Mary'.split()
-        n = len(hypothesis) + 1  #
-        weights = [1.0 / n] * n  # Uniform weights.
-        # Since no n-grams matches were found the result should be zero
-        # exp(w_1 * 1 * w_2 * 1 * w_3 * 1 * w_4 * -inf) = 0
-        self.assertAlmostEqual(
-            sentence_bleu(references, hypothesis, weights), 0.0, places=4
-        )
-        # Checks that the warning has been raised because len(hypothesis) < 4.
-        try:
-            self.assertWarns(UserWarning, sentence_bleu, references, hypothesis)
-        except AttributeError:
-            pass  # unittest.TestCase.assertWarns is only supported in Python >= 3.2.
-
-        # Test case where n > len(hypothesis) but so is n > len(reference), and
-        # it's a special case where reference == hypothesis.
-        references = ['John loves Mary'.split()]
-        hypothesis = 'John loves Mary'.split()
-        # Since no 4-grams matches were found the result should be zero
-        # exp(w_1 * 1 * w_2 * 1 * w_3 * 1 * w_4 * -inf) = 0
-        self.assertAlmostEqual(
-            sentence_bleu(references, hypothesis, weights), 0.0, places=4
-        )
-
-    def test_empty_hypothesis(self):
-        # Test case where there's hypothesis is empty.
-        references = ['The candidate has no alignment to any of the references'.split()]
-        hypothesis = []
-        assert sentence_bleu(references, hypothesis) == 0
-
-    def test_empty_references(self):
-        # Test case where there's reference is empty.
-        references = [[]]
-        hypothesis = 'John loves Mary'.split()
-        assert sentence_bleu(references, hypothesis) == 0
-
-    def test_empty_references_and_hypothesis(self):
-        # Test case where both references and hypothesis is empty.
-        references = [[]]
-        hypothesis = []
-        assert sentence_bleu(references, hypothesis) == 0
-
-    def test_reference_or_hypothesis_shorter_than_fourgrams(self):
-        # Tese case where the length of reference or hypothesis
-        # is shorter than 4.
-        references = ['let it go'.split()]
-        hypothesis = 'let go it'.split()
-        # Checks that the value the hypothesis and reference returns is 0.0
-        # exp(w_1 * 1 * w_2 * 1 * w_3 * 1 * w_4 * -inf) = 0
-        self.assertAlmostEqual(sentence_bleu(references, hypothesis), 0.0, places=4)
-        # Checks that the warning has been raised.
-        try:
-            self.assertWarns(UserWarning, sentence_bleu, references, hypothesis)
-        except AttributeError:
-            pass  # unittest.TestCase.assertWarns is only supported in Python >= 3.2.
-
-
-class TestBLEUvsMteval13a(unittest.TestCase):
-    def test_corpus_bleu(self):
-        ref_file = find('models/wmt15_eval/ref.ru')
-        hyp_file = find('models/wmt15_eval/google.ru')
-        mteval_output_file = find('models/wmt15_eval/mteval-13a.output')
-
-        # Reads the BLEU scores from the `mteval-13a.output` file.
-        # The order of the list corresponds to the order of the ngrams.
-        with open(mteval_output_file, 'r') as mteval_fin:
-            # The numbers are located in the last 2nd line of the file.
-            # The first and 2nd item in the list are the score and system names.
-            mteval_bleu_scores = map(float, mteval_fin.readlines()[-2].split()[1:-1])
-
-        with io.open(ref_file, 'r', encoding='utf8') as ref_fin:
-            with io.open(hyp_file, 'r', encoding='utf8') as hyp_fin:
-                # Whitespace tokenize the file.
-                # Note: split() automatically strip().
-                hypothesis = list(map(lambda x: x.split(), hyp_fin))
-                # Note that the corpus_bleu input is list of list of references.
-                references = list(map(lambda x: [x.split()], ref_fin))
-                # Without smoothing.
-                for i, mteval_bleu in zip(range(1, 10), mteval_bleu_scores):
-                    nltk_bleu = corpus_bleu(
-                        references, hypothesis, weights=(1.0 / i,) * i
-                    )
-                    # Check that the BLEU scores difference is less than 0.005 .
-                    # Note: This is an approximate comparison; as much as
-                    #       +/- 0.01 BLEU might be "statistically significant",
-                    #       the actual translation quality might not be.
-                    assert abs(mteval_bleu - nltk_bleu) < 0.005
-
-                # With the same smoothing method used in mteval-v13a.pl
-                chencherry = SmoothingFunction()
-                for i, mteval_bleu in zip(range(1, 10), mteval_bleu_scores):
-                    nltk_bleu = corpus_bleu(
-                        references,
-                        hypothesis,
-                        weights=(1.0 / i,) * i,
-                        smoothing_function=chencherry.method3,
-                    )
-                    assert abs(mteval_bleu - nltk_bleu) < 0.005
-
-
-class TestBLEUWithBadSentence(unittest.TestCase):
-    def test_corpus_bleu_with_bad_sentence(self):
-        hyp = "Teo S yb , oe uNb , R , T t , , t Tue Ar saln S , , 5istsi l , 5oe R ulO sae oR R"
-        ref = str(
-            "Their tasks include changing a pump on the faulty stokehold ."
-            "Likewise , two species that are very similar in morphology "
-            "were distinguished using genetics ."
-        )
-        references = [[ref.split()]]
-        hypotheses = [hyp.split()]
-        try:  # Check that the warning is raised since no. of 2-grams < 0.
-            with self.assertWarns(UserWarning):
-                # Verify that the BLEU output is undesired since no. of 2-grams < 0.
-                self.assertAlmostEqual(
-                    corpus_bleu(references, hypotheses), 0.0, places=4
-                )
-        except AttributeError:  # unittest.TestCase.assertWarns is only supported in Python >= 3.2.
-            self.assertAlmostEqual(corpus_bleu(references, hypotheses), 0.0, places=4)
diff --git a/nlp_resource_data/nltk/test/unit/translate/test_gdfa.py b/nlp_resource_data/nltk/test/unit/translate/test_gdfa.py

deleted file mode 100644 (file)

index 58db482..0000000
--- a/nlp_resource_data/nltk/test/unit/translate/test_gdfa.py
+++ /dev/null
@@ -1,157 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Tests GDFA alignments
-"""
-
-import functools
-import io
-import unittest
-
-from nltk.translate.gdfa import grow_diag_final_and
-
-
-class TestGDFA(unittest.TestCase):
-    def test_from_eflomal_outputs(self):
-        """
-        Testing GDFA with first 10 eflomal outputs from issue #1829
-        https://github.com/nltk/nltk/issues/1829
-        """
-        # Input.
-        forwards = [
-            '0-0 1-2',
-            '0-0 1-1',
-            '0-0 2-1 3-2 4-3 5-4 6-5 7-6 8-7 7-8 9-9 10-10 9-11 11-12 12-13 13-14',
-            '0-0 1-1 1-2 2-3 3-4 4-5 4-6 5-7 6-8 8-9 9-10',
-            '0-0 14-1 15-2 16-3 20-5 21-6 22-7 5-8 6-9 7-10 8-11 9-12 10-13 11-14 12-15 13-16 14-17 17-18 18-19 19-20 20-21 23-22 24-23 25-24 26-25 27-27 28-28 29-29 30-30 31-31',
-            '0-0 1-1 0-2 2-3',
-            '0-0 2-2 4-4',
-            '0-0 1-1 2-3 3-4 5-5 7-6 8-7 9-8 10-9 11-10 12-11 13-12 14-13 15-14 16-16 17-17 18-18 19-19 20-20',
-            '3-0 4-1 6-2 5-3 6-4 7-5 8-6 9-7 10-8 11-9 16-10 9-12 10-13 12-14',
-            '1-0',
-        ]
-        backwards = [
-            '0-0 1-2',
-            '0-0 1-1',
-            '0-0 2-1 3-2 4-3 5-4 6-5 7-6 8-7 9-8 10-10 11-12 12-11 13-13',
-            '0-0 1-2 2-3 3-4 4-6 6-8 7-5 8-7 9-8',
-            '0-0 1-8 2-9 3-10 4-11 5-12 6-11 8-13 9-14 10-15 11-16 12-17 13-18 14-19 15-20 16-21 17-22 18-23 19-24 20-29 21-30 22-31 23-2 24-3 25-4 26-5 27-5 28-6 29-7 30-28 31-31',
-            '0-0 1-1 2-3',
-            '0-0 1-1 2-3 4-4',
-            '0-0 1-1 2-3 3-4 5-5 7-6 8-7 9-8 10-9 11-10 12-11 13-12 14-13 15-14 16-16 17-17 18-18 19-19 20-16 21-18',
-            '0-0 1-1 3-2 4-1 5-3 6-4 7-5 8-6 9-7 10-8 11-9 12-8 13-9 14-8 15-9 16-10',
-            '1-0',
-        ]
-        source_lens = [2, 3, 3, 15, 11, 33, 4, 6, 23, 18]
-        target_lens = [2, 4, 3, 16, 12, 33, 5, 6, 22, 16]
-        # Expected Output.
-        expected = [
-            [(0, 0), (1, 2)],
-            [(0, 0), (1, 1)],
-            [
-                (0, 0),
-                (2, 1),
-                (3, 2),
-                (4, 3),
-                (5, 4),
-                (6, 5),
-                (7, 6),
-                (8, 7),
-                (10, 10),
-                (11, 12),
-            ],
-            [
-                (0, 0),
-                (1, 1),
-                (1, 2),
-                (2, 3),
-                (3, 4),
-                (4, 5),
-                (4, 6),
-                (5, 7),
-                (6, 8),
-                (7, 5),
-                (8, 7),
-                (8, 9),
-                (9, 8),
-                (9, 10),
-            ],
-            [
-                (0, 0),
-                (1, 8),
-                (2, 9),
-                (3, 10),
-                (4, 11),
-                (5, 8),
-                (6, 9),
-                (6, 11),
-                (7, 10),
-                (8, 11),
-                (31, 31),
-            ],
-            [(0, 0), (0, 2), (1, 1), (2, 3)],
-            [(0, 0), (1, 1), (2, 2), (2, 3), (4, 4)],
-            [
-                (0, 0),
-                (1, 1),
-                (2, 3),
-                (3, 4),
-                (5, 5),
-                (7, 6),
-                (8, 7),
-                (9, 8),
-                (10, 9),
-                (11, 10),
-                (12, 11),
-                (13, 12),
-                (14, 13),
-                (15, 14),
-                (16, 16),
-                (17, 17),
-                (18, 18),
-                (19, 19),
-            ],
-            [
-                (0, 0),
-                (1, 1),
-                (3, 0),
-                (3, 2),
-                (4, 1),
-                (5, 3),
-                (6, 2),
-                (6, 4),
-                (7, 5),
-                (8, 6),
-                (9, 7),
-                (9, 12),
-                (10, 8),
-                (10, 13),
-                (11, 9),
-                (12, 8),
-                (12, 14),
-                (13, 9),
-                (14, 8),
-                (15, 9),
-                (16, 10),
-            ],
-            [(1, 0)],
-            [
-                (0, 0),
-                (1, 1),
-                (3, 2),
-                (4, 3),
-                (5, 4),
-                (6, 5),
-                (7, 6),
-                (9, 10),
-                (10, 12),
-                (11, 13),
-                (12, 14),
-                (13, 15),
-            ],
-        ]
-
-        # Iterate through all 10 examples and check for expected outputs.
-        for fw, bw, src_len, trg_len, expect in zip(
-            forwards, backwards, source_lens, target_lens, expected
-        ):
-            self.assertListEqual(expect, grow_diag_final_and(src_len, trg_len, fw, bw))
diff --git a/nlp_resource_data/nltk/test/unit/translate/test_ibm1.py b/nlp_resource_data/nltk/test/unit/translate/test_ibm1.py

deleted file mode 100644 (file)

index ae8c941..0000000
--- a/nlp_resource_data/nltk/test/unit/translate/test_ibm1.py
+++ /dev/null
@@ -1,76 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Tests for IBM Model 1 training methods
-"""
-
-import unittest
-
-from collections import defaultdict
-from nltk.translate import AlignedSent
-from nltk.translate import IBMModel
-from nltk.translate import IBMModel1
-from nltk.translate.ibm_model import AlignmentInfo
-
-
-class TestIBMModel1(unittest.TestCase):
-    def test_set_uniform_translation_probabilities(self):
-        # arrange
-        corpus = [
-            AlignedSent(['ham', 'eggs'], ['schinken', 'schinken', 'eier']),
-            AlignedSent(['spam', 'spam', 'spam', 'spam'], ['spam', 'spam']),
-        ]
-        model1 = IBMModel1(corpus, 0)
-
-        # act
-        model1.set_uniform_probabilities(corpus)
-
-        # assert
-        # expected_prob = 1.0 / (target vocab size + 1)
-        self.assertEqual(model1.translation_table['ham']['eier'], 1.0 / 3)
-        self.assertEqual(model1.translation_table['eggs'][None], 1.0 / 3)
-
-    def test_set_uniform_translation_probabilities_of_non_domain_values(self):
-        # arrange
-        corpus = [
-            AlignedSent(['ham', 'eggs'], ['schinken', 'schinken', 'eier']),
-            AlignedSent(['spam', 'spam', 'spam', 'spam'], ['spam', 'spam']),
-        ]
-        model1 = IBMModel1(corpus, 0)
-
-        # act
-        model1.set_uniform_probabilities(corpus)
-
-        # assert
-        # examine target words that are not in the training data domain
-        self.assertEqual(model1.translation_table['parrot']['eier'], IBMModel.MIN_PROB)
-
-    def test_prob_t_a_given_s(self):
-        # arrange
-        src_sentence = ["ich", 'esse', 'ja', 'gern', 'räucherschinken']
-        trg_sentence = ['i', 'love', 'to', 'eat', 'smoked', 'ham']
-        corpus = [AlignedSent(trg_sentence, src_sentence)]
-        alignment_info = AlignmentInfo(
-            (0, 1, 4, 0, 2, 5, 5),
-            [None] + src_sentence,
-            ['UNUSED'] + trg_sentence,
-            None,
-        )
-
-        translation_table = defaultdict(lambda: defaultdict(float))
-        translation_table['i']['ich'] = 0.98
-        translation_table['love']['gern'] = 0.98
-        translation_table['to'][None] = 0.98
-        translation_table['eat']['esse'] = 0.98
-        translation_table['smoked']['räucherschinken'] = 0.98
-        translation_table['ham']['räucherschinken'] = 0.98
-
-        model1 = IBMModel1(corpus, 0)
-        model1.translation_table = translation_table
-
-        # act
-        probability = model1.prob_t_a_given_s(alignment_info)
-
-        # assert
-        lexical_translation = 0.98 * 0.98 * 0.98 * 0.98 * 0.98 * 0.98
-        expected_probability = lexical_translation
-        self.assertEqual(round(probability, 4), round(expected_probability, 4))
diff --git a/nlp_resource_data/nltk/test/unit/translate/test_ibm2.py b/nlp_resource_data/nltk/test/unit/translate/test_ibm2.py

deleted file mode 100644 (file)

index 1d0579b..0000000
--- a/nlp_resource_data/nltk/test/unit/translate/test_ibm2.py
+++ /dev/null
@@ -1,89 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Tests for IBM Model 2 training methods
-"""
-
-import unittest
-
-from collections import defaultdict
-from nltk.translate import AlignedSent
-from nltk.translate import IBMModel
-from nltk.translate import IBMModel2
-from nltk.translate.ibm_model import AlignmentInfo
-
-
-class TestIBMModel2(unittest.TestCase):
-    def test_set_uniform_alignment_probabilities(self):
-        # arrange
-        corpus = [
-            AlignedSent(['ham', 'eggs'], ['schinken', 'schinken', 'eier']),
-            AlignedSent(['spam', 'spam', 'spam', 'spam'], ['spam', 'spam']),
-        ]
-        model2 = IBMModel2(corpus, 0)
-
-        # act
-        model2.set_uniform_probabilities(corpus)
-
-        # assert
-        # expected_prob = 1.0 / (length of source sentence + 1)
-        self.assertEqual(model2.alignment_table[0][1][3][2], 1.0 / 4)
-        self.assertEqual(model2.alignment_table[2][4][2][4], 1.0 / 3)
-
-    def test_set_uniform_alignment_probabilities_of_non_domain_values(self):
-        # arrange
-        corpus = [
-            AlignedSent(['ham', 'eggs'], ['schinken', 'schinken', 'eier']),
-            AlignedSent(['spam', 'spam', 'spam', 'spam'], ['spam', 'spam']),
-        ]
-        model2 = IBMModel2(corpus, 0)
-
-        # act
-        model2.set_uniform_probabilities(corpus)
-
-        # assert
-        # examine i and j values that are not in the training data domain
-        self.assertEqual(model2.alignment_table[99][1][3][2], IBMModel.MIN_PROB)
-        self.assertEqual(model2.alignment_table[2][99][2][4], IBMModel.MIN_PROB)
-
-    def test_prob_t_a_given_s(self):
-        # arrange
-        src_sentence = ["ich", 'esse', 'ja', 'gern', 'räucherschinken']
-        trg_sentence = ['i', 'love', 'to', 'eat', 'smoked', 'ham']
-        corpus = [AlignedSent(trg_sentence, src_sentence)]
-        alignment_info = AlignmentInfo(
-            (0, 1, 4, 0, 2, 5, 5),
-            [None] + src_sentence,
-            ['UNUSED'] + trg_sentence,
-            None,
-        )
-
-        translation_table = defaultdict(lambda: defaultdict(float))
-        translation_table['i']['ich'] = 0.98
-        translation_table['love']['gern'] = 0.98
-        translation_table['to'][None] = 0.98
-        translation_table['eat']['esse'] = 0.98
-        translation_table['smoked']['räucherschinken'] = 0.98
-        translation_table['ham']['räucherschinken'] = 0.98
-
-        alignment_table = defaultdict(
-            lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(float)))
-        )
-        alignment_table[0][3][5][6] = 0.97  # None -> to
-        alignment_table[1][1][5][6] = 0.97  # ich -> i
-        alignment_table[2][4][5][6] = 0.97  # esse -> eat
-        alignment_table[4][2][5][6] = 0.97  # gern -> love
-        alignment_table[5][5][5][6] = 0.96  # räucherschinken -> smoked
-        alignment_table[5][6][5][6] = 0.96  # räucherschinken -> ham
-
-        model2 = IBMModel2(corpus, 0)
-        model2.translation_table = translation_table
-        model2.alignment_table = alignment_table
-
-        # act
-        probability = model2.prob_t_a_given_s(alignment_info)
-
-        # assert
-        lexical_translation = 0.98 * 0.98 * 0.98 * 0.98 * 0.98 * 0.98
-        alignment = 0.97 * 0.97 * 0.97 * 0.97 * 0.96 * 0.96
-        expected_probability = lexical_translation * alignment
-        self.assertEqual(round(probability, 4), round(expected_probability, 4))
diff --git a/nlp_resource_data/nltk/test/unit/translate/test_ibm3.py b/nlp_resource_data/nltk/test/unit/translate/test_ibm3.py

deleted file mode 100644 (file)

index 7c42404..0000000
--- a/nlp_resource_data/nltk/test/unit/translate/test_ibm3.py
+++ /dev/null
@@ -1,108 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Tests for IBM Model 3 training methods
-"""
-
-import unittest
-
-from collections import defaultdict
-from nltk.translate import AlignedSent
-from nltk.translate import IBMModel
-from nltk.translate import IBMModel3
-from nltk.translate.ibm_model import AlignmentInfo
-
-
-class TestIBMModel3(unittest.TestCase):
-    def test_set_uniform_distortion_probabilities(self):
-        # arrange
-        corpus = [
-            AlignedSent(['ham', 'eggs'], ['schinken', 'schinken', 'eier']),
-            AlignedSent(['spam', 'spam', 'spam', 'spam'], ['spam', 'spam']),
-        ]
-        model3 = IBMModel3(corpus, 0)
-
-        # act
-        model3.set_uniform_probabilities(corpus)
-
-        # assert
-        # expected_prob = 1.0 / length of target sentence
-        self.assertEqual(model3.distortion_table[1][0][3][2], 1.0 / 2)
-        self.assertEqual(model3.distortion_table[4][2][2][4], 1.0 / 4)
-
-    def test_set_uniform_distortion_probabilities_of_non_domain_values(self):
-        # arrange
-        corpus = [
-            AlignedSent(['ham', 'eggs'], ['schinken', 'schinken', 'eier']),
-            AlignedSent(['spam', 'spam', 'spam', 'spam'], ['spam', 'spam']),
-        ]
-        model3 = IBMModel3(corpus, 0)
-
-        # act
-        model3.set_uniform_probabilities(corpus)
-
-        # assert
-        # examine i and j values that are not in the training data domain
-        self.assertEqual(model3.distortion_table[0][0][3][2], IBMModel.MIN_PROB)
-        self.assertEqual(model3.distortion_table[9][2][2][4], IBMModel.MIN_PROB)
-        self.assertEqual(model3.distortion_table[2][9][2][4], IBMModel.MIN_PROB)
-
-    def test_prob_t_a_given_s(self):
-        # arrange
-        src_sentence = ["ich", 'esse', 'ja', 'gern', 'räucherschinken']
-        trg_sentence = ['i', 'love', 'to', 'eat', 'smoked', 'ham']
-        corpus = [AlignedSent(trg_sentence, src_sentence)]
-        alignment_info = AlignmentInfo(
-            (0, 1, 4, 0, 2, 5, 5),
-            [None] + src_sentence,
-            ['UNUSED'] + trg_sentence,
-            [[3], [1], [4], [], [2], [5, 6]],
-        )
-
-        distortion_table = defaultdict(
-            lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(float)))
-        )
-        distortion_table[1][1][5][6] = 0.97  # i -> ich
-        distortion_table[2][4][5][6] = 0.97  # love -> gern
-        distortion_table[3][0][5][6] = 0.97  # to -> NULL
-        distortion_table[4][2][5][6] = 0.97  # eat -> esse
-        distortion_table[5][5][5][6] = 0.97  # smoked -> räucherschinken
-        distortion_table[6][5][5][6] = 0.97  # ham -> räucherschinken
-
-        translation_table = defaultdict(lambda: defaultdict(float))
-        translation_table['i']['ich'] = 0.98
-        translation_table['love']['gern'] = 0.98
-        translation_table['to'][None] = 0.98
-        translation_table['eat']['esse'] = 0.98
-        translation_table['smoked']['räucherschinken'] = 0.98
-        translation_table['ham']['räucherschinken'] = 0.98
-
-        fertility_table = defaultdict(lambda: defaultdict(float))
-        fertility_table[1]['ich'] = 0.99
-        fertility_table[1]['esse'] = 0.99
-        fertility_table[0]['ja'] = 0.99
-        fertility_table[1]['gern'] = 0.99
-        fertility_table[2]['räucherschinken'] = 0.999
-        fertility_table[1][None] = 0.99
-
-        probabilities = {
-            'p1': 0.167,
-            'translation_table': translation_table,
-            'distortion_table': distortion_table,
-            'fertility_table': fertility_table,
-            'alignment_table': None,
-        }
-
-        model3 = IBMModel3(corpus, 0, probabilities)
-
-        # act
-        probability = model3.prob_t_a_given_s(alignment_info)
-
-        # assert
-        null_generation = 5 * pow(0.167, 1) * pow(0.833, 4)
-        fertility = 1 * 0.99 * 1 * 0.99 * 1 * 0.99 * 1 * 0.99 * 2 * 0.999
-        lexical_translation = 0.98 * 0.98 * 0.98 * 0.98 * 0.98 * 0.98
-        distortion = 0.97 * 0.97 * 0.97 * 0.97 * 0.97 * 0.97
-        expected_probability = (
-            null_generation * fertility * lexical_translation * distortion
-        )
-        self.assertEqual(round(probability, 4), round(expected_probability, 4))
diff --git a/nlp_resource_data/nltk/test/unit/translate/test_ibm4.py b/nlp_resource_data/nltk/test/unit/translate/test_ibm4.py

deleted file mode 100644 (file)

index c6e5398..0000000
--- a/nlp_resource_data/nltk/test/unit/translate/test_ibm4.py
+++ /dev/null
@@ -1,123 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Tests for IBM Model 4 training methods
-"""
-
-import unittest
-
-from collections import defaultdict
-from nltk.translate import AlignedSent
-from nltk.translate import IBMModel
-from nltk.translate import IBMModel4
-from nltk.translate.ibm_model import AlignmentInfo
-
-
-class TestIBMModel4(unittest.TestCase):
-    def test_set_uniform_distortion_probabilities_of_max_displacements(self):
-        # arrange
-        src_classes = {'schinken': 0, 'eier': 0, 'spam': 1}
-        trg_classes = {'ham': 0, 'eggs': 1, 'spam': 2}
-        corpus = [
-            AlignedSent(['ham', 'eggs'], ['schinken', 'schinken', 'eier']),
-            AlignedSent(['spam', 'spam', 'spam', 'spam'], ['spam', 'spam']),
-        ]
-        model4 = IBMModel4(corpus, 0, src_classes, trg_classes)
-
-        # act
-        model4.set_uniform_probabilities(corpus)
-
-        # assert
-        # number of displacement values =
-        #     2 *(number of words in longest target sentence - 1)
-        expected_prob = 1.0 / (2 * (4 - 1))
-
-        # examine the boundary values for (displacement, src_class, trg_class)
-        self.assertEqual(model4.head_distortion_table[3][0][0], expected_prob)
-        self.assertEqual(model4.head_distortion_table[-3][1][2], expected_prob)
-        self.assertEqual(model4.non_head_distortion_table[3][0], expected_prob)
-        self.assertEqual(model4.non_head_distortion_table[-3][2], expected_prob)
-
-    def test_set_uniform_distortion_probabilities_of_non_domain_values(self):
-        # arrange
-        src_classes = {'schinken': 0, 'eier': 0, 'spam': 1}
-        trg_classes = {'ham': 0, 'eggs': 1, 'spam': 2}
-        corpus = [
-            AlignedSent(['ham', 'eggs'], ['schinken', 'schinken', 'eier']),
-            AlignedSent(['spam', 'spam', 'spam', 'spam'], ['spam', 'spam']),
-        ]
-        model4 = IBMModel4(corpus, 0, src_classes, trg_classes)
-
-        # act
-        model4.set_uniform_probabilities(corpus)
-
-        # assert
-        # examine displacement values that are not in the training data domain
-        self.assertEqual(model4.head_distortion_table[4][0][0], IBMModel.MIN_PROB)
-        self.assertEqual(model4.head_distortion_table[100][1][2], IBMModel.MIN_PROB)
-        self.assertEqual(model4.non_head_distortion_table[4][0], IBMModel.MIN_PROB)
-        self.assertEqual(model4.non_head_distortion_table[100][2], IBMModel.MIN_PROB)
-
-    def test_prob_t_a_given_s(self):
-        # arrange
-        src_sentence = ["ich", 'esse', 'ja', 'gern', 'räucherschinken']
-        trg_sentence = ['i', 'love', 'to', 'eat', 'smoked', 'ham']
-        src_classes = {'räucherschinken': 0, 'ja': 1, 'ich': 2, 'esse': 3, 'gern': 4}
-        trg_classes = {'ham': 0, 'smoked': 1, 'i': 3, 'love': 4, 'to': 2, 'eat': 4}
-        corpus = [AlignedSent(trg_sentence, src_sentence)]
-        alignment_info = AlignmentInfo(
-            (0, 1, 4, 0, 2, 5, 5),
-            [None] + src_sentence,
-            ['UNUSED'] + trg_sentence,
-            [[3], [1], [4], [], [2], [5, 6]],
-        )
-
-        head_distortion_table = defaultdict(
-            lambda: defaultdict(lambda: defaultdict(float))
-        )
-        head_distortion_table[1][None][3] = 0.97  # None, i
-        head_distortion_table[3][2][4] = 0.97  # ich, eat
-        head_distortion_table[-2][3][4] = 0.97  # esse, love
-        head_distortion_table[3][4][1] = 0.97  # gern, smoked
-
-        non_head_distortion_table = defaultdict(lambda: defaultdict(float))
-        non_head_distortion_table[1][0] = 0.96  # ham
-
-        translation_table = defaultdict(lambda: defaultdict(float))
-        translation_table['i']['ich'] = 0.98
-        translation_table['love']['gern'] = 0.98
-        translation_table['to'][None] = 0.98
-        translation_table['eat']['esse'] = 0.98
-        translation_table['smoked']['räucherschinken'] = 0.98
-        translation_table['ham']['räucherschinken'] = 0.98
-
-        fertility_table = defaultdict(lambda: defaultdict(float))
-        fertility_table[1]['ich'] = 0.99
-        fertility_table[1]['esse'] = 0.99
-        fertility_table[0]['ja'] = 0.99
-        fertility_table[1]['gern'] = 0.99
-        fertility_table[2]['räucherschinken'] = 0.999
-        fertility_table[1][None] = 0.99
-
-        probabilities = {
-            'p1': 0.167,
-            'translation_table': translation_table,
-            'head_distortion_table': head_distortion_table,
-            'non_head_distortion_table': non_head_distortion_table,
-            'fertility_table': fertility_table,
-            'alignment_table': None,
-        }
-
-        model4 = IBMModel4(corpus, 0, src_classes, trg_classes, probabilities)
-
-        # act
-        probability = model4.prob_t_a_given_s(alignment_info)
-
-        # assert
-        null_generation = 5 * pow(0.167, 1) * pow(0.833, 4)
-        fertility = 1 * 0.99 * 1 * 0.99 * 1 * 0.99 * 1 * 0.99 * 2 * 0.999
-        lexical_translation = 0.98 * 0.98 * 0.98 * 0.98 * 0.98 * 0.98
-        distortion = 0.97 * 0.97 * 1 * 0.97 * 0.97 * 0.96
-        expected_probability = (
-            null_generation * fertility * lexical_translation * distortion
-        )
-        self.assertEqual(round(probability, 4), round(expected_probability, 4))
diff --git a/nlp_resource_data/nltk/test/unit/translate/test_ibm5.py b/nlp_resource_data/nltk/test/unit/translate/test_ibm5.py

deleted file mode 100644 (file)

index a3eecb3..0000000
--- a/nlp_resource_data/nltk/test/unit/translate/test_ibm5.py
+++ /dev/null
@@ -1,164 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Tests for IBM Model 5 training methods
-"""
-
-import unittest
-
-from collections import defaultdict
-from nltk.translate import AlignedSent
-from nltk.translate import IBMModel
-from nltk.translate import IBMModel4
-from nltk.translate import IBMModel5
-from nltk.translate.ibm_model import AlignmentInfo
-
-
-class TestIBMModel5(unittest.TestCase):
-    def test_set_uniform_vacancy_probabilities_of_max_displacements(self):
-        # arrange
-        src_classes = {'schinken': 0, 'eier': 0, 'spam': 1}
-        trg_classes = {'ham': 0, 'eggs': 1, 'spam': 2}
-        corpus = [
-            AlignedSent(['ham', 'eggs'], ['schinken', 'schinken', 'eier']),
-            AlignedSent(['spam', 'spam', 'spam', 'spam'], ['spam', 'spam']),
-        ]
-        model5 = IBMModel5(corpus, 0, src_classes, trg_classes)
-
-        # act
-        model5.set_uniform_probabilities(corpus)
-
-        # assert
-        # number of vacancy difference values =
-        #     2 * number of words in longest target sentence
-        expected_prob = 1.0 / (2 * 4)
-
-        # examine the boundary values for (dv, max_v, trg_class)
-        self.assertEqual(model5.head_vacancy_table[4][4][0], expected_prob)
-        self.assertEqual(model5.head_vacancy_table[-3][1][2], expected_prob)
-        self.assertEqual(model5.non_head_vacancy_table[4][4][0], expected_prob)
-        self.assertEqual(model5.non_head_vacancy_table[-3][1][2], expected_prob)
-
-    def test_set_uniform_vacancy_probabilities_of_non_domain_values(self):
-        # arrange
-        src_classes = {'schinken': 0, 'eier': 0, 'spam': 1}
-        trg_classes = {'ham': 0, 'eggs': 1, 'spam': 2}
-        corpus = [
-            AlignedSent(['ham', 'eggs'], ['schinken', 'schinken', 'eier']),
-            AlignedSent(['spam', 'spam', 'spam', 'spam'], ['spam', 'spam']),
-        ]
-        model5 = IBMModel5(corpus, 0, src_classes, trg_classes)
-
-        # act
-        model5.set_uniform_probabilities(corpus)
-
-        # assert
-        # examine dv and max_v values that are not in the training data domain
-        self.assertEqual(model5.head_vacancy_table[5][4][0], IBMModel.MIN_PROB)
-        self.assertEqual(model5.head_vacancy_table[-4][1][2], IBMModel.MIN_PROB)
-        self.assertEqual(model5.head_vacancy_table[4][0][0], IBMModel.MIN_PROB)
-        self.assertEqual(model5.non_head_vacancy_table[5][4][0], IBMModel.MIN_PROB)
-        self.assertEqual(model5.non_head_vacancy_table[-4][1][2], IBMModel.MIN_PROB)
-
-    def test_prob_t_a_given_s(self):
-        # arrange
-        src_sentence = ["ich", 'esse', 'ja', 'gern', 'räucherschinken']
-        trg_sentence = ['i', 'love', 'to', 'eat', 'smoked', 'ham']
-        src_classes = {'räucherschinken': 0, 'ja': 1, 'ich': 2, 'esse': 3, 'gern': 4}
-        trg_classes = {'ham': 0, 'smoked': 1, 'i': 3, 'love': 4, 'to': 2, 'eat': 4}
-        corpus = [AlignedSent(trg_sentence, src_sentence)]
-        alignment_info = AlignmentInfo(
-            (0, 1, 4, 0, 2, 5, 5),
-            [None] + src_sentence,
-            ['UNUSED'] + trg_sentence,
-            [[3], [1], [4], [], [2], [5, 6]],
-        )
-
-        head_vacancy_table = defaultdict(
-            lambda: defaultdict(lambda: defaultdict(float))
-        )
-        head_vacancy_table[1 - 0][6][3] = 0.97  # ich -> i
-        head_vacancy_table[3 - 0][5][4] = 0.97  # esse -> eat
-        head_vacancy_table[1 - 2][4][4] = 0.97  # gern -> love
-        head_vacancy_table[2 - 0][2][1] = 0.97  # räucherschinken -> smoked
-
-        non_head_vacancy_table = defaultdict(
-            lambda: defaultdict(lambda: defaultdict(float))
-        )
-        non_head_vacancy_table[1 - 0][1][0] = 0.96  # räucherschinken -> ham
-
-        translation_table = defaultdict(lambda: defaultdict(float))
-        translation_table['i']['ich'] = 0.98
-        translation_table['love']['gern'] = 0.98
-        translation_table['to'][None] = 0.98
-        translation_table['eat']['esse'] = 0.98
-        translation_table['smoked']['räucherschinken'] = 0.98
-        translation_table['ham']['räucherschinken'] = 0.98
-
-        fertility_table = defaultdict(lambda: defaultdict(float))
-        fertility_table[1]['ich'] = 0.99
-        fertility_table[1]['esse'] = 0.99
-        fertility_table[0]['ja'] = 0.99
-        fertility_table[1]['gern'] = 0.99
-        fertility_table[2]['räucherschinken'] = 0.999
-        fertility_table[1][None] = 0.99
-
-        probabilities = {
-            'p1': 0.167,
-            'translation_table': translation_table,
-            'fertility_table': fertility_table,
-            'head_vacancy_table': head_vacancy_table,
-            'non_head_vacancy_table': non_head_vacancy_table,
-            'head_distortion_table': None,
-            'non_head_distortion_table': None,
-            'alignment_table': None,
-        }
-
-        model5 = IBMModel5(corpus, 0, src_classes, trg_classes, probabilities)
-
-        # act
-        probability = model5.prob_t_a_given_s(alignment_info)
-
-        # assert
-        null_generation = 5 * pow(0.167, 1) * pow(0.833, 4)
-        fertility = 1 * 0.99 * 1 * 0.99 * 1 * 0.99 * 1 * 0.99 * 2 * 0.999
-        lexical_translation = 0.98 * 0.98 * 0.98 * 0.98 * 0.98 * 0.98
-        vacancy = 0.97 * 0.97 * 1 * 0.97 * 0.97 * 0.96
-        expected_probability = (
-            null_generation * fertility * lexical_translation * vacancy
-        )
-        self.assertEqual(round(probability, 4), round(expected_probability, 4))
-
-    def test_prune(self):
-        # arrange
-        alignment_infos = [
-            AlignmentInfo((1, 1), None, None, None),
-            AlignmentInfo((1, 2), None, None, None),
-            AlignmentInfo((2, 1), None, None, None),
-            AlignmentInfo((2, 2), None, None, None),
-            AlignmentInfo((0, 0), None, None, None),
-        ]
-        min_factor = IBMModel5.MIN_SCORE_FACTOR
-        best_score = 0.9
-        scores = {
-            (1, 1): min(min_factor * 1.5, 1) * best_score,  # above threshold
-            (1, 2): best_score,
-            (2, 1): min_factor * best_score,  # at threshold
-            (2, 2): min_factor * best_score * 0.5,  # low score
-            (0, 0): min(min_factor * 1.1, 1) * 1.2,  # above threshold
-        }
-        corpus = [AlignedSent(['a'], ['b'])]
-        original_prob_function = IBMModel4.model4_prob_t_a_given_s
-        # mock static method
-        IBMModel4.model4_prob_t_a_given_s = staticmethod(
-            lambda a, model: scores[a.alignment]
-        )
-        model5 = IBMModel5(corpus, 0, None, None)
-
-        # act
-        pruned_alignments = model5.prune(alignment_infos)
-
-        # assert
-        self.assertEqual(len(pruned_alignments), 3)
-
-        # restore static method
-        IBMModel4.model4_prob_t_a_given_s = original_prob_function
diff --git a/nlp_resource_data/nltk/test/unit/translate/test_ibm_model.py b/nlp_resource_data/nltk/test/unit/translate/test_ibm_model.py

deleted file mode 100644 (file)

index 31383bc..0000000
--- a/nlp_resource_data/nltk/test/unit/translate/test_ibm_model.py
+++ /dev/null
@@ -1,279 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Tests for common methods of IBM translation models
-"""
-
-import unittest
-
-from collections import defaultdict
-from nltk.translate import AlignedSent
-from nltk.translate import IBMModel
-from nltk.translate.ibm_model import AlignmentInfo
-
-
-class TestIBMModel(unittest.TestCase):
-    __TEST_SRC_SENTENCE = ["j'", 'aime', 'bien', 'jambon']
-    __TEST_TRG_SENTENCE = ['i', 'love', 'ham']
-
-    def test_vocabularies_are_initialized(self):
-        parallel_corpora = [
-            AlignedSent(['one', 'two', 'three', 'four'], ['un', 'deux', 'trois']),
-            AlignedSent(['five', 'one', 'six'], ['quatre', 'cinq', 'six']),
-            AlignedSent([], ['sept']),
-        ]
-
-        ibm_model = IBMModel(parallel_corpora)
-        self.assertEqual(len(ibm_model.src_vocab), 8)
-        self.assertEqual(len(ibm_model.trg_vocab), 6)
-
-    def test_vocabularies_are_initialized_even_with_empty_corpora(self):
-        parallel_corpora = []
-
-        ibm_model = IBMModel(parallel_corpora)
-        self.assertEqual(len(ibm_model.src_vocab), 1)  # addition of NULL token
-        self.assertEqual(len(ibm_model.trg_vocab), 0)
-
-    def test_best_model2_alignment(self):
-        # arrange
-        sentence_pair = AlignedSent(
-            TestIBMModel.__TEST_TRG_SENTENCE, TestIBMModel.__TEST_SRC_SENTENCE
-        )
-        # None and 'bien' have zero fertility
-        translation_table = {
-            'i': {"j'": 0.9, 'aime': 0.05, 'bien': 0.02, 'jambon': 0.03, None: 0},
-            'love': {"j'": 0.05, 'aime': 0.9, 'bien': 0.01, 'jambon': 0.01, None: 0.03},
-            'ham': {"j'": 0, 'aime': 0.01, 'bien': 0, 'jambon': 0.99, None: 0},
-        }
-        alignment_table = defaultdict(
-            lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: 0.2)))
-        )
-
-        ibm_model = IBMModel([])
-        ibm_model.translation_table = translation_table
-        ibm_model.alignment_table = alignment_table
-
-        # act
-        a_info = ibm_model.best_model2_alignment(sentence_pair)
-
-        # assert
-        self.assertEqual(a_info.alignment[1:], (1, 2, 4))  # 0th element unused
-        self.assertEqual(a_info.cepts, [[], [1], [2], [], [3]])
-
-    def test_best_model2_alignment_does_not_change_pegged_alignment(self):
-        # arrange
-        sentence_pair = AlignedSent(
-            TestIBMModel.__TEST_TRG_SENTENCE, TestIBMModel.__TEST_SRC_SENTENCE
-        )
-        translation_table = {
-            'i': {"j'": 0.9, 'aime': 0.05, 'bien': 0.02, 'jambon': 0.03, None: 0},
-            'love': {"j'": 0.05, 'aime': 0.9, 'bien': 0.01, 'jambon': 0.01, None: 0.03},
-            'ham': {"j'": 0, 'aime': 0.01, 'bien': 0, 'jambon': 0.99, None: 0},
-        }
-        alignment_table = defaultdict(
-            lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: 0.2)))
-        )
-
-        ibm_model = IBMModel([])
-        ibm_model.translation_table = translation_table
-        ibm_model.alignment_table = alignment_table
-
-        # act: force 'love' to be pegged to 'jambon'
-        a_info = ibm_model.best_model2_alignment(sentence_pair, 2, 4)
-        # assert
-        self.assertEqual(a_info.alignment[1:], (1, 4, 4))
-        self.assertEqual(a_info.cepts, [[], [1], [], [], [2, 3]])
-
-    def test_best_model2_alignment_handles_fertile_words(self):
-        # arrange
-        sentence_pair = AlignedSent(
-            ['i', 'really', ',', 'really', 'love', 'ham'],
-            TestIBMModel.__TEST_SRC_SENTENCE,
-        )
-        # 'bien' produces 2 target words: 'really' and another 'really'
-        translation_table = {
-            'i': {"j'": 0.9, 'aime': 0.05, 'bien': 0.02, 'jambon': 0.03, None: 0},
-            'really': {"j'": 0, 'aime': 0, 'bien': 0.9, 'jambon': 0.01, None: 0.09},
-            ',': {"j'": 0, 'aime': 0, 'bien': 0.3, 'jambon': 0, None: 0.7},
-            'love': {"j'": 0.05, 'aime': 0.9, 'bien': 0.01, 'jambon': 0.01, None: 0.03},
-            'ham': {"j'": 0, 'aime': 0.01, 'bien': 0, 'jambon': 0.99, None: 0},
-        }
-        alignment_table = defaultdict(
-            lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: 0.2)))
-        )
-
-        ibm_model = IBMModel([])
-        ibm_model.translation_table = translation_table
-        ibm_model.alignment_table = alignment_table
-
-        # act
-        a_info = ibm_model.best_model2_alignment(sentence_pair)
-
-        # assert
-        self.assertEqual(a_info.alignment[1:], (1, 3, 0, 3, 2, 4))
-        self.assertEqual(a_info.cepts, [[3], [1], [5], [2, 4], [6]])
-
-    def test_best_model2_alignment_handles_empty_src_sentence(self):
-        # arrange
-        sentence_pair = AlignedSent(TestIBMModel.__TEST_TRG_SENTENCE, [])
-        ibm_model = IBMModel([])
-
-        # act
-        a_info = ibm_model.best_model2_alignment(sentence_pair)
-
-        # assert
-        self.assertEqual(a_info.alignment[1:], (0, 0, 0))
-        self.assertEqual(a_info.cepts, [[1, 2, 3]])
-
-    def test_best_model2_alignment_handles_empty_trg_sentence(self):
-        # arrange
-        sentence_pair = AlignedSent([], TestIBMModel.__TEST_SRC_SENTENCE)
-        ibm_model = IBMModel([])
-
-        # act
-        a_info = ibm_model.best_model2_alignment(sentence_pair)
-
-        # assert
-        self.assertEqual(a_info.alignment[1:], ())
-        self.assertEqual(a_info.cepts, [[], [], [], [], []])
-
-    def test_neighboring_finds_neighbor_alignments(self):
-        # arrange
-        a_info = AlignmentInfo(
-            (0, 3, 2),
-            (None, 'des', 'œufs', 'verts'),
-            ('UNUSED', 'green', 'eggs'),
-            [[], [], [2], [1]],
-        )
-        ibm_model = IBMModel([])
-
-        # act
-        neighbors = ibm_model.neighboring(a_info)
-
-        # assert
-        neighbor_alignments = set()
-        for neighbor in neighbors:
-            neighbor_alignments.add(neighbor.alignment)
-        expected_alignments = set(
-            [
-                # moves
-                (0, 0, 2),
-                (0, 1, 2),
-                (0, 2, 2),
-                (0, 3, 0),
-                (0, 3, 1),
-                (0, 3, 3),
-                # swaps
-                (0, 2, 3),
-                # original alignment
-                (0, 3, 2),
-            ]
-        )
-        self.assertEqual(neighbor_alignments, expected_alignments)
-
-    def test_neighboring_sets_neighbor_alignment_info(self):
-        # arrange
-        a_info = AlignmentInfo(
-            (0, 3, 2),
-            (None, 'des', 'œufs', 'verts'),
-            ('UNUSED', 'green', 'eggs'),
-            [[], [], [2], [1]],
-        )
-        ibm_model = IBMModel([])
-
-        # act
-        neighbors = ibm_model.neighboring(a_info)
-
-        # assert: select a few particular alignments
-        for neighbor in neighbors:
-            if neighbor.alignment == (0, 2, 2):
-                moved_alignment = neighbor
-            elif neighbor.alignment == (0, 3, 2):
-                swapped_alignment = neighbor
-
-        self.assertEqual(moved_alignment.cepts, [[], [], [1, 2], []])
-        self.assertEqual(swapped_alignment.cepts, [[], [], [2], [1]])
-
-    def test_neighboring_returns_neighbors_with_pegged_alignment(self):
-        # arrange
-        a_info = AlignmentInfo(
-            (0, 3, 2),
-            (None, 'des', 'œufs', 'verts'),
-            ('UNUSED', 'green', 'eggs'),
-            [[], [], [2], [1]],
-        )
-        ibm_model = IBMModel([])
-
-        # act: peg 'eggs' to align with 'œufs'
-        neighbors = ibm_model.neighboring(a_info, 2)
-
-        # assert
-        neighbor_alignments = set()
-        for neighbor in neighbors:
-            neighbor_alignments.add(neighbor.alignment)
-        expected_alignments = set(
-            [
-                # moves
-                (0, 0, 2),
-                (0, 1, 2),
-                (0, 2, 2),
-                # no swaps
-                # original alignment
-                (0, 3, 2),
-            ]
-        )
-        self.assertEqual(neighbor_alignments, expected_alignments)
-
-    def test_hillclimb(self):
-        # arrange
-        initial_alignment = AlignmentInfo((0, 3, 2), None, None, None)
-
-        def neighboring_mock(a, j):
-            if a.alignment == (0, 3, 2):
-                return set(
-                    [
-                        AlignmentInfo((0, 2, 2), None, None, None),
-                        AlignmentInfo((0, 1, 1), None, None, None),
-                    ]
-                )
-            elif a.alignment == (0, 2, 2):
-                return set(
-                    [
-                        AlignmentInfo((0, 3, 3), None, None, None),
-                        AlignmentInfo((0, 4, 4), None, None, None),
-                    ]
-                )
-            return set()
-
-        def prob_t_a_given_s_mock(a):
-            prob_values = {
-                (0, 3, 2): 0.5,
-                (0, 2, 2): 0.6,
-                (0, 1, 1): 0.4,
-                (0, 3, 3): 0.6,
-                (0, 4, 4): 0.7,
-            }
-            return prob_values.get(a.alignment, 0.01)
-
-        ibm_model = IBMModel([])
-        ibm_model.neighboring = neighboring_mock
-        ibm_model.prob_t_a_given_s = prob_t_a_given_s_mock
-
-        # act
-        best_alignment = ibm_model.hillclimb(initial_alignment)
-
-        # assert: hill climbing goes from (0, 3, 2) -> (0, 2, 2) -> (0, 4, 4)
-        self.assertEqual(best_alignment.alignment, (0, 4, 4))
-
-    def test_sample(self):
-        # arrange
-        sentence_pair = AlignedSent(
-            TestIBMModel.__TEST_TRG_SENTENCE, TestIBMModel.__TEST_SRC_SENTENCE
-        )
-        ibm_model = IBMModel([])
-        ibm_model.prob_t_a_given_s = lambda x: 0.001
-
-        # act
-        samples, best_alignment = ibm_model.sample(sentence_pair)
-
-        # assert
-        self.assertEqual(len(samples), 61)
diff --git a/nlp_resource_data/nltk/test/unit/translate/test_nist.py b/nlp_resource_data/nltk/test/unit/translate/test_nist.py

deleted file mode 100644 (file)

index 84e6342..0000000
--- a/nlp_resource_data/nltk/test/unit/translate/test_nist.py
+++ /dev/null
@@ -1,37 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Tests for NIST translation evaluation metric
-"""
-
-import io
-import unittest
-
-from nltk.data import find
-from nltk.translate.nist_score import sentence_nist, corpus_nist
-
-
-class TestNIST(unittest.TestCase):
-    def test_sentence_nist(self):
-        ref_file = find('models/wmt15_eval/ref.ru')
-        hyp_file = find('models/wmt15_eval/google.ru')
-        mteval_output_file = find('models/wmt15_eval/mteval-13a.output')
-
-        # Reads the NIST scores from the `mteval-13a.output` file.
-        # The order of the list corresponds to the order of the ngrams.
-        with open(mteval_output_file, 'r') as mteval_fin:
-            # The numbers are located in the last 4th line of the file.
-            # The first and 2nd item in the list are the score and system names.
-            mteval_nist_scores = map(float, mteval_fin.readlines()[-4].split()[1:-1])
-
-        with io.open(ref_file, 'r', encoding='utf8') as ref_fin:
-            with io.open(hyp_file, 'r', encoding='utf8') as hyp_fin:
-                # Whitespace tokenize the file.
-                # Note: split() automatically strip().
-                hypotheses = list(map(lambda x: x.split(), hyp_fin))
-                # Note that the corpus_bleu input is list of list of references.
-                references = list(map(lambda x: [x.split()], ref_fin))
-                # Without smoothing.
-                for i, mteval_nist in zip(range(1, 10), mteval_nist_scores):
-                    nltk_nist = corpus_nist(references, hypotheses, i)
-                    # Check that the NIST scores difference is less than 0.5
-                    assert abs(mteval_nist - nltk_nist) < 0.05
diff --git a/nlp_resource_data/nltk/test/unit/translate/test_stack_decoder.py b/nlp_resource_data/nltk/test/unit/translate/test_stack_decoder.py

deleted file mode 100644 (file)

index 5d5f2d4..0000000
--- a/nlp_resource_data/nltk/test/unit/translate/test_stack_decoder.py
+++ /dev/null
@@ -1,295 +0,0 @@
-# -*- coding: utf-8 -*-
-# Natural Language Toolkit: Stack decoder
-#
-# Copyright (C) 2001-2019 NLTK Project
-# Author: Tah Wei Hoon <hoon.tw@gmail.com>
-# URL: <http://nltk.org/>
-# For license information, see LICENSE.TXT
-
-"""
-Tests for stack decoder
-"""
-
-import unittest
-from collections import defaultdict
-from math import log
-from nltk.translate import PhraseTable
-from nltk.translate import StackDecoder
-from nltk.translate.stack_decoder import _Hypothesis, _Stack
-
-
-class TestStackDecoder(unittest.TestCase):
-    def test_find_all_src_phrases(self):
-        # arrange
-        phrase_table = TestStackDecoder.create_fake_phrase_table()
-        stack_decoder = StackDecoder(phrase_table, None)
-        sentence = ('my', 'hovercraft', 'is', 'full', 'of', 'eels')
-
-        # act
-        src_phrase_spans = stack_decoder.find_all_src_phrases(sentence)
-
-        # assert
-        self.assertEqual(src_phrase_spans[0], [2])  # 'my hovercraft'
-        self.assertEqual(src_phrase_spans[1], [2])  # 'hovercraft'
-        self.assertEqual(src_phrase_spans[2], [3])  # 'is'
-        self.assertEqual(src_phrase_spans[3], [5, 6])  # 'full of', 'full of eels'
-        self.assertFalse(src_phrase_spans[4])  # no entry starting with 'of'
-        self.assertEqual(src_phrase_spans[5], [6])  # 'eels'
-
-    def test_distortion_score(self):
-        # arrange
-        stack_decoder = StackDecoder(None, None)
-        stack_decoder.distortion_factor = 0.5
-        hypothesis = _Hypothesis()
-        hypothesis.src_phrase_span = (3, 5)
-
-        # act
-        score = stack_decoder.distortion_score(hypothesis, (8, 10))
-
-        # assert
-        expected_score = log(stack_decoder.distortion_factor) * (8 - 5)
-        self.assertEqual(score, expected_score)
-
-    def test_distortion_score_of_first_expansion(self):
-        # arrange
-        stack_decoder = StackDecoder(None, None)
-        stack_decoder.distortion_factor = 0.5
-        hypothesis = _Hypothesis()
-
-        # act
-        score = stack_decoder.distortion_score(hypothesis, (8, 10))
-
-        # assert
-        # expansion from empty hypothesis always has zero distortion cost
-        self.assertEqual(score, 0.0)
-
-    def test_compute_future_costs(self):
-        # arrange
-        phrase_table = TestStackDecoder.create_fake_phrase_table()
-        language_model = TestStackDecoder.create_fake_language_model()
-        stack_decoder = StackDecoder(phrase_table, language_model)
-        sentence = ('my', 'hovercraft', 'is', 'full', 'of', 'eels')
-
-        # act
-        future_scores = stack_decoder.compute_future_scores(sentence)
-
-        # assert
-        self.assertEqual(
-            future_scores[1][2],
-            (
-                phrase_table.translations_for(('hovercraft',))[0].log_prob
-                + language_model.probability(('hovercraft',))
-            ),
-        )
-        self.assertEqual(
-            future_scores[0][2],
-            (
-                phrase_table.translations_for(('my', 'hovercraft'))[0].log_prob
-                + language_model.probability(('my', 'hovercraft'))
-            ),
-        )
-
-    def test_compute_future_costs_for_phrases_not_in_phrase_table(self):
-        # arrange
-        phrase_table = TestStackDecoder.create_fake_phrase_table()
-        language_model = TestStackDecoder.create_fake_language_model()
-        stack_decoder = StackDecoder(phrase_table, language_model)
-        sentence = ('my', 'hovercraft', 'is', 'full', 'of', 'eels')
-
-        # act
-        future_scores = stack_decoder.compute_future_scores(sentence)
-
-        # assert
-        self.assertEqual(
-            future_scores[1][3],  # 'hovercraft is' is not in phrase table
-            future_scores[1][2] + future_scores[2][3],
-        )  # backoff
-
-    def test_future_score(self):
-        # arrange: sentence with 8 words; words 2, 3, 4 already translated
-        hypothesis = _Hypothesis()
-        hypothesis.untranslated_spans = lambda _: [(0, 2), (5, 8)]  # mock
-        future_score_table = defaultdict(lambda: defaultdict(float))
-        future_score_table[0][2] = 0.4
-        future_score_table[5][8] = 0.5
-        stack_decoder = StackDecoder(None, None)
-
-        # act
-        future_score = stack_decoder.future_score(hypothesis, future_score_table, 8)
-
-        # assert
-        self.assertEqual(future_score, 0.4 + 0.5)
-
-    def test_valid_phrases(self):
-        # arrange
-        hypothesis = _Hypothesis()
-        # mock untranslated_spans method
-        hypothesis.untranslated_spans = lambda _: [(0, 2), (3, 6)]
-        all_phrases_from = [[1, 4], [2], [], [5], [5, 6, 7], [], [7]]
-
-        # act
-        phrase_spans = StackDecoder.valid_phrases(all_phrases_from, hypothesis)
-
-        # assert
-        self.assertEqual(phrase_spans, [(0, 1), (1, 2), (3, 5), (4, 5), (4, 6)])
-
-    @staticmethod
-    def create_fake_phrase_table():
-        phrase_table = PhraseTable()
-        phrase_table.add(('hovercraft',), ('',), 0.8)
-        phrase_table.add(('my', 'hovercraft'), ('', ''), 0.7)
-        phrase_table.add(('my', 'cheese'), ('', ''), 0.7)
-        phrase_table.add(('is',), ('',), 0.8)
-        phrase_table.add(('is',), ('',), 0.5)
-        phrase_table.add(('full', 'of'), ('', ''), 0.01)
-        phrase_table.add(('full', 'of', 'eels'), ('', '', ''), 0.5)
-        phrase_table.add(('full', 'of', 'spam'), ('', ''), 0.5)
-        phrase_table.add(('eels',), ('',), 0.5)
-        phrase_table.add(('spam',), ('',), 0.5)
-        return phrase_table
-
-    @staticmethod
-    def create_fake_language_model():
-        # nltk.model should be used here once it is implemented
-        language_prob = defaultdict(lambda: -999.0)
-        language_prob[('my',)] = log(0.1)
-        language_prob[('hovercraft',)] = log(0.1)
-        language_prob[('is',)] = log(0.1)
-        language_prob[('full',)] = log(0.1)
-        language_prob[('of',)] = log(0.1)
-        language_prob[('eels',)] = log(0.1)
-        language_prob[('my', 'hovercraft')] = log(0.3)
-        language_model = type(
-            '', (object,), {'probability': lambda _, phrase: language_prob[phrase]}
-        )()
-        return language_model
-
-
-class TestHypothesis(unittest.TestCase):
-    def setUp(self):
-        root = _Hypothesis()
-        child = _Hypothesis(
-            raw_score=0.5,
-            src_phrase_span=(3, 7),
-            trg_phrase=('hello', 'world'),
-            previous=root,
-        )
-        grandchild = _Hypothesis(
-            raw_score=0.4,
-            src_phrase_span=(1, 2),
-            trg_phrase=('and', 'goodbye'),
-            previous=child,
-        )
-        self.hypothesis_chain = grandchild
-
-    def test_translation_so_far(self):
-        # act
-        translation = self.hypothesis_chain.translation_so_far()
-
-        # assert
-        self.assertEqual(translation, ['hello', 'world', 'and', 'goodbye'])
-
-    def test_translation_so_far_for_empty_hypothesis(self):
-        # arrange
-        hypothesis = _Hypothesis()
-
-        # act
-        translation = hypothesis.translation_so_far()
-
-        # assert
-        self.assertEqual(translation, [])
-
-    def test_total_translated_words(self):
-        # act
-        total_translated_words = self.hypothesis_chain.total_translated_words()
-
-        # assert
-        self.assertEqual(total_translated_words, 5)
-
-    def test_translated_positions(self):
-        # act
-        translated_positions = self.hypothesis_chain.translated_positions()
-
-        # assert
-        translated_positions.sort()
-        self.assertEqual(translated_positions, [1, 3, 4, 5, 6])
-
-    def test_untranslated_spans(self):
-        # act
-        untranslated_spans = self.hypothesis_chain.untranslated_spans(10)
-
-        # assert
-        self.assertEqual(untranslated_spans, [(0, 1), (2, 3), (7, 10)])
-
-    def test_untranslated_spans_for_empty_hypothesis(self):
-        # arrange
-        hypothesis = _Hypothesis()
-
-        # act
-        untranslated_spans = hypothesis.untranslated_spans(10)
-
-        # assert
-        self.assertEqual(untranslated_spans, [(0, 10)])
-
-
-class TestStack(unittest.TestCase):
-    def test_push_bumps_off_worst_hypothesis_when_stack_is_full(self):
-        # arrange
-        stack = _Stack(3)
-        poor_hypothesis = _Hypothesis(0.01)
-
-        # act
-        stack.push(_Hypothesis(0.2))
-        stack.push(poor_hypothesis)
-        stack.push(_Hypothesis(0.1))
-        stack.push(_Hypothesis(0.3))
-
-        # assert
-        self.assertFalse(poor_hypothesis in stack)
-
-    def test_push_removes_hypotheses_that_fall_below_beam_threshold(self):
-        # arrange
-        stack = _Stack(3, 0.5)
-        poor_hypothesis = _Hypothesis(0.01)
-        worse_hypothesis = _Hypothesis(0.009)
-
-        # act
-        stack.push(poor_hypothesis)
-        stack.push(worse_hypothesis)
-        stack.push(_Hypothesis(0.9))  # greatly superior hypothesis
-
-        # assert
-        self.assertFalse(poor_hypothesis in stack)
-        self.assertFalse(worse_hypothesis in stack)
-
-    def test_push_does_not_add_hypothesis_that_falls_below_beam_threshold(self):
-        # arrange
-        stack = _Stack(3, 0.5)
-        poor_hypothesis = _Hypothesis(0.01)
-
-        # act
-        stack.push(_Hypothesis(0.9))  # greatly superior hypothesis
-        stack.push(poor_hypothesis)
-
-        # assert
-        self.assertFalse(poor_hypothesis in stack)
-
-    def test_best_returns_the_best_hypothesis(self):
-        # arrange
-        stack = _Stack(3)
-        best_hypothesis = _Hypothesis(0.99)
-
-        # act
-        stack.push(_Hypothesis(0.0))
-        stack.push(best_hypothesis)
-        stack.push(_Hypothesis(0.5))
-
-        # assert
-        self.assertEqual(stack.best(), best_hypothesis)
-
-    def test_best_returns_none_when_stack_is_empty(self):
-        # arrange
-        stack = _Stack(3)
-
-        # assert
-        self.assertEqual(stack.best(), None)
diff --git a/nlp_resource_data/nltk/test/unit/utils.py b/nlp_resource_data/nltk/test/unit/utils.py

deleted file mode 100644 (file)

index 0489b16..0000000
--- a/nlp_resource_data/nltk/test/unit/utils.py
+++ /dev/null
@@ -1,47 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import
-from unittest import TestCase
-from functools import wraps
-from nose.plugins.skip import SkipTest
-from nltk.util import py26
-
-
-def skip(reason):
-    """
-    Unconditionally skip a test.
-    """
-
-    def decorator(test_item):
-        is_test_class = isinstance(test_item, type) and issubclass(test_item, TestCase)
-
-        if is_test_class and py26():
-            # Patch all test_ methods to raise SkipText exception.
-            # This is necessary for Python 2.6 because its unittest
-            # doesn't understand __unittest_skip__.
-            for meth_name in (m for m in dir(test_item) if m.startswith('test_')):
-                patched_method = skip(reason)(getattr(test_item, meth_name))
-                setattr(test_item, meth_name, patched_method)
-
-        if not is_test_class:
-
-            @wraps(test_item)
-            def skip_wrapper(*args, **kwargs):
-                raise SkipTest(reason)
-
-            skip_wrapper.__name__ = test_item.__name__
-            test_item = skip_wrapper
-
-        test_item.__unittest_skip__ = True
-        test_item.__unittest_skip_why__ = reason
-        return test_item
-
-    return decorator
-
-
-def skipIf(condition, reason):
-    """
-    Skip a test if the condition is true.
-    """
-    if condition:
-        return skip(reason)
-    return lambda obj: obj
diff --git a/nlp_resource_data/nltk/test/util.doctest b/nlp_resource_data/nltk/test/util.doctest

deleted file mode 100644 (file)

index 7ba6af1..0000000
--- a/nlp_resource_data/nltk/test/util.doctest
+++ /dev/null
@@ -1,48 +0,0 @@
-.. Copyright (C) 2001-2019 NLTK Project
-.. For license information, see LICENSE.TXT
-
-=================
-Utility functions
-=================
-
-    >>> from __future__ import print_function
-    >>> from nltk.util import *
-    >>> from nltk.tree import Tree
-
-    >>> print_string("This is a long string, therefore it should break", 25)
-    This is a long string,
-    therefore it should break
-
-    >>> re_show("[a-z]+", "sdf123")
-    {sdf}123
-
-    >>> tree = Tree(5,
-    ...             [Tree(4, [Tree(2, [1, 3])]),
-    ...              Tree(8, [Tree(6, [7]), 9])])
-    >>> for x in breadth_first(tree):
-    ...     if isinstance(x, int): print(x)
-    ...     else: print(x.label())
-    5
-    4
-    8
-    2
-    6
-    9
-    1
-    3
-    7
-    >>> for x in breadth_first(tree, maxdepth=2):
-    ...     if isinstance(x, int): print(x)
-    ...     else: print(x.label())
-    5
-    4
-    8
-    2
-    6
-    9
-
-    >>> invert_dict({1: 2})
-    defaultdict(<... 'list'>, {2: 1})
-
-    >>> invert_dict({1: [3, 4, 5]})
-    defaultdict(<... 'list'>, {3: [1], 4: [1], 5: [1]})
diff --git a/nlp_resource_data/nltk/test/wordnet.doctest b/nlp_resource_data/nltk/test/wordnet.doctest

deleted file mode 100644 (file)

index 409504d..0000000
--- a/nlp_resource_data/nltk/test/wordnet.doctest
+++ /dev/null
@@ -1,605 +0,0 @@
-.. Copyright (C) 2001-2019 NLTK Project
-.. For license information, see LICENSE.TXT
-
-=================
-WordNet Interface
-=================
-
-WordNet is just another NLTK corpus reader, and can be imported like this:
-    >>> from __future__ import print_function, unicode_literals
-    >>> from nltk.corpus import wordnet
-
-For more compact code, we recommend:
-
-    >>> from nltk.corpus import wordnet as wn
-
------
-Words
------
-
-Look up a word using ``synsets()``; this function has an optional ``pos`` argument
-which lets you constrain the part of speech of the word:
-
-    >>> wn.synsets('dog') # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
-    [Synset('dog.n.01'), Synset('frump.n.01'), Synset('dog.n.03'), Synset('cad.n.01'),
-    Synset('frank.n.02'), Synset('pawl.n.01'), Synset('andiron.n.01'), Synset('chase.v.01')]
-    >>> wn.synsets('dog', pos=wn.VERB)
-    [Synset('chase.v.01')]
-
-The other parts of speech are ``NOUN``, ``ADJ`` and ``ADV``.
-A synset is identified with a 3-part name of the form: word.pos.nn:
-
-    >>> wn.synset('dog.n.01')
-    Synset('dog.n.01')
-    >>> print(wn.synset('dog.n.01').definition())
-    a member of the genus Canis (probably descended from the common wolf) that has been domesticated by man since prehistoric times; occurs in many breeds
-    >>> len(wn.synset('dog.n.01').examples())
-    1
-    >>> print(wn.synset('dog.n.01').examples()[0])
-    the dog barked all night
-    >>> wn.synset('dog.n.01').lemmas()
-    [Lemma('dog.n.01.dog'), Lemma('dog.n.01.domestic_dog'), Lemma('dog.n.01.Canis_familiaris')]
-    >>> [str(lemma.name()) for lemma in wn.synset('dog.n.01').lemmas()]
-    ['dog', 'domestic_dog', 'Canis_familiaris']
-    >>> wn.lemma('dog.n.01.dog').synset()
-    Synset('dog.n.01')
-
-The WordNet corpus reader gives access to the Open Multilingual
-WordNet, using ISO-639 language codes.
-
-    >>> sorted(wn.langs()) # doctest: +NORMALIZE_WHITESPACE
-    ['als', 'arb', 'bul', 'cat', 'cmn', 'dan', 'ell', 'eng', 'eus', 'fas',
-    'fin', 'fra', 'glg', 'heb', 'hrv', 'ind', 'ita', 'jpn', 'nld', 'nno',
-    'nob', 'pol', 'por', 'qcn', 'slv', 'spa', 'swe', 'tha', 'zsm']
-    >>> wn.synsets(b'\xe7\x8a\xac'.decode('utf-8'), lang='jpn')
-    [Synset('dog.n.01'), Synset('spy.n.01')]
-    
-    wn.synset('spy.n.01').lemma_names('jpn') # doctest: +NORMALIZE_WHITESPACE
-    ['\u3044\u306c', '\u307e\u308f\u3057\u8005', '\u30b9\u30d1\u30a4', '\u56de\u3057\u8005',
-    '\u56de\u8005', '\u5bc6\u5075', '\u5de5\u4f5c\u54e1', '\u5efb\u3057\u8005',
-    '\u5efb\u8005', '\u63a2', '\u63a2\u308a', '\u72ac', '\u79d8\u5bc6\u635c\u67fb\u54e1',
-    '\u8adc\u5831\u54e1', '\u8adc\u8005', '\u9593\u8005', '\u9593\u8adc', '\u96a0\u5bc6']
-    
-    >>> wn.synset('dog.n.01').lemma_names('ita')
-    ['cane', 'Canis_familiaris']
-    >>> wn.lemmas('cane', lang='ita') # doctest: +NORMALIZE_WHITESPACE
-    [Lemma('dog.n.01.cane'), Lemma('cramp.n.02.cane'), Lemma('hammer.n.01.cane'), Lemma('bad_person.n.01.cane'), 
-    Lemma('incompetent.n.01.cane')]
-    >>> sorted(wn.synset('dog.n.01').lemmas('dan')) # doctest: +NORMALIZE_WHITESPACE
-    [Lemma('dog.n.01.hund'), Lemma('dog.n.01.k\xf8ter'),
-    Lemma('dog.n.01.vovhund'), Lemma('dog.n.01.vovse')]
-    
-    sorted(wn.synset('dog.n.01').lemmas('por'))
-       [Lemma('dog.n.01.cachorra'), Lemma('dog.n.01.cachorro'), Lemma('dog.n.01.cadela'), Lemma('dog.n.01.c\xe3o')]
-    
-    >>> dog_lemma = wn.lemma(b'dog.n.01.c\xc3\xa3o'.decode('utf-8'), lang='por')
-    >>> dog_lemma
-    Lemma('dog.n.01.c\xe3o')
-    >>> dog_lemma.lang()
-    'por'
-    >>> len(wordnet.all_lemma_names(pos='n', lang='jpn'))
-    64797
-
--------
-Synsets
--------
-
-`Synset`: a set of synonyms that share a common meaning.
-
-    >>> dog = wn.synset('dog.n.01')
-    >>> dog.hypernyms()
-    [Synset('canine.n.02'), Synset('domestic_animal.n.01')]
-    >>> dog.hyponyms()  # doctest: +ELLIPSIS
-    [Synset('basenji.n.01'), Synset('corgi.n.01'), Synset('cur.n.01'), Synset('dalmatian.n.02'), ...]
-    >>> dog.member_holonyms()
-    [Synset('canis.n.01'), Synset('pack.n.06')]
-    >>> dog.root_hypernyms()
-    [Synset('entity.n.01')]
-    >>> wn.synset('dog.n.01').lowest_common_hypernyms(wn.synset('cat.n.01'))
-    [Synset('carnivore.n.01')]
-
-Each synset contains one or more lemmas, which represent a specific
-sense of a specific word.
-
-Note that some relations are defined by WordNet only over Lemmas:
-
-    >>> good = wn.synset('good.a.01')
-    >>> good.antonyms()
-    Traceback (most recent call last):
-      File "<stdin>", line 1, in <module>
-    AttributeError: 'Synset' object has no attribute 'antonyms'
-    >>> good.lemmas()[0].antonyms()
-    [Lemma('bad.a.01.bad')]
-
-The relations that are currently defined in this way are `antonyms`,
-`derivationally_related_forms` and `pertainyms`.
-
-If you know the byte offset used to identify a synset in the original
-Princeton WordNet data file, you can use that to instantiate the synset
-in NLTK:
-
-    >>> wn.synset_from_pos_and_offset('n', 4543158)
-    Synset('wagon.n.01')
-
-------
-Lemmas
-------
-
-    >>> eat = wn.lemma('eat.v.03.eat')
-    >>> eat
-    Lemma('feed.v.06.eat')
-    >>> print(eat.key())
-    eat%2:34:02::
-    >>> eat.count()
-    4
-    >>> wn.lemma_from_key(eat.key())
-    Lemma('feed.v.06.eat')
-    >>> wn.lemma_from_key(eat.key()).synset()
-    Synset('feed.v.06')
-    >>> wn.lemma_from_key('feebleminded%5:00:00:retarded:00')
-    Lemma('backward.s.03.feebleminded')
-    >>> for lemma in wn.synset('eat.v.03').lemmas():
-    ...     print(lemma, lemma.count())
-    ...
-    Lemma('feed.v.06.feed') 3
-    Lemma('feed.v.06.eat') 4
-    >>> for lemma in wn.lemmas('eat', 'v'):
-    ...     print(lemma, lemma.count())
-    ...
-    Lemma('eat.v.01.eat') 61
-    Lemma('eat.v.02.eat') 13
-    Lemma('feed.v.06.eat') 4
-    Lemma('eat.v.04.eat') 0
-    Lemma('consume.v.05.eat') 0
-    Lemma('corrode.v.01.eat') 0
-    >>> wn.lemma('jump.v.11.jump')
-    Lemma('jump.v.11.jump')
-
-Lemmas can also have relations between them:
-
-    >>> vocal = wn.lemma('vocal.a.01.vocal')
-    >>> vocal.derivationally_related_forms()
-    [Lemma('vocalize.v.02.vocalize')]
-    >>> vocal.pertainyms()
-    [Lemma('voice.n.02.voice')]
-    >>> vocal.antonyms()
-    [Lemma('instrumental.a.01.instrumental')]
-
-The three relations above exist only on lemmas, not on synsets.
-
------------
-Verb Frames
------------
-
-    >>> wn.synset('think.v.01').frame_ids()
-    [5, 9]
-    >>> for lemma in wn.synset('think.v.01').lemmas():
-    ...     print(lemma, lemma.frame_ids())
-    ...     print(" | ".join(lemma.frame_strings()))
-    ...
-    Lemma('think.v.01.think') [5, 9]
-    Something think something Adjective/Noun | Somebody think somebody
-    Lemma('think.v.01.believe') [5, 9]
-    Something believe something Adjective/Noun | Somebody believe somebody
-    Lemma('think.v.01.consider') [5, 9]
-    Something consider something Adjective/Noun | Somebody consider somebody
-    Lemma('think.v.01.conceive') [5, 9]
-    Something conceive something Adjective/Noun | Somebody conceive somebody
-    >>> wn.synset('stretch.v.02').frame_ids()
-    [8]
-    >>> for lemma in wn.synset('stretch.v.02').lemmas():
-    ...     print(lemma, lemma.frame_ids())
-    ...     print(" | ".join(lemma.frame_strings()))
-    ...
-    Lemma('stretch.v.02.stretch') [8, 2]
-    Somebody stretch something | Somebody stretch
-    Lemma('stretch.v.02.extend') [8]
-    Somebody extend something
-
-
-----------
-Similarity
-----------
-
-    >>> dog = wn.synset('dog.n.01')
-    >>> cat = wn.synset('cat.n.01')
-
-    >>> hit = wn.synset('hit.v.01')
-    >>> slap = wn.synset('slap.v.01')
-
-
-``synset1.path_similarity(synset2):``
-Return a score denoting how similar two word senses are, based on the
-shortest path that connects the senses in the is-a (hypernym/hypnoym)
-taxonomy. The score is in the range 0 to 1. By default, there is now
-a fake root node added to verbs so for cases where previously a path
-could not be found---and None was returned---it should return a value.
-The old behavior can be achieved by setting simulate_root to be False.
-A score of 1 represents identity i.e. comparing a sense with itself
-will return 1.
-
-    >>> dog.path_similarity(cat)  # doctest: +ELLIPSIS
-    0.2...
-
-    >>> hit.path_similarity(slap)  # doctest: +ELLIPSIS
-    0.142...
-
-    >>> wn.path_similarity(hit, slap)  # doctest: +ELLIPSIS
-    0.142...
-
-    >>> print(hit.path_similarity(slap, simulate_root=False))
-    None
-
-    >>> print(wn.path_similarity(hit, slap, simulate_root=False))
-    None
-
-``synset1.lch_similarity(synset2):``
-Leacock-Chodorow Similarity:
-Return a score denoting how similar two word senses are, based on the
-shortest path that connects the senses (as above) and the maximum depth
-of the taxonomy in which the senses occur. The relationship is given
-as -log(p/2d) where p is the shortest path length and d the taxonomy
-depth.
-
-    >>> dog.lch_similarity(cat)  # doctest: +ELLIPSIS
-    2.028...
-
-    >>> hit.lch_similarity(slap)  # doctest: +ELLIPSIS
-    1.312...
-
-    >>> wn.lch_similarity(hit, slap)  # doctest: +ELLIPSIS
-    1.312...
-
-    >>> print(hit.lch_similarity(slap, simulate_root=False))
-    None
-
-    >>> print(wn.lch_similarity(hit, slap, simulate_root=False))
-    None
-
-``synset1.wup_similarity(synset2):``
-Wu-Palmer Similarity:
-Return a score denoting how similar two word senses are, based on the
-depth of the two senses in the taxonomy and that of their Least Common
-Subsumer (most specific ancestor node). Note that at this time the
-scores given do _not_ always agree with those given by Pedersen's Perl
-implementation of Wordnet Similarity.
-
-The LCS does not necessarily feature in the shortest path connecting the
-two senses, as it is by definition the common ancestor deepest in the
-taxonomy, not closest to the two senses. Typically, however, it will so
-feature. Where multiple candidates for the LCS exist, that whose
-shortest path to the root node is the longest will be selected. Where
-the LCS has multiple paths to the root, the longer path is used for
-the purposes of the calculation.
-
-    >>> dog.wup_similarity(cat)  # doctest: +ELLIPSIS
-    0.857...
-
-    >>> hit.wup_similarity(slap)
-    0.25
-
-    >>> wn.wup_similarity(hit, slap)
-    0.25
-
-    >>> print(hit.wup_similarity(slap, simulate_root=False))
-    None
-
-    >>> print(wn.wup_similarity(hit, slap, simulate_root=False))
-    None
-
-``wordnet_ic``
-Information Content:
-Load an information content file from the wordnet_ic corpus.
-
-    >>> from nltk.corpus import wordnet_ic
-    >>> brown_ic = wordnet_ic.ic('ic-brown.dat')
-    >>> semcor_ic = wordnet_ic.ic('ic-semcor.dat')
-
-Or you can create an information content dictionary from a corpus (or
-anything that has a words() method).
-
-   >>> from nltk.corpus import genesis
-   >>> genesis_ic = wn.ic(genesis, False, 0.0)
-
-``synset1.res_similarity(synset2, ic):``
-Resnik Similarity:
-Return a score denoting how similar two word senses are, based on the
-Information Content (IC) of the Least Common Subsumer (most specific
-ancestor node).  Note that for any similarity measure that uses
-information content, the result is dependent on the corpus used to
-generate the information content and the specifics of how the
-information content was created.
-
-    >>> dog.res_similarity(cat, brown_ic)  # doctest: +ELLIPSIS
-    7.911...
-    >>> dog.res_similarity(cat, genesis_ic)  # doctest: +ELLIPSIS
-    7.204...
-
-``synset1.jcn_similarity(synset2, ic):``
-Jiang-Conrath Similarity
-Return a score denoting how similar two word senses are, based on the
-Information Content (IC) of the Least Common Subsumer (most specific
-ancestor node) and that of the two input Synsets. The relationship is
-given by the equation 1 / (IC(s1) + IC(s2) - 2 * IC(lcs)).
-
-    >>> dog.jcn_similarity(cat, brown_ic)  # doctest: +ELLIPSIS
-    0.449...
-    >>> dog.jcn_similarity(cat, genesis_ic)  # doctest: +ELLIPSIS
-    0.285...
-
-``synset1.lin_similarity(synset2, ic):``
-Lin Similarity:
-Return a score denoting how similar two word senses are, based on the
-Information Content (IC) of the Least Common Subsumer (most specific
-ancestor node) and that of the two input Synsets. The relationship is
-given by the equation 2 * IC(lcs) / (IC(s1) + IC(s2)).
-
-    >>> dog.lin_similarity(cat, semcor_ic)  # doctest: +ELLIPSIS
-    0.886...
-
-
----------------------
-Access to all Synsets
----------------------
-
-Iterate over all the noun synsets:
-
-    >>> for synset in list(wn.all_synsets('n'))[:10]:
-    ...     print(synset)
-    ...
-    Synset('entity.n.01')
-    Synset('physical_entity.n.01')
-    Synset('abstraction.n.06')
-    Synset('thing.n.12')
-    Synset('object.n.01')
-    Synset('whole.n.02')
-    Synset('congener.n.03')
-    Synset('living_thing.n.01')
-    Synset('organism.n.01')
-    Synset('benthos.n.02')
-
-Get all synsets for this word, possibly restricted by POS:
-
-    >>> wn.synsets('dog') # doctest: +ELLIPSIS
-    [Synset('dog.n.01'), Synset('frump.n.01'), Synset('dog.n.03'), Synset('cad.n.01'), ...]
-    >>> wn.synsets('dog', pos='v')
-    [Synset('chase.v.01')]
-
-Walk through the noun synsets looking at their hypernyms:
-
-    >>> from itertools import islice
-    >>> for synset in islice(wn.all_synsets('n'), 5):
-    ...     print(synset, synset.hypernyms())
-    ...
-    Synset('entity.n.01') []
-    Synset('physical_entity.n.01') [Synset('entity.n.01')]
-    Synset('abstraction.n.06') [Synset('entity.n.01')]
-    Synset('thing.n.12') [Synset('physical_entity.n.01')]
-    Synset('object.n.01') [Synset('physical_entity.n.01')]
-
-
-------
-Morphy
-------
-
-Look up forms not in WordNet, with the help of Morphy:
-
-    >>> wn.morphy('denied', wn.NOUN)
-    >>> print(wn.morphy('denied', wn.VERB))
-    deny
-    >>> wn.synsets('denied', wn.NOUN)
-    []
-    >>> wn.synsets('denied', wn.VERB) # doctest: +NORMALIZE_WHITESPACE
-    [Synset('deny.v.01'), Synset('deny.v.02'), Synset('deny.v.03'), Synset('deny.v.04'),
-    Synset('deny.v.05'), Synset('traverse.v.03'), Synset('deny.v.07')]
-
-Morphy uses a combination of inflectional ending rules and exception
-lists to handle a variety of different possibilities:
-
-    >>> print(wn.morphy('dogs'))
-    dog
-    >>> print(wn.morphy('churches'))
-    church
-    >>> print(wn.morphy('aardwolves'))
-    aardwolf
-    >>> print(wn.morphy('abaci'))
-    abacus
-    >>> print(wn.morphy('book', wn.NOUN))
-    book
-    >>> wn.morphy('hardrock', wn.ADV)
-    >>> wn.morphy('book', wn.ADJ)
-    >>> wn.morphy('his', wn.NOUN)
-    >>>
-
----------------
-Synset Closures
----------------
-
-Compute transitive closures of synsets
-
-    >>> dog = wn.synset('dog.n.01')
-    >>> hypo = lambda s: s.hyponyms()
-    >>> hyper = lambda s: s.hypernyms()
-    >>> list(dog.closure(hypo, depth=1)) == dog.hyponyms()
-    True
-    >>> list(dog.closure(hyper, depth=1)) == dog.hypernyms()
-    True
-    >>> list(dog.closure(hypo)) # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
-    [Synset('basenji.n.01'), Synset('corgi.n.01'), Synset('cur.n.01'),
-     Synset('dalmatian.n.02'), Synset('great_pyrenees.n.01'),
-     Synset('griffon.n.02'), Synset('hunting_dog.n.01'), Synset('lapdog.n.01'),
-     Synset('leonberg.n.01'), Synset('mexican_hairless.n.01'),
-     Synset('newfoundland.n.01'), Synset('pooch.n.01'), Synset('poodle.n.01'), ...]
-    >>> list(dog.closure(hyper)) # doctest: +NORMALIZE_WHITESPACE
-    [Synset('canine.n.02'), Synset('domestic_animal.n.01'), Synset('carnivore.n.01'), Synset('animal.n.01'), 
-    Synset('placental.n.01'), Synset('organism.n.01'), Synset('mammal.n.01'), Synset('living_thing.n.01'),
-    Synset('vertebrate.n.01'), Synset('whole.n.02'), Synset('chordate.n.01'), Synset('object.n.01'),
-    Synset('physical_entity.n.01'), Synset('entity.n.01')]
-
-
-----------------
-Regression Tests
-----------------
-
-Bug 85: morphy returns the base form of a word, if it's input is given
-as a base form for a POS for which that word is not defined:
-
-    >>> wn.synsets('book', wn.NOUN)
-    [Synset('book.n.01'), Synset('book.n.02'), Synset('record.n.05'), Synset('script.n.01'), Synset('ledger.n.01'), Synset('book.n.06'), Synset('book.n.07'), Synset('koran.n.01'), Synset('bible.n.01'), Synset('book.n.10'), Synset('book.n.11')]
-    >>> wn.synsets('book', wn.ADJ)
-    []
-    >>> wn.morphy('book', wn.NOUN)
-    'book'
-    >>> wn.morphy('book', wn.ADJ)
-
-Bug 160: wup_similarity breaks when the two synsets have no common hypernym
-
-    >>> t = wn.synsets('picasso')[0]
-    >>> m = wn.synsets('male')[1]
-    >>> t.wup_similarity(m)  # doctest: +ELLIPSIS
-    0.631...
-
-    >>> t = wn.synsets('titan')[1]
-    >>> s = wn.synsets('say', wn.VERB)[0]
-    >>> print(t.wup_similarity(s))
-    None
-
-Bug 21: "instance of" not included in LCS (very similar to bug 160)
-
-    >>> a = wn.synsets("writings")[0]
-    >>> b = wn.synsets("scripture")[0]
-    >>> brown_ic = wordnet_ic.ic('ic-brown.dat')
-    >>> a.jcn_similarity(b, brown_ic)  # doctest: +ELLIPSIS
-    0.175...
-
-Bug 221: Verb root IC is zero
-
-    >>> from nltk.corpus.reader.wordnet import information_content
-    >>> s = wn.synsets('say', wn.VERB)[0]
-    >>> information_content(s, brown_ic)  # doctest: +ELLIPSIS
-    4.623...
-
-Bug 161: Comparison between WN keys/lemmas should not be case sensitive
-
-    >>> k = wn.synsets("jefferson")[0].lemmas()[0].key()
-    >>> wn.lemma_from_key(k)
-    Lemma('jefferson.n.01.Jefferson')
-    >>> wn.lemma_from_key(k.upper())
-    Lemma('jefferson.n.01.Jefferson')
-
-Bug 99: WordNet root_hypernyms gives incorrect results
-
-    >>> from nltk.corpus import wordnet as wn
-    >>> for s in wn.all_synsets(wn.NOUN):
-    ...     if s.root_hypernyms()[0] != wn.synset('entity.n.01'):
-    ...         print(s, s.root_hypernyms())
-    ...
-    >>>
-
-Bug 382: JCN Division by zero error
-
-    >>> tow = wn.synset('tow.v.01')
-    >>> shlep = wn.synset('shlep.v.02')
-    >>> from nltk.corpus import wordnet_ic
-    >>> brown_ic =  wordnet_ic.ic('ic-brown.dat')
-    >>> tow.jcn_similarity(shlep, brown_ic)  # doctest: +ELLIPSIS
-    1...e+300
-
-Bug 428: Depth is zero for instance nouns
-
-    >>> s = wn.synset("lincoln.n.01")
-    >>> s.max_depth() > 0
-    True
-
-Bug 429: Information content smoothing used old reference to all_synsets
-
-    >>> genesis_ic = wn.ic(genesis, True, 1.0)
-
-Bug 430: all_synsets used wrong pos lookup when synsets were cached
-
-    >>> for ii in wn.all_synsets(): pass
-    >>> for ii in wn.all_synsets(): pass
-
-Bug 470: shortest_path_distance ignored instance hypernyms
-
-    >>> google = wordnet.synsets("google")[0]
-    >>> earth = wordnet.synsets("earth")[0]
-    >>> google.wup_similarity(earth)  # doctest: +ELLIPSIS
-    0.1...
-
-Bug 484: similarity metrics returned -1 instead of None for no LCS
-
-    >>> t = wn.synsets('fly', wn.VERB)[0]
-    >>> s = wn.synsets('say', wn.VERB)[0]
-    >>> print(s.shortest_path_distance(t))
-    None
-    >>> print(s.path_similarity(t, simulate_root=False))
-    None
-    >>> print(s.lch_similarity(t, simulate_root=False))
-    None
-    >>> print(s.wup_similarity(t, simulate_root=False))
-    None
-
-Bug 427: "pants" does not return all the senses it should
-
-    >>> from nltk.corpus import wordnet
-    >>> wordnet.synsets("pants",'n')
-    [Synset('bloomers.n.01'), Synset('pant.n.01'), Synset('trouser.n.01'), Synset('gasp.n.01')]
-
-Bug 482: Some nouns not being lemmatised by WordNetLemmatizer().lemmatize
-
-    >>> from nltk.stem.wordnet import WordNetLemmatizer
-    >>> WordNetLemmatizer().lemmatize("eggs", pos="n")
-    'egg'
-    >>> WordNetLemmatizer().lemmatize("legs", pos="n")
-    'leg'
-
-Bug 284: instance hypernyms not used in similarity calculations
-
-    >>> wn.synset('john.n.02').lch_similarity(wn.synset('dog.n.01'))  # doctest: +ELLIPSIS
-    1.335...
-    >>> wn.synset('john.n.02').wup_similarity(wn.synset('dog.n.01'))  # doctest: +ELLIPSIS
-    0.571...
-    >>> wn.synset('john.n.02').res_similarity(wn.synset('dog.n.01'), brown_ic)  # doctest: +ELLIPSIS
-    2.224...
-    >>> wn.synset('john.n.02').jcn_similarity(wn.synset('dog.n.01'), brown_ic)  # doctest: +ELLIPSIS
-    0.075...
-    >>> wn.synset('john.n.02').lin_similarity(wn.synset('dog.n.01'), brown_ic)  # doctest: +ELLIPSIS
-    0.252...
-    >>> wn.synset('john.n.02').hypernym_paths()  # doctest: +ELLIPSIS
-    [[Synset('entity.n.01'), ..., Synset('john.n.02')]]
-
-Issue 541: add domains to wordnet
-
-    >>> wn.synset('code.n.03').topic_domains()
-    [Synset('computer_science.n.01')]
-    >>> wn.synset('pukka.a.01').region_domains()
-    [Synset('india.n.01')]
-    >>> wn.synset('freaky.a.01').usage_domains()
-    [Synset('slang.n.02')]
-
-Issue 629: wordnet failures when python run with -O optimizations
-
-    >>> # Run the test suite with python -O to check this
-    >>> wn.synsets("brunch")
-    [Synset('brunch.n.01'), Synset('brunch.v.01')]
-
-Issue 395: wordnet returns incorrect result for lowest_common_hypernyms of chef and policeman
-
-    >>> wn.synset('policeman.n.01').lowest_common_hypernyms(wn.synset('chef.n.01'))
-    [Synset('person.n.01')]
-
-Bug https://github.com/nltk/nltk/issues/1641: Non-English lemmas containing capital letters cannot be looked up using wordnet.lemmas() or wordnet.synsets()
-
-    >>> wn.lemmas('Londres', lang='fra')
-    [Lemma('united_kingdom.n.01.Londres'), Lemma('london.n.01.Londres'), Lemma('london.n.02.Londres')]
-    >>> wn.lemmas('londres', lang='fra')
-    [Lemma('united_kingdom.n.01.Londres'), Lemma('london.n.01.Londres'), Lemma('london.n.02.Londres')]
-
-Patch-1 https://github.com/nltk/nltk/pull/2065  Adding 3 functions (relations) to WordNet class
-    >>> wn.synsets("computer_science")[0].in_topic_domains()[2]
-    Synset('access_time.n.01')
-    >>> wn.synsets("France")[0].in_region_domains()[18]
-    Synset('french.n.01')
-    >>> wn.synsets("slang")[1].in_usage_domains()[18]
-    Synset('can-do.s.01')
diff --git a/nlp_resource_data/nltk/test/wordnet_fixt.py b/nlp_resource_data/nltk/test/wordnet_fixt.py

deleted file mode 100644 (file)

index 1412c0d..0000000
--- a/nlp_resource_data/nltk/test/wordnet_fixt.py
+++ /dev/null
@@ -1,8 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import
-
-
-def teardown_module(module=None):
-    from nltk.corpus import wordnet
-
-    wordnet._unload()
diff --git a/nlp_resource_data/nltk/test/wordnet_lch.doctest b/nlp_resource_data/nltk/test/wordnet_lch.doctest

deleted file mode 100644 (file)

index c2536b4..0000000
--- a/nlp_resource_data/nltk/test/wordnet_lch.doctest
+++ /dev/null
@@ -1,53 +0,0 @@
-.. Copyright (C) 2001-2019 NLTK Project
-.. For license information, see LICENSE.TXT
-
-===============================
-WordNet Lowest Common Hypernyms
-===============================
-
-Wordnet's lowest_common_hypernyms() method is based used to locate the 
-lowest single hypernym that is shared by two given words:
-
-    >>> from nltk.corpus import wordnet as wn
-    >>> wn.synset('kin.n.01').lowest_common_hypernyms(wn.synset('mother.n.01'))
-    [Synset('relative.n.01')]
-
-    >>> wn.synset('policeman.n.01').lowest_common_hypernyms(wn.synset('chef.n.01'))
-    [Synset('person.n.01')]
-
-This method generally returns a single result, but in some cases, more than one
-valid LCH is possible:
-
-    >>> wn.synset('body.n.09').lowest_common_hypernyms(wn.synset('sidereal_day.n.01'))
-    [Synset('attribute.n.02'), Synset('measure.n.02')]
-
-In some cases, lowest_common_hypernyms() can return one of the synsets which was 
-passed to it as an argument:
-
-    >>> wn.synset('woman.n.01').lowest_common_hypernyms(wn.synset('girlfriend.n.02'))
-    [Synset('woman.n.01')]
-
-In NLTK 3.0a2 the behavior of lowest_common_hypernyms() was changed to give more
-accurate results in a small set of cases, generally when dealing with nouns describing 
-social roles or jobs. To emulate the pre v3.0a2 behavior, you can set the use_min_depth=True
-flag:
-
-    >>> wn.synset('policeman.n.01').lowest_common_hypernyms(wn.synset('chef.n.01'))
-    [Synset('person.n.01')]
-    >>> wn.synset('policeman.n.01').lowest_common_hypernyms(wn.synset('chef.n.01'), use_min_depth=True)
-    [Synset('organism.n.01')]
-
-In some cases use_min_depth=True may return more or fewer results than the default
-behavior:
-
-    >>> wn.synset('woman.n.01').lowest_common_hypernyms(wn.synset('girlfriend.n.02'))
-    [Synset('woman.n.01')]
-    >>> wn.synset('woman.n.01').lowest_common_hypernyms(wn.synset('girlfriend.n.02'), use_min_depth=True)
-    [Synset('organism.n.01'), Synset('woman.n.01')]
-
-In the general case, however, they tend to return the same results:
-
-    >>> wn.synset('body.n.09').lowest_common_hypernyms(wn.synset('sidereal_day.n.01'))
-    [Synset('attribute.n.02'), Synset('measure.n.02')]
-    >>> wn.synset('body.n.09').lowest_common_hypernyms(wn.synset('sidereal_day.n.01'), use_min_depth=True)
-    [Synset('attribute.n.02'), Synset('measure.n.02')]
diff --git a/nlp_resource_data/nltk/test/wsd.doctest b/nlp_resource_data/nltk/test/wsd.doctest

deleted file mode 100644 (file)

index b4d8f90..0000000
--- a/nlp_resource_data/nltk/test/wsd.doctest
+++ /dev/null
@@ -1,68 +0,0 @@
-.. Copyright (C) 2001-2019 NLTK Project
-.. For license information, see LICENSE.TXT
-
-.. -*- coding: utf-8 -*-
-
-=========================
-Word Sense Disambiguation
-=========================
-
-
-Lesk Algorithm
---------------
-
-
-Performs the classic Lesk algorithm for Word Sense Disambiguation (WSD) using
-a the definitions of the ambiguous word.
-
-Given an ambiguous word and the context in which the word occurs, Lesk returns
-a Synset with the highest number of overlapping words between the context
-sentence and different definitions from each Synset.
-
-    >>> from nltk.wsd import lesk
-    >>> sent = ['I', 'went', 'to', 'the', 'bank', 'to', 'deposit', 'money', '.']
-
-    >>> print(lesk(sent, 'bank', 'n'))
-    Synset('savings_bank.n.02')
-
-    >>> print(lesk(sent, 'bank'))
-    Synset('savings_bank.n.02')
-
-The definitions for "bank" are:
-
-    >>> from nltk.corpus import wordnet as wn
-    >>> for ss in wn.synsets('bank'):
-    ...     print(ss, ss.definition())
-    ...
-    Synset('bank.n.01') sloping land (especially the slope beside a body of water)
-    Synset('depository_financial_institution.n.01') a financial institution that accepts deposits and channels the money into lending activities
-    Synset('bank.n.03') a long ridge or pile
-    Synset('bank.n.04') an arrangement of similar objects in a row or in tiers
-    Synset('bank.n.05') a supply or stock held in reserve for future use (especially in emergencies)
-    Synset('bank.n.06') the funds held by a gambling house or the dealer in some gambling games
-    Synset('bank.n.07') a slope in the turn of a road or track; the outside is higher than the inside in order to reduce the effects of centrifugal force
-    Synset('savings_bank.n.02') a container (usually with a slot in the top) for keeping money at home
-    Synset('bank.n.09') a building in which the business of banking transacted
-    Synset('bank.n.10') a flight maneuver; aircraft tips laterally about its longitudinal axis (especially in turning)
-    Synset('bank.v.01') tip laterally
-    Synset('bank.v.02') enclose with a bank
-    Synset('bank.v.03') do business with a bank or keep an account at a bank
-    Synset('bank.v.04') act as the banker in a game or in gambling
-    Synset('bank.v.05') be in the banking business
-    Synset('deposit.v.02') put into a bank account
-    Synset('bank.v.07') cover with ashes so to control the rate of burning
-    Synset('trust.v.01') have confidence or faith in
-
-Test disambiguation of POS tagged `able`.
-
-    >>> [(s, s.pos()) for s in wn.synsets('able')]
-    [(Synset('able.a.01'), 'a'), (Synset('able.s.02'), 's'), (Synset('able.s.03'), 's'), (Synset('able.s.04'), 's')]
-    >>> sent = 'people should be able to marry a person of their choice'.split()
-    >>> lesk(sent, 'able')
-    Synset('able.s.04')
-    >>> lesk(sent, 'able', pos='a')
-    Synset('able.a.01')
-
-Test behavior if there is are no matching senses.
-
-    >>> lesk('John loves Mary'.split(), 'loves', synsets=[])
author	jay.ho.park <jay.ho.park@samsung.com>
	Sat, 24 Oct 2020 04:06:24 +0000 (13:06 +0900)
committer	jay.ho.park <jay.ho.park@samsung.com>
	Sat, 24 Oct 2020 04:06:24 +0000 (13:06 +0900)
nlp_resource_data/nltk/test/__init__.py	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/__pycache__/__init__.cpython-37.pyc	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/__pycache__/all.cpython-37.pyc	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/__pycache__/childes_fixt.cpython-37.pyc	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/__pycache__/classify_fixt.cpython-37.pyc	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/__pycache__/compat_fixt.cpython-37.pyc	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/__pycache__/corpus_fixt.cpython-37.pyc	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/__pycache__/discourse_fixt.cpython-37.pyc	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/__pycache__/doctest_nose_plugin.cpython-37.pyc	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/__pycache__/gensim_fixt.cpython-37.pyc	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/__pycache__/gluesemantics_malt_fixt.cpython-37.pyc	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/__pycache__/inference_fixt.cpython-37.pyc	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/__pycache__/nonmonotonic_fixt.cpython-37.pyc	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/__pycache__/portuguese_en_fixt.cpython-37.pyc	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/__pycache__/probability_fixt.cpython-37.pyc	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/__pycache__/runtests.cpython-37.pyc	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/__pycache__/segmentation_fixt.cpython-37.pyc	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/__pycache__/semantics_fixt.cpython-37.pyc	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/__pycache__/translate_fixt.cpython-37.pyc	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/__pycache__/wordnet_fixt.cpython-37.pyc	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/all.py	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/bleu.doctest	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/bnc.doctest	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/ccg.doctest	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/ccg_semantics.doctest	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/chat80.doctest	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/childes.doctest	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/childes_fixt.py	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/chunk.doctest	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/classify.doctest	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/classify_fixt.py	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/collections.doctest	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/collocations.doctest	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/compat.doctest	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/compat_fixt.py	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/concordance.doctest	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/corpus.doctest	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/corpus_fixt.py	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/crubadan.doctest	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/data.doctest	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/dependency.doctest	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/discourse.doctest	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/discourse_fixt.py	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/doctest_nose_plugin.py	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/drt.doctest	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/featgram.doctest	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/featstruct.doctest	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/framenet.doctest	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/generate.doctest	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/gensim.doctest	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/gensim_fixt.py	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/gluesemantics.doctest	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/gluesemantics_malt.doctest	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/gluesemantics_malt_fixt.py	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/grammar.doctest	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/grammartestsuites.doctest	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/index.doctest	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/inference.doctest	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/inference_fixt.py	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/internals.doctest	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/japanese.doctest	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/lm.doctest	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/logic.doctest	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/metrics.doctest	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/misc.doctest	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/nonmonotonic.doctest	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/nonmonotonic_fixt.py	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/paice.doctest	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/parse.doctest	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/portuguese_en.doctest	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/portuguese_en_fixt.py	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/probability.doctest	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/probability_fixt.py	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/propbank.doctest	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/relextract.doctest	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/resolution.doctest	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/runtests.py	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/segmentation_fixt.py	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/semantics.doctest	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/semantics_fixt.py	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/sentiment.doctest	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/sentiwordnet.doctest	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/simple.doctest	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/stem.doctest	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/tag.doctest	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/tokenize.doctest	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/toolbox.doctest	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/translate.doctest	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/translate_fixt.py	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/tree.doctest	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/treeprettyprinter.doctest	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/treetransforms.doctest	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/__init__.py	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/__pycache__/__init__.cpython-37.pyc	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/__pycache__/test_2x_compat.cpython-37.pyc	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/__pycache__/test_aline.cpython-37.pyc	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/__pycache__/test_brill.cpython-37.pyc	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/__pycache__/test_chunk.cpython-37.pyc	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/__pycache__/test_classify.cpython-37.pyc	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/__pycache__/test_collocations.cpython-37.pyc	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/__pycache__/test_concordance.cpython-37.pyc	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/__pycache__/test_corenlp.cpython-37.pyc	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/__pycache__/test_corpora.cpython-37.pyc	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/__pycache__/test_corpus_views.cpython-37.pyc	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/__pycache__/test_data.cpython-37.pyc	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/__pycache__/test_disagreement.cpython-37.pyc	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/__pycache__/test_hmm.cpython-37.pyc	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/__pycache__/test_json2csv_corpus.cpython-37.pyc	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/__pycache__/test_naivebayes.cpython-37.pyc	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/__pycache__/test_pos_tag.cpython-37.pyc	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/__pycache__/test_rte_classify.cpython-37.pyc	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/__pycache__/test_seekable_unicode_stream_reader.cpython-37.pyc	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/__pycache__/test_senna.cpython-37.pyc	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/__pycache__/test_stem.cpython-37.pyc	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/__pycache__/test_tag.cpython-37.pyc	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/__pycache__/test_tgrep.cpython-37.pyc	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/__pycache__/test_tokenize.cpython-37.pyc	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/__pycache__/test_twitter_auth.cpython-37.pyc	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/__pycache__/test_wordnet.cpython-37.pyc	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/__pycache__/utils.cpython-37.pyc	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/lm/__init__.py	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/lm/__pycache__/__init__.cpython-37.pyc	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/lm/__pycache__/test_counter.cpython-37.pyc	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/lm/__pycache__/test_models.cpython-37.pyc	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/lm/__pycache__/test_preprocessing.cpython-37.pyc	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/lm/__pycache__/test_vocabulary.cpython-37.pyc	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/lm/test_counter.py	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/lm/test_models.py	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/lm/test_preprocessing.py	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/lm/test_vocabulary.py	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/test_2x_compat.py	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/test_aline.py	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/test_brill.py	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/test_chunk.py	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/test_classify.py	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/test_collocations.py	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/test_concordance.py	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/test_corenlp.py	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/test_corpora.py	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/test_corpus_views.py	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/test_data.py	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/test_disagreement.py	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/test_hmm.py	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/test_json2csv_corpus.py	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/test_naivebayes.py	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/test_pos_tag.py	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/test_rte_classify.py	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/test_seekable_unicode_stream_reader.py	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/test_senna.py	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/test_stem.py	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/test_tag.py	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/test_tgrep.py	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/test_tokenize.py	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/test_twitter_auth.py	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/test_wordnet.py	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/translate/__init__.py	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/translate/__pycache__/__init__.cpython-37.pyc	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/translate/__pycache__/test_bleu.cpython-37.pyc	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/translate/__pycache__/test_gdfa.cpython-37.pyc	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/translate/__pycache__/test_ibm1.cpython-37.pyc	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/translate/__pycache__/test_ibm2.cpython-37.pyc	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/translate/__pycache__/test_ibm3.cpython-37.pyc	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/translate/__pycache__/test_ibm4.cpython-37.pyc	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/translate/__pycache__/test_ibm5.cpython-37.pyc	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/translate/__pycache__/test_ibm_model.cpython-37.pyc	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/translate/__pycache__/test_nist.cpython-37.pyc	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/translate/__pycache__/test_stack_decoder.cpython-37.pyc	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/translate/test_bleu.py	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/translate/test_gdfa.py	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/translate/test_ibm1.py	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/translate/test_ibm2.py	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/translate/test_ibm3.py	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/translate/test_ibm4.py	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/translate/test_ibm5.py	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/translate/test_ibm_model.py	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/translate/test_nist.py	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/translate/test_stack_decoder.py	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/unit/utils.py	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/util.doctest	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/wordnet.doctest	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/wordnet_fixt.py	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/wordnet_lch.doctest	[deleted file]	patch \| blob \| history
nlp_resource_data/nltk/test/wsd.doctest	[deleted file]	patch \| blob \| history