Imported Upstream version 3.0.7 upstream/3.0.7
authorDongHun Kwak <dh0128.kwak@samsung.com>
Mon, 18 Jul 2022 05:42:37 +0000 (14:42 +0900)
committerDongHun Kwak <dh0128.kwak@samsung.com>
Mon, 18 Jul 2022 05:42:37 +0000 (14:42 +0900)
16 files changed:
CHANGES
PKG-INFO
README.rst
docs/whats_new_in_3_0_0.rst
examples/bigquery_view_parser.py
examples/booleansearchparser.py
pyparsing.egg-info/PKG-INFO
pyparsing/__init__.py
pyparsing/core.py
pyparsing/helpers.py
pyparsing/results.py
pyparsing/unicode.py
pyparsing/util.py
tests/test_simple_unit.py
tests/test_unit.py
tox.ini

diff --git a/CHANGES b/CHANGES
index 99ac526d77d6ce81ae7af1dc3e729ba834a1e3bf..94b34d986340dc31c42fb634b9ff492d1c5c4dbf 100644 (file)
--- a/CHANGES
+++ b/CHANGES
@@ -2,6 +2,71 @@
 Change Log
 ==========
 
+Version 3.0.7 -
+---------------
+- Fixed bug #345, in which delimitedList changed expressions in place
+  using expr.streamline(). Reported by Kim Gräsman, thanks!
+
+- Fixed bug #346, when a string of word characters was passed to WordStart
+  or WordEnd instead of just taking the default value. Originally posted
+  as a question by Parag on StackOverflow, good catch!
+
+- Fixed bug #350, in which White expressions could fail to match due to
+  unintended whitespace-skipping. Reported by Fu Hanxi, thank you!
+
+- Fixed bug #355, when a QuotedString is defined with characters in its
+  quoteChar string containing regex-significant characters such as ., *,
+  ?, [, ], etc.
+
+- Fixed bug in ParserElement.run_tests where comments would be displayed
+  using with_line_numbers.
+
+- Added optional "min" and "max" arguments to `delimited_list`. PR
+  submitted by Marius, thanks!
+
+- Added new API change note in `whats_new_in_pyparsing_3_0_0`, regarding
+  a bug fix in the `bool()` behavior of `ParseResults`.
+
+  Prior to pyparsing 3.0.x, the `ParseResults` class implementation of
+  `__bool__` would return `False` if the `ParseResults` item list was empty,
+  even if it contained named results. In 3.0.0 and later, `ParseResults` will
+  return `True` if either the item list is not empty *or* if the named
+  results dict is not empty.
+
+      # generate an empty ParseResults by parsing a blank string with
+      # a ZeroOrMore
+      result = Word(alphas)[...].parse_string("")
+      print(result.as_list())
+      print(result.as_dict())
+      print(bool(result))
+
+      # add a results name to the result
+      result["name"] = "empty result"
+      print(result.as_list())
+      print(result.as_dict())
+      print(bool(result))
+
+  Prints:
+
+      []
+      {}
+      False
+
+      []
+      {'name': 'empty result'}
+      True
+
+  In previous versions, the second call to `bool()` would return `False`.
+
+- Minor enhancement to Word generation of internal regular expression, to
+  emit consecutive characters in range, such as "ab", as "ab", not "a-b".
+
+- Fixed character ranges for search terms using non-Western characters
+  in booleansearchparser, PR submitted by tc-yu, nice work!
+
+- Additional type annotations on public methods.
+
+
 Version 3.0.6 -
 ---------------
 - Added `suppress_warning()` method to individually suppress a warning on a
index e1371c0022ac7869e32400fc103d75ebfce517b0..31fd0b4b9e0e5687d63003f43ffcadbf12519409 100644 (file)
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: pyparsing
-Version: 3.0.6
+Version: 3.0.7
 Summary: Python parsing module
 Home-page: https://github.com/pyparsing/pyparsing/
 Author: Paul McGuire
@@ -79,8 +79,8 @@ Description: PyParsing -- A Python Parsing Module
         
         See `CHANGES <https://github.com/pyparsing/pyparsing/blob/master/CHANGES>`__ file.
         
-        .. |Build Status| image:: https://travis-ci.com/pyparsing/pyparsing.svg?branch=master
-           :target: https://travis-ci.com/pyparsing/pyparsing
+        .. |Build Status| image:: https://github.com/pyparsing/pyparsing/actions/workflows/ci.yml/badge.svg
+           :target: https://github.com/pyparsing/pyparsing/actions/workflows/ci.yml
         .. |Coverage| image:: https://codecov.io/gh/pyparsing/pyparsing/branch/master/graph/badge.svg
           :target: https://codecov.io/gh/pyparsing/pyparsing
         
index 62e9741f5273f4a1f2f7ddeccc7da845414f8cef..f51c9ddd1bcb576b66b95d9448bea337ea40b767 100644 (file)
@@ -70,7 +70,7 @@ History
 
 See `CHANGES <https://github.com/pyparsing/pyparsing/blob/master/CHANGES>`__ file.
 
-.. |Build Status| image:: https://travis-ci.com/pyparsing/pyparsing.svg?branch=master
-   :target: https://travis-ci.com/pyparsing/pyparsing
+.. |Build Status| image:: https://github.com/pyparsing/pyparsing/actions/workflows/ci.yml/badge.svg
+   :target: https://github.com/pyparsing/pyparsing/actions/workflows/ci.yml
 .. |Coverage| image:: https://codecov.io/gh/pyparsing/pyparsing/branch/master/graph/badge.svg
   :target: https://codecov.io/gh/pyparsing/pyparsing
index e5e40e40a6baf85ece2a18d91d4de9ba70118bf4..3e099c63f6dd620a4c8a6755946d702c76bbc3e7 100644 (file)
@@ -498,7 +498,37 @@ Other new features
 API Changes
 ===========
 
-- [Note added in pyparsing 3.0.4]
+- [Note added in pyparsing 3.0.7, reflecting a change in 3.0.0]
+  Fixed a bug in the `ParseResults` class implementation of `__bool__`, which
+  would formerly return `False` if the `ParseResults` item list was empty, even if it
+  contained named results. Now `ParseResults` will return `True` if either the item
+  list is not empty *or* if the named results list is not empty.
+
+      # generate an empty ParseResults by parsing a blank string with a ZeroOrMore
+      result = Word(alphas)[...].parse_string("")
+      print(result.as_list())
+      print(result.as_dict())
+      print(bool(result))
+
+      # add a results name to the result
+      result["name"] = "empty result"
+      print(result.as_list())
+      print(result.as_dict())
+      print(bool(result))
+
+  Prints::
+
+      []
+      {}
+      False
+
+      []
+      {'name': 'empty result'}
+      True
+
+  In previous versions, the second call to `bool()` would return `False`.
+
+- [Note added in pyparsing 3.0.4, reflecting a change in 3.0.0]
   The `ParseResults` class now uses `__slots__` to pre-define instance attributes. This
   means that code written like this (which was allowed in pyparsing 2.4.7)::
 
index c9b8411d84fa67716c1a7fb49d4f272194e5fc2c..cec4412788282f5db1c02e55678a9b66d804acf1 100644 (file)
@@ -6,13 +6,16 @@
 #
 # Michael Smedberg
 #
+import sys
 
 from pyparsing import ParserElement, Suppress, Forward, CaselessKeyword
 from pyparsing import MatchFirst, alphas, alphanums, Combine, Word
-from pyparsing import QuotedString, CharsNotIn, Optional, Group, ZeroOrMore
+from pyparsing import QuotedString, CharsNotIn, Optional, Group
 from pyparsing import oneOf, delimitedList, restOfLine, cStyleComment
 from pyparsing import infixNotation, opAssoc, Regex, nums
 
+sys.setrecursionlimit(3000)
+
 ParserElement.enablePackrat()
 
 
@@ -44,7 +47,7 @@ class BigQueryViewParser:
         BigQueryViewParser._with_aliases.clear()
         BigQueryViewParser._get_parser().parseString(sql_stmt)
 
-        return (BigQueryViewParser._table_identifiers, BigQueryViewParser._with_aliases)
+        return BigQueryViewParser._table_identifiers, BigQueryViewParser._with_aliases
 
     @classmethod
     def lowercase_of_tuple(cls, tuple_to_lowercase):
@@ -62,257 +65,75 @@ class BigQueryViewParser:
         ParserElement.enablePackrat()
 
         LPAR, RPAR, COMMA, LBRACKET, RBRACKET, LT, GT = map(Suppress, "(),[]<>")
+        QUOT, APOS, ACC, DOT = map(Suppress, "\"'`.")
         ungrouped_select_stmt = Forward().setName("select statement")
 
+        QUOTED_QUOT = QuotedString('"')
+        QUOTED_APOS = QuotedString("'")
+        QUOTED_ACC = QuotedString("`")
+
+        # fmt: off
         # keywords
         (
-            UNION,
-            ALL,
-            AND,
-            INTERSECT,
-            EXCEPT,
-            COLLATE,
-            ASC,
-            DESC,
-            ON,
-            USING,
-            NATURAL,
-            INNER,
-            CROSS,
-            LEFT,
-            RIGHT,
-            OUTER,
-            FULL,
-            JOIN,
-            AS,
-            INDEXED,
-            NOT,
-            SELECT,
-            DISTINCT,
-            FROM,
-            WHERE,
-            GROUP,
-            BY,
-            HAVING,
-            ORDER,
-            BY,
-            LIMIT,
-            OFFSET,
-            OR,
-            CAST,
-            ISNULL,
-            NOTNULL,
-            NULL,
-            IS,
-            BETWEEN,
-            ELSE,
-            END,
-            CASE,
-            WHEN,
-            THEN,
-            EXISTS,
-            COLLATE,
-            IN,
-            LIKE,
-            GLOB,
-            REGEXP,
-            MATCH,
-            ESCAPE,
-            CURRENT_TIME,
-            CURRENT_DATE,
-            CURRENT_TIMESTAMP,
-            WITH,
-            EXTRACT,
-            PARTITION,
-            ROWS,
-            RANGE,
-            UNBOUNDED,
-            PRECEDING,
-            CURRENT,
-            ROW,
-            FOLLOWING,
-            OVER,
-            INTERVAL,
-            DATE_ADD,
-            DATE_SUB,
-            ADDDATE,
-            SUBDATE,
-            REGEXP_EXTRACT,
-            SPLIT,
-            ORDINAL,
-            FIRST_VALUE,
-            LAST_VALUE,
-            NTH_VALUE,
-            LEAD,
-            LAG,
-            PERCENTILE_CONT,
-            PRECENTILE_DISC,
-            RANK,
-            DENSE_RANK,
-            PERCENT_RANK,
-            CUME_DIST,
-            NTILE,
-            ROW_NUMBER,
-            DATE,
-            TIME,
-            DATETIME,
-            TIMESTAMP,
-            UNNEST,
-            INT64,
-            NUMERIC,
-            FLOAT64,
-            BOOL,
-            BYTES,
-            GEOGRAPHY,
-            ARRAY,
-            STRUCT,
-            SAFE_CAST,
-            ANY_VALUE,
-            ARRAY_AGG,
-            ARRAY_CONCAT_AGG,
-            AVG,
-            BIT_AND,
-            BIT_OR,
-            BIT_XOR,
-            COUNT,
-            COUNTIF,
-            LOGICAL_AND,
-            LOGICAL_OR,
-            MAX,
-            MIN,
-            STRING_AGG,
-            SUM,
-            CORR,
-            COVAR_POP,
-            COVAR_SAMP,
-            STDDEV_POP,
-            STDDEV_SAMP,
-            STDDEV,
-            VAR_POP,
-            VAR_SAMP,
-            VARIANCE,
-            TIMESTAMP_ADD,
-            TIMESTAMP_SUB,
-            GENERATE_ARRAY,
-            GENERATE_DATE_ARRAY,
-            GENERATE_TIMESTAMP_ARRAY,
-            FOR,
-            SYSTEMTIME,
-            AS,
-            OF,
-            WINDOW,
-            RESPECT,
-            IGNORE,
-            NULLS,
+            UNION, ALL, AND, INTERSECT, EXCEPT, COLLATE, ASC, DESC, ON, USING, NATURAL,
+            INNER, CROSS, LEFT, RIGHT, OUTER, FULL, JOIN, AS, INDEXED, NOT, SELECT,
+            DISTINCT, FROM, WHERE, GROUP, BY, HAVING, ORDER, BY, LIMIT, OFFSET, OR,
+            CAST, ISNULL, NOTNULL, NULL, IS, BETWEEN, ELSE, END, CASE, WHEN, THEN,
+            EXISTS, COLLATE, IN, LIKE, GLOB, REGEXP, MATCH, ESCAPE, CURRENT_TIME,
+            CURRENT_DATE, CURRENT_TIMESTAMP, WITH, EXTRACT, PARTITION, ROWS, RANGE,
+            UNBOUNDED, PRECEDING, CURRENT, ROW, FOLLOWING, OVER, INTERVAL, DATE_ADD,
+            DATE_SUB, ADDDATE, SUBDATE, REGEXP_EXTRACT, SPLIT, ORDINAL, FIRST_VALUE,
+            LAST_VALUE, NTH_VALUE, LEAD, LAG, PERCENTILE_CONT, PRECENTILE_DISC, RANK,
+            DENSE_RANK, PERCENT_RANK, CUME_DIST, NTILE, ROW_NUMBER, DATE, TIME, DATETIME,
+            TIMESTAMP, UNNEST, INT64, NUMERIC, FLOAT64, BOOL, BYTES, GEOGRAPHY, ARRAY,
+            STRUCT, SAFE_CAST, ANY_VALUE, ARRAY_AGG, ARRAY_CONCAT_AGG, AVG, BIT_AND,
+            BIT_OR, BIT_XOR, COUNT, COUNTIF, LOGICAL_AND, LOGICAL_OR, MAX, MIN,
+            STRING_AGG, SUM, CORR, COVAR_POP, COVAR_SAMP, STDDEV_POP, STDDEV_SAMP,
+            STDDEV, VAR_POP, VAR_SAMP, VARIANCE, TIMESTAMP_ADD, TIMESTAMP_SUB,
+            GENERATE_ARRAY, GENERATE_DATE_ARRAY, GENERATE_TIMESTAMP_ARRAY, FOR,
+            SYSTEMTIME, AS, OF, WINDOW, RESPECT, IGNORE, NULLS,
         ) = map(
             CaselessKeyword,
             """
-            UNION, ALL, AND, INTERSECT, EXCEPT, COLLATE, ASC, DESC, ON, USING,
-            NATURAL, INNER, CROSS, LEFT, RIGHT, OUTER, FULL, JOIN, AS, INDEXED,
-            NOT, SELECT, DISTINCT, FROM, WHERE, GROUP, BY, HAVING, ORDER, BY,
-            LIMIT, OFFSET, OR, CAST, ISNULL, NOTNULL, NULL, IS, BETWEEN, ELSE,
-            END, CASE, WHEN, THEN, EXISTS, COLLATE, IN, LIKE, GLOB, REGEXP,
-            MATCH, ESCAPE, CURRENT_TIME, CURRENT_DATE, CURRENT_TIMESTAMP, WITH,
-            EXTRACT, PARTITION, ROWS, RANGE, UNBOUNDED, PRECEDING, CURRENT,
-            ROW, FOLLOWING, OVER, INTERVAL, DATE_ADD, DATE_SUB, ADDDATE,
-            SUBDATE, REGEXP_EXTRACT, SPLIT, ORDINAL, FIRST_VALUE, LAST_VALUE,
-            NTH_VALUE, LEAD, LAG, PERCENTILE_CONT, PRECENTILE_DISC, RANK,
-            DENSE_RANK, PERCENT_RANK, CUME_DIST, NTILE, ROW_NUMBER, DATE, TIME,
-            DATETIME, TIMESTAMP, UNNEST, INT64, NUMERIC, FLOAT64, BOOL, BYTES,
-            GEOGRAPHY, ARRAY, STRUCT, SAFE_CAST, ANY_VALUE, ARRAY_AGG,
-            ARRAY_CONCAT_AGG, AVG, BIT_AND, BIT_OR, BIT_XOR, COUNT, COUNTIF,
-            LOGICAL_AND, LOGICAL_OR, MAX, MIN, STRING_AGG, SUM, CORR,
-            COVAR_POP, COVAR_SAMP, STDDEV_POP, STDDEV_SAMP, STDDEV, VAR_POP,
-            VAR_SAMP, VARIANCE, TIMESTAMP_ADD, TIMESTAMP_SUB, GENERATE_ARRAY,
-            GENERATE_DATE_ARRAY, GENERATE_TIMESTAMP_ARRAY, FOR, SYSTEMTIME, AS,
-            OF, WINDOW, RESPECT, IGNORE, NULLS
-                 """.replace(
-                ",", ""
-            ).split(),
+            UNION, ALL, AND, INTERSECT, EXCEPT, COLLATE, ASC, DESC, ON, USING, NATURAL,
+            INNER, CROSS, LEFT, RIGHT, OUTER, FULL, JOIN, AS, INDEXED, NOT, SELECT,
+            DISTINCT, FROM, WHERE, GROUP, BY, HAVING, ORDER, BY, LIMIT, OFFSET, OR,
+            CAST, ISNULL, NOTNULL, NULL, IS, BETWEEN, ELSE, END, CASE, WHEN, THEN,
+            EXISTS, COLLATE, IN, LIKE, GLOB, REGEXP, MATCH, ESCAPE, CURRENT_TIME,
+            CURRENT_DATE, CURRENT_TIMESTAMP, WITH, EXTRACT, PARTITION, ROWS, RANGE,
+            UNBOUNDED, PRECEDING, CURRENT, ROW, FOLLOWING, OVER, INTERVAL, DATE_ADD,
+            DATE_SUB, ADDDATE, SUBDATE, REGEXP_EXTRACT, SPLIT, ORDINAL, FIRST_VALUE,
+            LAST_VALUE, NTH_VALUE, LEAD, LAG, PERCENTILE_CONT, PRECENTILE_DISC, RANK,
+            DENSE_RANK, PERCENT_RANK, CUME_DIST, NTILE, ROW_NUMBER, DATE, TIME, DATETIME,
+            TIMESTAMP, UNNEST, INT64, NUMERIC, FLOAT64, BOOL, BYTES, GEOGRAPHY, ARRAY,
+            STRUCT, SAFE_CAST, ANY_VALUE, ARRAY_AGG, ARRAY_CONCAT_AGG, AVG, BIT_AND,
+            BIT_OR, BIT_XOR, COUNT, COUNTIF, LOGICAL_AND, LOGICAL_OR, MAX, MIN,
+            STRING_AGG, SUM, CORR, COVAR_POP, COVAR_SAMP, STDDEV_POP, STDDEV_SAMP,
+            STDDEV, VAR_POP, VAR_SAMP, VARIANCE, TIMESTAMP_ADD, TIMESTAMP_SUB,
+            GENERATE_ARRAY, GENERATE_DATE_ARRAY, GENERATE_TIMESTAMP_ARRAY, FOR,
+            SYSTEMTIME, AS, OF, WINDOW, RESPECT, IGNORE, NULLS,
+            """.replace(",", "").split(),
         )
 
         keyword_nonfunctions = MatchFirst(
-            (
-                UNION,
-                ALL,
-                INTERSECT,
-                EXCEPT,
-                COLLATE,
-                ASC,
-                DESC,
-                ON,
-                USING,
-                NATURAL,
-                INNER,
-                CROSS,
-                LEFT,
-                RIGHT,
-                OUTER,
-                FULL,
-                JOIN,
-                AS,
-                INDEXED,
-                NOT,
-                SELECT,
-                DISTINCT,
-                FROM,
-                WHERE,
-                GROUP,
-                BY,
-                HAVING,
-                ORDER,
-                BY,
-                LIMIT,
-                OFFSET,
-                CAST,
-                ISNULL,
-                NOTNULL,
-                NULL,
-                IS,
-                BETWEEN,
-                ELSE,
-                END,
-                CASE,
-                WHEN,
-                THEN,
-                EXISTS,
-                COLLATE,
-                IN,
-                LIKE,
-                GLOB,
-                REGEXP,
-                MATCH,
-                STRUCT,
-                WINDOW,
-            )
+            (UNION, ALL, INTERSECT, EXCEPT, COLLATE, ASC, DESC, ON, USING,
+             NATURAL, INNER, CROSS, LEFT, RIGHT, OUTER, FULL, JOIN, AS, INDEXED,
+             NOT, SELECT, DISTINCT, FROM, WHERE, GROUP, BY, HAVING, ORDER, BY,
+             LIMIT, OFFSET, CAST, ISNULL, NOTNULL, NULL, IS, BETWEEN, ELSE, END,
+             CASE, WHEN, THEN, EXISTS, COLLATE, IN, LIKE, GLOB, REGEXP, MATCH,
+             STRUCT, WINDOW,
+             )
         )
 
         keyword = keyword_nonfunctions | MatchFirst(
-            (
-                ESCAPE,
-                CURRENT_TIME,
-                CURRENT_DATE,
-                CURRENT_TIMESTAMP,
-                DATE_ADD,
-                DATE_SUB,
-                ADDDATE,
-                SUBDATE,
-                INTERVAL,
-                STRING_AGG,
-                REGEXP_EXTRACT,
-                SPLIT,
-                ORDINAL,
-                UNNEST,
-                SAFE_CAST,
-                PARTITION,
-                TIMESTAMP_ADD,
-                TIMESTAMP_SUB,
-                ARRAY,
-                GENERATE_ARRAY,
-                GENERATE_DATE_ARRAY,
-                GENERATE_TIMESTAMP_ARRAY,
-            )
+            (ESCAPE, CURRENT_TIME, CURRENT_DATE, CURRENT_TIMESTAMP, DATE_ADD,
+             DATE_SUB, ADDDATE, SUBDATE, INTERVAL, STRING_AGG, REGEXP_EXTRACT,
+             SPLIT, ORDINAL, UNNEST, SAFE_CAST, PARTITION, TIMESTAMP_ADD,
+             TIMESTAMP_SUB, ARRAY, GENERATE_ARRAY, GENERATE_DATE_ARRAY,
+             GENERATE_TIMESTAMP_ARRAY,
+             )
         )
+        # fmt: on
 
         identifier_word = Word(alphas + "_@#", alphanums + "@$#_")
         identifier = ~keyword + identifier_word.copy()
@@ -320,8 +141,7 @@ class BigQueryViewParser:
         # NOTE: Column names can be keywords.  Doc says they cannot, but in practice it seems to work.
         column_name = identifier_word.copy()
         qualified_column_name = Combine(
-            column_name
-            + (ZeroOrMore(" ") + "." + ZeroOrMore(" ") + column_name) * (0, 6)
+            column_name + ("." + column_name)[..., 6], adjacent=False
         )
         # NOTE: As with column names, column aliases can be keywords, e.g. functions like `current_time`.  Other
         # keywords, e.g. `from` make parsing pretty difficult (e.g. "SELECT a from from b" is confusing.)
@@ -334,13 +154,11 @@ class BigQueryViewParser:
         # NOTE: The expression in a CASE statement can be an integer.  E.g. this is valid SQL:
         # select CASE 1 WHEN 1 THEN -1 ELSE -2 END from test_table
         unquoted_case_identifier = ~keyword + Word(alphanums + "$_")
-        quoted_case_identifier = ~keyword + (
-            QuotedString('"') ^ Suppress("`") + CharsNotIn("`") + Suppress("`")
-        )
+        quoted_case_identifier = QUOTED_QUOT | QUOTED_ACC
         case_identifier = quoted_case_identifier | unquoted_case_identifier
         case_expr = (
-            Optional(case_identifier + Suppress("."))
-            + Optional(case_identifier + Suppress("."))
+            Optional(case_identifier + DOT)
+            + Optional(case_identifier + DOT)
             + case_identifier
         )
 
@@ -349,7 +167,7 @@ class BigQueryViewParser:
 
         integer = Regex(r"[+-]?\d+")
         numeric_literal = Regex(r"[+-]?\d*\.?\d+([eE][+-]?\d+)?")
-        string_literal = QuotedString("'") | QuotedString('"') | QuotedString("`")
+        string_literal = QUOTED_APOS | QUOTED_QUOT | QUOTED_ACC
         regex_literal = "r" + string_literal
         blob_literal = Regex(r"[xX]'[0-9A-Fa-f]+'")
         date_or_time_literal = (DATE | TIME | DATETIME | TIMESTAMP) + string_literal
@@ -377,6 +195,7 @@ class BigQueryViewParser:
             MINUTE_MICROSECOND MINUTE_SECOND MONTH QUARTER SECOND
             SECOND_MICROSECOND WEEK YEAR YEAR_MONTH""",
             caseless=True,
+            as_keyword=True,
         )
         datetime_operators = (
             DATE_ADD | DATE_SUB | ADDDATE | SUBDATE | TIMESTAMP_ADD | TIMESTAMP_SUB
@@ -530,7 +349,7 @@ class BigQueryViewParser:
 
         case_when = WHEN + expr.copy()("when")
         case_then = THEN + expr.copy()("then")
-        case_clauses = Group(ZeroOrMore(case_when + case_then))
+        case_clauses = Group((case_when + case_then)[...])
         case_else = ELSE + expr.copy()("else")
         case_stmt = (
             CASE
@@ -566,7 +385,7 @@ class BigQueryViewParser:
         struct_term = LPAR + delimitedList(expr_term) + RPAR
 
         UNARY, BINARY, TERNARY = 1, 2, 3
-        expr << infixNotation(
+        expr <<= infixNotation(
             (expr_term | struct_term),
             [
                 (oneOf("- + ~") | NOT, UNARY, opAssoc.RIGHT),
@@ -601,10 +420,7 @@ class BigQueryViewParser:
             ],
         )
         quoted_expr = (
-            expr
-            ^ Suppress('"') + expr + Suppress('"')
-            ^ Suppress("'") + expr + Suppress("'")
-            ^ Suppress("`") + expr + Suppress("`")
+            expr | QUOT + expr + QUOT | APOS + expr + APOS | ACC + expr + ACC
         )("quoted_expr")
 
         compound_operator = (
@@ -667,41 +483,32 @@ class BigQueryViewParser:
             cls._table_identifiers.add(tuple(padded_list))
 
         standard_table_part = ~keyword + Word(alphanums + "_")
-        quoted_project_part = (
-            Suppress('"') + CharsNotIn('"') + Suppress('"')
-            | Suppress("'") + CharsNotIn("'") + Suppress("'")
-            | Suppress("`") + CharsNotIn("`") + Suppress("`")
-        )
+        quoted_project_part = QUOTED_QUOT | QUOTED_APOS | QUOTED_ACC
         quoted_table_part = (
-            Suppress('"') + CharsNotIn('".') + Suppress('"')
-            | Suppress("'") + CharsNotIn("'.") + Suppress("'")
-            | Suppress("`") + CharsNotIn("`.") + Suppress("`")
+            QUOT + CharsNotIn('".') + QUOT
+            | APOS + CharsNotIn("'.") + APOS
+            | ACC + CharsNotIn("`.") + ACC
         )
         quoted_table_parts_identifier = (
             Optional(
-                (quoted_project_part("project") | standard_table_part("project"))
-                + Suppress(".")
+                (quoted_project_part("project") | standard_table_part("project")) + DOT
             )
             + Optional(
-                (quoted_table_part("dataset") | standard_table_part("dataset"))
-                + Suppress(".")
+                (quoted_table_part("dataset") | standard_table_part("dataset")) + DOT
             )
             + (quoted_table_part("table") | standard_table_part("table"))
         ).setParseAction(record_table_identifier)
 
         def record_quoted_table_identifier(t):
-            identifier_list = t.asList()[0].split(".")
-            first = ".".join(identifier_list[0:-2]) or None
-            second = identifier_list[-2]
-            third = identifier_list[-1]
+            identifier_list = t[0].split(".")
+            *first, second, third = identifier_list
+            first = ".".join(first) or None
             identifier_list = [first, second, third]
             padded_list = [None] * (3 - len(identifier_list)) + identifier_list
             cls._table_identifiers.add(tuple(padded_list))
 
         quotable_table_parts_identifier = (
-            Suppress('"') + CharsNotIn('"') + Suppress('"')
-            | Suppress("'") + CharsNotIn("'") + Suppress("'")
-            | Suppress("`") + CharsNotIn("`") + Suppress("`")
+            QUOTED_QUOT | QUOTED_APOS | QUOTED_ACC
         ).setParseAction(record_quoted_table_identifier)
 
         table_identifier = (
@@ -719,9 +526,7 @@ class BigQueryViewParser:
             | (UNNEST + LPAR + expr + RPAR)
         ) + Optional(Optional(AS) + table_alias)
 
-        join_source << single_source + ZeroOrMore(
-            join_op + single_source + join_constraint
-        )
+        join_source <<= single_source + (join_op + single_source + join_constraint)[...]
 
         over_partition = (PARTITION + BY + delimitedList(partition_expression_list))(
             "over_partition"
@@ -787,9 +592,9 @@ class BigQueryViewParser:
         select_core = Optional(with_stmt) + select_no_with
         grouped_select_core = select_core | (LPAR + select_core + RPAR)
 
-        ungrouped_select_stmt << (
+        ungrouped_select_stmt <<= (
             grouped_select_core
-            + ZeroOrMore(compound_operator + grouped_select_core)
+            + (compound_operator + grouped_select_core)[...]
             + Optional(
                 LIMIT
                 + (Group(expr + OFFSET + expr) | Group(expr + COMMA + expr) | expr)(
@@ -815,7 +620,7 @@ class BigQueryViewParser:
             + select_stmt
             + RPAR
         )
-        with_stmt << (WITH + delimitedList(with_clause))
+        with_stmt <<= WITH + delimitedList(with_clause)
         with_stmt.ignore(sql_comment)
 
         cls._parser = select_stmt
@@ -839,6 +644,7 @@ class BigQueryViewParser:
 
 
 if __name__ == "__main__":
+    # fmt: off
     TEST_CASES = [
         [
             """
@@ -846,11 +652,7 @@ if __name__ == "__main__":
             """,
             [
                 (None, "y", "a"),
-                (
-                    None,
-                    None,
-                    "b",
-                ),
+                (None, None, "b"),
             ],
         ],
         [
@@ -883,11 +685,7 @@ if __name__ == "__main__":
             select * from xyzzy
             """,
             [
-                (
-                    None,
-                    None,
-                    "xyzzy",
-                ),
+                (None, None, "xyzzy"),
             ],
         ],
         [
@@ -895,11 +693,7 @@ if __name__ == "__main__":
             select z.* from xyzzy
             """,
             [
-                (
-                    None,
-                    None,
-                    "xyzzy",
-                ),
+                (None, None, "xyzzy"),
             ],
         ],
         [
@@ -1042,11 +836,7 @@ if __name__ == "__main__":
             FROM a
             """,
             [
-                (
-                    None,
-                    None,
-                    "a",
-                ),
+                (None, None, "a"),
             ],
         ],
         [
@@ -1056,11 +846,7 @@ if __name__ == "__main__":
             FROM T
             """,
             [
-                (
-                    None,
-                    None,
-                    "T",
-                ),
+                (None, None, "T"),
             ],
         ],
         [
@@ -1464,11 +1250,7 @@ if __name__ == "__main__":
             FROM d
             """,
             [
-                (
-                    None,
-                    None,
-                    "d",
-                ),
+                (None, None, "d"),
             ],
         ],
         [
@@ -1479,11 +1261,7 @@ if __name__ == "__main__":
             FROM i
             """,
             [
-                (
-                    None,
-                    None,
-                    "i",
-                ),
+                (None, None, "i"),
             ],
         ],
         [
@@ -1493,11 +1271,7 @@ if __name__ == "__main__":
             FROM m
             """,
             [
-                (
-                    None,
-                    None,
-                    "m",
-                ),
+                (None, None, "m",),
             ],
         ],
         [
@@ -1508,11 +1282,7 @@ if __name__ == "__main__":
             FROM r
             """,
             [
-                (
-                    None,
-                    None,
-                    "r",
-                ),
+                (None, None, "r"),
             ],
         ],
         [
@@ -1522,11 +1292,7 @@ if __name__ == "__main__":
             FROM w
             """,
             [
-                (
-                    None,
-                    None,
-                    "w",
-                ),
+                (None, None, "w"),
             ],
         ],
         [
@@ -1537,11 +1303,7 @@ if __name__ == "__main__":
             FROM ac
             """,
             [
-                (
-                    None,
-                    None,
-                    "ac",
-                ),
+                (None, None, "ac"),
             ],
         ],
         [
@@ -1551,11 +1313,7 @@ if __name__ == "__main__":
             FROM ah
             """,
             [
-                (
-                    None,
-                    None,
-                    "ah",
-                ),
+                (None, None, "ah"),
             ],
         ],
         [
@@ -1566,11 +1324,7 @@ if __name__ == "__main__":
             FROM an
             """,
             [
-                (
-                    None,
-                    None,
-                    "an",
-                ),
+                (None, None, "an"),
             ],
         ],
         [
@@ -1581,16 +1335,8 @@ if __name__ == "__main__":
             SELECT y FROM onE JOIN TWo
             """,
             [
-                (
-                    None,
-                    None,
-                    "y",
-                ),
-                (
-                    None,
-                    None,
-                    "b",
-                ),
+                (None, None, "y"),
+                (None, None, "b"),
             ],
         ],
         [
@@ -1601,16 +1347,8 @@ if __name__ == "__main__":
             FROM OnE
             """,
             [
-                (
-                    None,
-                    None,
-                    "oNE",
-                ),
-                (
-                    None,
-                    None,
-                    "OnE",
-                ),
+                (None, None, "oNE"),
+                (None, None, "OnE"),
             ],
         ],
         [
@@ -1763,7 +1501,10 @@ if __name__ == "__main__":
             )
             SELECT y FROM z
             """,
-            [(None, None, "b"), (None, None, "z")],
+            [
+                (None, None, "b"),
+                (None, None, "z")
+            ],
         ],
         [
             """
@@ -1771,14 +1512,18 @@ if __name__ == "__main__":
                 FIRST_VALUE(x IGNORE NULLS) OVER (PARTITION BY y)
             FROM z
             """,
-            [(None, None, "z")],
+            [
+                (None, None, "z")
+            ],
         ],
         [
             """
             SELECT a . b .   c
             FROM d
             """,
-            [(None, None, "d")],
+            [
+                (None, None, "d")
+            ],
         ],
         [
             """
@@ -1794,7 +1539,10 @@ if __name__ == "__main__":
             )
             SELECT h FROM a
             """,
-            [(None, None, "c"), (None, None, "f")],
+            [
+                (None, None, "c"),
+                (None, None, "f")
+            ],
         ],
         [
             """
@@ -1810,21 +1558,29 @@ if __name__ == "__main__":
             )
             (SELECT h FROM a)
             """,
-            [(None, None, "c"), (None, None, "f")],
+            [
+                (None, None, "c"),
+                (None, None, "f")
+            ],
         ],
         [
             """
             SELECT * FROM a.b.`c`
             """,
-            [("a", "b", "c")],
+            [
+                ("a", "b", "c"),
+            ],
         ],
         [
             """
             SELECT * FROM 'a'.b.`c`
             """,
-            [("a", "b", "c")],
+            [
+                ("a", "b", "c"),
+            ],
         ],
     ]
+    # fmt: on
 
     parser = BigQueryViewParser()
     for test_index, test_case in enumerate(TEST_CASES):
index d32ef392282a0040ba0637a6a62eb2b80cb93c87..c901db14db153538ce7f8c9d940908657ccec78d 100644 (file)
@@ -84,34 +84,37 @@ TODO:
 from pyparsing import (
     Word,
     alphanums,
-    Keyword,
+    CaselessKeyword,
     Group,
     Forward,
     Suppress,
     OneOrMore,
-    oneOf,
+    one_of,
 )
 import re
 
 
+# Updated on 02 Dec 2021 according to ftp://ftp.unicode.org/Public/UNIDATA/Blocks.txt
 alphabet_ranges = [
-    ##CYRILIC: https://en.wikipedia.org/wiki/Cyrillic_(Unicode_block)
+    # CYRILIC: https://en.wikipedia.org/wiki/Cyrillic_(Unicode_block)
     [int("0400", 16), int("04FF", 16)],
-    ##THAI: https://en.wikipedia.org/wiki/Thai_(Unicode_block)
-    [int("0E00", 16), int("0E7F", 16)],
-    ##ARABIC: https://en.wikipedia.org/wiki/Arabic_(Unicode_block) (Arabic (0600–06FF)+ Syriac (0700–074F)+ Arabic Supplement (0750–077F) )
+    # ARABIC: https://en.wikipedia.org/wiki/Arabic_(Unicode_block) (Arabic (0600–06FF)+ Syriac (0700–074F)+ Arabic Supplement (0750–077F))
     [int("0600", 16), int("07FF", 16)],
-    ##CHINESE: https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
-    [int("0400", 16), int("09FF", 16)],
-    # JAPANESE : https://en.wikipedia.org/wiki/Japanese_writing_system
+    # THAI: https://en.wikipedia.org/wiki/Thai_(Unicode_block)
+    [int("0E00", 16), int("0E7F", 16)],
+    # JAPANESE : https://en.wikipedia.org/wiki/Japanese_writing_system (Hiragana (3040–309F) + Katakana (30A0–30FF))
     [int("3040", 16), int("30FF", 16)],
+    # Enclosed CJK Letters and Months
+    [int("3200", 16), int("32FF", 16)],
+    # CHINESE: https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
+    [int("4E00", 16), int("9FFF", 16)],
     # KOREAN : https://en.wikipedia.org/wiki/Hangul
-    [int("AC00", 16), int("D7AF", 16)],
     [int("1100", 16), int("11FF", 16)],
     [int("3130", 16), int("318F", 16)],
-    [int("3200", 16), int("32FF", 16)],
     [int("A960", 16), int("A97F", 16)],
+    [int("AC00", 16), int("D7AF", 16)],
     [int("D7B0", 16), int("D7FF", 16)],
+    # Halfwidth and Fullwidth Forms
     [int("FF00", 16), int("FFEF", 16)],
 ]
 
@@ -152,23 +155,23 @@ class BooleanSearchParser:
         alphabet = alphanums
 
         # support for non-western alphabets
-        for r in alphabet_ranges:
-            alphabet += "".join(chr(c) for c in range(*r) if not chr(c).isspace())
+        for lo, hi in alphabet_ranges:
+            alphabet += "".join(chr(c) for c in range(lo, hi + 1) if not chr(c).isspace())
 
-        operatorWord = Group(Word(alphabet + "*")).setResultsName("word*")
+        operatorWord = Group(Word(alphabet + "*")).set_results_name("word*")
 
         operatorQuotesContent = Forward()
         operatorQuotesContent << ((operatorWord + operatorQuotesContent) | operatorWord)
 
         operatorQuotes = (
-            Group(Suppress('"') + operatorQuotesContent + Suppress('"')).setResultsName(
+            Group(Suppress('"') + operatorQuotesContent + Suppress('"')).set_results_name(
                 "quotes"
             )
             | operatorWord
         )
 
         operatorParenthesis = (
-            Group(Suppress("(") + operatorOr + Suppress(")")).setResultsName(
+            Group(Suppress("(") + operatorOr + Suppress(")")).set_results_name(
                 "parenthesis"
             )
             | operatorQuotes
@@ -176,7 +179,7 @@ class BooleanSearchParser:
 
         operatorNot = Forward()
         operatorNot << (
-            Group(Suppress(Keyword("not", caseless=True)) + operatorNot).setResultsName(
+            Group(Suppress(CaselessKeyword("not")) + operatorNot).set_results_name(
                 "not"
             )
             | operatorParenthesis
@@ -185,22 +188,22 @@ class BooleanSearchParser:
         operatorAnd = Forward()
         operatorAnd << (
             Group(
-                operatorNot + Suppress(Keyword("and", caseless=True)) + operatorAnd
-            ).setResultsName("and")
+                operatorNot + Suppress(CaselessKeyword("and")) + operatorAnd
+            ).set_results_name("and")
             | Group(
-                operatorNot + OneOrMore(~oneOf("and or") + operatorAnd)
-            ).setResultsName("and")
+                operatorNot + OneOrMore(~one_of("and or") + operatorAnd)
+            ).set_results_name("and")
             | operatorNot
         )
 
         operatorOr << (
             Group(
-                operatorAnd + Suppress(Keyword("or", caseless=True)) + operatorOr
-            ).setResultsName("or")
+                operatorAnd + Suppress(CaselessKeyword("or")) + operatorOr
+            ).set_results_name("or")
             | operatorAnd
         )
 
-        return operatorOr.parseString
+        return operatorOr.parse_string
 
     def evaluateAnd(self, argument):
         return all(self.evaluate(arg) for arg in argument)
@@ -217,7 +220,7 @@ class BooleanSearchParser:
     def evaluateQuotes(self, argument):
         """Evaluate quoted strings
 
-        First is does an 'and' on the indidual search terms, then it asks the
+        First is does an 'and' on the individual search terms, then it asks the
         function GetQuoted to only return the subset of ID's that contain the
         literal string.
         """
@@ -461,6 +464,37 @@ class ParserTest(BooleanSearchParser):
 
             all_ok = all_ok and test_passed
 
+        # Tests for non western characters, should fail with
+        # pyparsing.exceptions.ParseException under the previous
+        # configuration
+        non_western_exprs = {
+            "0": "*",
+            "1": "ヿ",  # Edge character
+            "2": "亀",  # Character in CJK block
+            "3": "ヿ or 亀",
+            "4": "ヿ and 亀",
+            "5": "not ヿ"
+        }
+
+        non_western_texts_matcheswith = {
+            "안녕하세요, 당신은 어떠세요?": ["0", "5"],
+            "ヿ": ["0", "1", "3"],
+            "亀": ["0", "2", "3", "5"],
+            "亀 ヿ": ["0", "1", "2", "3", "4"],
+        }
+
+        for text, matches in non_western_texts_matcheswith.items():
+            _matches = []
+            for _id, expr in non_western_exprs.items():
+                if self.match(text, expr):
+                    _matches.append(_id)
+
+            test_passed = sorted(matches) == sorted(_matches)
+            if not test_passed:
+                print("Failed", repr(text), "expected", matches, "matched", _matches)
+
+            all_ok = all_ok and test_passed
+
         return all_ok
 
 
index e1371c0022ac7869e32400fc103d75ebfce517b0..31fd0b4b9e0e5687d63003f43ffcadbf12519409 100644 (file)
@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: pyparsing
-Version: 3.0.6
+Version: 3.0.7
 Summary: Python parsing module
 Home-page: https://github.com/pyparsing/pyparsing/
 Author: Paul McGuire
@@ -79,8 +79,8 @@ Description: PyParsing -- A Python Parsing Module
         
         See `CHANGES <https://github.com/pyparsing/pyparsing/blob/master/CHANGES>`__ file.
         
-        .. |Build Status| image:: https://travis-ci.com/pyparsing/pyparsing.svg?branch=master
-           :target: https://travis-ci.com/pyparsing/pyparsing
+        .. |Build Status| image:: https://github.com/pyparsing/pyparsing/actions/workflows/ci.yml/badge.svg
+           :target: https://github.com/pyparsing/pyparsing/actions/workflows/ci.yml
         .. |Coverage| image:: https://codecov.io/gh/pyparsing/pyparsing/branch/master/graph/badge.svg
           :target: https://codecov.io/gh/pyparsing/pyparsing
         
index 288618fe7cb914118520214f4746a2ef05088de0..47b8e5cf19e7011fcf3ae46f4eff4453abd13fca 100644 (file)
@@ -125,8 +125,8 @@ class version_info(NamedTuple):
         )
 
 
-__version_info__ = version_info(3, 0, 6, "final", 0)
-__version_time__ = "12 Nov 2021 16:06 UTC"
+__version_info__ = version_info(3, 0, 7, "final", 0)
+__version_time__ = "15 Jan 2022 04:10 UTC"
 __version__ = __version_info__.__version__
 __versionTime__ = __version_time__
 __author__ = "Paul McGuire <ptmcg.gm+pyparsing@gmail.com>"
index ff24eee5074f5e8bdb6d95f3398c533af1d6e184..0e10b673b13e90ced1bf3661037999c01c1b1379 100644 (file)
@@ -5,6 +5,7 @@ import os
 from typing import (
     Optional as OptionalType,
     Iterable as IterableType,
+    NamedTuple,
     Union,
     Callable,
     Any,
@@ -14,6 +15,7 @@ from typing import (
     TextIO,
     Set,
     Dict as DictType,
+    Sequence,
 )
 from abc import ABC, abstractmethod
 from enum import Enum
@@ -114,7 +116,7 @@ class __diag__(__config_flags):
     _debug_names = [name for name in _all_names if name.startswith("enable_debug")]
 
     @classmethod
-    def enable_all_warnings(cls):
+    def enable_all_warnings(cls) -> None:
         for name in cls._warning_names:
             cls.enable(name)
 
@@ -152,21 +154,21 @@ class Diagnostics(Enum):
     enable_debug_on_named_expressions = 7
 
 
-def enable_diag(diag_enum):
+def enable_diag(diag_enum: Diagnostics) -> None:
     """
     Enable a global pyparsing diagnostic flag (see :class:`Diagnostics`).
     """
     __diag__.enable(diag_enum.name)
 
 
-def disable_diag(diag_enum):
+def disable_diag(diag_enum: Diagnostics) -> None:
     """
     Disable a global pyparsing diagnostic flag (see :class:`Diagnostics`).
     """
     __diag__.disable(diag_enum.name)
 
 
-def enable_all_warnings():
+def enable_all_warnings() -> None:
     """
     Enable all global pyparsing diagnostic warnings (see :class:`Diagnostics`).
     """
@@ -178,7 +180,7 @@ del __config_flags
 
 
 def _should_enable_warnings(
-    cmd_line_warn_options: List[str], warn_env_var: OptionalType[str]
+    cmd_line_warn_options: IterableType[str], warn_env_var: OptionalType[str]
 ) -> bool:
     enable = bool(warn_env_var)
     for warn_opt in cmd_line_warn_options:
@@ -242,7 +244,7 @@ identbodychars = pyparsing_unicode.Latin1.identbodychars
 nums = "0123456789"
 hexnums = nums + "ABCDEFabcdef"
 alphanums = alphas + nums
-printables = "".join(c for c in string.printable if c not in string.whitespace)
+printables = "".join([c for c in string.printable if c not in string.whitespace])
 
 _trim_arity_call_line = None
 
@@ -311,7 +313,7 @@ def _trim_arity(func, maxargs=2):
 
 def condition_as_parse_action(
     fn: ParseCondition, message: str = None, fatal: bool = False
-):
+) -> ParseAction:
     """
     Function to convert a simple predicate function that returns ``True`` or ``False``
     into a parse action. Can be used in places when a parse action is required
@@ -395,7 +397,7 @@ class ParserElement(ABC):
     _literalStringClass: OptionalType[type] = None
 
     @staticmethod
-    def set_default_whitespace_chars(chars: str):
+    def set_default_whitespace_chars(chars: str) -> None:
         r"""
         Overrides the default whitespace chars
 
@@ -416,7 +418,7 @@ class ParserElement(ABC):
                 expr.whiteChars = set(chars)
 
     @staticmethod
-    def inline_literals_using(cls: type):
+    def inline_literals_using(cls: type) -> None:
         """
         Set class to be used for inclusion of string literals into a parser.
 
@@ -437,6 +439,11 @@ class ParserElement(ABC):
         """
         ParserElement._literalStringClass = cls
 
+    class DebugActions(NamedTuple):
+        debug_try: OptionalType[DebugStartAction]
+        debug_match: OptionalType[DebugSuccessAction]
+        debug_fail: OptionalType[DebugExceptionAction]
+
     def __init__(self, savelist: bool = False):
         self.parseAction: List[ParseAction] = list()
         self.failAction: OptionalType[ParseFailAction] = None
@@ -459,18 +466,14 @@ class ParserElement(ABC):
         # mark results names as modal (report only last) or cumulative (list all)
         self.modalResults = True
         # custom debug actions
-        self.debugActions: Tuple[
-            OptionalType[DebugStartAction],
-            OptionalType[DebugSuccessAction],
-            OptionalType[DebugExceptionAction],
-        ] = (None, None, None)
+        self.debugActions = self.DebugActions(None, None, None)
         self.re = None
         # avoid redundant calls to preParse
         self.callPreparse = True
         self.callDuringTry = False
-        self.suppress_warnings_ = []
+        self.suppress_warnings_: List[Diagnostics] = []
 
-    def suppress_warning(self, warning_type: Diagnostics):
+    def suppress_warning(self, warning_type: Diagnostics) -> "ParserElement":
         """
         Suppress warnings emitted for a particular diagnostic on this expression.
 
@@ -582,9 +585,7 @@ class ParserElement(ABC):
                 self._parse = self._parse._originalParseMethod
         return self
 
-    def set_parse_action(
-        self, *fns: ParseAction, **kwargs
-    ) -> OptionalType["ParserElement"]:
+    def set_parse_action(self, *fns: ParseAction, **kwargs) -> "ParserElement":
         """
         Define one or more actions to perform when successfully matching parse element definition.
 
@@ -635,13 +636,13 @@ class ParserElement(ABC):
                 return int(toks[0])
 
             # use a parse action to verify that the date is a valid date
-            def is_valid_date(toks):
+            def is_valid_date(instring, loc, toks):
                 from datetime import date
                 year, month, day = toks[::2]
                 try:
                     date(year, month, day)
                 except ValueError:
-                    raise ParseException("invalid date given")
+                    raise ParseException(instring, loc, "invalid date given")
 
             integer = Word(nums)
             date_str = integer + '/' + integer + '/' + integer
@@ -664,7 +665,7 @@ class ParserElement(ABC):
         else:
             if not all(callable(fn) for fn in fns):
                 raise TypeError("parse actions must be callable")
-            self.parseAction = list(map(_trim_arity, list(fns)))
+            self.parseAction = [_trim_arity(fn) for fn in fns]
             self.callDuringTry = kwargs.get(
                 "call_during_try", kwargs.get("callDuringTry", False)
             )
@@ -676,7 +677,7 @@ class ParserElement(ABC):
 
         See examples in :class:`copy`.
         """
-        self.parseAction += list(map(_trim_arity, list(fns)))
+        self.parseAction += [_trim_arity(fn) for fn in fns]
         self.callDuringTry = self.callDuringTry or kwargs.get(
             "call_during_try", kwargs.get("callDuringTry", False)
         )
@@ -780,8 +781,8 @@ class ParserElement(ABC):
                 else:
                     pre_loc = loc
                 tokens_start = pre_loc
-                if self.debugActions[TRY]:
-                    self.debugActions[TRY](instring, tokens_start, self)
+                if self.debugActions.debug_try:
+                    self.debugActions.debug_try(instring, tokens_start, self, False)
                 if self.mayIndexError or pre_loc >= len_instring:
                     try:
                         loc, tokens = self.parseImpl(instring, pre_loc, doActions)
@@ -791,8 +792,10 @@ class ParserElement(ABC):
                     loc, tokens = self.parseImpl(instring, pre_loc, doActions)
             except Exception as err:
                 # print("Exception raised:", err)
-                if self.debugActions[FAIL]:
-                    self.debugActions[FAIL](instring, tokens_start, self, err)
+                if self.debugActions.debug_fail:
+                    self.debugActions.debug_fail(
+                        instring, tokens_start, self, err, False
+                    )
                 if self.failAction:
                     self.failAction(instring, tokens_start, self, err)
                 raise
@@ -835,8 +838,10 @@ class ParserElement(ABC):
                             )
                 except Exception as err:
                     # print "Exception raised in user parse action:", err
-                    if self.debugActions[FAIL]:
-                        self.debugActions[FAIL](instring, tokens_start, self, err)
+                    if self.debugActions.debug_fail:
+                        self.debugActions.debug_fail(
+                            instring, tokens_start, self, err, False
+                        )
                     raise
             else:
                 for fn in self.parseAction:
@@ -856,8 +861,10 @@ class ParserElement(ABC):
                         )
         if debugging:
             # print("Matched", self, "->", ret_tokens.as_list())
-            if self.debugActions[MATCH]:
-                self.debugActions[MATCH](instring, tokens_start, loc, self, ret_tokens)
+            if self.debugActions.debug_match:
+                self.debugActions.debug_match(
+                    instring, tokens_start, loc, self, ret_tokens, False
+                )
 
         return loc, ret_tokens
 
@@ -914,15 +921,15 @@ class ParserElement(ABC):
                     return value
             else:
                 ParserElement.packrat_cache_stats[HIT] += 1
-                if self.debug and self.debugActions[TRY]:
+                if self.debug and self.debugActions.debug_try:
                     try:
-                        self.debugActions[TRY](instring, loc, self, cache_hit=True)
+                        self.debugActions.debug_try(instring, loc, self, cache_hit=True)
                     except TypeError:
                         pass
                 if isinstance(value, Exception):
-                    if self.debug and self.debugActions[FAIL]:
+                    if self.debug and self.debugActions.debug_fail:
                         try:
-                            self.debugActions[FAIL](
+                            self.debugActions.debug_fail(
                                 instring, loc, self, value, cache_hit=True
                             )
                         except TypeError:
@@ -930,9 +937,9 @@ class ParserElement(ABC):
                     raise value
 
                 loc_, result, endloc = value[0], value[1].copy(), value[2]
-                if self.debug and self.debugActions[MATCH]:
+                if self.debug and self.debugActions.debug_match:
                     try:
-                        self.debugActions[MATCH](
+                        self.debugActions.debug_match(
                             instring, loc_, endloc, self, result, cache_hit=True
                         )
                     except TypeError:
@@ -1237,7 +1244,7 @@ class ParserElement(ABC):
 
             Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York.
         """
-        out = []
+        out: List[str] = []
         lastE = 0
         # force preservation of <TAB>s, to minimize unwanted transformation of string, and to
         # keep string locs straight between transform_string and scan_string
@@ -1249,13 +1256,13 @@ class ParserElement(ABC):
                     if isinstance(t, ParseResults):
                         out += t.as_list()
                     elif isinstance(t, Iterable) and not isinstance(t, str_type):
-                        out += list(t)
+                        out.extend(t)
                     else:
                         out.append(t)
                 lastE = e
             out.append(instring[lastE:])
             out = [o for o in out if o]
-            return "".join(map(str, _flatten(out)))
+            return "".join([str(s) for s in _flatten(out)])
         except ParseBaseException as exc:
             if ParserElement.verbose_stacktrace:
                 raise
@@ -1760,7 +1767,7 @@ class ParserElement(ABC):
         - ``exception_action`` - method to be called when expression fails to parse;
           should have the signature ``fn(input_string: str, location: int, expression: ParserElement, exception: Exception, cache_hit: bool)``
         """
-        self.debugActions = (
+        self.debugActions = self.DebugActions(
             start_action or _default_start_debug_action,
             success_action or _default_success_debug_action,
             exception_action or _default_exception_debug_action,
@@ -1768,7 +1775,7 @@ class ParserElement(ABC):
         self.debug = True
         return self
 
-    def set_debug(self, flag=True) -> "ParserElement":
+    def set_debug(self, flag: bool = True) -> "ParserElement":
         """
         Enable display of debugging messages while doing pattern matching.
         Set ``flag`` to ``True`` to enable, ``False`` to disable.
@@ -1856,7 +1863,7 @@ class ParserElement(ABC):
         self._defaultName = None
         return self
 
-    def recurse(self):
+    def recurse(self) -> Sequence["ParserElement"]:
         return []
 
     def _checkRecursion(self, parseElementList):
@@ -1864,7 +1871,7 @@ class ParserElement(ABC):
         for e in self.recurse():
             e._checkRecursion(subRecCheckList)
 
-    def validate(self, validateTrace=None):
+    def validate(self, validateTrace=None) -> None:
         """
         Check defined expressions for valid structure, check for infinite recursive definitions.
         """
@@ -1950,7 +1957,7 @@ class ParserElement(ABC):
         printResults: bool = True,
         failureTests: bool = False,
         postParse: Callable[[str, ParseResults], str] = None,
-    ):
+    ) -> Tuple[bool, List[Tuple[str, Union[ParseResults, Exception]]]]:
         """
         Execute the parse expression on a series of test strings, showing each
         test, the parsed results or where the parse failed. Quick and easy way to
@@ -2053,7 +2060,8 @@ class ParserElement(ABC):
         failureTests = failureTests or failure_tests
         postParse = postParse or post_parse
         if isinstance(tests, str_type):
-            tests = list(map(type(tests).strip, tests.rstrip().splitlines()))
+            line_strip = type(tests).strip
+            tests = [line_strip(test_line) for test_line in tests.rstrip().splitlines()]
         if isinstance(comment, str_type):
             comment = Literal(comment)
         if file is None:
@@ -2068,7 +2076,9 @@ class ParserElement(ABC):
         BOM = "\ufeff"
         for t in tests:
             if comment is not None and comment.matches(t, False) or comments and not t:
-                comments.append(pyparsing_test.with_line_numbers(t))
+                comments.append(
+                    pyparsing_test.with_line_numbers(t) if with_line_numbers else t
+                )
                 continue
             if not t:
                 continue
@@ -2441,7 +2451,7 @@ class Keyword(Token):
         raise ParseException(instring, errloc, errmsg, self)
 
     @staticmethod
-    def set_default_keyword_chars(chars):
+    def set_default_keyword_chars(chars) -> None:
         """
         Overrides the default characters used by :class:`Keyword` expressions.
         """
@@ -2983,7 +2993,7 @@ class Regex(Token):
         ret = result
         return loc, ret
 
-    def sub(self, repl):
+    def sub(self, repl: str) -> ParserElement:
         r"""
         Return :class:`Regex` with an attached parse action to transform the parsed
         result as if called using `re.sub(expr, repl, string) <https://docs.python.org/3/library/re.html#re.sub>`_.
@@ -3121,7 +3131,7 @@ class QuotedString(Token):
                 + "|".join(
                     "(?:{}(?!{}))".format(
                         re.escape(self.endQuoteChar[:i]),
-                        _escape_regex_range_chars(self.endQuoteChar[i:]),
+                        re.escape(self.endQuoteChar[i:]),
                     )
                     for i in range(len(self.endQuoteChar) - 1, 0, -1)
                 )
@@ -3329,7 +3339,7 @@ class White(Token):
         super().__init__()
         self.matchWhite = ws
         self.set_whitespace_chars(
-            "".join(c for c in self.whiteChars if c not in self.matchWhite),
+            "".join(c for c in self.whiteStrs if c not in self.matchWhite),
             copy_defaults=True,
         )
         # self.leave_whitespace()
@@ -3522,7 +3532,7 @@ class WordStart(PositionToken):
     """
 
     def __init__(self, word_chars: str = printables, *, wordChars: str = printables):
-        wordChars = word_chars if wordChars != printables else wordChars
+        wordChars = word_chars if wordChars == printables else wordChars
         super().__init__()
         self.wordChars = set(wordChars)
         self.errmsg = "Not at the start of a word"
@@ -3547,7 +3557,7 @@ class WordEnd(PositionToken):
     """
 
     def __init__(self, word_chars: str = printables, *, wordChars: str = printables):
-        wordChars = word_chars if wordChars != printables else wordChars
+        wordChars = word_chars if wordChars == printables else wordChars
         super().__init__()
         self.wordChars = set(wordChars)
         self.skipWhitespace = False
@@ -3595,15 +3605,15 @@ class ParseExpression(ParserElement):
                 self.exprs = [exprs]
         self.callPreparse = False
 
-    def recurse(self):
+    def recurse(self) -> Sequence[ParserElement]:
         return self.exprs[:]
 
-    def append(self, other):
+    def append(self, other) -> ParserElement:
         self.exprs.append(other)
         self._defaultName = None
         return self
 
-    def leave_whitespace(self, recursive=True):
+    def leave_whitespace(self, recursive: bool = True) -> ParserElement:
         """
         Extends ``leave_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on
            all contained expressions.
@@ -3616,7 +3626,7 @@ class ParseExpression(ParserElement):
                 e.leave_whitespace(recursive)
         return self
 
-    def ignore_whitespace(self, recursive=True):
+    def ignore_whitespace(self, recursive: bool = True) -> ParserElement:
         """
         Extends ``ignore_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on
            all contained expressions.
@@ -3628,7 +3638,7 @@ class ParseExpression(ParserElement):
                 e.ignore_whitespace(recursive)
         return self
 
-    def ignore(self, other):
+    def ignore(self, other) -> ParserElement:
         if isinstance(other, Suppress):
             if other not in self.ignoreExprs:
                 super().ignore(other)
@@ -3643,7 +3653,7 @@ class ParseExpression(ParserElement):
     def _generateDefaultName(self):
         return "{}:({})".format(self.__class__.__name__, str(self.exprs))
 
-    def streamline(self):
+    def streamline(self) -> ParserElement:
         if self.streamlined:
             return self
 
@@ -3684,13 +3694,13 @@ class ParseExpression(ParserElement):
 
         return self
 
-    def validate(self, validateTrace=None):
+    def validate(self, validateTrace=None) -> None:
         tmp = (validateTrace if validateTrace is not None else [])[:] + [self]
         for e in self.exprs:
             e.validate(tmp)
         self._checkRecursion([])
 
-    def copy(self):
+    def copy(self) -> ParserElement:
         ret = super().copy()
         ret.exprs = [e.copy() for e in self.exprs]
         return ret
@@ -3770,11 +3780,14 @@ class And(ParseExpression):
         super().__init__(exprs, savelist)
         if self.exprs:
             self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
-            self.set_whitespace_chars(
-                self.exprs[0].whiteChars,
-                copy_defaults=self.exprs[0].copyDefaultWhiteChars,
-            )
-            self.skipWhitespace = self.exprs[0].skipWhitespace
+            if not isinstance(self.exprs[0], White):
+                self.set_whitespace_chars(
+                    self.exprs[0].whiteChars,
+                    copy_defaults=self.exprs[0].copyDefaultWhiteChars,
+                )
+                self.skipWhitespace = self.exprs[0].skipWhitespace
+            else:
+                self.skipWhitespace = False
         else:
             self.mayReturnEmpty = True
         self.callPreparse = True
@@ -3813,7 +3826,7 @@ class And(ParseExpression):
                 seen.add(id(cur))
                 if isinstance(cur, IndentedBlock):
                     prev.add_parse_action(
-                        lambda s, l, t: setattr(cur, "parent_anchor", col(l, s))
+                        lambda s, l, t, cur_=cur: setattr(cur_, "parent_anchor", col(l, s))
                     )
                     break
                 subs = cur.recurse()
@@ -3903,7 +3916,9 @@ class Or(ParseExpression):
         if self.exprs:
             self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
             self.saveAsList = any(e.saveAsList for e in self.exprs)
-            self.skipWhitespace = all(e.skipWhitespace for e in self.exprs)
+            self.skipWhitespace = all(
+                e.skipWhitespace and not isinstance(e, White) for e in self.exprs
+            )
         else:
             self.saveAsList = False
         return self
@@ -4013,7 +4028,7 @@ class Or(ParseExpression):
                 warnings.warn(
                     "{}: setting results name {!r} on {} expression "
                     "will return a list of all parsed tokens in an And alternative, "
-                    "in prior versions only the first token was returned; enclose"
+                    "in prior versions only the first token was returned; enclose "
                     "contained argument in Group".format(
                         "warn_multiple_tokens_in_named_alternation",
                         name,
@@ -4059,7 +4074,9 @@ class MatchFirst(ParseExpression):
         if self.exprs:
             self.saveAsList = any(e.saveAsList for e in self.exprs)
             self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
-            self.skipWhitespace = all(e.skipWhitespace for e in self.exprs)
+            self.skipWhitespace = all(
+                e.skipWhitespace and not isinstance(e, White) for e in self.exprs
+            )
         else:
             self.saveAsList = False
             self.mayReturnEmpty = True
@@ -4122,7 +4139,7 @@ class MatchFirst(ParseExpression):
                 warnings.warn(
                     "{}: setting results name {!r} on {} expression "
                     "will return a list of all parsed tokens in an And alternative, "
-                    "in prior versions only the first token was returned; enclose"
+                    "in prior versions only the first token was returned; enclose "
                     "contained argument in Group".format(
                         "warn_multiple_tokens_in_named_alternation",
                         name,
@@ -4280,7 +4297,7 @@ class Each(ParseExpression):
             raise max_fatal
 
         if tmpReqd:
-            missing = ", ".join(str(e) for e in tmpReqd)
+            missing = ", ".join([str(e) for e in tmpReqd])
             raise ParseException(
                 instring,
                 loc,
@@ -4327,16 +4344,16 @@ class ParseElementEnhance(ParserElement):
             self.callPreparse = expr.callPreparse
             self.ignoreExprs.extend(expr.ignoreExprs)
 
-    def recurse(self):
+    def recurse(self) -> Sequence[ParserElement]:
         return [self.expr] if self.expr is not None else []
 
     def parseImpl(self, instring, loc, doActions=True):
         if self.expr is not None:
             return self.expr._parse(instring, loc, doActions, callPreParse=False)
         else:
-            raise ParseException("", loc, self.errmsg, self)
+            raise ParseException(instring, loc, "No expression defined", self)
 
-    def leave_whitespace(self, recursive=True):
+    def leave_whitespace(self, recursive: bool = True) -> ParserElement:
         super().leave_whitespace(recursive)
 
         if recursive:
@@ -4345,7 +4362,7 @@ class ParseElementEnhance(ParserElement):
                 self.expr.leave_whitespace(recursive)
         return self
 
-    def ignore_whitespace(self, recursive=True):
+    def ignore_whitespace(self, recursive: bool = True) -> ParserElement:
         super().ignore_whitespace(recursive)
 
         if recursive:
@@ -4354,7 +4371,7 @@ class ParseElementEnhance(ParserElement):
                 self.expr.ignore_whitespace(recursive)
         return self
 
-    def ignore(self, other):
+    def ignore(self, other) -> ParserElement:
         if isinstance(other, Suppress):
             if other not in self.ignoreExprs:
                 super().ignore(other)
@@ -4366,7 +4383,7 @@ class ParseElementEnhance(ParserElement):
                 self.expr.ignore(self.ignoreExprs[-1])
         return self
 
-    def streamline(self):
+    def streamline(self) -> ParserElement:
         super().streamline()
         if self.expr is not None:
             self.expr.streamline()
@@ -4379,7 +4396,7 @@ class ParseElementEnhance(ParserElement):
         if self.expr is not None:
             self.expr._checkRecursion(subRecCheckList)
 
-    def validate(self, validateTrace=None):
+    def validate(self, validateTrace=None) -> None:
         if validateTrace is None:
             validateTrace = []
         tmp = validateTrace[:] + [self]
@@ -4730,7 +4747,7 @@ class _MultipleMatch(ParseElementEnhance):
             ender = self._literalStringClass(ender)
         self.stopOn(ender)
 
-    def stopOn(self, ender):
+    def stopOn(self, ender) -> ParserElement:
         if isinstance(ender, str_type):
             ender = self._literalStringClass(ender)
         self.not_ender = ~ender if ender is not None else None
@@ -5252,22 +5269,22 @@ class Forward(ParseElementEnhance):
                                 raise
                         prev_loc, prev_peek = memo[peek_key] = new_loc, new_peek
 
-    def leave_whitespace(self, recursive=True):
+    def leave_whitespace(self, recursive: bool = True) -> ParserElement:
         self.skipWhitespace = False
         return self
 
-    def ignore_whitespace(self, recursive=True):
+    def ignore_whitespace(self, recursive: bool = True) -> ParserElement:
         self.skipWhitespace = True
         return self
 
-    def streamline(self):
+    def streamline(self) -> ParserElement:
         if not self.streamlined:
             self.streamlined = True
             if self.expr is not None:
                 self.expr.streamline()
         return self
 
-    def validate(self, validateTrace=None):
+    def validate(self, validateTrace=None) -> None:
         if validateTrace is None:
             validateTrace = []
 
@@ -5291,7 +5308,7 @@ class Forward(ParseElementEnhance):
         finally:
             return self.__class__.__name__ + ": " + retString
 
-    def copy(self):
+    def copy(self) -> ParserElement:
         if self.expr is not None:
             return super().copy()
         else:
@@ -5367,7 +5384,7 @@ class Combine(TokenConverter):
         self.joinString = joinString
         self.callPreparse = True
 
-    def ignore(self, other):
+    def ignore(self, other) -> ParserElement:
         if self.adjacent:
             ParserElement.ignore(self, other)
         else:
@@ -5562,11 +5579,11 @@ class Suppress(TokenConverter):
     def postParse(self, instring, loc, tokenlist):
         return []
 
-    def suppress(self):
+    def suppress(self) -> ParserElement:
         return self
 
 
-def trace_parse_action(f: ParseAction):
+def trace_parse_action(f: ParseAction) -> ParseAction:
     """Decorator for debugging parse actions.
 
     When the parse action is called, this decorator will print
@@ -5641,7 +5658,7 @@ _reBracketExpr = (
 )
 
 
-def srange(s):
+def srange(s: str) -> str:
     r"""Helper to easily define string ranges for use in :class:`Word`
     construction. Borrows syntax from regexp ``'[]'`` string range
     definitions::
@@ -5678,7 +5695,7 @@ def srange(s):
         return ""
 
 
-def token_map(func, *args):
+def token_map(func, *args) -> ParseAction:
     """Helper to define a parse action by mapping a function to all
     elements of a :class:`ParseResults` list. If any additional args are passed,
     they are forwarded to the given function as additional arguments
@@ -5724,7 +5741,7 @@ def token_map(func, *args):
     return pa
 
 
-def autoname_elements():
+def autoname_elements() -> None:
     """
     Utility to simplify mass-naming of parser elements, for
     generating railroad diagram with named subdiagrams.
index 7d61197128a79ad26a059d81729a4bebf4a72286..5e7b3ad05eb1e3db46c2870f102db48730568a63 100644 (file)
@@ -14,6 +14,8 @@ def delimited_list(
     expr: Union[str, ParserElement],
     delim: Union[str, ParserElement] = ",",
     combine: bool = False,
+    min: OptionalType[int] = None,
+    max: OptionalType[int] = None,
     *,
     allow_trailing_delim: bool = False,
 ) -> ParserElement:
@@ -38,7 +40,7 @@ def delimited_list(
         expr = ParserElement._literalStringClass(expr)
 
     dlName = "{expr} [{delim} {expr}]...{end}".format(
-        expr=str(expr.streamline()),
+        expr=str(expr.copy().streamline()),
         delim=str(delim),
         end=" [{}]".format(str(delim)) if allow_trailing_delim else "",
     )
@@ -46,7 +48,15 @@ def delimited_list(
     if not combine:
         delim = Suppress(delim)
 
-    delimited_list_expr = expr + ZeroOrMore(delim + expr)
+    if min is not None:
+        if min < 1:
+            raise ValueError("min must be greater than 0")
+        min -= 1
+    if max is not None:
+        if min is not None and max <= min:
+            raise ValueError("max must be greater than, or equal to min")
+        max -= 1
+    delimited_list_expr = expr + (delim + expr)[min, max]
 
     if allow_trailing_delim:
         delimited_list_expr += Opt(delim)
@@ -175,7 +185,7 @@ def match_previous_expr(expr: ParserElement) -> ParserElement:
         def must_match_these_tokens(s, l, t):
             theseTokens = _flatten(t.as_list())
             if theseTokens != matchTokens:
-                raise ParseException("", 0, "")
+                raise ParseException(s, l, "Expected {}, found{}".format(matchTokens, theseTokens))
 
         rep.set_parse_action(must_match_these_tokens, callDuringTry=True)
 
@@ -247,7 +257,7 @@ def one_of(
         masks = lambda a, b: b.startswith(a)
         parseElementClass = Keyword if asKeyword else Literal
 
-    symbols = []
+    symbols: List[str] = []
     if isinstance(strs, str_type):
         symbols = strs.split()
     elif isinstance(strs, Iterable):
index 842d16b3c75fb452e89858932a738598965f9861..9676f45b88b8b549637f36ecb7af46ec363cfaba 100644 (file)
@@ -462,8 +462,10 @@ class ParseResults:
         return (
             "["
             + ", ".join(
-                str(i) if isinstance(i, ParseResults) else repr(i)
-                for i in self._toklist
+                [
+                    str(i) if isinstance(i, ParseResults) else repr(i)
+                    for i in self._toklist
+                ]
             )
             + "]"
         )
index caa3306db8c29a2698e21b980cd7ba59221ca5c9..92261487c7af50ede7204c4b65299f2ed333bed1 100644 (file)
@@ -108,7 +108,7 @@ class unicode_set:
                     cls.identchars
                     + "0123456789"
                     + "".join(
-                        c for c in cls._chars_for_ranges if ("_" + c).isidentifier()
+                        [c for c in cls._chars_for_ranges if ("_" + c).isidentifier()]
                     )
                 )
             )
index 1309ad6effeb85ffbe49ca5736ddc402569eb45f..34ce092c6d08d9cdc2704840b7539de7b5ae1dcc 100644 (file)
@@ -213,9 +213,10 @@ def _collapse_string_to_ranges(
             if first == last:
                 ret.append(escape_re_range_char(first))
             else:
+                sep = "" if ord(last) == ord(first) + 1 else "-"
                 ret.append(
-                    "{}-{}".format(
-                        escape_re_range_char(first), escape_re_range_char(last)
+                    "{}{}{}".format(
+                        escape_re_range_char(first), sep, escape_re_range_char(last)
                     )
                 )
     else:
index 6d06b34f3a98cb1570824b2cbdde98d1884d28c3..a6b3d3a73f84a92a65f55a89a304575cf53a70bb 100644 (file)
@@ -269,6 +269,18 @@ class TestRepetition(PyparsingExpressionTestCase):
             text="xxyx,xy,y,xxyx,yxx, xy,",
             expected_list=["xxyx", "xy", "y", "xxyx", "yxx", "xy"],
         ),
+        PpTestSpec(
+            desc="Using delimited_list (comma is the default delimiter) with minimum size",
+            expr=pp.delimited_list(pp.Word(pp.alphas), min=3),
+            text="xxyx,xy",
+            expected_fail_locn=7,
+        ),
+        PpTestSpec(
+            desc="Using delimited_list (comma is the default delimiter) with maximum size",
+            expr=pp.delimited_list(pp.Word(pp.alphas), max=3),
+            text="xxyx,xy,y,xxyx,yxx, xy,",
+            expected_list=["xxyx", "xy", "y"],
+        ),
         PpTestSpec(
             desc="Using delimited_list, with ':' delimiter",
             expr=pp.delimited_list(
index 4c41e7fd03a6eb0a030b14d330af70a0489603f8..388af38f5bb8342a0475656b39091c9d3feeb999 100644 (file)
@@ -217,6 +217,20 @@ class Test02_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
             msg="scanString with overlaps failed",
         )
 
+    def testCombineWithResultsNames(self):
+        # test case reproducing Issue #350
+        from pyparsing import White, alphas, Word
+
+        parser = White(' \t').set_results_name('indent') + Word(alphas).set_results_name('word')
+        result = parser.parse_string('    test')
+        print(result.dump())
+        self.assertParseResultsEquals(result, ['    ', 'test'], {'indent': '    ', 'word': 'test'})
+
+        parser = White(' \t') + Word(alphas).set_results_name('word')
+        result = parser.parse_string('    test')
+        print(result.dump())
+        self.assertParseResultsEquals(result, ['    ', 'test'], {'word': 'test'})
+
     def testTransformString(self):
         make_int_with_commas = ppc.integer().addParseAction(
             lambda t: "{:,}".format(t[0])
@@ -1793,6 +1807,29 @@ class Test02_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
         with self.assertRaises(ValueError):
             pp.QuotedString("", "\\")
 
+    def testCustomQuotes2(self):
+
+        qs = pp.QuotedString(quote_char=".[", end_quote_char="].")
+        print(qs.reString)
+        self.assertParseAndCheckList(qs, ".[...].", ['...'])
+        self.assertParseAndCheckList(qs, ".[].", [''])
+        self.assertParseAndCheckList(qs, ".[]].", [']'])
+        self.assertParseAndCheckList(qs, ".[]]].", [']]'])
+
+        qs = pp.QuotedString(quote_char="+*", end_quote_char="*+")
+        print(qs.reString)
+        self.assertParseAndCheckList(qs, "+*...*+", ['...'])
+        self.assertParseAndCheckList(qs, "+**+", [''])
+        self.assertParseAndCheckList(qs, "+***+", ['*'])
+        self.assertParseAndCheckList(qs, "+****+", ['**'])
+
+        qs = pp.QuotedString(quote_char="*/", end_quote_char="/*")
+        print(qs.reString)
+        self.assertParseAndCheckList(qs, "*/.../*", ['...'])
+        self.assertParseAndCheckList(qs, "*//*", [''])
+        self.assertParseAndCheckList(qs, "*///*", ['/'])
+        self.assertParseAndCheckList(qs, "*////*", ['//'])
+
     def testRepeater(self):
         if ParserElement._packratEnabled or ParserElement._left_recursion_enabled:
             print("skipping this test, not compatible with memoization")
@@ -3163,6 +3200,16 @@ class Test02_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
         except RecursionError:
             self.fail("fail getting named result when empty")
 
+    def testParseResultsBool(self):
+        result = pp.Word(pp.alphas)[...].parseString("AAA")
+        self.assertTrue(result, "non-empty ParseResults evaluated as False")
+
+        result = pp.Word(pp.alphas)[...].parseString("")
+        self.assertFalse(result, "empty ParseResults evaluated as True")
+
+        result["A"] = 0
+        self.assertTrue(result, "ParseResults with empty list but containing a results name evaluated as False")
+
     def testIgnoreString(self):
         """test ParserElement.ignore() passed a string arg"""
 
@@ -4903,6 +4950,24 @@ class Test02_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
                 ),
             )
 
+    def testWordBoundaryExpressions2(self):
+        from itertools import product
+        ws1 = pp.WordStart(pp.alphas)
+        ws2 = pp.WordStart(wordChars=pp.alphas)
+        ws3 = pp.WordStart(word_chars=pp.alphas)
+        we1 = pp.WordEnd(pp.alphas)
+        we2 = pp.WordEnd(wordChars=pp.alphas)
+        we3 = pp.WordEnd(word_chars=pp.alphas)
+
+        for i, (ws, we) in enumerate(product((ws1, ws2, ws3), (we1, we2, we3))):
+            try:
+                expr = ("(" + ws + pp.Word(pp.alphas) + we + ")")
+                expr.parseString("(abc)")
+            except pp.ParseException as pe:
+                self.fail(f"Test {i} failed: {pe}")
+            else:
+                pass
+
     def testRequiredEach(self):
 
         parser = pp.Keyword("bam") & pp.Keyword("boo")
@@ -6711,6 +6776,43 @@ class Test02_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
             "noop parse action changed ParseResults structure",
         )
 
+    def testParseActionWithDelimitedList(self):
+        class AnnotatedToken(object):
+            def __init__(self, kind, elements):
+                self.kind = kind
+                self.elements = elements
+
+            def __str__(self):
+                return 'AnnotatedToken(%r, %r)' % (self.kind, self.elements)
+
+            def __eq__(self, other):
+                return type(self) == type(other) and self.kind == other.kind and self.elements == other.elements
+
+            __repr__ = __str__
+
+        def annotate(name):
+            def _(t):
+                return AnnotatedToken(name, t.asList())
+            return _
+
+        identifier = pp.Word(pp.srange('[a-z0-9]'))
+        numeral = pp.Word(pp.nums)
+
+        named_number_value = pp.Suppress('(') + numeral + pp.Suppress(')')
+        named_number = identifier + named_number_value
+
+        named_number_list = (pp.Suppress('{') +
+                             pp.Group(pp.Optional(pp.delimitedList(named_number))) +
+                             pp.Suppress('}'))
+
+        # repro but in #345 - delimitedList silently changes contents of named_number
+        named_number_value.setParseAction(annotate("val"))
+
+        test_string = "{ x1(1), x2(2) }"
+        expected = [['x1', AnnotatedToken("val", ['1']), 'x2', AnnotatedToken("val", ['2'])]]
+
+        self.assertParseAndCheckList(named_number_list, test_string, expected)
+
     def testParseResultsNameBelowUngroupedName(self):
 
         rule_num = pp.Regex("[0-9]+")("LIT_NUM*")
@@ -7845,6 +7947,25 @@ class Test02_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
             expr, source, [s.strip() for s in source.split(",")]
         )
 
+    def testDelimitedListMinMax(self):
+        source = "ABC, ABC,ABC"
+        with self.assertRaises(ValueError, msg="min must be greater than 0"):
+            pp.delimited_list("ABC", min=0)
+        with self.assertRaises(ValueError, msg="max must be greater than, or equal to min"):
+            pp.delimited_list("ABC", min=1, max=0)
+        with self.assertRaises(pp.ParseException):
+            pp.delimited_list("ABC", min=4).parse_string(source)
+
+        source_expr_pairs = [
+            ("ABC,  ABC", pp.delimited_list("ABC", max=2)),
+            (source, pp.delimited_list("ABC", min=2, max=4)),
+        ]
+        for source, expr in source_expr_pairs:
+            print(str(expr))
+            self.assertParseAndCheckList(
+                expr, source, [s.strip() for s in source.split(",")]
+            )
+
     def testEnableDebugOnNamedExpressions(self):
         """
         - enable_debug_on_named_expressions - flag to auto-enable debug on all subsequent
@@ -8148,28 +8269,39 @@ class Test02_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
                 "__diag__.{} not set to True".format(diag_name),
             )
 
-    def testWordInternalReRangesKnownSets(self):
-        self.assertEqual(
-            "[!-~]+",
-            pp.Word(pp.printables).reString,
-            "failed to generate correct internal re",
-        )
-        self.assertEqual(
-            "[0-9A-Za-z]+",
-            pp.Word(pp.alphanums).reString,
-            "failed to generate correct internal re",
-        )
-        self.assertEqual(
-            "[!-~¡-ÿ]+",
-            pp.Word(pp.pyparsing_unicode.Latin1.printables).reString,
-            "failed to generate correct internal re",
-        )
-        self.assertEqual(
-            "[À-ÖØ-öø-ÿ]+",
-            pp.Word(pp.alphas8bit).reString,
-            "failed to generate correct internal re",
+    def testWordInternalReRangeWithConsecutiveChars(self):
+        self.assertParseAndCheckList(
+            pp.Word("ABCDEMNXYZ"),
+            "ABCDEMNXYZABCDEMNXYZABCDEMNXYZ",
+            ["ABCDEMNXYZABCDEMNXYZABCDEMNXYZ"]
         )
 
+    def testWordInternalReRangesKnownSet(self):
+        tests = [
+            ("ABCDEMNXYZ", "[A-EMNX-Z]+"),
+            (pp.printables, "[!-~]+"),
+            (pp.alphanums, "[0-9A-Za-z]+"),
+            (pp.pyparsing_unicode.Latin1.printables, "[!-~¡-ÿ]+"),
+            (pp.pyparsing_unicode.Latin1.alphanums, "[0-9A-Za-zª²³µ¹ºÀ-ÖØ-öø-ÿ]+"),
+            (pp.alphas8bit, "[À-ÖØ-öø-ÿ]+"),
+        ]
+        failed = []
+        for word_string, expected_re in tests:
+            try:
+                msg = "failed to generate correct internal re for {!r}".format(word_string)
+                resultant_re = pp.Word(word_string).reString
+                self.assertEqual(
+                    expected_re,
+                    resultant_re,
+                    msg + "; expected {!r} got {!r}".format(expected_re, resultant_re)
+                )
+            except AssertionError:
+                failed.append(msg)
+
+        if failed:
+            print("Errors:\n{}".format("\n".join(failed)))
+            self.fail("failed to generate correct internal re's")
+
     def testWordInternalReRanges(self):
         import random
 
diff --git a/tox.ini b/tox.ini
index f811816349d7f3516af98cd8fac88b55cda27366..ea9c909dcc86a6734d90d38924ffa4d640c89758 100644 (file)
--- a/tox.ini
+++ b/tox.ini
@@ -15,6 +15,6 @@ deps=
     pytest
 commands=
     python -c "import shutil,os,stat;os.path.exists('packaging') and shutil.rmtree('packaging', onerror=lambda fn, path, _:os.chmod(path,stat.S_IWRITE) or fn(path))"
-    git clone --depth 1 https://github.com/pypa/packaging.git
+    git clone --depth 10 https://github.com/pypa/packaging.git
     python -m pytest packaging/tests
     python -c "import shutil,os,stat;shutil.rmtree('packaging', onerror=lambda fn, path, _:os.chmod(path,stat.S_IWRITE) or fn(path))"