Change Log
==========
+Version 3.0.7 -
+---------------
+- Fixed bug #345, in which delimitedList changed expressions in place
+ using expr.streamline(). Reported by Kim Gräsman, thanks!
+
+- Fixed bug #346, when a string of word characters was passed to WordStart
+ or WordEnd instead of just taking the default value. Originally posted
+ as a question by Parag on StackOverflow, good catch!
+
+- Fixed bug #350, in which White expressions could fail to match due to
+ unintended whitespace-skipping. Reported by Fu Hanxi, thank you!
+
+- Fixed bug #355, when a QuotedString is defined with characters in its
+ quoteChar string containing regex-significant characters such as ., *,
+ ?, [, ], etc.
+
+- Fixed bug in ParserElement.run_tests where comments would be displayed
+ using with_line_numbers.
+
+- Added optional "min" and "max" arguments to `delimited_list`. PR
+ submitted by Marius, thanks!
+
+- Added new API change note in `whats_new_in_pyparsing_3_0_0`, regarding
+ a bug fix in the `bool()` behavior of `ParseResults`.
+
+ Prior to pyparsing 3.0.x, the `ParseResults` class implementation of
+ `__bool__` would return `False` if the `ParseResults` item list was empty,
+ even if it contained named results. In 3.0.0 and later, `ParseResults` will
+ return `True` if either the item list is not empty *or* if the named
+ results dict is not empty.
+
+ # generate an empty ParseResults by parsing a blank string with
+ # a ZeroOrMore
+ result = Word(alphas)[...].parse_string("")
+ print(result.as_list())
+ print(result.as_dict())
+ print(bool(result))
+
+ # add a results name to the result
+ result["name"] = "empty result"
+ print(result.as_list())
+ print(result.as_dict())
+ print(bool(result))
+
+ Prints:
+
+ []
+ {}
+ False
+
+ []
+ {'name': 'empty result'}
+ True
+
+ In previous versions, the second call to `bool()` would return `False`.
+
+- Minor enhancement to Word generation of internal regular expression, to
+ emit consecutive characters in range, such as "ab", as "ab", not "a-b".
+
+- Fixed character ranges for search terms using non-Western characters
+ in booleansearchparser, PR submitted by tc-yu, nice work!
+
+- Additional type annotations on public methods.
+
+
Version 3.0.6 -
---------------
- Added `suppress_warning()` method to individually suppress a warning on a
Metadata-Version: 2.1
Name: pyparsing
-Version: 3.0.6
+Version: 3.0.7
Summary: Python parsing module
Home-page: https://github.com/pyparsing/pyparsing/
Author: Paul McGuire
See `CHANGES <https://github.com/pyparsing/pyparsing/blob/master/CHANGES>`__ file.
- .. |Build Status| image:: https://travis-ci.com/pyparsing/pyparsing.svg?branch=master
- :target: https://travis-ci.com/pyparsing/pyparsing
+ .. |Build Status| image:: https://github.com/pyparsing/pyparsing/actions/workflows/ci.yml/badge.svg
+ :target: https://github.com/pyparsing/pyparsing/actions/workflows/ci.yml
.. |Coverage| image:: https://codecov.io/gh/pyparsing/pyparsing/branch/master/graph/badge.svg
:target: https://codecov.io/gh/pyparsing/pyparsing
See `CHANGES <https://github.com/pyparsing/pyparsing/blob/master/CHANGES>`__ file.
-.. |Build Status| image:: https://travis-ci.com/pyparsing/pyparsing.svg?branch=master
- :target: https://travis-ci.com/pyparsing/pyparsing
+.. |Build Status| image:: https://github.com/pyparsing/pyparsing/actions/workflows/ci.yml/badge.svg
+ :target: https://github.com/pyparsing/pyparsing/actions/workflows/ci.yml
.. |Coverage| image:: https://codecov.io/gh/pyparsing/pyparsing/branch/master/graph/badge.svg
:target: https://codecov.io/gh/pyparsing/pyparsing
API Changes
===========
-- [Note added in pyparsing 3.0.4]
+- [Note added in pyparsing 3.0.7, reflecting a change in 3.0.0]
+ Fixed a bug in the `ParseResults` class implementation of `__bool__`, which
+ would formerly return `False` if the `ParseResults` item list was empty, even if it
+ contained named results. Now `ParseResults` will return `True` if either the item
+ list is not empty *or* if the named results list is not empty.
+
+ # generate an empty ParseResults by parsing a blank string with a ZeroOrMore
+ result = Word(alphas)[...].parse_string("")
+ print(result.as_list())
+ print(result.as_dict())
+ print(bool(result))
+
+ # add a results name to the result
+ result["name"] = "empty result"
+ print(result.as_list())
+ print(result.as_dict())
+ print(bool(result))
+
+ Prints::
+
+ []
+ {}
+ False
+
+ []
+ {'name': 'empty result'}
+ True
+
+ In previous versions, the second call to `bool()` would return `False`.
+
+- [Note added in pyparsing 3.0.4, reflecting a change in 3.0.0]
The `ParseResults` class now uses `__slots__` to pre-define instance attributes. This
means that code written like this (which was allowed in pyparsing 2.4.7)::
#
# Michael Smedberg
#
+import sys
from pyparsing import ParserElement, Suppress, Forward, CaselessKeyword
from pyparsing import MatchFirst, alphas, alphanums, Combine, Word
-from pyparsing import QuotedString, CharsNotIn, Optional, Group, ZeroOrMore
+from pyparsing import QuotedString, CharsNotIn, Optional, Group
from pyparsing import oneOf, delimitedList, restOfLine, cStyleComment
from pyparsing import infixNotation, opAssoc, Regex, nums
+sys.setrecursionlimit(3000)
+
ParserElement.enablePackrat()
BigQueryViewParser._with_aliases.clear()
BigQueryViewParser._get_parser().parseString(sql_stmt)
- return (BigQueryViewParser._table_identifiers, BigQueryViewParser._with_aliases)
+ return BigQueryViewParser._table_identifiers, BigQueryViewParser._with_aliases
@classmethod
def lowercase_of_tuple(cls, tuple_to_lowercase):
ParserElement.enablePackrat()
LPAR, RPAR, COMMA, LBRACKET, RBRACKET, LT, GT = map(Suppress, "(),[]<>")
+ QUOT, APOS, ACC, DOT = map(Suppress, "\"'`.")
ungrouped_select_stmt = Forward().setName("select statement")
+ QUOTED_QUOT = QuotedString('"')
+ QUOTED_APOS = QuotedString("'")
+ QUOTED_ACC = QuotedString("`")
+
+ # fmt: off
# keywords
(
- UNION,
- ALL,
- AND,
- INTERSECT,
- EXCEPT,
- COLLATE,
- ASC,
- DESC,
- ON,
- USING,
- NATURAL,
- INNER,
- CROSS,
- LEFT,
- RIGHT,
- OUTER,
- FULL,
- JOIN,
- AS,
- INDEXED,
- NOT,
- SELECT,
- DISTINCT,
- FROM,
- WHERE,
- GROUP,
- BY,
- HAVING,
- ORDER,
- BY,
- LIMIT,
- OFFSET,
- OR,
- CAST,
- ISNULL,
- NOTNULL,
- NULL,
- IS,
- BETWEEN,
- ELSE,
- END,
- CASE,
- WHEN,
- THEN,
- EXISTS,
- COLLATE,
- IN,
- LIKE,
- GLOB,
- REGEXP,
- MATCH,
- ESCAPE,
- CURRENT_TIME,
- CURRENT_DATE,
- CURRENT_TIMESTAMP,
- WITH,
- EXTRACT,
- PARTITION,
- ROWS,
- RANGE,
- UNBOUNDED,
- PRECEDING,
- CURRENT,
- ROW,
- FOLLOWING,
- OVER,
- INTERVAL,
- DATE_ADD,
- DATE_SUB,
- ADDDATE,
- SUBDATE,
- REGEXP_EXTRACT,
- SPLIT,
- ORDINAL,
- FIRST_VALUE,
- LAST_VALUE,
- NTH_VALUE,
- LEAD,
- LAG,
- PERCENTILE_CONT,
- PRECENTILE_DISC,
- RANK,
- DENSE_RANK,
- PERCENT_RANK,
- CUME_DIST,
- NTILE,
- ROW_NUMBER,
- DATE,
- TIME,
- DATETIME,
- TIMESTAMP,
- UNNEST,
- INT64,
- NUMERIC,
- FLOAT64,
- BOOL,
- BYTES,
- GEOGRAPHY,
- ARRAY,
- STRUCT,
- SAFE_CAST,
- ANY_VALUE,
- ARRAY_AGG,
- ARRAY_CONCAT_AGG,
- AVG,
- BIT_AND,
- BIT_OR,
- BIT_XOR,
- COUNT,
- COUNTIF,
- LOGICAL_AND,
- LOGICAL_OR,
- MAX,
- MIN,
- STRING_AGG,
- SUM,
- CORR,
- COVAR_POP,
- COVAR_SAMP,
- STDDEV_POP,
- STDDEV_SAMP,
- STDDEV,
- VAR_POP,
- VAR_SAMP,
- VARIANCE,
- TIMESTAMP_ADD,
- TIMESTAMP_SUB,
- GENERATE_ARRAY,
- GENERATE_DATE_ARRAY,
- GENERATE_TIMESTAMP_ARRAY,
- FOR,
- SYSTEMTIME,
- AS,
- OF,
- WINDOW,
- RESPECT,
- IGNORE,
- NULLS,
+ UNION, ALL, AND, INTERSECT, EXCEPT, COLLATE, ASC, DESC, ON, USING, NATURAL,
+ INNER, CROSS, LEFT, RIGHT, OUTER, FULL, JOIN, AS, INDEXED, NOT, SELECT,
+ DISTINCT, FROM, WHERE, GROUP, BY, HAVING, ORDER, BY, LIMIT, OFFSET, OR,
+ CAST, ISNULL, NOTNULL, NULL, IS, BETWEEN, ELSE, END, CASE, WHEN, THEN,
+ EXISTS, COLLATE, IN, LIKE, GLOB, REGEXP, MATCH, ESCAPE, CURRENT_TIME,
+ CURRENT_DATE, CURRENT_TIMESTAMP, WITH, EXTRACT, PARTITION, ROWS, RANGE,
+ UNBOUNDED, PRECEDING, CURRENT, ROW, FOLLOWING, OVER, INTERVAL, DATE_ADD,
+ DATE_SUB, ADDDATE, SUBDATE, REGEXP_EXTRACT, SPLIT, ORDINAL, FIRST_VALUE,
+ LAST_VALUE, NTH_VALUE, LEAD, LAG, PERCENTILE_CONT, PRECENTILE_DISC, RANK,
+ DENSE_RANK, PERCENT_RANK, CUME_DIST, NTILE, ROW_NUMBER, DATE, TIME, DATETIME,
+ TIMESTAMP, UNNEST, INT64, NUMERIC, FLOAT64, BOOL, BYTES, GEOGRAPHY, ARRAY,
+ STRUCT, SAFE_CAST, ANY_VALUE, ARRAY_AGG, ARRAY_CONCAT_AGG, AVG, BIT_AND,
+ BIT_OR, BIT_XOR, COUNT, COUNTIF, LOGICAL_AND, LOGICAL_OR, MAX, MIN,
+ STRING_AGG, SUM, CORR, COVAR_POP, COVAR_SAMP, STDDEV_POP, STDDEV_SAMP,
+ STDDEV, VAR_POP, VAR_SAMP, VARIANCE, TIMESTAMP_ADD, TIMESTAMP_SUB,
+ GENERATE_ARRAY, GENERATE_DATE_ARRAY, GENERATE_TIMESTAMP_ARRAY, FOR,
+ SYSTEMTIME, AS, OF, WINDOW, RESPECT, IGNORE, NULLS,
) = map(
CaselessKeyword,
"""
- UNION, ALL, AND, INTERSECT, EXCEPT, COLLATE, ASC, DESC, ON, USING,
- NATURAL, INNER, CROSS, LEFT, RIGHT, OUTER, FULL, JOIN, AS, INDEXED,
- NOT, SELECT, DISTINCT, FROM, WHERE, GROUP, BY, HAVING, ORDER, BY,
- LIMIT, OFFSET, OR, CAST, ISNULL, NOTNULL, NULL, IS, BETWEEN, ELSE,
- END, CASE, WHEN, THEN, EXISTS, COLLATE, IN, LIKE, GLOB, REGEXP,
- MATCH, ESCAPE, CURRENT_TIME, CURRENT_DATE, CURRENT_TIMESTAMP, WITH,
- EXTRACT, PARTITION, ROWS, RANGE, UNBOUNDED, PRECEDING, CURRENT,
- ROW, FOLLOWING, OVER, INTERVAL, DATE_ADD, DATE_SUB, ADDDATE,
- SUBDATE, REGEXP_EXTRACT, SPLIT, ORDINAL, FIRST_VALUE, LAST_VALUE,
- NTH_VALUE, LEAD, LAG, PERCENTILE_CONT, PRECENTILE_DISC, RANK,
- DENSE_RANK, PERCENT_RANK, CUME_DIST, NTILE, ROW_NUMBER, DATE, TIME,
- DATETIME, TIMESTAMP, UNNEST, INT64, NUMERIC, FLOAT64, BOOL, BYTES,
- GEOGRAPHY, ARRAY, STRUCT, SAFE_CAST, ANY_VALUE, ARRAY_AGG,
- ARRAY_CONCAT_AGG, AVG, BIT_AND, BIT_OR, BIT_XOR, COUNT, COUNTIF,
- LOGICAL_AND, LOGICAL_OR, MAX, MIN, STRING_AGG, SUM, CORR,
- COVAR_POP, COVAR_SAMP, STDDEV_POP, STDDEV_SAMP, STDDEV, VAR_POP,
- VAR_SAMP, VARIANCE, TIMESTAMP_ADD, TIMESTAMP_SUB, GENERATE_ARRAY,
- GENERATE_DATE_ARRAY, GENERATE_TIMESTAMP_ARRAY, FOR, SYSTEMTIME, AS,
- OF, WINDOW, RESPECT, IGNORE, NULLS
- """.replace(
- ",", ""
- ).split(),
+ UNION, ALL, AND, INTERSECT, EXCEPT, COLLATE, ASC, DESC, ON, USING, NATURAL,
+ INNER, CROSS, LEFT, RIGHT, OUTER, FULL, JOIN, AS, INDEXED, NOT, SELECT,
+ DISTINCT, FROM, WHERE, GROUP, BY, HAVING, ORDER, BY, LIMIT, OFFSET, OR,
+ CAST, ISNULL, NOTNULL, NULL, IS, BETWEEN, ELSE, END, CASE, WHEN, THEN,
+ EXISTS, COLLATE, IN, LIKE, GLOB, REGEXP, MATCH, ESCAPE, CURRENT_TIME,
+ CURRENT_DATE, CURRENT_TIMESTAMP, WITH, EXTRACT, PARTITION, ROWS, RANGE,
+ UNBOUNDED, PRECEDING, CURRENT, ROW, FOLLOWING, OVER, INTERVAL, DATE_ADD,
+ DATE_SUB, ADDDATE, SUBDATE, REGEXP_EXTRACT, SPLIT, ORDINAL, FIRST_VALUE,
+ LAST_VALUE, NTH_VALUE, LEAD, LAG, PERCENTILE_CONT, PRECENTILE_DISC, RANK,
+ DENSE_RANK, PERCENT_RANK, CUME_DIST, NTILE, ROW_NUMBER, DATE, TIME, DATETIME,
+ TIMESTAMP, UNNEST, INT64, NUMERIC, FLOAT64, BOOL, BYTES, GEOGRAPHY, ARRAY,
+ STRUCT, SAFE_CAST, ANY_VALUE, ARRAY_AGG, ARRAY_CONCAT_AGG, AVG, BIT_AND,
+ BIT_OR, BIT_XOR, COUNT, COUNTIF, LOGICAL_AND, LOGICAL_OR, MAX, MIN,
+ STRING_AGG, SUM, CORR, COVAR_POP, COVAR_SAMP, STDDEV_POP, STDDEV_SAMP,
+ STDDEV, VAR_POP, VAR_SAMP, VARIANCE, TIMESTAMP_ADD, TIMESTAMP_SUB,
+ GENERATE_ARRAY, GENERATE_DATE_ARRAY, GENERATE_TIMESTAMP_ARRAY, FOR,
+ SYSTEMTIME, AS, OF, WINDOW, RESPECT, IGNORE, NULLS,
+ """.replace(",", "").split(),
)
keyword_nonfunctions = MatchFirst(
- (
- UNION,
- ALL,
- INTERSECT,
- EXCEPT,
- COLLATE,
- ASC,
- DESC,
- ON,
- USING,
- NATURAL,
- INNER,
- CROSS,
- LEFT,
- RIGHT,
- OUTER,
- FULL,
- JOIN,
- AS,
- INDEXED,
- NOT,
- SELECT,
- DISTINCT,
- FROM,
- WHERE,
- GROUP,
- BY,
- HAVING,
- ORDER,
- BY,
- LIMIT,
- OFFSET,
- CAST,
- ISNULL,
- NOTNULL,
- NULL,
- IS,
- BETWEEN,
- ELSE,
- END,
- CASE,
- WHEN,
- THEN,
- EXISTS,
- COLLATE,
- IN,
- LIKE,
- GLOB,
- REGEXP,
- MATCH,
- STRUCT,
- WINDOW,
- )
+ (UNION, ALL, INTERSECT, EXCEPT, COLLATE, ASC, DESC, ON, USING,
+ NATURAL, INNER, CROSS, LEFT, RIGHT, OUTER, FULL, JOIN, AS, INDEXED,
+ NOT, SELECT, DISTINCT, FROM, WHERE, GROUP, BY, HAVING, ORDER, BY,
+ LIMIT, OFFSET, CAST, ISNULL, NOTNULL, NULL, IS, BETWEEN, ELSE, END,
+ CASE, WHEN, THEN, EXISTS, COLLATE, IN, LIKE, GLOB, REGEXP, MATCH,
+ STRUCT, WINDOW,
+ )
)
keyword = keyword_nonfunctions | MatchFirst(
- (
- ESCAPE,
- CURRENT_TIME,
- CURRENT_DATE,
- CURRENT_TIMESTAMP,
- DATE_ADD,
- DATE_SUB,
- ADDDATE,
- SUBDATE,
- INTERVAL,
- STRING_AGG,
- REGEXP_EXTRACT,
- SPLIT,
- ORDINAL,
- UNNEST,
- SAFE_CAST,
- PARTITION,
- TIMESTAMP_ADD,
- TIMESTAMP_SUB,
- ARRAY,
- GENERATE_ARRAY,
- GENERATE_DATE_ARRAY,
- GENERATE_TIMESTAMP_ARRAY,
- )
+ (ESCAPE, CURRENT_TIME, CURRENT_DATE, CURRENT_TIMESTAMP, DATE_ADD,
+ DATE_SUB, ADDDATE, SUBDATE, INTERVAL, STRING_AGG, REGEXP_EXTRACT,
+ SPLIT, ORDINAL, UNNEST, SAFE_CAST, PARTITION, TIMESTAMP_ADD,
+ TIMESTAMP_SUB, ARRAY, GENERATE_ARRAY, GENERATE_DATE_ARRAY,
+ GENERATE_TIMESTAMP_ARRAY,
+ )
)
+ # fmt: on
identifier_word = Word(alphas + "_@#", alphanums + "@$#_")
identifier = ~keyword + identifier_word.copy()
# NOTE: Column names can be keywords. Doc says they cannot, but in practice it seems to work.
column_name = identifier_word.copy()
qualified_column_name = Combine(
- column_name
- + (ZeroOrMore(" ") + "." + ZeroOrMore(" ") + column_name) * (0, 6)
+ column_name + ("." + column_name)[..., 6], adjacent=False
)
# NOTE: As with column names, column aliases can be keywords, e.g. functions like `current_time`. Other
# keywords, e.g. `from` make parsing pretty difficult (e.g. "SELECT a from from b" is confusing.)
# NOTE: The expression in a CASE statement can be an integer. E.g. this is valid SQL:
# select CASE 1 WHEN 1 THEN -1 ELSE -2 END from test_table
unquoted_case_identifier = ~keyword + Word(alphanums + "$_")
- quoted_case_identifier = ~keyword + (
- QuotedString('"') ^ Suppress("`") + CharsNotIn("`") + Suppress("`")
- )
+ quoted_case_identifier = QUOTED_QUOT | QUOTED_ACC
case_identifier = quoted_case_identifier | unquoted_case_identifier
case_expr = (
- Optional(case_identifier + Suppress("."))
- + Optional(case_identifier + Suppress("."))
+ Optional(case_identifier + DOT)
+ + Optional(case_identifier + DOT)
+ case_identifier
)
integer = Regex(r"[+-]?\d+")
numeric_literal = Regex(r"[+-]?\d*\.?\d+([eE][+-]?\d+)?")
- string_literal = QuotedString("'") | QuotedString('"') | QuotedString("`")
+ string_literal = QUOTED_APOS | QUOTED_QUOT | QUOTED_ACC
regex_literal = "r" + string_literal
blob_literal = Regex(r"[xX]'[0-9A-Fa-f]+'")
date_or_time_literal = (DATE | TIME | DATETIME | TIMESTAMP) + string_literal
MINUTE_MICROSECOND MINUTE_SECOND MONTH QUARTER SECOND
SECOND_MICROSECOND WEEK YEAR YEAR_MONTH""",
caseless=True,
+ as_keyword=True,
)
datetime_operators = (
DATE_ADD | DATE_SUB | ADDDATE | SUBDATE | TIMESTAMP_ADD | TIMESTAMP_SUB
case_when = WHEN + expr.copy()("when")
case_then = THEN + expr.copy()("then")
- case_clauses = Group(ZeroOrMore(case_when + case_then))
+ case_clauses = Group((case_when + case_then)[...])
case_else = ELSE + expr.copy()("else")
case_stmt = (
CASE
struct_term = LPAR + delimitedList(expr_term) + RPAR
UNARY, BINARY, TERNARY = 1, 2, 3
- expr << infixNotation(
+ expr <<= infixNotation(
(expr_term | struct_term),
[
(oneOf("- + ~") | NOT, UNARY, opAssoc.RIGHT),
],
)
quoted_expr = (
- expr
- ^ Suppress('"') + expr + Suppress('"')
- ^ Suppress("'") + expr + Suppress("'")
- ^ Suppress("`") + expr + Suppress("`")
+ expr | QUOT + expr + QUOT | APOS + expr + APOS | ACC + expr + ACC
)("quoted_expr")
compound_operator = (
cls._table_identifiers.add(tuple(padded_list))
standard_table_part = ~keyword + Word(alphanums + "_")
- quoted_project_part = (
- Suppress('"') + CharsNotIn('"') + Suppress('"')
- | Suppress("'") + CharsNotIn("'") + Suppress("'")
- | Suppress("`") + CharsNotIn("`") + Suppress("`")
- )
+ quoted_project_part = QUOTED_QUOT | QUOTED_APOS | QUOTED_ACC
quoted_table_part = (
- Suppress('"') + CharsNotIn('".') + Suppress('"')
- | Suppress("'") + CharsNotIn("'.") + Suppress("'")
- | Suppress("`") + CharsNotIn("`.") + Suppress("`")
+ QUOT + CharsNotIn('".') + QUOT
+ | APOS + CharsNotIn("'.") + APOS
+ | ACC + CharsNotIn("`.") + ACC
)
quoted_table_parts_identifier = (
Optional(
- (quoted_project_part("project") | standard_table_part("project"))
- + Suppress(".")
+ (quoted_project_part("project") | standard_table_part("project")) + DOT
)
+ Optional(
- (quoted_table_part("dataset") | standard_table_part("dataset"))
- + Suppress(".")
+ (quoted_table_part("dataset") | standard_table_part("dataset")) + DOT
)
+ (quoted_table_part("table") | standard_table_part("table"))
).setParseAction(record_table_identifier)
def record_quoted_table_identifier(t):
- identifier_list = t.asList()[0].split(".")
- first = ".".join(identifier_list[0:-2]) or None
- second = identifier_list[-2]
- third = identifier_list[-1]
+ identifier_list = t[0].split(".")
+ *first, second, third = identifier_list
+ first = ".".join(first) or None
identifier_list = [first, second, third]
padded_list = [None] * (3 - len(identifier_list)) + identifier_list
cls._table_identifiers.add(tuple(padded_list))
quotable_table_parts_identifier = (
- Suppress('"') + CharsNotIn('"') + Suppress('"')
- | Suppress("'") + CharsNotIn("'") + Suppress("'")
- | Suppress("`") + CharsNotIn("`") + Suppress("`")
+ QUOTED_QUOT | QUOTED_APOS | QUOTED_ACC
).setParseAction(record_quoted_table_identifier)
table_identifier = (
| (UNNEST + LPAR + expr + RPAR)
) + Optional(Optional(AS) + table_alias)
- join_source << single_source + ZeroOrMore(
- join_op + single_source + join_constraint
- )
+ join_source <<= single_source + (join_op + single_source + join_constraint)[...]
over_partition = (PARTITION + BY + delimitedList(partition_expression_list))(
"over_partition"
select_core = Optional(with_stmt) + select_no_with
grouped_select_core = select_core | (LPAR + select_core + RPAR)
- ungrouped_select_stmt << (
+ ungrouped_select_stmt <<= (
grouped_select_core
- + ZeroOrMore(compound_operator + grouped_select_core)
+ + (compound_operator + grouped_select_core)[...]
+ Optional(
LIMIT
+ (Group(expr + OFFSET + expr) | Group(expr + COMMA + expr) | expr)(
+ select_stmt
+ RPAR
)
- with_stmt << (WITH + delimitedList(with_clause))
+ with_stmt <<= WITH + delimitedList(with_clause)
with_stmt.ignore(sql_comment)
cls._parser = select_stmt
if __name__ == "__main__":
+ # fmt: off
TEST_CASES = [
[
"""
""",
[
(None, "y", "a"),
- (
- None,
- None,
- "b",
- ),
+ (None, None, "b"),
],
],
[
select * from xyzzy
""",
[
- (
- None,
- None,
- "xyzzy",
- ),
+ (None, None, "xyzzy"),
],
],
[
select z.* from xyzzy
""",
[
- (
- None,
- None,
- "xyzzy",
- ),
+ (None, None, "xyzzy"),
],
],
[
FROM a
""",
[
- (
- None,
- None,
- "a",
- ),
+ (None, None, "a"),
],
],
[
FROM T
""",
[
- (
- None,
- None,
- "T",
- ),
+ (None, None, "T"),
],
],
[
FROM d
""",
[
- (
- None,
- None,
- "d",
- ),
+ (None, None, "d"),
],
],
[
FROM i
""",
[
- (
- None,
- None,
- "i",
- ),
+ (None, None, "i"),
],
],
[
FROM m
""",
[
- (
- None,
- None,
- "m",
- ),
+ (None, None, "m",),
],
],
[
FROM r
""",
[
- (
- None,
- None,
- "r",
- ),
+ (None, None, "r"),
],
],
[
FROM w
""",
[
- (
- None,
- None,
- "w",
- ),
+ (None, None, "w"),
],
],
[
FROM ac
""",
[
- (
- None,
- None,
- "ac",
- ),
+ (None, None, "ac"),
],
],
[
FROM ah
""",
[
- (
- None,
- None,
- "ah",
- ),
+ (None, None, "ah"),
],
],
[
FROM an
""",
[
- (
- None,
- None,
- "an",
- ),
+ (None, None, "an"),
],
],
[
SELECT y FROM onE JOIN TWo
""",
[
- (
- None,
- None,
- "y",
- ),
- (
- None,
- None,
- "b",
- ),
+ (None, None, "y"),
+ (None, None, "b"),
],
],
[
FROM OnE
""",
[
- (
- None,
- None,
- "oNE",
- ),
- (
- None,
- None,
- "OnE",
- ),
+ (None, None, "oNE"),
+ (None, None, "OnE"),
],
],
[
)
SELECT y FROM z
""",
- [(None, None, "b"), (None, None, "z")],
+ [
+ (None, None, "b"),
+ (None, None, "z")
+ ],
],
[
"""
FIRST_VALUE(x IGNORE NULLS) OVER (PARTITION BY y)
FROM z
""",
- [(None, None, "z")],
+ [
+ (None, None, "z")
+ ],
],
[
"""
SELECT a . b . c
FROM d
""",
- [(None, None, "d")],
+ [
+ (None, None, "d")
+ ],
],
[
"""
)
SELECT h FROM a
""",
- [(None, None, "c"), (None, None, "f")],
+ [
+ (None, None, "c"),
+ (None, None, "f")
+ ],
],
[
"""
)
(SELECT h FROM a)
""",
- [(None, None, "c"), (None, None, "f")],
+ [
+ (None, None, "c"),
+ (None, None, "f")
+ ],
],
[
"""
SELECT * FROM a.b.`c`
""",
- [("a", "b", "c")],
+ [
+ ("a", "b", "c"),
+ ],
],
[
"""
SELECT * FROM 'a'.b.`c`
""",
- [("a", "b", "c")],
+ [
+ ("a", "b", "c"),
+ ],
],
]
+ # fmt: on
parser = BigQueryViewParser()
for test_index, test_case in enumerate(TEST_CASES):
from pyparsing import (
Word,
alphanums,
- Keyword,
+ CaselessKeyword,
Group,
Forward,
Suppress,
OneOrMore,
- oneOf,
+ one_of,
)
import re
+# Updated on 02 Dec 2021 according to ftp://ftp.unicode.org/Public/UNIDATA/Blocks.txt
alphabet_ranges = [
- ##CYRILIC: https://en.wikipedia.org/wiki/Cyrillic_(Unicode_block)
+ # CYRILIC: https://en.wikipedia.org/wiki/Cyrillic_(Unicode_block)
[int("0400", 16), int("04FF", 16)],
- ##THAI: https://en.wikipedia.org/wiki/Thai_(Unicode_block)
- [int("0E00", 16), int("0E7F", 16)],
- ##ARABIC: https://en.wikipedia.org/wiki/Arabic_(Unicode_block) (Arabic (0600–06FF)+ Syriac (0700–074F)+ Arabic Supplement (0750–077F) )
+ # ARABIC: https://en.wikipedia.org/wiki/Arabic_(Unicode_block) (Arabic (0600–06FF)+ Syriac (0700–074F)+ Arabic Supplement (0750–077F))
[int("0600", 16), int("07FF", 16)],
- ##CHINESE: https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
- [int("0400", 16), int("09FF", 16)],
- # JAPANESE : https://en.wikipedia.org/wiki/Japanese_writing_system
+ # THAI: https://en.wikipedia.org/wiki/Thai_(Unicode_block)
+ [int("0E00", 16), int("0E7F", 16)],
+ # JAPANESE : https://en.wikipedia.org/wiki/Japanese_writing_system (Hiragana (3040–309F) + Katakana (30A0–30FF))
[int("3040", 16), int("30FF", 16)],
+ # Enclosed CJK Letters and Months
+ [int("3200", 16), int("32FF", 16)],
+ # CHINESE: https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
+ [int("4E00", 16), int("9FFF", 16)],
# KOREAN : https://en.wikipedia.org/wiki/Hangul
- [int("AC00", 16), int("D7AF", 16)],
[int("1100", 16), int("11FF", 16)],
[int("3130", 16), int("318F", 16)],
- [int("3200", 16), int("32FF", 16)],
[int("A960", 16), int("A97F", 16)],
+ [int("AC00", 16), int("D7AF", 16)],
[int("D7B0", 16), int("D7FF", 16)],
+ # Halfwidth and Fullwidth Forms
[int("FF00", 16), int("FFEF", 16)],
]
alphabet = alphanums
# support for non-western alphabets
- for r in alphabet_ranges:
- alphabet += "".join(chr(c) for c in range(*r) if not chr(c).isspace())
+ for lo, hi in alphabet_ranges:
+ alphabet += "".join(chr(c) for c in range(lo, hi + 1) if not chr(c).isspace())
- operatorWord = Group(Word(alphabet + "*")).setResultsName("word*")
+ operatorWord = Group(Word(alphabet + "*")).set_results_name("word*")
operatorQuotesContent = Forward()
operatorQuotesContent << ((operatorWord + operatorQuotesContent) | operatorWord)
operatorQuotes = (
- Group(Suppress('"') + operatorQuotesContent + Suppress('"')).setResultsName(
+ Group(Suppress('"') + operatorQuotesContent + Suppress('"')).set_results_name(
"quotes"
)
| operatorWord
)
operatorParenthesis = (
- Group(Suppress("(") + operatorOr + Suppress(")")).setResultsName(
+ Group(Suppress("(") + operatorOr + Suppress(")")).set_results_name(
"parenthesis"
)
| operatorQuotes
operatorNot = Forward()
operatorNot << (
- Group(Suppress(Keyword("not", caseless=True)) + operatorNot).setResultsName(
+ Group(Suppress(CaselessKeyword("not")) + operatorNot).set_results_name(
"not"
)
| operatorParenthesis
operatorAnd = Forward()
operatorAnd << (
Group(
- operatorNot + Suppress(Keyword("and", caseless=True)) + operatorAnd
- ).setResultsName("and")
+ operatorNot + Suppress(CaselessKeyword("and")) + operatorAnd
+ ).set_results_name("and")
| Group(
- operatorNot + OneOrMore(~oneOf("and or") + operatorAnd)
- ).setResultsName("and")
+ operatorNot + OneOrMore(~one_of("and or") + operatorAnd)
+ ).set_results_name("and")
| operatorNot
)
operatorOr << (
Group(
- operatorAnd + Suppress(Keyword("or", caseless=True)) + operatorOr
- ).setResultsName("or")
+ operatorAnd + Suppress(CaselessKeyword("or")) + operatorOr
+ ).set_results_name("or")
| operatorAnd
)
- return operatorOr.parseString
+ return operatorOr.parse_string
def evaluateAnd(self, argument):
return all(self.evaluate(arg) for arg in argument)
def evaluateQuotes(self, argument):
"""Evaluate quoted strings
- First is does an 'and' on the indidual search terms, then it asks the
+ First is does an 'and' on the individual search terms, then it asks the
function GetQuoted to only return the subset of ID's that contain the
literal string.
"""
all_ok = all_ok and test_passed
+ # Tests for non western characters, should fail with
+ # pyparsing.exceptions.ParseException under the previous
+ # configuration
+ non_western_exprs = {
+ "0": "*",
+ "1": "ヿ", # Edge character
+ "2": "亀", # Character in CJK block
+ "3": "ヿ or 亀",
+ "4": "ヿ and 亀",
+ "5": "not ヿ"
+ }
+
+ non_western_texts_matcheswith = {
+ "안녕하세요, 당신은 어떠세요?": ["0", "5"],
+ "ヿ": ["0", "1", "3"],
+ "亀": ["0", "2", "3", "5"],
+ "亀 ヿ": ["0", "1", "2", "3", "4"],
+ }
+
+ for text, matches in non_western_texts_matcheswith.items():
+ _matches = []
+ for _id, expr in non_western_exprs.items():
+ if self.match(text, expr):
+ _matches.append(_id)
+
+ test_passed = sorted(matches) == sorted(_matches)
+ if not test_passed:
+ print("Failed", repr(text), "expected", matches, "matched", _matches)
+
+ all_ok = all_ok and test_passed
+
return all_ok
Metadata-Version: 2.1
Name: pyparsing
-Version: 3.0.6
+Version: 3.0.7
Summary: Python parsing module
Home-page: https://github.com/pyparsing/pyparsing/
Author: Paul McGuire
See `CHANGES <https://github.com/pyparsing/pyparsing/blob/master/CHANGES>`__ file.
- .. |Build Status| image:: https://travis-ci.com/pyparsing/pyparsing.svg?branch=master
- :target: https://travis-ci.com/pyparsing/pyparsing
+ .. |Build Status| image:: https://github.com/pyparsing/pyparsing/actions/workflows/ci.yml/badge.svg
+ :target: https://github.com/pyparsing/pyparsing/actions/workflows/ci.yml
.. |Coverage| image:: https://codecov.io/gh/pyparsing/pyparsing/branch/master/graph/badge.svg
:target: https://codecov.io/gh/pyparsing/pyparsing
)
-__version_info__ = version_info(3, 0, 6, "final", 0)
-__version_time__ = "12 Nov 2021 16:06 UTC"
+__version_info__ = version_info(3, 0, 7, "final", 0)
+__version_time__ = "15 Jan 2022 04:10 UTC"
__version__ = __version_info__.__version__
__versionTime__ = __version_time__
__author__ = "Paul McGuire <ptmcg.gm+pyparsing@gmail.com>"
from typing import (
Optional as OptionalType,
Iterable as IterableType,
+ NamedTuple,
Union,
Callable,
Any,
TextIO,
Set,
Dict as DictType,
+ Sequence,
)
from abc import ABC, abstractmethod
from enum import Enum
_debug_names = [name for name in _all_names if name.startswith("enable_debug")]
@classmethod
- def enable_all_warnings(cls):
+ def enable_all_warnings(cls) -> None:
for name in cls._warning_names:
cls.enable(name)
enable_debug_on_named_expressions = 7
-def enable_diag(diag_enum):
+def enable_diag(diag_enum: Diagnostics) -> None:
"""
Enable a global pyparsing diagnostic flag (see :class:`Diagnostics`).
"""
__diag__.enable(diag_enum.name)
-def disable_diag(diag_enum):
+def disable_diag(diag_enum: Diagnostics) -> None:
"""
Disable a global pyparsing diagnostic flag (see :class:`Diagnostics`).
"""
__diag__.disable(diag_enum.name)
-def enable_all_warnings():
+def enable_all_warnings() -> None:
"""
Enable all global pyparsing diagnostic warnings (see :class:`Diagnostics`).
"""
def _should_enable_warnings(
- cmd_line_warn_options: List[str], warn_env_var: OptionalType[str]
+ cmd_line_warn_options: IterableType[str], warn_env_var: OptionalType[str]
) -> bool:
enable = bool(warn_env_var)
for warn_opt in cmd_line_warn_options:
nums = "0123456789"
hexnums = nums + "ABCDEFabcdef"
alphanums = alphas + nums
-printables = "".join(c for c in string.printable if c not in string.whitespace)
+printables = "".join([c for c in string.printable if c not in string.whitespace])
_trim_arity_call_line = None
def condition_as_parse_action(
fn: ParseCondition, message: str = None, fatal: bool = False
-):
+) -> ParseAction:
"""
Function to convert a simple predicate function that returns ``True`` or ``False``
into a parse action. Can be used in places when a parse action is required
_literalStringClass: OptionalType[type] = None
@staticmethod
- def set_default_whitespace_chars(chars: str):
+ def set_default_whitespace_chars(chars: str) -> None:
r"""
Overrides the default whitespace chars
expr.whiteChars = set(chars)
@staticmethod
- def inline_literals_using(cls: type):
+ def inline_literals_using(cls: type) -> None:
"""
Set class to be used for inclusion of string literals into a parser.
"""
ParserElement._literalStringClass = cls
+ class DebugActions(NamedTuple):
+ debug_try: OptionalType[DebugStartAction]
+ debug_match: OptionalType[DebugSuccessAction]
+ debug_fail: OptionalType[DebugExceptionAction]
+
def __init__(self, savelist: bool = False):
self.parseAction: List[ParseAction] = list()
self.failAction: OptionalType[ParseFailAction] = None
# mark results names as modal (report only last) or cumulative (list all)
self.modalResults = True
# custom debug actions
- self.debugActions: Tuple[
- OptionalType[DebugStartAction],
- OptionalType[DebugSuccessAction],
- OptionalType[DebugExceptionAction],
- ] = (None, None, None)
+ self.debugActions = self.DebugActions(None, None, None)
self.re = None
# avoid redundant calls to preParse
self.callPreparse = True
self.callDuringTry = False
- self.suppress_warnings_ = []
+ self.suppress_warnings_: List[Diagnostics] = []
- def suppress_warning(self, warning_type: Diagnostics):
+ def suppress_warning(self, warning_type: Diagnostics) -> "ParserElement":
"""
Suppress warnings emitted for a particular diagnostic on this expression.
self._parse = self._parse._originalParseMethod
return self
- def set_parse_action(
- self, *fns: ParseAction, **kwargs
- ) -> OptionalType["ParserElement"]:
+ def set_parse_action(self, *fns: ParseAction, **kwargs) -> "ParserElement":
"""
Define one or more actions to perform when successfully matching parse element definition.
return int(toks[0])
# use a parse action to verify that the date is a valid date
- def is_valid_date(toks):
+ def is_valid_date(instring, loc, toks):
from datetime import date
year, month, day = toks[::2]
try:
date(year, month, day)
except ValueError:
- raise ParseException("invalid date given")
+ raise ParseException(instring, loc, "invalid date given")
integer = Word(nums)
date_str = integer + '/' + integer + '/' + integer
else:
if not all(callable(fn) for fn in fns):
raise TypeError("parse actions must be callable")
- self.parseAction = list(map(_trim_arity, list(fns)))
+ self.parseAction = [_trim_arity(fn) for fn in fns]
self.callDuringTry = kwargs.get(
"call_during_try", kwargs.get("callDuringTry", False)
)
See examples in :class:`copy`.
"""
- self.parseAction += list(map(_trim_arity, list(fns)))
+ self.parseAction += [_trim_arity(fn) for fn in fns]
self.callDuringTry = self.callDuringTry or kwargs.get(
"call_during_try", kwargs.get("callDuringTry", False)
)
else:
pre_loc = loc
tokens_start = pre_loc
- if self.debugActions[TRY]:
- self.debugActions[TRY](instring, tokens_start, self)
+ if self.debugActions.debug_try:
+ self.debugActions.debug_try(instring, tokens_start, self, False)
if self.mayIndexError or pre_loc >= len_instring:
try:
loc, tokens = self.parseImpl(instring, pre_loc, doActions)
loc, tokens = self.parseImpl(instring, pre_loc, doActions)
except Exception as err:
# print("Exception raised:", err)
- if self.debugActions[FAIL]:
- self.debugActions[FAIL](instring, tokens_start, self, err)
+ if self.debugActions.debug_fail:
+ self.debugActions.debug_fail(
+ instring, tokens_start, self, err, False
+ )
if self.failAction:
self.failAction(instring, tokens_start, self, err)
raise
)
except Exception as err:
# print "Exception raised in user parse action:", err
- if self.debugActions[FAIL]:
- self.debugActions[FAIL](instring, tokens_start, self, err)
+ if self.debugActions.debug_fail:
+ self.debugActions.debug_fail(
+ instring, tokens_start, self, err, False
+ )
raise
else:
for fn in self.parseAction:
)
if debugging:
# print("Matched", self, "->", ret_tokens.as_list())
- if self.debugActions[MATCH]:
- self.debugActions[MATCH](instring, tokens_start, loc, self, ret_tokens)
+ if self.debugActions.debug_match:
+ self.debugActions.debug_match(
+ instring, tokens_start, loc, self, ret_tokens, False
+ )
return loc, ret_tokens
return value
else:
ParserElement.packrat_cache_stats[HIT] += 1
- if self.debug and self.debugActions[TRY]:
+ if self.debug and self.debugActions.debug_try:
try:
- self.debugActions[TRY](instring, loc, self, cache_hit=True)
+ self.debugActions.debug_try(instring, loc, self, cache_hit=True)
except TypeError:
pass
if isinstance(value, Exception):
- if self.debug and self.debugActions[FAIL]:
+ if self.debug and self.debugActions.debug_fail:
try:
- self.debugActions[FAIL](
+ self.debugActions.debug_fail(
instring, loc, self, value, cache_hit=True
)
except TypeError:
raise value
loc_, result, endloc = value[0], value[1].copy(), value[2]
- if self.debug and self.debugActions[MATCH]:
+ if self.debug and self.debugActions.debug_match:
try:
- self.debugActions[MATCH](
+ self.debugActions.debug_match(
instring, loc_, endloc, self, result, cache_hit=True
)
except TypeError:
Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York.
"""
- out = []
+ out: List[str] = []
lastE = 0
# force preservation of <TAB>s, to minimize unwanted transformation of string, and to
# keep string locs straight between transform_string and scan_string
if isinstance(t, ParseResults):
out += t.as_list()
elif isinstance(t, Iterable) and not isinstance(t, str_type):
- out += list(t)
+ out.extend(t)
else:
out.append(t)
lastE = e
out.append(instring[lastE:])
out = [o for o in out if o]
- return "".join(map(str, _flatten(out)))
+ return "".join([str(s) for s in _flatten(out)])
except ParseBaseException as exc:
if ParserElement.verbose_stacktrace:
raise
- ``exception_action`` - method to be called when expression fails to parse;
should have the signature ``fn(input_string: str, location: int, expression: ParserElement, exception: Exception, cache_hit: bool)``
"""
- self.debugActions = (
+ self.debugActions = self.DebugActions(
start_action or _default_start_debug_action,
success_action or _default_success_debug_action,
exception_action or _default_exception_debug_action,
self.debug = True
return self
- def set_debug(self, flag=True) -> "ParserElement":
+ def set_debug(self, flag: bool = True) -> "ParserElement":
"""
Enable display of debugging messages while doing pattern matching.
Set ``flag`` to ``True`` to enable, ``False`` to disable.
self._defaultName = None
return self
- def recurse(self):
+ def recurse(self) -> Sequence["ParserElement"]:
return []
def _checkRecursion(self, parseElementList):
for e in self.recurse():
e._checkRecursion(subRecCheckList)
- def validate(self, validateTrace=None):
+ def validate(self, validateTrace=None) -> None:
"""
Check defined expressions for valid structure, check for infinite recursive definitions.
"""
printResults: bool = True,
failureTests: bool = False,
postParse: Callable[[str, ParseResults], str] = None,
- ):
+ ) -> Tuple[bool, List[Tuple[str, Union[ParseResults, Exception]]]]:
"""
Execute the parse expression on a series of test strings, showing each
test, the parsed results or where the parse failed. Quick and easy way to
failureTests = failureTests or failure_tests
postParse = postParse or post_parse
if isinstance(tests, str_type):
- tests = list(map(type(tests).strip, tests.rstrip().splitlines()))
+ line_strip = type(tests).strip
+ tests = [line_strip(test_line) for test_line in tests.rstrip().splitlines()]
if isinstance(comment, str_type):
comment = Literal(comment)
if file is None:
BOM = "\ufeff"
for t in tests:
if comment is not None and comment.matches(t, False) or comments and not t:
- comments.append(pyparsing_test.with_line_numbers(t))
+ comments.append(
+ pyparsing_test.with_line_numbers(t) if with_line_numbers else t
+ )
continue
if not t:
continue
raise ParseException(instring, errloc, errmsg, self)
@staticmethod
- def set_default_keyword_chars(chars):
+ def set_default_keyword_chars(chars) -> None:
"""
Overrides the default characters used by :class:`Keyword` expressions.
"""
ret = result
return loc, ret
- def sub(self, repl):
+ def sub(self, repl: str) -> ParserElement:
r"""
Return :class:`Regex` with an attached parse action to transform the parsed
result as if called using `re.sub(expr, repl, string) <https://docs.python.org/3/library/re.html#re.sub>`_.
+ "|".join(
"(?:{}(?!{}))".format(
re.escape(self.endQuoteChar[:i]),
- _escape_regex_range_chars(self.endQuoteChar[i:]),
+ re.escape(self.endQuoteChar[i:]),
)
for i in range(len(self.endQuoteChar) - 1, 0, -1)
)
super().__init__()
self.matchWhite = ws
self.set_whitespace_chars(
- "".join(c for c in self.whiteChars if c not in self.matchWhite),
+ "".join(c for c in self.whiteStrs if c not in self.matchWhite),
copy_defaults=True,
)
# self.leave_whitespace()
"""
def __init__(self, word_chars: str = printables, *, wordChars: str = printables):
- wordChars = word_chars if wordChars != printables else wordChars
+ wordChars = word_chars if wordChars == printables else wordChars
super().__init__()
self.wordChars = set(wordChars)
self.errmsg = "Not at the start of a word"
"""
def __init__(self, word_chars: str = printables, *, wordChars: str = printables):
- wordChars = word_chars if wordChars != printables else wordChars
+ wordChars = word_chars if wordChars == printables else wordChars
super().__init__()
self.wordChars = set(wordChars)
self.skipWhitespace = False
self.exprs = [exprs]
self.callPreparse = False
- def recurse(self):
+ def recurse(self) -> Sequence[ParserElement]:
return self.exprs[:]
- def append(self, other):
+ def append(self, other) -> ParserElement:
self.exprs.append(other)
self._defaultName = None
return self
- def leave_whitespace(self, recursive=True):
+ def leave_whitespace(self, recursive: bool = True) -> ParserElement:
"""
Extends ``leave_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on
all contained expressions.
e.leave_whitespace(recursive)
return self
- def ignore_whitespace(self, recursive=True):
+ def ignore_whitespace(self, recursive: bool = True) -> ParserElement:
"""
Extends ``ignore_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on
all contained expressions.
e.ignore_whitespace(recursive)
return self
- def ignore(self, other):
+ def ignore(self, other) -> ParserElement:
if isinstance(other, Suppress):
if other not in self.ignoreExprs:
super().ignore(other)
def _generateDefaultName(self):
return "{}:({})".format(self.__class__.__name__, str(self.exprs))
- def streamline(self):
+ def streamline(self) -> ParserElement:
if self.streamlined:
return self
return self
- def validate(self, validateTrace=None):
+ def validate(self, validateTrace=None) -> None:
tmp = (validateTrace if validateTrace is not None else [])[:] + [self]
for e in self.exprs:
e.validate(tmp)
self._checkRecursion([])
- def copy(self):
+ def copy(self) -> ParserElement:
ret = super().copy()
ret.exprs = [e.copy() for e in self.exprs]
return ret
super().__init__(exprs, savelist)
if self.exprs:
self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
- self.set_whitespace_chars(
- self.exprs[0].whiteChars,
- copy_defaults=self.exprs[0].copyDefaultWhiteChars,
- )
- self.skipWhitespace = self.exprs[0].skipWhitespace
+ if not isinstance(self.exprs[0], White):
+ self.set_whitespace_chars(
+ self.exprs[0].whiteChars,
+ copy_defaults=self.exprs[0].copyDefaultWhiteChars,
+ )
+ self.skipWhitespace = self.exprs[0].skipWhitespace
+ else:
+ self.skipWhitespace = False
else:
self.mayReturnEmpty = True
self.callPreparse = True
seen.add(id(cur))
if isinstance(cur, IndentedBlock):
prev.add_parse_action(
- lambda s, l, t: setattr(cur, "parent_anchor", col(l, s))
+ lambda s, l, t, cur_=cur: setattr(cur_, "parent_anchor", col(l, s))
)
break
subs = cur.recurse()
if self.exprs:
self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
self.saveAsList = any(e.saveAsList for e in self.exprs)
- self.skipWhitespace = all(e.skipWhitespace for e in self.exprs)
+ self.skipWhitespace = all(
+ e.skipWhitespace and not isinstance(e, White) for e in self.exprs
+ )
else:
self.saveAsList = False
return self
warnings.warn(
"{}: setting results name {!r} on {} expression "
"will return a list of all parsed tokens in an And alternative, "
- "in prior versions only the first token was returned; enclose"
+ "in prior versions only the first token was returned; enclose "
"contained argument in Group".format(
"warn_multiple_tokens_in_named_alternation",
name,
if self.exprs:
self.saveAsList = any(e.saveAsList for e in self.exprs)
self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
- self.skipWhitespace = all(e.skipWhitespace for e in self.exprs)
+ self.skipWhitespace = all(
+ e.skipWhitespace and not isinstance(e, White) for e in self.exprs
+ )
else:
self.saveAsList = False
self.mayReturnEmpty = True
warnings.warn(
"{}: setting results name {!r} on {} expression "
"will return a list of all parsed tokens in an And alternative, "
- "in prior versions only the first token was returned; enclose"
+ "in prior versions only the first token was returned; enclose "
"contained argument in Group".format(
"warn_multiple_tokens_in_named_alternation",
name,
raise max_fatal
if tmpReqd:
- missing = ", ".join(str(e) for e in tmpReqd)
+ missing = ", ".join([str(e) for e in tmpReqd])
raise ParseException(
instring,
loc,
self.callPreparse = expr.callPreparse
self.ignoreExprs.extend(expr.ignoreExprs)
- def recurse(self):
+ def recurse(self) -> Sequence[ParserElement]:
return [self.expr] if self.expr is not None else []
def parseImpl(self, instring, loc, doActions=True):
if self.expr is not None:
return self.expr._parse(instring, loc, doActions, callPreParse=False)
else:
- raise ParseException("", loc, self.errmsg, self)
+ raise ParseException(instring, loc, "No expression defined", self)
- def leave_whitespace(self, recursive=True):
+ def leave_whitespace(self, recursive: bool = True) -> ParserElement:
super().leave_whitespace(recursive)
if recursive:
self.expr.leave_whitespace(recursive)
return self
- def ignore_whitespace(self, recursive=True):
+ def ignore_whitespace(self, recursive: bool = True) -> ParserElement:
super().ignore_whitespace(recursive)
if recursive:
self.expr.ignore_whitespace(recursive)
return self
- def ignore(self, other):
+ def ignore(self, other) -> ParserElement:
if isinstance(other, Suppress):
if other not in self.ignoreExprs:
super().ignore(other)
self.expr.ignore(self.ignoreExprs[-1])
return self
- def streamline(self):
+ def streamline(self) -> ParserElement:
super().streamline()
if self.expr is not None:
self.expr.streamline()
if self.expr is not None:
self.expr._checkRecursion(subRecCheckList)
- def validate(self, validateTrace=None):
+ def validate(self, validateTrace=None) -> None:
if validateTrace is None:
validateTrace = []
tmp = validateTrace[:] + [self]
ender = self._literalStringClass(ender)
self.stopOn(ender)
- def stopOn(self, ender):
+ def stopOn(self, ender) -> ParserElement:
if isinstance(ender, str_type):
ender = self._literalStringClass(ender)
self.not_ender = ~ender if ender is not None else None
raise
prev_loc, prev_peek = memo[peek_key] = new_loc, new_peek
- def leave_whitespace(self, recursive=True):
+ def leave_whitespace(self, recursive: bool = True) -> ParserElement:
self.skipWhitespace = False
return self
- def ignore_whitespace(self, recursive=True):
+ def ignore_whitespace(self, recursive: bool = True) -> ParserElement:
self.skipWhitespace = True
return self
- def streamline(self):
+ def streamline(self) -> ParserElement:
if not self.streamlined:
self.streamlined = True
if self.expr is not None:
self.expr.streamline()
return self
- def validate(self, validateTrace=None):
+ def validate(self, validateTrace=None) -> None:
if validateTrace is None:
validateTrace = []
finally:
return self.__class__.__name__ + ": " + retString
- def copy(self):
+ def copy(self) -> ParserElement:
if self.expr is not None:
return super().copy()
else:
self.joinString = joinString
self.callPreparse = True
- def ignore(self, other):
+ def ignore(self, other) -> ParserElement:
if self.adjacent:
ParserElement.ignore(self, other)
else:
def postParse(self, instring, loc, tokenlist):
return []
- def suppress(self):
+ def suppress(self) -> ParserElement:
return self
-def trace_parse_action(f: ParseAction):
+def trace_parse_action(f: ParseAction) -> ParseAction:
"""Decorator for debugging parse actions.
When the parse action is called, this decorator will print
)
-def srange(s):
+def srange(s: str) -> str:
r"""Helper to easily define string ranges for use in :class:`Word`
construction. Borrows syntax from regexp ``'[]'`` string range
definitions::
return ""
-def token_map(func, *args):
+def token_map(func, *args) -> ParseAction:
"""Helper to define a parse action by mapping a function to all
elements of a :class:`ParseResults` list. If any additional args are passed,
they are forwarded to the given function as additional arguments
return pa
-def autoname_elements():
+def autoname_elements() -> None:
"""
Utility to simplify mass-naming of parser elements, for
generating railroad diagram with named subdiagrams.
expr: Union[str, ParserElement],
delim: Union[str, ParserElement] = ",",
combine: bool = False,
+ min: OptionalType[int] = None,
+ max: OptionalType[int] = None,
*,
allow_trailing_delim: bool = False,
) -> ParserElement:
expr = ParserElement._literalStringClass(expr)
dlName = "{expr} [{delim} {expr}]...{end}".format(
- expr=str(expr.streamline()),
+ expr=str(expr.copy().streamline()),
delim=str(delim),
end=" [{}]".format(str(delim)) if allow_trailing_delim else "",
)
if not combine:
delim = Suppress(delim)
- delimited_list_expr = expr + ZeroOrMore(delim + expr)
+ if min is not None:
+ if min < 1:
+ raise ValueError("min must be greater than 0")
+ min -= 1
+ if max is not None:
+ if min is not None and max <= min:
+ raise ValueError("max must be greater than, or equal to min")
+ max -= 1
+ delimited_list_expr = expr + (delim + expr)[min, max]
if allow_trailing_delim:
delimited_list_expr += Opt(delim)
def must_match_these_tokens(s, l, t):
theseTokens = _flatten(t.as_list())
if theseTokens != matchTokens:
- raise ParseException("", 0, "")
+ raise ParseException(s, l, "Expected {}, found{}".format(matchTokens, theseTokens))
rep.set_parse_action(must_match_these_tokens, callDuringTry=True)
masks = lambda a, b: b.startswith(a)
parseElementClass = Keyword if asKeyword else Literal
- symbols = []
+ symbols: List[str] = []
if isinstance(strs, str_type):
symbols = strs.split()
elif isinstance(strs, Iterable):
return (
"["
+ ", ".join(
- str(i) if isinstance(i, ParseResults) else repr(i)
- for i in self._toklist
+ [
+ str(i) if isinstance(i, ParseResults) else repr(i)
+ for i in self._toklist
+ ]
)
+ "]"
)
cls.identchars
+ "0123456789"
+ "".join(
- c for c in cls._chars_for_ranges if ("_" + c).isidentifier()
+ [c for c in cls._chars_for_ranges if ("_" + c).isidentifier()]
)
)
)
if first == last:
ret.append(escape_re_range_char(first))
else:
+ sep = "" if ord(last) == ord(first) + 1 else "-"
ret.append(
- "{}-{}".format(
- escape_re_range_char(first), escape_re_range_char(last)
+ "{}{}{}".format(
+ escape_re_range_char(first), sep, escape_re_range_char(last)
)
)
else:
text="xxyx,xy,y,xxyx,yxx, xy,",
expected_list=["xxyx", "xy", "y", "xxyx", "yxx", "xy"],
),
+ PpTestSpec(
+ desc="Using delimited_list (comma is the default delimiter) with minimum size",
+ expr=pp.delimited_list(pp.Word(pp.alphas), min=3),
+ text="xxyx,xy",
+ expected_fail_locn=7,
+ ),
+ PpTestSpec(
+ desc="Using delimited_list (comma is the default delimiter) with maximum size",
+ expr=pp.delimited_list(pp.Word(pp.alphas), max=3),
+ text="xxyx,xy,y,xxyx,yxx, xy,",
+ expected_list=["xxyx", "xy", "y"],
+ ),
PpTestSpec(
desc="Using delimited_list, with ':' delimiter",
expr=pp.delimited_list(
msg="scanString with overlaps failed",
)
+ def testCombineWithResultsNames(self):
+ # test case reproducing Issue #350
+ from pyparsing import White, alphas, Word
+
+ parser = White(' \t').set_results_name('indent') + Word(alphas).set_results_name('word')
+ result = parser.parse_string(' test')
+ print(result.dump())
+ self.assertParseResultsEquals(result, [' ', 'test'], {'indent': ' ', 'word': 'test'})
+
+ parser = White(' \t') + Word(alphas).set_results_name('word')
+ result = parser.parse_string(' test')
+ print(result.dump())
+ self.assertParseResultsEquals(result, [' ', 'test'], {'word': 'test'})
+
def testTransformString(self):
make_int_with_commas = ppc.integer().addParseAction(
lambda t: "{:,}".format(t[0])
with self.assertRaises(ValueError):
pp.QuotedString("", "\\")
+ def testCustomQuotes2(self):
+
+ qs = pp.QuotedString(quote_char=".[", end_quote_char="].")
+ print(qs.reString)
+ self.assertParseAndCheckList(qs, ".[...].", ['...'])
+ self.assertParseAndCheckList(qs, ".[].", [''])
+ self.assertParseAndCheckList(qs, ".[]].", [']'])
+ self.assertParseAndCheckList(qs, ".[]]].", [']]'])
+
+ qs = pp.QuotedString(quote_char="+*", end_quote_char="*+")
+ print(qs.reString)
+ self.assertParseAndCheckList(qs, "+*...*+", ['...'])
+ self.assertParseAndCheckList(qs, "+**+", [''])
+ self.assertParseAndCheckList(qs, "+***+", ['*'])
+ self.assertParseAndCheckList(qs, "+****+", ['**'])
+
+ qs = pp.QuotedString(quote_char="*/", end_quote_char="/*")
+ print(qs.reString)
+ self.assertParseAndCheckList(qs, "*/.../*", ['...'])
+ self.assertParseAndCheckList(qs, "*//*", [''])
+ self.assertParseAndCheckList(qs, "*///*", ['/'])
+ self.assertParseAndCheckList(qs, "*////*", ['//'])
+
def testRepeater(self):
if ParserElement._packratEnabled or ParserElement._left_recursion_enabled:
print("skipping this test, not compatible with memoization")
except RecursionError:
self.fail("fail getting named result when empty")
+ def testParseResultsBool(self):
+ result = pp.Word(pp.alphas)[...].parseString("AAA")
+ self.assertTrue(result, "non-empty ParseResults evaluated as False")
+
+ result = pp.Word(pp.alphas)[...].parseString("")
+ self.assertFalse(result, "empty ParseResults evaluated as True")
+
+ result["A"] = 0
+ self.assertTrue(result, "ParseResults with empty list but containing a results name evaluated as False")
+
def testIgnoreString(self):
"""test ParserElement.ignore() passed a string arg"""
),
)
+ def testWordBoundaryExpressions2(self):
+ from itertools import product
+ ws1 = pp.WordStart(pp.alphas)
+ ws2 = pp.WordStart(wordChars=pp.alphas)
+ ws3 = pp.WordStart(word_chars=pp.alphas)
+ we1 = pp.WordEnd(pp.alphas)
+ we2 = pp.WordEnd(wordChars=pp.alphas)
+ we3 = pp.WordEnd(word_chars=pp.alphas)
+
+ for i, (ws, we) in enumerate(product((ws1, ws2, ws3), (we1, we2, we3))):
+ try:
+ expr = ("(" + ws + pp.Word(pp.alphas) + we + ")")
+ expr.parseString("(abc)")
+ except pp.ParseException as pe:
+ self.fail(f"Test {i} failed: {pe}")
+ else:
+ pass
+
def testRequiredEach(self):
parser = pp.Keyword("bam") & pp.Keyword("boo")
"noop parse action changed ParseResults structure",
)
+ def testParseActionWithDelimitedList(self):
+ class AnnotatedToken(object):
+ def __init__(self, kind, elements):
+ self.kind = kind
+ self.elements = elements
+
+ def __str__(self):
+ return 'AnnotatedToken(%r, %r)' % (self.kind, self.elements)
+
+ def __eq__(self, other):
+ return type(self) == type(other) and self.kind == other.kind and self.elements == other.elements
+
+ __repr__ = __str__
+
+ def annotate(name):
+ def _(t):
+ return AnnotatedToken(name, t.asList())
+ return _
+
+ identifier = pp.Word(pp.srange('[a-z0-9]'))
+ numeral = pp.Word(pp.nums)
+
+ named_number_value = pp.Suppress('(') + numeral + pp.Suppress(')')
+ named_number = identifier + named_number_value
+
+ named_number_list = (pp.Suppress('{') +
+ pp.Group(pp.Optional(pp.delimitedList(named_number))) +
+ pp.Suppress('}'))
+
+ # repro but in #345 - delimitedList silently changes contents of named_number
+ named_number_value.setParseAction(annotate("val"))
+
+ test_string = "{ x1(1), x2(2) }"
+ expected = [['x1', AnnotatedToken("val", ['1']), 'x2', AnnotatedToken("val", ['2'])]]
+
+ self.assertParseAndCheckList(named_number_list, test_string, expected)
+
def testParseResultsNameBelowUngroupedName(self):
rule_num = pp.Regex("[0-9]+")("LIT_NUM*")
expr, source, [s.strip() for s in source.split(",")]
)
+ def testDelimitedListMinMax(self):
+ source = "ABC, ABC,ABC"
+ with self.assertRaises(ValueError, msg="min must be greater than 0"):
+ pp.delimited_list("ABC", min=0)
+ with self.assertRaises(ValueError, msg="max must be greater than, or equal to min"):
+ pp.delimited_list("ABC", min=1, max=0)
+ with self.assertRaises(pp.ParseException):
+ pp.delimited_list("ABC", min=4).parse_string(source)
+
+ source_expr_pairs = [
+ ("ABC, ABC", pp.delimited_list("ABC", max=2)),
+ (source, pp.delimited_list("ABC", min=2, max=4)),
+ ]
+ for source, expr in source_expr_pairs:
+ print(str(expr))
+ self.assertParseAndCheckList(
+ expr, source, [s.strip() for s in source.split(",")]
+ )
+
def testEnableDebugOnNamedExpressions(self):
"""
- enable_debug_on_named_expressions - flag to auto-enable debug on all subsequent
"__diag__.{} not set to True".format(diag_name),
)
- def testWordInternalReRangesKnownSets(self):
- self.assertEqual(
- "[!-~]+",
- pp.Word(pp.printables).reString,
- "failed to generate correct internal re",
- )
- self.assertEqual(
- "[0-9A-Za-z]+",
- pp.Word(pp.alphanums).reString,
- "failed to generate correct internal re",
- )
- self.assertEqual(
- "[!-~¡-ÿ]+",
- pp.Word(pp.pyparsing_unicode.Latin1.printables).reString,
- "failed to generate correct internal re",
- )
- self.assertEqual(
- "[À-ÖØ-öø-ÿ]+",
- pp.Word(pp.alphas8bit).reString,
- "failed to generate correct internal re",
+ def testWordInternalReRangeWithConsecutiveChars(self):
+ self.assertParseAndCheckList(
+ pp.Word("ABCDEMNXYZ"),
+ "ABCDEMNXYZABCDEMNXYZABCDEMNXYZ",
+ ["ABCDEMNXYZABCDEMNXYZABCDEMNXYZ"]
)
+ def testWordInternalReRangesKnownSet(self):
+ tests = [
+ ("ABCDEMNXYZ", "[A-EMNX-Z]+"),
+ (pp.printables, "[!-~]+"),
+ (pp.alphanums, "[0-9A-Za-z]+"),
+ (pp.pyparsing_unicode.Latin1.printables, "[!-~¡-ÿ]+"),
+ (pp.pyparsing_unicode.Latin1.alphanums, "[0-9A-Za-zª²³µ¹ºÀ-ÖØ-öø-ÿ]+"),
+ (pp.alphas8bit, "[À-ÖØ-öø-ÿ]+"),
+ ]
+ failed = []
+ for word_string, expected_re in tests:
+ try:
+ msg = "failed to generate correct internal re for {!r}".format(word_string)
+ resultant_re = pp.Word(word_string).reString
+ self.assertEqual(
+ expected_re,
+ resultant_re,
+ msg + "; expected {!r} got {!r}".format(expected_re, resultant_re)
+ )
+ except AssertionError:
+ failed.append(msg)
+
+ if failed:
+ print("Errors:\n{}".format("\n".join(failed)))
+ self.fail("failed to generate correct internal re's")
+
def testWordInternalReRanges(self):
import random
pytest
commands=
python -c "import shutil,os,stat;os.path.exists('packaging') and shutil.rmtree('packaging', onerror=lambda fn, path, _:os.chmod(path,stat.S_IWRITE) or fn(path))"
- git clone --depth 1 https://github.com/pypa/packaging.git
+ git clone --depth 10 https://github.com/pypa/packaging.git
python -m pytest packaging/tests
python -c "import shutil,os,stat;shutil.rmtree('packaging', onerror=lambda fn, path, _:os.chmod(path,stat.S_IWRITE) or fn(path))"