-------------
(not released yet)
+Version 2.7.3
+-------------
+(released December 6, 2020)
+
+- Updated lexers:
+
+ * Ada (PR#1581)
+ * HTML (PR#1615, #1614)
+ * Java (PR#1594, #1586)
+ * JavaScript (PR#1605, PR#1589, #1588)
+ * JSON (PR#1569 -- this is a complete rewrite)
+ * Lean (PR#1601)
+ * LLVM (PR#1612)
+ * Mason (PR#1592)
+ * MySQL (PR#1555, #1551)
+ * Rust (PR#1608)
+ * Turtle (PR#1590, #1553)
+
+- Deprecated JsonBareObjectLexer, which is now identical to JsonLexer (#1600)
+- The ``ImgFormatter`` now calculates the exact character width, which fixes some issues with overlapping text (#1213, PR#1611)
+- Documentation fixes (PR#1609, #1599, #1598)
+- Fixed duplicated Juttle language alias (#1604, PR#1606)
+- Added support for Kotlin scripts (PR#1587)
+- Removed CSS rule which forced margin to 0
+
Version 2.7.2
-------------
* LLVM (PR#1565)
* SPARQL (PR#1559)
-- Fixed Python console/traceback lexer problems with
- custom exceptions without messages (#1548)
+- Fixed Python console/traceback lexer problems with custom exceptions without messages (#1548)
- Allow loading ttc fonts on Mac/image formatter (#1223)
-- Improve ``analyze_text`` across a variety of lexers (PR#1549)
+- Improved ``analyze_text`` across a variety of lexers (PR#1549)
- Removed CSS rule which forced the vertical padding to 0 for line numbers (PR#1583, #1579)
-- Fix ``TNTLexer`` crashing on unexpected EOL (#1568, PR#1570)
+- Fixed ``TNTLexer`` crashing on unexpected EOL (#1568, PR#1570)
- ``regexlint`` can be now run locally as part of ``tox`` tests (PR#1557)
-- Fix typos (PR#1550, #1562)
-- Add Python 3.9 as a supported version (PR#1554)
+- Fixed typos (PR#1550, #1562)
+- Added Python 3.9 as a supported version (PR#1554)
Version 2.7.1
Metadata-Version: 1.2
Name: Pygments
-Version: 2.7.2
+Version: 2.7.3
Summary: Pygments is a syntax highlighting package written in Python.
Home-page: https://pygments.org/
Author: Georg Brandl
Metadata-Version: 1.2
Name: Pygments
-Version: 2.7.2
+Version: 2.7.3
Summary: Pygments is a syntax highlighting package written in Python.
Home-page: https://pygments.org/
Author: Georg Brandl
tests/test_hdl.py
tests/test_html_formatter.py
tests/test_html_formatter_linenos_elements.py
+tests/test_html_lexer.py
tests/test_idris.py
tests/test_inherit.py
tests/test_irc_formatter.py
tests/test_python.py
tests/test_qbasiclexer.py
tests/test_r.py
+tests/test_rdf.py
tests/test_regexlexer.py
tests/test_regexopt.py
tests/test_rtf_formatter.py
tests/test_shell.py
tests/test_smarty.py
tests/test_sql.py
+tests/test_templates.py
tests/test_terminal_formatter.py
tests/test_textfmts.py
tests/test_tnt.py
import sys
from io import StringIO, BytesIO
-__version__ = '2.7.2'
+__version__ = '2.7.3'
__docformat__ = 'restructuredtext'
__all__ = ['lex', 'format', 'highlight']
:license: BSD, see LICENSE for details.
"""
+import functools
import os
import sys
import os.path
self.tagurlformat = self._decodeifneeded(options.get('tagurlformat', ''))
self.filename = self._decodeifneeded(options.get('filename', ''))
self.wrapcode = get_bool_opt(options, 'wrapcode', False)
+ self.span_element_openers = {}
if self.tagsfile:
if not ctags:
return ''
def _get_css_classes(self, ttype):
- """Return the css classes of this token type prefixed with
- the classprefix option."""
+ """Generate the opening <span> tag for a given token type using CSS classes."""
cls = self._get_css_class(ttype)
while ttype not in STANDARD_TYPES:
ttype = ttype.parent
cls = self._get_css_class(ttype) + ' ' + cls
- return cls
+ return cls and '<span class="%s">' % cls or ''
+
+ def _get_css_inline_styles(self, ttype):
+ """Generate the opening <span> tag for a given token type using inline CSS styles."""
+ cclass = self.ttype2class.get(ttype)
+ while cclass is None:
+ ttype = ttype.parent
+ cclass = self.ttype2class.get(ttype)
+ return cclass and '<span style="%s">' % self.class2style[cclass][0] or ''
def _create_stylesheet(self):
t2c = self.ttype2class = {Token: ''}
@property
def _pre_style(self):
- return 'line-height: 125%; margin: 0;'
+ return 'line-height: 125%;'
@property
def _linenos_style(self):
yield from inner
yield 0, '</code>'
+ @functools.lru_cache(maxsize=100)
+ def _translate_parts(self, value):
+ """HTML-escape a value and split it by newlines."""
+ return value.translate(_escape_html_table).split('\n')
+
def _format_lines(self, tokensource):
"""
Just format the tokens, without any wrapping tags.
"""
nocls = self.noclasses
lsep = self.lineseparator
- # for <span style=""> lookup only
- getcls = self.ttype2class.get
- c2s = self.class2style
- escape_table = _escape_html_table
tagsfile = self.tagsfile
lspan = ''
line = []
for ttype, value in tokensource:
- if nocls:
- cclass = getcls(ttype)
- while cclass is None:
- ttype = ttype.parent
- cclass = getcls(ttype)
- cspan = cclass and '<span style="%s">' % c2s[cclass][0] or ''
- else:
- cls = self._get_css_classes(ttype)
- cspan = cls and '<span class="%s">' % cls or ''
+ try:
+ cspan = self.span_element_openers[ttype]
+ except KeyError:
+ if nocls:
+ cspan = self.span_element_openers[ttype] = self._get_css_inline_styles(ttype)
+ else:
+ cspan = self.span_element_openers[ttype] = self._get_css_classes(ttype)
- parts = value.translate(escape_table).split('\n')
+ parts = self._translate_parts(value)
if tagsfile and ttype in Token.Name:
filename, linenumber = self._lookup_ctag(value)
"""
return self.fonts['NORMAL'].getsize('M')
+ def get_text_size(self, text):
+ """
+ Get the text size(width, height).
+ """
+ return self.fonts['NORMAL'].getsize(text)
+
def get_font(self, bold, oblique):
"""
Get the font based on bold and italic flags.
"""
return self.fontw
- def _get_char_x(self, charno):
+ def _get_char_x(self, linelength):
"""
Get the X coordinate of a character position.
"""
- return charno * self.fontw + self.image_pad + self.line_number_width
+ return linelength + self.image_pad + self.line_number_width
- def _get_text_pos(self, charno, lineno):
+ def _get_text_pos(self, linelength, lineno):
"""
Get the actual position for a character and line position.
"""
- return self._get_char_x(charno), self._get_line_y(lineno)
+ return self._get_char_x(linelength), self._get_line_y(lineno)
def _get_linenumber_pos(self, lineno):
"""
"""
return self.fonts.get_font(style['bold'], style['italic'])
- def _get_image_size(self, maxcharno, maxlineno):
+ def _get_image_size(self, maxlinelength, maxlineno):
"""
Get the required image size.
"""
- return (self._get_char_x(maxcharno) + self.image_pad,
+ return (self._get_char_x(maxlinelength) + self.image_pad,
self._get_line_y(maxlineno + 0) + self.image_pad)
def _draw_linenumber(self, posno, lineno):
Create drawables for the token content.
"""
lineno = charno = maxcharno = 0
+ maxlinelength = linelength = 0
for ttype, value in tokensource:
while ttype not in self.styles:
ttype = ttype.parent
temp = line.rstrip('\n')
if temp:
self._draw_text(
- self._get_text_pos(charno, lineno),
+ self._get_text_pos(linelength, lineno),
temp,
font = self._get_style_font(style),
fill = self._get_text_color(style)
)
+ temp_width, temp_hight = self.fonts.get_text_size(temp)
+ linelength += temp_width
+ maxlinelength = max(maxlinelength, linelength)
charno += len(temp)
maxcharno = max(maxcharno, charno)
if line.endswith('\n'):
# add a line for each extra line in the value
+ linelength = 0
charno = 0
lineno += 1
+ self.maxlinelength = maxlinelength
self.maxcharno = maxcharno
self.maxlineno = lineno
self._draw_line_numbers()
im = Image.new(
'RGB',
- self._get_image_size(self.maxcharno, self.maxlineno),
+ self._get_image_size(self.maxlinelength, self.maxlineno),
self.background_color
)
self._paint_line_number_bg(im)
'JavascriptSmartyLexer': ('pygments.lexers.templates', 'JavaScript+Smarty', ('js+smarty', 'javascript+smarty'), (), ('application/x-javascript+smarty', 'text/x-javascript+smarty', 'text/javascript+smarty')),
'JclLexer': ('pygments.lexers.scripting', 'JCL', ('jcl',), ('*.jcl',), ('text/x-jcl',)),
'JsgfLexer': ('pygments.lexers.grammar_notation', 'JSGF', ('jsgf',), ('*.jsgf',), ('application/jsgf', 'application/x-jsgf', 'text/jsgf')),
- 'JsonBareObjectLexer': ('pygments.lexers.data', 'JSONBareObject', ('json-object',), (), ('application/json-object',)),
+ 'JsonBareObjectLexer': ('pygments.lexers.data', 'JSONBareObject', (), (), ()),
'JsonLdLexer': ('pygments.lexers.data', 'JSON-LD', ('jsonld', 'json-ld'), ('*.jsonld',), ('application/ld+json',)),
- 'JsonLexer': ('pygments.lexers.data', 'JSON', ('json',), ('*.json', 'Pipfile.lock'), ('application/json',)),
+ 'JsonLexer': ('pygments.lexers.data', 'JSON', ('json', 'json-object'), ('*.json', 'Pipfile.lock'), ('application/json', 'application/json-object')),
'JspLexer': ('pygments.lexers.templates', 'Java Server Page', ('jsp',), ('*.jsp',), ('application/x-jsp',)),
'JuliaConsoleLexer': ('pygments.lexers.julia', 'Julia console', ('jlcon',), (), ()),
'JuliaLexer': ('pygments.lexers.julia', 'Julia', ('julia', 'jl'), ('*.jl',), ('text/x-julia', 'application/x-julia')),
- 'JuttleLexer': ('pygments.lexers.javascript', 'Juttle', ('juttle', 'juttle'), ('*.juttle',), ('application/juttle', 'application/x-juttle', 'text/x-juttle', 'text/juttle')),
+ 'JuttleLexer': ('pygments.lexers.javascript', 'Juttle', ('juttle',), ('*.juttle',), ('application/juttle', 'application/x-juttle', 'text/x-juttle', 'text/juttle')),
'KalLexer': ('pygments.lexers.javascript', 'Kal', ('kal',), ('*.kal',), ('text/kal', 'application/kal')),
'KconfigLexer': ('pygments.lexers.configs', 'Kconfig', ('kconfig', 'menuconfig', 'linux-config', 'kernel-config'), ('Kconfig*', '*Config.in*', 'external.in*', 'standard-modules.in'), ('text/x-kconfig',)),
'KernelLogLexer': ('pygments.lexers.textfmts', 'Kernel log', ('kmsg', 'dmesg'), ('*.kmsg', '*.dmesg'), ()),
'KokaLexer': ('pygments.lexers.haskell', 'Koka', ('koka',), ('*.kk', '*.kki'), ('text/x-koka',)),
- 'KotlinLexer': ('pygments.lexers.jvm', 'Kotlin', ('kotlin',), ('*.kt',), ('text/x-kotlin',)),
+ 'KotlinLexer': ('pygments.lexers.jvm', 'Kotlin', ('kotlin',), ('*.kt', '*.kts'), ('text/x-kotlin',)),
'LSLLexer': ('pygments.lexers.scripting', 'LSL', ('lsl',), ('*.lsl',), ('text/x-lsl',)),
'LassoCssLexer': ('pygments.lexers.templates', 'CSS+Lasso', ('css+lasso',), (), ('text/css+lasso',)),
'LassoHtmlLexer': ('pygments.lexers.templates', 'HTML+Lasso', ('html+lasso',), (), ('text/html+lasso', 'application/x-httpd-lasso', 'application/x-httpd-lasso[89]')),
'nonlazybind', 'nonnull', 'norecurse', 'noRecurse', 'noredzone', 'noreturn',
'notail', 'notEligibleToImport', 'nounwind', 'nsw', 'nsz', 'null', 'nuw', 'oeq',
'offset', 'oge', 'ogt', 'ole', 'olt', 'one', 'opaque', 'optforfuzzing',
- 'optnone', 'optsize', 'or', 'ord', 'path', 'personality', 'phi',
+ 'optnone', 'optsize', 'or', 'ord', 'path', 'personality', 'phi', 'poison',
'prefix', 'preserve_allcc', 'preserve_mostcc', 'private', 'prologue',
'protected', 'ptrtoint', 'ptx_device', 'ptx_kernel', 'readnone', 'readNone',
'readonly', 'readOnly', 'reassoc', 'refs', 'relbf', 'release', 'resByArg',
import re
-from pygments.lexer import RegexLexer, ExtendedRegexLexer, LexerContext, \
+from pygments.lexer import Lexer, RegexLexer, ExtendedRegexLexer, LexerContext, \
include, bygroups, inherit
from pygments.token import Text, Comment, Keyword, Name, String, Number, \
Punctuation, Literal, Error
return super().get_tokens_unprocessed(text, context)
-class JsonLexer(RegexLexer):
+class JsonLexer(Lexer):
"""
For JSON data structures.
"""
name = 'JSON'
- aliases = ['json']
+ aliases = ['json', 'json-object']
filenames = ['*.json', 'Pipfile.lock']
- mimetypes = ['application/json']
+ mimetypes = ['application/json', 'application/json-object']
+
+ # No validation of integers, floats, or constants is done.
+ # As long as the characters are members of the following
+ # sets, the token will be considered valid. For example,
+ #
+ # "--1--" is parsed as an integer
+ # "1...eee" is parsed as a float
+ # "trustful" is parsed as a constant
+ #
+ integers = set('-0123456789')
+ floats = set('.eE+')
+ constants = set('truefalsenull') # true|false|null
+ hexadecimals = set('0123456789abcdefABCDEF')
+ punctuations = set('{}[],')
+ whitespaces = {'\u0020', '\u000a', '\u000d', '\u0009'}
+
+ def get_tokens_unprocessed(self, text):
+ """Parse JSON data."""
+
+ in_string = False
+ in_escape = False
+ in_unicode_escape = 0
+ in_whitespace = False
+ in_constant = False
+ in_number = False
+ in_float = False
+ in_punctuation = False
+
+ start = 0
+
+ # The queue is used to store data that may need to be tokenized
+ # differently based on what follows. In particular, JSON object
+ # keys are tokenized differently than string values, but cannot
+ # be distinguished until punctuation is encountered outside the
+ # string.
+ #
+ # A ":" character after the string indicates that the string is
+ # an object key; any other character indicates the string is a
+ # regular string value.
+ #
+ # The queue holds tuples that contain the following data:
+ #
+ # (start_index, token_type, text)
+ #
+ # By default the token type of text in double quotes is
+ # String.Double. The token type will be replaced if a colon
+ # is encountered after the string closes.
+ #
+ queue = []
+
+ for stop, character in enumerate(text):
+ if in_string:
+ if in_unicode_escape:
+ if character in self.hexadecimals:
+ in_unicode_escape -= 1
+ if not in_unicode_escape:
+ in_escape = False
+ else:
+ in_unicode_escape = 0
+ in_escape = False
+
+ elif in_escape:
+ if character == 'u':
+ in_unicode_escape = 4
+ else:
+ in_escape = False
+
+ elif character == '\\':
+ in_escape = True
+
+ elif character == '"':
+ queue.append((start, String.Double, text[start:stop + 1]))
+ in_string = False
+ in_escape = False
+ in_unicode_escape = 0
+
+ continue
+
+ elif in_whitespace:
+ if character in self.whitespaces:
+ continue
+
+ if queue:
+ queue.append((start, Text, text[start:stop]))
+ else:
+ yield start, Text, text[start:stop]
+ in_whitespace = False
+ # Fall through so the new character can be evaluated.
+
+ elif in_constant:
+ if character in self.constants:
+ continue
+
+ yield start, Keyword.Constant, text[start:stop]
+ in_constant = False
+ # Fall through so the new character can be evaluated.
+
+ elif in_number:
+ if character in self.integers:
+ continue
+ elif character in self.floats:
+ in_float = True
+ continue
+
+ if in_float:
+ yield start, Number.Float, text[start:stop]
+ else:
+ yield start, Number.Integer, text[start:stop]
+ in_number = False
+ in_float = False
+ # Fall through so the new character can be evaluated.
+
+ elif in_punctuation:
+ if character in self.punctuations:
+ continue
+
+ yield start, Punctuation, text[start:stop]
+ in_punctuation = False
+ # Fall through so the new character can be evaluated.
+
+ start = stop
+
+ if character == '"':
+ in_string = True
+
+ elif character in self.whitespaces:
+ in_whitespace = True
+
+ elif character in {'f', 'n', 't'}: # The first letters of true|false|null
+ # Exhaust the queue. Accept the existing token types.
+ yield from queue
+ queue.clear()
+
+ in_constant = True
+
+ elif character in self.integers:
+ # Exhaust the queue. Accept the existing token types.
+ yield from queue
+ queue.clear()
+
+ in_number = True
+
+ elif character == ':':
+ # Yield from the queue. Replace string token types.
+ for _start, _token, _text in queue:
+ if _token is Text:
+ yield _start, _token, _text
+ elif _token is String.Double:
+ yield _start, Name.Tag, _text
+ else:
+ yield _start, Error, _text
+ queue.clear()
+
+ in_punctuation = True
+
+ elif character in self.punctuations:
+ # Exhaust the queue. Accept the existing token types.
+ yield from queue
+ queue.clear()
+
+ in_punctuation = True
- flags = re.DOTALL
-
- # integer part of a number
- int_part = r'-?(0|[1-9]\d*)'
-
- # fractional part of a number
- frac_part = r'\.\d+'
-
- # exponential part of a number
- exp_part = r'[eE](\+|-)?\d+'
-
- tokens = {
- 'whitespace': [
- (r'\s+', Text),
- ],
-
- # represents a simple terminal value
- 'simplevalue': [
- (r'(true|false|null)\b', Keyword.Constant),
- (('%(int_part)s(%(frac_part)s%(exp_part)s|'
- '%(exp_part)s|%(frac_part)s)') % vars(),
- Number.Float),
- (int_part, Number.Integer),
- (r'"(\\(["\\/bfnrt]|u[a-fA-F0-9]{4})|[^\\"])*"', String.Double),
- ],
-
-
- # the right hand side of an object, after the attribute name
- 'objectattribute': [
- include('value'),
- (r':', Punctuation),
- # comma terminates the attribute but expects more
- (r',', Punctuation, '#pop'),
- # a closing bracket terminates the entire object, so pop twice
- (r'\}', Punctuation, '#pop:2'),
- ],
-
- # a json object - { attr, attr, ... }
- 'objectvalue': [
- include('whitespace'),
- (r'"(\\(["\\/bfnrt]|u[a-fA-F0-9]{4})|[^\\"])*"', Name.Tag, 'objectattribute'),
- (r'\}', Punctuation, '#pop'),
- ],
-
- # json array - [ value, value, ... }
- 'arrayvalue': [
- include('whitespace'),
- include('value'),
- (r',', Punctuation),
- (r'\]', Punctuation, '#pop'),
- ],
-
- # a json value - either a simple value or a complex value (object or array)
- 'value': [
- include('whitespace'),
- include('simplevalue'),
- (r'\{', Punctuation, 'objectvalue'),
- (r'\[', Punctuation, 'arrayvalue'),
- ],
-
- # the root of a json document whould be a value
- 'root': [
- include('value'),
- ],
- }
+ else:
+ # Exhaust the queue. Accept the existing token types.
+ yield from queue
+ queue.clear()
+
+ yield start, Error, character
+
+ # Yield any remaining text.
+ yield from queue
+ if in_string:
+ yield start, Error, text[start:]
+ elif in_float:
+ yield start, Number.Float, text[start:]
+ elif in_number:
+ yield start, Number.Integer, text[start:]
+ elif in_constant:
+ yield start, Keyword.Constant, text[start:]
+ elif in_whitespace:
+ yield start, Text, text[start:]
+ elif in_punctuation:
+ yield start, Punctuation, text[start:]
class JsonBareObjectLexer(JsonLexer):
For JSON data structures (with missing object curly braces).
.. versionadded:: 2.2
+
+ .. deprecated:: 2.8.0
+
+ Behaves the same as `JsonLexer` now.
"""
name = 'JSONBareObject'
- aliases = ['json-object']
+ aliases = []
filenames = []
- mimetypes = ['application/json-object']
-
- tokens = {
- 'root': [
- (r'\}', Error),
- include('objectvalue'),
- ],
- 'objectattribute': [
- (r'\}', Error),
- inherit,
- ],
- }
+ mimetypes = []
class JsonLdLexer(JsonLexer):
"""
- For `JSON-LD <http://json-ld.org/>`_ linked data.
+ For `JSON-LD <https://json-ld.org/>`_ linked data.
.. versionadded:: 2.0
"""
filenames = ['*.jsonld']
mimetypes = ['application/ld+json']
- tokens = {
- 'objectvalue': [
- (r'"@(context|id|value|language|type|container|list|set|'
- r'reverse|index|base|vocab|graph)"', Name.Decorator,
- 'objectattribute'),
- inherit,
- ],
+ json_ld_keywords = {
+ '"@%s"' % keyword
+ for keyword in (
+ 'base',
+ 'container',
+ 'context',
+ 'direction',
+ 'graph',
+ 'id',
+ 'import',
+ 'included',
+ 'index',
+ 'json',
+ 'language',
+ 'list',
+ 'nest',
+ 'none',
+ 'prefix',
+ 'propagate',
+ 'protected',
+ 'reverse',
+ 'set',
+ 'type',
+ 'value',
+ 'version',
+ 'vocab',
+ )
}
+
+ def get_tokens_unprocessed(self, text):
+ for start, token, value in super(JsonLdLexer, self).get_tokens_unprocessed(text):
+ if token is Name.Tag and value in self.json_ld_keywords:
+ yield start, Name.Decorator, value
+ else:
+ yield start, token, value
class ECLLexer(RegexLexer):
"""
Lexer for the declarative big-data `ECL
- <http://hpccsystems.com/community/docs/ecl-language-reference/html>`_
+ <https://hpccsystems.com/training/documentation/ecl-language-reference/html>`_
language.
.. versionadded:: 1.5
bygroups(Punctuation, Text, Punctuation, Text, Name.Tag, Text,
Punctuation), '#pop'),
(r'.+?(?=<\s*/\s*script\s*>)', using(JavascriptLexer)),
+ # fallback cases for when there is no closing script tag
+ # first look for newline and then go back into root state
+ # if that fails just read the rest of the file
+ # this is similar to the error handling logic in lexer.py
+ (r'.+?\n', using(JavascriptLexer), '#pop'),
+ (r'.+', using(JavascriptLexer), '#pop'),
],
'style-content': [
(r'(<)(\s*)(/)(\s*)(style)(\s*)(>)',
bygroups(Punctuation, Text, Punctuation, Text, Name.Tag, Text,
Punctuation),'#pop'),
(r'.+?(?=<\s*/\s*style\s*>)', using(CssLexer)),
+ # fallback cases for when there is no closing style tag
+ # first look for newline and then go back into root state
+ # if that fails just read the rest of the file
+ # this is similar to the error handling logic in lexer.py
+ (r'.+?\n', using(CssLexer), '#pop'),
+ (r'.+', using(CssLexer), '#pop'),
],
'attr': [
('".*?"', String, '#pop'),
(r'[{(\[;,]', Punctuation, 'slashstartsregex'),
(r'[})\].]', Punctuation),
(r'(for|in|while|do|break|return|continue|switch|case|default|if|else|'
- r'throw|try|catch|finally|new|delete|typeof|instanceof|void|yield|'
+ r'throw|try|catch|finally|new|delete|typeof|instanceof|void|yield|await|async|'
r'this|of)\b', Keyword, 'slashstartsregex'),
(r'(var|let|with|function)\b', Keyword.Declaration, 'slashstartsregex'),
(r'(abstract|boolean|byte|char|class|const|debugger|double|enum|export|'
"""
name = 'Juttle'
- aliases = ['juttle', 'juttle']
+ aliases = ['juttle']
filenames = ['*.juttle']
mimetypes = ['application/juttle', 'application/x-juttle',
'text/x-juttle', 'text/juttle']
'var'),
(r'(import(?:\s+static)?)(\s+)', bygroups(Keyword.Namespace, Text),
'import'),
- (r'"(\\\\|\\"|[^"])*"', String),
+ (r'"', String, 'string'),
(r"'\\.'|'[^\\]'|'\\u[0-9a-fA-F]{4}'", String.Char),
(r'(\.)((?:[^\W\d]|\$)[\w$]*)', bygroups(Punctuation,
Name.Attribute)),
'import': [
(r'[\w.]+\*?', Name.Namespace, '#pop')
],
+ 'string': [
+ (r'[^\\"]+', String),
+ (r'\\\\', String), # Escaped backslash
+ (r'\\"', String), # Escaped quote
+ (r'\\', String), # Bare backslash
+ (r'"', String, '#pop'), # Closing quote
+ ],
}
name = 'Kotlin'
aliases = ['kotlin']
- filenames = ['*.kt']
+ filenames = ['*.kt', '*.kts']
mimetypes = ['text/x-kotlin']
flags = re.MULTILINE | re.DOTALL | re.UNICODE
(r'\s+', Text),
(r'\\\n', Text), # line continuation
(r'//.*?\n', Comment.Single),
+ (r'^#!/.+?\n', Comment.Single), # shebang for kotlin scripts
(r'/[*].*?[*]/', Comment.Multiline),
(r'""".*?"""', String),
(r'\n', Text),
'exception', 'exit', 'interface', 'for', 'goto', 'if', 'is', 'limited',
'loop', 'new', 'null', 'of', 'or', 'others', 'out', 'overriding',
'pragma', 'protected', 'raise', 'range', 'record', 'renames', 'requeue',
- 'return', 'reverse', 'select', 'separate', 'subtype', 'synchronized',
- 'task', 'tagged', 'terminate', 'then', 'type', 'until', 'when',
- 'while', 'xor'), prefix=r'\b', suffix=r'\b'),
+ 'return', 'reverse', 'select', 'separate', 'some', 'subtype',
+ 'synchronized', 'task', 'tagged', 'terminate', 'then', 'type', 'until',
+ 'when', 'while', 'xor'), prefix=r'\b', suffix=r'\b'),
Keyword.Reserved),
(r'"[^"]*"', String),
include('attribute'),
filenames = ['*.ttl']
mimetypes = ['text/turtle', 'application/x-turtle']
- flags = re.IGNORECASE
+ # character group definitions ::
+ PN_CHARS_BASE_GRP = ('a-zA-Z'
+ '\u00c0-\u00d6'
+ '\u00d8-\u00f6'
+ '\u00f8-\u02ff'
+ '\u0370-\u037d'
+ '\u037f-\u1fff'
+ '\u200c-\u200d'
+ '\u2070-\u218f'
+ '\u2c00-\u2fef'
+ '\u3001-\ud7ff'
+ '\uf900-\ufdcf'
+ '\ufdf0-\ufffd')
+
+ PN_CHARS_U_GRP = (PN_CHARS_BASE_GRP + '_')
+
+ PN_CHARS_GRP = (PN_CHARS_U_GRP +
+ r'\-' +
+ r'0-9' +
+ '\u00b7' +
+ '\u0300-\u036f' +
+ '\u203f-\u2040')
+
+ PN_CHARS = '[' + PN_CHARS_GRP + ']'
+
+ PN_CHARS_BASE = '[' + PN_CHARS_BASE_GRP + ']'
+
+ PN_PREFIX = PN_CHARS_BASE + '(?:[' + PN_CHARS_GRP + '.]*' + PN_CHARS + ')?'
+
+ HEX_GRP = '0-9A-Fa-f'
+
+ HEX = '[' + HEX_GRP + ']'
+
+ PERCENT = '%' + HEX + HEX
+
+ PN_LOCAL_ESC_CHARS_GRP = r' _~.\-!$&"()*+,;=/?#@%'
+
+ PN_LOCAL_ESC_CHARS = '[' + PN_LOCAL_ESC_CHARS_GRP + ']'
+
+ PN_LOCAL_ESC = r'\\' + PN_LOCAL_ESC_CHARS
+
+ PLX = '(?:' + PERCENT + ')|(?:' + PN_LOCAL_ESC + ')'
+
+ PN_LOCAL = ('(?:[' + PN_CHARS_U_GRP + ':0-9' + ']|' + PLX + ')' +
+ '(?:(?:[' + PN_CHARS_GRP + '.:]|' + PLX + ')*(?:[' +
+ PN_CHARS_GRP + ':]|' + PLX + '))?')
patterns = {
- 'PNAME_NS': r'((?:[a-z][\w-]*)?\:)', # Simplified character range
+ 'PNAME_NS': r'((?:[a-zA-Z][\w-]*)?\:)', # Simplified character range
'IRIREF': r'(<[^<>"{}|^`\\\x00-\x20]*>)'
}
- # PNAME_NS PN_LOCAL (with simplified character range)
- patterns['PrefixedName'] = r'%(PNAME_NS)s([a-z][\w-]*)' % patterns
-
tokens = {
'root': [
- (r'\s+', Whitespace),
+ (r'\s+', Text),
# Base / prefix
(r'(@base|BASE)(\s+)%(IRIREF)s(\s*)(\.?)' % patterns,
(r'%(IRIREF)s' % patterns, Name.Variable),
# PrefixedName
- (r'%(PrefixedName)s' % patterns,
- bygroups(Name.Namespace, Name.Tag)),
+ (r'(' + PN_PREFIX + r')?(\:)(' + PN_LOCAL + r')?',
+ bygroups(Name.Namespace, Punctuation, Name.Tag)),
# Comment
(r'#[^\n]+', Comment),
(r'.', String, '#pop'),
],
'end-of-string': [
- (r'(@)([a-z]+(:?-[a-z0-9]+)*)',
+ (r'(@)([a-zA-Z]+(?:-[a-zA-Z0-9]+)*)',
bygroups(Operator, Generic.Emph), '#pop:2'),
(r'(\^\^)%(IRIREF)s' % patterns, bygroups(Operator, Generic.Emph), '#pop:2'),
- (r'(\^\^)%(PrefixedName)s' % patterns,
- bygroups(Operator, Generic.Emph, Generic.Emph), '#pop:2'),
default('#pop:2'),
class RustLexer(RegexLexer):
"""
- Lexer for the Rust programming language (version 1.40).
+ Lexer for the Rust programming language (version 1.47).
.. versionadded:: 1.6
"""
'usize', 'isize', 'f32', 'f64', 'char', 'str', 'bool',
), suffix=r'\b'), Keyword.Type)
- builtin_types = (words((
- 'Send', 'Sized', 'Sync', 'Unpin',
- 'Drop', 'Fn', 'FnMut', 'FnOnce',
- 'AsRef', 'AsMut', 'Into', 'From',
+ builtin_funcs_types = (words((
+ 'Copy', 'Send', 'Sized', 'Sync', 'Unpin',
+ 'Drop', 'Fn', 'FnMut', 'FnOnce', 'drop',
+ 'Box', 'ToOwned', 'Clone',
+ 'PartialEq', 'PartialOrd', 'Eq', 'Ord',
+ 'AsRef', 'AsMut', 'Into', 'From', 'Default',
'Iterator', 'Extend', 'IntoIterator', 'DoubleEndedIterator',
- 'ExactSizeIterator', 'Option', 'Result',
- 'Box', 'ToOwned', 'String', 'ToString', 'Vec',
- 'Clone', 'Copy', 'Default', 'Eq', 'Hash', 'Ord', 'PartialEq',
- 'PartialOrd', 'Ord',
+ 'ExactSizeIterator',
+ 'Option', 'Some', 'None',
+ 'Result', 'Ok', 'Err',
+ 'String', 'ToString', 'Vec',
), suffix=r'\b'), Name.Builtin)
- builtin_funcs_macros = (words((
- 'drop', 'Some', 'None', 'Ok', 'Err',
- 'asm!', 'assert!', 'assert_eq!', 'assert_ne!', 'cfg!', 'column!',
- 'compile_error!', 'concat!', 'concat_idents!', 'dbg!', 'debug_assert!',
- 'debug_assert_eq!', 'debug_assert_ne!', 'env!', 'eprint!', 'eprintln!',
- 'file!', 'format_args!', 'format_args_nl!', 'global_asm!', 'include!',
- 'include_bytes!', 'include_str!', 'line!', 'log_syntax!',
- 'module_path!', 'option_env!', 'panic!', 'print!', 'println!',
- 'stringify!', 'thread_local!', 'todo!', 'trace_macros!',
- 'unimplemented!', 'unreachable!', 'vec!', 'write!', 'writeln!',
- ), suffix=r'\b'), Name.Builtin)
+ builtin_macros = (words((
+ 'asm', 'assert', 'assert_eq', 'assert_ne', 'cfg', 'column',
+ 'compile_error', 'concat', 'concat_idents', 'dbg', 'debug_assert',
+ 'debug_assert_eq', 'debug_assert_ne', 'env', 'eprint', 'eprintln',
+ 'file', 'format', 'format_args', 'format_args_nl', 'global_asm',
+ 'include', 'include_bytes', 'include_str',
+ 'is_aarch64_feature_detected',
+ 'is_arm_feature_detected',
+ 'is_mips64_feature_detected',
+ 'is_mips_feature_detected',
+ 'is_powerpc64_feature_detected',
+ 'is_powerpc_feature_detected',
+ 'is_x86_feature_detected',
+ 'line', 'llvm_asm', 'log_syntax', 'macro_rules', 'matches',
+ 'module_path', 'option_env', 'panic', 'print', 'println', 'stringify',
+ 'thread_local', 'todo', 'trace_macros', 'unimplemented', 'unreachable',
+ 'vec', 'write', 'writeln',
+ ), suffix=r'!'), Name.Function.Magic)
tokens = {
'root': [
# Macro parameters
(r"""\$([a-zA-Z_]\w*|\(,?|\),?|,?)""", Comment.Preproc),
# Keywords
- (words((
- 'as', 'async', 'await', 'box', 'const', 'crate', 'dyn', 'else',
- 'extern', 'for', 'if', 'impl', 'in', 'loop', 'match', 'move',
- 'mut', 'pub', 'ref', 'return', 'static', 'super', 'trait',
- 'try', 'unsafe', 'use', 'where', 'while', 'macro_rules!',
- ), suffix=r'\b'), Keyword),
- (words(('abstract', 'alignof', 'become', 'do', 'final', 'macro',
- 'offsetof', 'override', 'priv', 'proc', 'pure', 'sizeof',
- 'typeof', 'unsized', 'virtual', 'yield'), suffix=r'\b'),
- Keyword.Reserved),
+ (words(('as', 'async', 'await', 'box', 'const', 'crate', 'dyn',
+ 'else', 'extern', 'for', 'if', 'impl', 'in', 'loop',
+ 'match', 'move', 'mut', 'pub', 'ref', 'return', 'static',
+ 'super', 'trait', 'unsafe', 'use', 'where', 'while'),
+ suffix=r'\b'), Keyword),
+ (words(('abstract', 'become', 'do', 'final', 'macro', 'override',
+ 'priv', 'typeof', 'try', 'unsized', 'virtual', 'yield'),
+ suffix=r'\b'), Keyword.Reserved),
(r'(true|false)\b', Keyword.Constant),
+ (r'self\b', Name.Builtin.Pseudo),
(r'mod\b', Keyword, 'modname'),
(r'let\b', Keyword.Declaration),
(r'fn\b', Keyword, 'funcname'),
keyword_types,
(r'[sS]elf\b', Name.Builtin.Pseudo),
# Prelude (taken from Rust's src/libstd/prelude.rs)
- builtin_types,
- builtin_funcs_macros,
+ builtin_funcs_types,
+ builtin_macros,
# Path seperators, so types don't catch them.
(r'::\b', Text),
# Types in positions.
(r'\s+', Text),
(r'&', Keyword.Pseudo),
(r"'", Operator, 'lifetime'),
- builtin_types,
+ builtin_funcs_types,
keyword_types,
(r'[a-zA-Z_]\w*', Name.Class, '#pop'),
default('#pop'),
else:
yield index, token, value
+
class ShellSessionBaseLexer(Lexer):
"""
- Base lexer for simplistic shell sessions.
+ Base lexer for shell sessions.
.. versionadded:: 2.1
"""
class BashSessionLexer(ShellSessionBaseLexer):
"""
- Lexer for simplistic shell sessions.
+ Lexer for Bash shell sessions, i.e. command lines, including a
+ prompt, interspersed with output.
.. versionadded:: 1.1
"""
class MSDOSSessionLexer(ShellSessionBaseLexer):
"""
- Lexer for simplistic MSDOS sessions.
+ Lexer for MS DOS shell sessions, i.e. command lines, including a
+ prompt, interspersed with output.
.. versionadded:: 2.1
"""
class TcshSessionLexer(ShellSessionBaseLexer):
"""
- Lexer for Tcsh sessions.
+ Lexer for Tcsh sessions, i.e. command lines, including a
+ prompt, interspersed with output.
.. versionadded:: 2.1
"""
class PowerShellSessionLexer(ShellSessionBaseLexer):
"""
- Lexer for simplistic Windows PowerShell sessions.
+ Lexer for PowerShell sessions, i.e. command lines, including a
+ prompt, interspersed with output.
.. versionadded:: 2.1
"""
(r'[0-9]+\.[0-9]*(e[+-]?[0-9]+)?', Number.Float), # Mandatory integer, optional fraction and exponent
(r'[0-9]*\.[0-9]+(e[+-]?[0-9]+)?', Number.Float), # Mandatory fraction, optional integer and exponent
(r'[0-9]+e[+-]?[0-9]+', Number.Float), # Exponents with integer significands are still floats
- (r'[0-9]+', Number.Integer),
+ (r'[0-9]+(?=[^0-9a-z$_\u0080-\uffff])', Number.Integer), # Integers that are not in a schema object name
# Date literals
(r"\{\s*d\s*(?P<quote>['\"])\s*\d{2}(\d{2})?.?\d{2}.?\d{2}\s*(?P=quote)\s*\}",
# numeric literals have already been handled above.
#
('[0-9a-z$_\u0080-\uffff]+', Name),
- (r'`', Name, 'schema-object-name'),
+ (r'`', Name.Quoted, 'schema-object-name'),
# Punctuation
(r'[(),.;]', Punctuation),
# Schema object name substates
# ----------------------------
#
- # Backtick-quoted schema object names support escape characters.
- # It may be desirable to tokenize escape sequences differently,
- # but currently Pygments does not have an obvious token type for
- # this unique situation (for example, "Name.Escape").
+ # "Name.Quoted" and "Name.Quoted.Escape" are non-standard but
+ # formatters will style them as "Name" by default but add
+ # additional styles based on the token name. This gives users
+ # flexibility to add custom styles as desired.
#
'schema-object-name': [
- (r'[^`\\]+', Name),
- (r'(?:\\\\|\\`|``)', Name), # This could be an escaped name token type.
- (r'`', Name, '#pop'),
+ (r'[^`]+', Name.Quoted),
+ (r'``', Name.Quoted.Escape),
+ (r'`', Name.Quoted, '#pop'),
],
}
(r'[^{]+', Other),
(r'\{\{', Comment.Preproc, 'var'),
# jinja/django comments
- (r'\{[*#].*?[*#]\}', Comment),
+ (r'\{#.*?#\}', Comment),
# django comments
(r'(\{%)(-?\s*)(comment)(\s*-?)(%\})(.*?)'
r'(\{%)(-?\s*)(endcomment)(\s*-?)(%\})',
(r'(?s)(<%(?:def|method))(\s*)(.*?)(>)(.*?)(</%\2\s*>)',
bygroups(Name.Tag, Text, Name.Function, Name.Tag,
using(this), Name.Tag)),
- (r'(?s)(<%\w+)(.*?)(>)(.*?)(</%\2\s*>)',
- bygroups(Name.Tag, Name.Function, Name.Tag,
- using(PerlLexer), Name.Tag)),
+ (r'(?s)(<%(\w+)(.*?)(>))(.*?)(</%\2\s*>)',
+ bygroups(Name.Tag, None, None, None, using(PerlLexer), Name.Tag)),
(r'(?s)(<&[^|])(.*?)(,.*?)?(&>)',
bygroups(Name.Tag, Name.Function, using(PerlLexer), Name.Tag)),
(r'(?s)(<&\|)(.*?)(,.*?)?(&>)',
def analyse_text(text):
result = 0.0
- if re.search(r'</%(class|doc|init)%>', text) is not None:
+ if re.search(r'</%(class|doc|init)>', text) is not None:
result = 1.0
elif re.search(r'<&.+&>', text, re.DOTALL) is not None:
result = 0.11
'universe', 'universes',
'inductive', 'coinductive', 'structure', 'extends',
'class', 'instance',
+ 'abbreviation',
'noncomputable theory',
'let', 'if', 'else', 'then', 'in', 'with', 'calc', 'match',
'do'
), prefix=r'\b', suffix=r'\b'), Keyword),
+ (words(('sorry', 'admit'), prefix=r'\b', suffix=r'\b'), Generic.Error),
(words(('Sort', 'Prop', 'Type'), prefix=r'\b', suffix=r'\b'), Keyword.Type),
(words((
'#eval', '#check', '#reduce', '#exit',
@prefix dcterms: <http://purl.org/dc/terms/>. @prefix xs: <http://www.w3.org/2001/XMLSchema> .
@prefix mads: <http://www.loc.gov/mads/rdf/v1#> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
-@PREFIX dc: <http://purl.org/dc/elements/1.1/> # SPARQL-like syntax is OK
+PREFIX dc: <http://purl.org/dc/elements/1.1/> # SPARQL-like syntax is OK
@prefix : <http://xmlns.com/foaf/0.1/> . # empty prefix is OK
<http://example.org/#spiderman> <http://www.perceive.net/schemas/relationship/enemyOf> <http://example.org/#green-goblin> .
-<#doc1> a <#document>
+<#doc1> a <#document>;
dc:creator "Smith", "Jones";
- :knows <http://getopenid.com/jsmith>
+ :knows <http://getopenid.com/jsmith>;
dcterms:hasPart [ # A comment
dc:title "Some title", "Some other title";
dc:creator "برشت، برتولد"@ar;
<http://data.ub.uio.no/realfagstermer/006839> a mads:Topic,
skos:Concept ;
- dcterms:created "2014-08-25"^^xsd:date ;
- dcterms:modified "2014-11-12"^^xsd:date ;
+ dcterms:created "2014-08-25"^^xs:date ;
+ dcterms:modified "2014-11-12"^^xs:date ;
dcterms:identifier "REAL006839" ;
skos:prefLabel "Flerbørstemarker"@nb,
"Polychaeta"@la ;
"Mangebørsteormer"@nb,
"Havbørsteormer"@nb,
"Havbørstemarker"@nb,
- "Polycheter"@nb.
+ "Polycheter"@nb ;
skos:inScheme <http://data.ub.uio.no/realfagstermer/> ;
skos:narrower <http://data.ub.uio.no/realfagstermer/018529>,
<http://data.ub.uio.no/realfagstermer/024538>,
SELECT e1.`apple` AS a, `example2`.b\r
FROM example1 AS e1\r
JOIN example2 e2\r
-ON `example1`.`id` = e2.id;\r
+ON `example1`.`a``b` = e2.`123`;\r
\r
\r
-- Operators\r
<div class="highlight" style="background: #f8f8f8">
- <pre style="line-height: 125%; margin: 0;"><span></span><span style="color: #000000; background-color: #f0f0f0; padding-left: 5px; padding-right: 5px;">1</span><span style="color: #408080; font-style: italic"># a</span>
+ <pre style="line-height: 125%;"><span></span><span style="color: #000000; background-color: #f0f0f0; padding-left: 5px; padding-right: 5px;">1</span><span style="color: #408080; font-style: italic"># a</span>
<span style="color: #000000; background-color: #f0f0f0; padding-left: 5px; padding-right: 5px;">2</span><span style="color: #408080; font-style: italic"># b</span>
<span style="color: #000000; background-color: #f0f0f0; padding-left: 5px; padding-right: 5px;">3</span><span style="color: #408080; font-style: italic"># c</span>
</pre>
<div class="highlight" style="background: #f8f8f8">
- <pre style="line-height: 125%; margin: 0;"><span></span><span style="color: #000000; background-color: #f0f0f0; padding-left: 5px; padding-right: 5px;">1</span><span style="color: #408080; font-style: italic"># a</span>
+ <pre style="line-height: 125%;"><span></span><span style="color: #000000; background-color: #f0f0f0; padding-left: 5px; padding-right: 5px;">1</span><span style="color: #408080; font-style: italic"># a</span>
<span style="color: #000000; background-color: #f0f0f0; padding-left: 5px; padding-right: 5px;">2</span><span style="color: #408080; font-style: italic"># b</span>
<span style="color: #000000; background-color: #f0f0f0; padding-left: 5px; padding-right: 5px;">3</span><span style="color: #408080; font-style: italic"># c</span>
</pre>
<div class="highlight" style="background: #f8f8f8">
- <pre style="line-height: 125%; margin: 0;"><span></span><span style="color: #000000; background-color: #f0f0f0; padding-left: 5px; padding-right: 5px;">1</span><span style="color: #408080; font-style: italic"># a</span>
+ <pre style="line-height: 125%;"><span></span><span style="color: #000000; background-color: #f0f0f0; padding-left: 5px; padding-right: 5px;">1</span><span style="color: #408080; font-style: italic"># a</span>
<span style="color: #000000; background-color: #f0f0f0; padding-left: 5px; padding-right: 5px;">2</span><span style="color: #408080; font-style: italic"># b</span>
<span style="color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px;">3</span><span style="color: #408080; font-style: italic"># c</span>
</pre>
<div class="highlight" style="background: #f8f8f8">
- <pre style="line-height: 125%; margin: 0;"><span></span><span style="color: #000000; background-color: #f0f0f0; padding-left: 5px; padding-right: 5px;">1</span><span style="color: #408080; font-style: italic"># a</span>
+ <pre style="line-height: 125%;"><span></span><span style="color: #000000; background-color: #f0f0f0; padding-left: 5px; padding-right: 5px;">1</span><span style="color: #408080; font-style: italic"># a</span>
<span style="color: #000000; background-color: #f0f0f0; padding-left: 5px; padding-right: 5px;">2</span><span style="color: #408080; font-style: italic"># b</span>
<span style="color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px;">3</span><span style="color: #408080; font-style: italic"># c</span>
</pre>
<div class="highlight" style="background: #f8f8f8">
- <pre style="line-height: 125%; margin: 0;"><span></span><span style="color: #000000; background-color: #f0f0f0; padding-left: 5px; padding-right: 5px;"> 8</span><span style="color: #408080; font-style: italic"># a</span>
+ <pre style="line-height: 125%;"><span></span><span style="color: #000000; background-color: #f0f0f0; padding-left: 5px; padding-right: 5px;"> 8</span><span style="color: #408080; font-style: italic"># a</span>
<span style="color: #000000; background-color: #f0f0f0; padding-left: 5px; padding-right: 5px;"> 9</span><span style="color: #408080; font-style: italic"># b</span>
<span style="color: #000000; background-color: #f0f0f0; padding-left: 5px; padding-right: 5px;">10</span><span style="color: #408080; font-style: italic"># c</span>
</pre>
<div class="highlight" style="background: #f8f8f8">
- <pre style="line-height: 125%; margin: 0;"><span></span><span style="color: #000000; background-color: #f0f0f0; padding-left: 5px; padding-right: 5px;"> 8</span><span style="color: #408080; font-style: italic"># a</span>
+ <pre style="line-height: 125%;"><span></span><span style="color: #000000; background-color: #f0f0f0; padding-left: 5px; padding-right: 5px;"> 8</span><span style="color: #408080; font-style: italic"># a</span>
<span style="color: #000000; background-color: #f0f0f0; padding-left: 5px; padding-right: 5px;"> 9</span><span style="color: #408080; font-style: italic"># b</span>
<span style="color: #000000; background-color: #f0f0f0; padding-left: 5px; padding-right: 5px;">10</span><span style="color: #408080; font-style: italic"># c</span>
</pre>
<div class="highlight" style="background: #f8f8f8">
- <pre style="line-height: 125%; margin: 0;"><span></span><span style="color: #000000; background-color: #f0f0f0; padding-left: 5px; padding-right: 5px;"> 8</span><span style="color: #408080; font-style: italic"># a</span>
+ <pre style="line-height: 125%;"><span></span><span style="color: #000000; background-color: #f0f0f0; padding-left: 5px; padding-right: 5px;"> 8</span><span style="color: #408080; font-style: italic"># a</span>
<span style="color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px;"> 9</span><span style="color: #408080; font-style: italic"># b</span>
<span style="color: #000000; background-color: #f0f0f0; padding-left: 5px; padding-right: 5px;">10</span><span style="color: #408080; font-style: italic"># c</span>
</pre>
<div class="highlight" style="background: #f8f8f8">
- <pre style="line-height: 125%; margin: 0;"><span></span><span style="color: #000000; background-color: #f0f0f0; padding-left: 5px; padding-right: 5px;"> 8</span><span style="color: #408080; font-style: italic"># a</span>
+ <pre style="line-height: 125%;"><span></span><span style="color: #000000; background-color: #f0f0f0; padding-left: 5px; padding-right: 5px;"> 8</span><span style="color: #408080; font-style: italic"># a</span>
<span style="color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px;"> 9</span><span style="color: #408080; font-style: italic"># b</span>
<span style="color: #000000; background-color: #f0f0f0; padding-left: 5px; padding-right: 5px;">10</span><span style="color: #408080; font-style: italic"># c</span>
</pre>
<div class="highlight" style="background: #f8f8f8">
- <pre style="line-height: 125%; margin: 0;"><span></span><span style="color: #000000; background-color: #f0f0f0; padding-left: 5px; padding-right: 5px;"> </span><span style="color: #408080; font-style: italic"># a</span>
+ <pre style="line-height: 125%;"><span></span><span style="color: #000000; background-color: #f0f0f0; padding-left: 5px; padding-right: 5px;"> </span><span style="color: #408080; font-style: italic"># a</span>
<span style="color: #000000; background-color: #f0f0f0; padding-left: 5px; padding-right: 5px;">2</span><span style="color: #408080; font-style: italic"># b</span>
<span style="color: #000000; background-color: #f0f0f0; padding-left: 5px; padding-right: 5px;"> </span><span style="color: #408080; font-style: italic"># c</span>
</pre>
<div class="highlight" style="background: #f8f8f8">
- <pre style="line-height: 125%; margin: 0;"><span></span><span style="color: #000000; background-color: #f0f0f0; padding-left: 5px; padding-right: 5px;"> </span><span style="color: #408080; font-style: italic"># a</span>
+ <pre style="line-height: 125%;"><span></span><span style="color: #000000; background-color: #f0f0f0; padding-left: 5px; padding-right: 5px;"> </span><span style="color: #408080; font-style: italic"># a</span>
<span style="color: #000000; background-color: #f0f0f0; padding-left: 5px; padding-right: 5px;">2</span><span style="color: #408080; font-style: italic"># b</span>
<span style="color: #000000; background-color: #f0f0f0; padding-left: 5px; padding-right: 5px;"> </span><span style="color: #408080; font-style: italic"># c</span>
</pre>
<div class="highlight" style="background: #f8f8f8">
- <pre style="line-height: 125%; margin: 0;"><span></span><span style="color: #000000; background-color: #f0f0f0; padding-left: 5px; padding-right: 5px;"> </span><span style="color: #408080; font-style: italic"># a</span>
+ <pre style="line-height: 125%;"><span></span><span style="color: #000000; background-color: #f0f0f0; padding-left: 5px; padding-right: 5px;"> </span><span style="color: #408080; font-style: italic"># a</span>
<span style="color: #000000; background-color: #f0f0f0; padding-left: 5px; padding-right: 5px;">2</span><span style="color: #408080; font-style: italic"># b</span>
<span style="color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px;"> </span><span style="color: #408080; font-style: italic"># c</span>
</pre>
<div class="highlight" style="background: #f8f8f8">
- <pre style="line-height: 125%; margin: 0;"><span></span><span style="color: #000000; background-color: #f0f0f0; padding-left: 5px; padding-right: 5px;"> </span><span style="color: #408080; font-style: italic"># a</span>
+ <pre style="line-height: 125%;"><span></span><span style="color: #000000; background-color: #f0f0f0; padding-left: 5px; padding-right: 5px;"> </span><span style="color: #408080; font-style: italic"># a</span>
<span style="color: #000000; background-color: #f0f0f0; padding-left: 5px; padding-right: 5px;">2</span><span style="color: #408080; font-style: italic"># b</span>
<span style="color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px;"> </span><span style="color: #408080; font-style: italic"># c</span>
</pre>
<div class="highlight" style="background: #f8f8f8">
- <pre style="line-height: 125%; margin: 0;"><span></span><span style="color: #000000; background-color: #f0f0f0; padding-left: 5px; padding-right: 5px;"> 8</span><span style="color: #408080; font-style: italic"># a</span>
+ <pre style="line-height: 125%;"><span></span><span style="color: #000000; background-color: #f0f0f0; padding-left: 5px; padding-right: 5px;"> 8</span><span style="color: #408080; font-style: italic"># a</span>
<span style="color: #000000; background-color: #f0f0f0; padding-left: 5px; padding-right: 5px;"> </span><span style="color: #408080; font-style: italic"># b</span>
<span style="color: #000000; background-color: #f0f0f0; padding-left: 5px; padding-right: 5px;">10</span><span style="color: #408080; font-style: italic"># c</span>
</pre>
<div class="highlight" style="background: #f8f8f8">
- <pre style="line-height: 125%; margin: 0;"><span></span><span style="color: #000000; background-color: #f0f0f0; padding-left: 5px; padding-right: 5px;"> 8</span><span style="color: #408080; font-style: italic"># a</span>
+ <pre style="line-height: 125%;"><span></span><span style="color: #000000; background-color: #f0f0f0; padding-left: 5px; padding-right: 5px;"> 8</span><span style="color: #408080; font-style: italic"># a</span>
<span style="color: #000000; background-color: #f0f0f0; padding-left: 5px; padding-right: 5px;"> </span><span style="color: #408080; font-style: italic"># b</span>
<span style="color: #000000; background-color: #f0f0f0; padding-left: 5px; padding-right: 5px;">10</span><span style="color: #408080; font-style: italic"># c</span>
</pre>
<div class="highlight" style="background: #f8f8f8">
- <pre style="line-height: 125%; margin: 0;"><span></span><span style="color: #000000; background-color: #f0f0f0; padding-left: 5px; padding-right: 5px;"> 8</span><span style="color: #408080; font-style: italic"># a</span>
+ <pre style="line-height: 125%;"><span></span><span style="color: #000000; background-color: #f0f0f0; padding-left: 5px; padding-right: 5px;"> 8</span><span style="color: #408080; font-style: italic"># a</span>
<span style="color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px;"> </span><span style="color: #408080; font-style: italic"># b</span>
<span style="color: #000000; background-color: #f0f0f0; padding-left: 5px; padding-right: 5px;">10</span><span style="color: #408080; font-style: italic"># c</span>
</pre>
<div class="highlight" style="background: #f8f8f8">
- <pre style="line-height: 125%; margin: 0;"><span></span><span style="color: #000000; background-color: #f0f0f0; padding-left: 5px; padding-right: 5px;"> 8</span><span style="color: #408080; font-style: italic"># a</span>
+ <pre style="line-height: 125%;"><span></span><span style="color: #000000; background-color: #f0f0f0; padding-left: 5px; padding-right: 5px;"> 8</span><span style="color: #408080; font-style: italic"># a</span>
<span style="color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px;"> </span><span style="color: #408080; font-style: italic"># b</span>
<span style="color: #000000; background-color: #f0f0f0; padding-left: 5px; padding-right: 5px;">10</span><span style="color: #408080; font-style: italic"># c</span>
</pre>
</td>
<td class="code">
<div class="highlight" style="background: #f8f8f8">
- <pre style="line-height: 125%; margin: 0;"><span></span><span style="color: #408080; font-style: italic"># a</span>
+ <pre style="line-height: 125%;"><span></span><span style="color: #408080; font-style: italic"># a</span>
<span style="color: #408080; font-style: italic"># b</span>
<span style="color: #408080; font-style: italic"># c</span>
</pre>
</td>
<td class="code">
<div class="highlight" style="background: #f8f8f8">
- <pre style="line-height: 125%; margin: 0;"><span></span><span style="color: #408080; font-style: italic"># a</span>
+ <pre style="line-height: 125%;"><span></span><span style="color: #408080; font-style: italic"># a</span>
<span style="color: #408080; font-style: italic"># b</span>
<span style="color: #408080; font-style: italic"># c</span>
</pre>
</td>
<td class="code">
<div class="highlight" style="background: #f8f8f8">
- <pre style="line-height: 125%; margin: 0;"><span></span><span style="color: #408080; font-style: italic"># a</span>
+ <pre style="line-height: 125%;"><span></span><span style="color: #408080; font-style: italic"># a</span>
<span style="color: #408080; font-style: italic"># b</span>
<span style="color: #408080; font-style: italic"># c</span>
</pre>
</td>
<td class="code">
<div class="highlight" style="background: #f8f8f8">
- <pre style="line-height: 125%; margin: 0;"><span></span><span style="color: #408080; font-style: italic"># a</span>
+ <pre style="line-height: 125%;"><span></span><span style="color: #408080; font-style: italic"># a</span>
<span style="color: #408080; font-style: italic"># b</span>
<span style="color: #408080; font-style: italic"># c</span>
</pre>
</td>
<td class="code">
<div class="highlight" style="background: #f8f8f8">
- <pre style="line-height: 125%; margin: 0;"><span></span><span style="color: #408080; font-style: italic"># a</span>
+ <pre style="line-height: 125%;"><span></span><span style="color: #408080; font-style: italic"># a</span>
<span style="color: #408080; font-style: italic"># b</span>
<span style="color: #408080; font-style: italic"># c</span>
</pre>
</td>
<td class="code">
<div class="highlight" style="background: #f8f8f8">
- <pre style="line-height: 125%; margin: 0;"><span></span><span style="color: #408080; font-style: italic"># a</span>
+ <pre style="line-height: 125%;"><span></span><span style="color: #408080; font-style: italic"># a</span>
<span style="color: #408080; font-style: italic"># b</span>
<span style="color: #408080; font-style: italic"># c</span>
</pre>
</td>
<td class="code">
<div class="highlight" style="background: #f8f8f8">
- <pre style="line-height: 125%; margin: 0;"><span></span><span style="color: #408080; font-style: italic"># a</span>
+ <pre style="line-height: 125%;"><span></span><span style="color: #408080; font-style: italic"># a</span>
<span style="color: #408080; font-style: italic"># b</span>
<span style="color: #408080; font-style: italic"># c</span>
</pre>
</td>
<td class="code">
<div class="highlight" style="background: #f8f8f8">
- <pre style="line-height: 125%; margin: 0;"><span></span><span style="color: #408080; font-style: italic"># a</span>
+ <pre style="line-height: 125%;"><span></span><span style="color: #408080; font-style: italic"># a</span>
<span style="color: #408080; font-style: italic"># b</span>
<span style="color: #408080; font-style: italic"># c</span>
</pre>
</td>
<td class="code">
<div class="highlight" style="background: #f8f8f8">
- <pre style="line-height: 125%; margin: 0;"><span></span><span style="color: #408080; font-style: italic"># a</span>
+ <pre style="line-height: 125%;"><span></span><span style="color: #408080; font-style: italic"># a</span>
<span style="color: #408080; font-style: italic"># b</span>
<span style="color: #408080; font-style: italic"># c</span>
</pre>
</td>
<td class="code">
<div class="highlight" style="background: #f8f8f8">
- <pre style="line-height: 125%; margin: 0;"><span></span><span style="color: #408080; font-style: italic"># a</span>
+ <pre style="line-height: 125%;"><span></span><span style="color: #408080; font-style: italic"># a</span>
<span style="color: #408080; font-style: italic"># b</span>
<span style="color: #408080; font-style: italic"># c</span>
</pre>
</td>
<td class="code">
<div class="highlight" style="background: #f8f8f8">
- <pre style="line-height: 125%; margin: 0;"><span></span><span style="color: #408080; font-style: italic"># a</span>
+ <pre style="line-height: 125%;"><span></span><span style="color: #408080; font-style: italic"># a</span>
<span style="color: #408080; font-style: italic"># b</span>
<span style="color: #408080; font-style: italic"># c</span>
</pre>
</td>
<td class="code">
<div class="highlight" style="background: #f8f8f8">
- <pre style="line-height: 125%; margin: 0;"><span></span><span style="color: #408080; font-style: italic"># a</span>
+ <pre style="line-height: 125%;"><span></span><span style="color: #408080; font-style: italic"># a</span>
<span style="color: #408080; font-style: italic"># b</span>
<span style="color: #408080; font-style: italic"># c</span>
</pre>
</td>
<td class="code">
<div class="highlight" style="background: #f8f8f8">
- <pre style="line-height: 125%; margin: 0;"><span></span><span style="color: #408080; font-style: italic"># a</span>
+ <pre style="line-height: 125%;"><span></span><span style="color: #408080; font-style: italic"># a</span>
<span style="color: #408080; font-style: italic"># b</span>
<span style="color: #408080; font-style: italic"># c</span>
</pre>
</td>
<td class="code">
<div class="highlight" style="background: #f8f8f8">
- <pre style="line-height: 125%; margin: 0;"><span></span><span style="color: #408080; font-style: italic"># a</span>
+ <pre style="line-height: 125%;"><span></span><span style="color: #408080; font-style: italic"># a</span>
<span style="color: #408080; font-style: italic"># b</span>
<span style="color: #408080; font-style: italic"># c</span>
</pre>
</td>
<td class="code">
<div class="highlight" style="background: #f8f8f8">
- <pre style="line-height: 125%; margin: 0;"><span></span><span style="color: #408080; font-style: italic"># a</span>
+ <pre style="line-height: 125%;"><span></span><span style="color: #408080; font-style: italic"># a</span>
<span style="color: #408080; font-style: italic"># b</span>
<span style="color: #408080; font-style: italic"># c</span>
</pre>
</td>
<td class="code">
<div class="highlight" style="background: #f8f8f8">
- <pre style="line-height: 125%; margin: 0;"><span></span><span style="color: #408080; font-style: italic"># a</span>
+ <pre style="line-height: 125%;"><span></span><span style="color: #408080; font-style: italic"># a</span>
<span style="color: #408080; font-style: italic"># b</span>
<span style="color: #408080; font-style: italic"># c</span>
</pre>
:license: BSD, see LICENSE for details.
"""
+import time
+
import pytest
-from pygments.lexers import JsonLexer, JsonBareObjectLexer, YamlLexer
-from pygments.token import Token
+from pygments.lexers.data import JsonLexer, JsonBareObjectLexer, JsonLdLexer, YamlLexer
+from pygments.token import Token, Punctuation, Text, Number, String, Keyword, Name
@pytest.fixture(scope='module')
yield JsonBareObjectLexer()
+@pytest.fixture(scope='module')
+def lexer_json_ld():
+ yield JsonLdLexer()
+
+
@pytest.fixture(scope='module')
def lexer_yaml():
yield YamlLexer()
+@pytest.mark.parametrize(
+ 'text, expected_token_types',
+ (
+ # Integers
+ ('0', (Number.Integer,)),
+ ('-1', (Number.Integer,)),
+ ('1234567890', (Number.Integer,)),
+ ('-1234567890', (Number.Integer,)),
+
+ # Floats, including scientific notation
+ ('123456789.0123456789', (Number.Float,)),
+ ('-123456789.0123456789', (Number.Float,)),
+ ('1e10', (Number.Float,)),
+ ('-1E10', (Number.Float,)),
+ ('1e-10', (Number.Float,)),
+ ('-1E+10', (Number.Float,)),
+ ('1.0e10', (Number.Float,)),
+ ('-1.0E10', (Number.Float,)),
+ ('1.0e-10', (Number.Float,)),
+ ('-1.0E+10', (Number.Float,)),
+
+ # Strings (escapes are tested elsewhere)
+ ('""', (String.Double,)),
+ ('"abc"', (String.Double,)),
+ ('"ひらがな"', (String.Double,)),
+ ('"123"', (String.Double,)),
+ ('"[]"', (String.Double,)),
+ ('"{}"', (String.Double,)),
+ ('"true"', (String.Double,)),
+ ('"false"', (String.Double,)),
+ ('"null"', (String.Double,)),
+ ('":,"', (String.Double,)),
+
+ # Constants
+ ('true', (Keyword.Constant, )),
+ ('false', (Keyword.Constant, )),
+ ('null', (Keyword.Constant, )),
+
+ # Whitespace
+ ('\u0020', (Text,)), # space
+ ('\u000a', (Text,)), # newline
+ ('\u000d', (Text,)), # carriage return
+ ('\u0009', (Text,)), # tab
+
+ # Arrays
+ ('[]', (Punctuation,)),
+ ('["a", "b"]', (Punctuation, String.Double, Punctuation, Text, String.Double, Punctuation)),
+
+ # Objects
+ ('{}', (Punctuation,)),
+ ('{"a": "b"}', (Punctuation, Name.Tag, Punctuation, Text, String.Double, Punctuation)),
+ )
+)
+def test_json_literals_positive_match(lexer_json, text, expected_token_types):
+ """Validate that syntactically-correct JSON literals are parsed correctly."""
+
+ tokens = list(lexer_json.get_tokens_unprocessed(text))
+ assert len(tokens) == len(expected_token_types)
+ assert all(token[1] is expected_token for token, expected_token in zip(tokens, expected_token_types))
+ assert ''.join(token[2] for token in tokens) == text
+
+
+@pytest.mark.parametrize(
+ 'text',
+ (
+ '"', '\\', '/', 'b', 'f', 'n', 'r', 't',
+ 'u0123', 'u4567', 'u89ab', 'ucdef', 'uABCD', 'uEF01',
+ )
+)
+def test_json_object_key_escapes_positive_match(lexer_json, text):
+ """Validate that escape sequences in JSON object keys are parsed correctly."""
+
+ tokens = list(lexer_json.get_tokens_unprocessed('{"\\%s": 1}' % text))
+ assert len(tokens) == 6
+ assert tokens[1][1] is Name.Tag
+ assert tokens[1][2] == '"\\%s"' % text
+
+
+@pytest.mark.parametrize(
+ 'text',
+ (
+ '"', '\\', '/', 'b', 'f', 'n', 'r', 't',
+ 'u0123', 'u4567', 'u89ab', 'ucdef', 'uABCD', 'uEF01',
+ )
+)
+def test_json_string_escapes_positive_match(lexer_json, text):
+ """Validate that escape sequences in JSON string values are parsed correctly."""
+
+ text = '"\\%s"' % text
+ tokens = list(lexer_json.get_tokens_unprocessed(text))
+ assert len(tokens) == 1
+ assert tokens[0][1] is String.Double
+ assert tokens[0][2] == text
+
+
+@pytest.mark.parametrize('text', ('+\n', '0\n', '""0\n', 'a\nb\n',))
+def test_json_round_trip_errors(lexer_json, text):
+ """Validate that past round-trip errors never crop up again."""
+
+ tokens = list(lexer_json.get_tokens_unprocessed(text))
+ assert ''.join(t[2] for t in tokens) == text
+
+
+def test_json_escape_backtracking(lexer_json):
+ """Confirm that there is no catastrophic backtracking in the lexer.
+
+ This no longer applies because the JSON lexer doesn't use regular expressions,
+ but the test is included to ensure no loss of functionality now or in the future.
+ """
+
+ fragment = r'{"\u00D0000\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\63CD'
+ start_time = time.time()
+ list(lexer_json.get_tokens(fragment))
+ assert time.time() - start_time < 1, 'The JSON lexer may have catastrophic backtracking'
+
+
+@pytest.mark.parametrize(
+ 'keyword',
+ (
+ 'base',
+ 'container',
+ 'context',
+ 'direction',
+ 'graph',
+ 'id',
+ 'import',
+ 'included',
+ 'index',
+ 'json',
+ 'language',
+ 'list',
+ 'nest',
+ 'none',
+ 'prefix',
+ 'propagate',
+ 'protected',
+ 'reverse',
+ 'set',
+ 'type',
+ 'value',
+ 'version',
+ 'vocab',
+ )
+)
+def test_json_ld_keywords_positive_match(lexer_json_ld, keyword):
+ """Validate that JSON-LD keywords are parsed correctly."""
+
+ tokens = list(lexer_json_ld.get_tokens_unprocessed('{"@%s": ""}' % keyword))
+ assert len(tokens) == 6
+ assert tokens[1][1] is Token.Name.Decorator
+ assert tokens[1][2] == '"@%s"' % keyword
+
+
+@pytest.mark.parametrize(
+ 'keyword',
+ (
+ '@bogus', # "@" does not guarantee a keyword match
+ '@bases', # Begins with the keyword "@base"
+ 'container', # Matches "container" but has no leading "@"
+ )
+)
+def test_json_ld_keywords_negative_match(lexer_json_ld, keyword):
+ """Validate that JSON-LD non-keywords are parsed correctly."""
+
+ tokens = list(lexer_json_ld.get_tokens_unprocessed('{"%s": ""}' % keyword))
+ assert len(tokens) == 6
+ assert tokens[1][1] is Token.Name.Tag
+ assert tokens[1][2] == '"%s"' % keyword
+
+
def test_basic_json(lexer_json):
fragment = '{"foo": "bar", "foo2": [1, 2, 3], "\\u0123": "\\u0123"}\n'
tokens = [
(Token.Punctuation, ','),
(Token.Text, ' '),
(Token.Literal.Number.Integer, '3'),
- (Token.Punctuation, ']'),
- (Token.Punctuation, ','),
+ (Token.Punctuation, '],'),
(Token.Text, ' '),
(Token.Name.Tag, '"\\u0123"'),
(Token.Punctuation, ':'),
assert list(lexer_json.get_tokens(fragment)) == tokens
-def test_json_escape_backtracking(lexer_json):
- # This tests that an (invalid) sequence of escapes doesn't cause the lexer
- # to fall into catastrophic backtracking. unfortunately, if it's broken
- # this test will hang and that's how we know it's broken :(
- fragment = r'{"\u00D0000\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\63CD'
- tokens = [
- (Token.Punctuation, '{'),
- (Token.Error, r'"'),
- (Token.Error, '\\'),
- (Token.Error, r'u'),
- (Token.Error, r'0'),
- (Token.Error, r'0'),
- (Token.Error, r'D'),
- (Token.Error, r'0'),
- (Token.Error, r'0'),
- (Token.Error, r'0'),
- (Token.Error, r'0')
- ] + [(Token.Error, '\\')] * 178 + [
- (Token.Error, r'6'),
- (Token.Error, r'3'),
- (Token.Error, r'C'),
- (Token.Error, r'D'),
- (Token.Text, '\n')]
-
- assert list(lexer_json.get_tokens(fragment)) == tokens
-
-
def test_basic_bare(lexer_bare):
# This is the same as testBasic for JsonLexer above, except the
# enclosing curly braces are removed.
assert list(lexer_bare.get_tokens(fragment)) == tokens
-def test_closing_curly(lexer_bare):
- # This can be an Error token, but should not be a can't-pop-from-stack
- # exception.
- fragment = '}"a"\n'
- tokens = [
- (Token.Error, '}'),
- (Token.Name.Tag, '"a"'),
- (Token.Text, '\n'),
- ]
- assert list(lexer_bare.get_tokens(fragment)) == tokens
-
-
-def test_closing_curly_in_value(lexer_bare):
- fragment = '"": ""}\n'
- tokens = [
- (Token.Name.Tag, '""'),
- (Token.Punctuation, ':'),
- (Token.Text, ' '),
- (Token.Literal.String.Double, '""'),
- (Token.Error, '}'),
- (Token.Text, '\n'),
- ]
- assert list(lexer_bare.get_tokens(fragment)) == tokens
-
-
def test_yaml(lexer_yaml):
# Bug #1528: This previously parsed 'token # innocent' as a tag
fragment = 'here: token # innocent: comment\n'
def test_get_style_defs_contains_pre_style():
style_defs = HtmlFormatter().get_style_defs().splitlines()
- assert style_defs[0] == 'pre { line-height: 125%; margin: 0; }'
+ assert style_defs[0] == 'pre { line-height: 125%; }'
def test_get_style_defs_contains_default_line_numbers_styles():
--- /dev/null
+# -*- coding: utf-8 -*-
+"""
+ HTML Lexer Tests
+ ~~~~~~~~~~~~~~~~
+
+ :copyright: Copyright 2020-2020 by the Pygments team, see AUTHORS.
+ :license: BSD, see LICENSE for details.
+"""
+
+import time
+
+import pytest
+
+from pygments.lexers.html import HtmlLexer
+from pygments.token import Token
+
+@pytest.fixture(scope='module')
+def lexer_html():
+ yield HtmlLexer()
+
+def test_simple_html(lexer_html):
+ """ extremely basic happy-path case
+
+ more tests are in test_examplefiles """
+
+ fragment = "<html>\n\t<body>\n\t\thello world\n\t</body>\n</html>"
+ tokens = list(lexer_html.get_tokens(fragment))
+ assert all(x[1] != Token.Error for x in tokens)
+
+def test_happy_javascript_fragment(lexer_html):
+ """ valid, even long Javascript fragments should still get parsed ok """
+
+ fragment = "<script type=\"text/javascript\">"+"alert(\"hi\");"*2000+"</script>"
+ start_time = time.time()
+ tokens = list(lexer_html.get_tokens(fragment))
+ assert all(x[1] != Token.Error for x in tokens)
+ assert time.time() - start_time < 5, 'The HTML lexer might have an expensive happy-path script case'
+
+def test_happy_css_fragment(lexer_html):
+ """ valid, even long CSS fragments should still get parsed ok """
+
+ fragment = "<style>"+".ui-helper-hidden{display:none}"*2000+"</style>"
+ start_time = time.time()
+ tokens = list(lexer_html.get_tokens(fragment))
+ assert all(x[1] != Token.Error for x in tokens)
+ assert time.time() - start_time < 5, 'The HTML lexer might have an expensive happy-path style case'
+
+def test_long_unclosed_javascript_fragment(lexer_html):
+ """ unclosed, long Javascript fragments should parse quickly """
+
+ reps = 2000
+ fragment = "<script type=\"text/javascript\">"+"alert(\"hi\");"*reps
+ start_time = time.time()
+ tokens = list(lexer_html.get_tokens(fragment))
+ assert time.time() - start_time < 5, 'The HTML lexer might have an expensive error script case'
+ tokens_intro = [
+ (Token.Punctuation, '<'),
+ (Token.Name.Tag, 'script'),
+ (Token.Text, ' '),
+ (Token.Name.Attribute, 'type'),
+ (Token.Operator, '='),
+ (Token.Literal.String, '"text/javascript"'),
+ (Token.Punctuation, '>'),
+ ]
+ tokens_body = [
+ (Token.Name.Other, 'alert'),
+ (Token.Punctuation, '('),
+ (Token.Literal.String.Double, '"hi"'),
+ (Token.Punctuation, ')'),
+ (Token.Punctuation, ';'),
+ ]
+
+ # make sure we get the right opening tokens
+ assert tokens[:len(tokens_intro)] == tokens_intro
+ # and make sure we get the right body tokens even though the script is
+ # unclosed
+ assert tokens[len(tokens_intro):-1] == tokens_body * reps
+ # and of course, the newline we get for free from get_tokens
+ assert tokens[-1] == (Token.Text, "\n")
+
+def test_long_unclosed_css_fragment(lexer_html):
+ """ unclosed, long CSS fragments should parse quickly """
+
+ reps = 2000
+ fragment = "<style>"+".ui-helper-hidden{display:none}"*reps
+ start_time = time.time()
+ tokens = list(lexer_html.get_tokens(fragment))
+ assert time.time() - start_time < 5, 'The HTML lexer might have an expensive error style case'
+
+ tokens_intro = [
+ (Token.Punctuation, '<'),
+ (Token.Name.Tag, 'style'),
+ (Token.Punctuation, '>'),
+ ]
+ tokens_body = [
+ (Token.Punctuation, '.'),
+ (Token.Name.Class, 'ui-helper-hidden'),
+ (Token.Punctuation, '{'),
+ (Token.Keyword, 'display'),
+ (Token.Punctuation, ':'),
+ (Token.Keyword.Constant, 'none'),
+ (Token.Punctuation, '}'),
+ ]
+
+ # make sure we get the right opening tokens
+ assert tokens[:len(tokens_intro)] == tokens_intro
+ # and make sure we get the right body tokens even though the style block is
+ # unclosed
+ assert tokens[len(tokens_intro):-1] == tokens_body * reps
+ # and of course, the newline we get for free from get_tokens
+ assert tokens[-1] == (Token.Text, "\n")
+
+def test_unclosed_fragment_with_newline_recovery(lexer_html):
+ """ unclosed Javascript fragments should recover on the next line """
+
+ fragment = "<script type=\"text/javascript\">"+"alert(\"hi\");"*20+"\n<div>hi</div>"
+ tokens = list(lexer_html.get_tokens(fragment))
+ recovery_tokens = [
+ (Token.Punctuation, '<'),
+ (Token.Name.Tag, 'div'),
+ (Token.Punctuation, '>'),
+ (Token.Text, 'hi'),
+ (Token.Punctuation, '<'),
+ (Token.Punctuation, '/'),
+ (Token.Name.Tag, 'div'),
+ (Token.Punctuation, '>'),
+ (Token.Text, '\n')]
+ assert tokens[-1*len(recovery_tokens):] == recovery_tokens
+
:license: BSD, see LICENSE for details.
"""
+import time
+
import pytest
-from pygments.token import Text, Name, Punctuation, Keyword, Number
+from pygments.token import Keyword, Name, Number, Punctuation, String, Text
from pygments.lexers import JavaLexer
(Text, '\n')
]
assert list(lexer.get_tokens(fragment)) == tokens
+
+
+@pytest.mark.parametrize(
+ 'text',
+ (
+ '""', '"abc"', '"ひらがな"', '"123"',
+ '"\\\\"', '"\\t"' '"\\""',
+ ),
+)
+def test_string_literals_positive_match(lexer, text):
+ """Test positive matches for string literals."""
+ tokens = list(lexer.get_tokens_unprocessed(text))
+ assert all([token is String for _, token, _ in tokens])
+ assert ''.join([value for _, _, value in tokens]) == text
+
+
+def test_string_literals_backtracking(lexer):
+ """Test catastrophic backtracking for string literals."""
+ start_time = time.time()
+ list(lexer.get_tokens_unprocessed('"' + '\\' * 100))
+ assert time.time() - start_time < 1, 'possible backtracking bug'
yield MySqlLexer()
-@pytest.mark.parametrize('text', ('123',))
-def test_integer_literals(lexer, text):
- assert list(lexer.get_tokens(text))[0] == (Number.Integer, text)
+@pytest.mark.parametrize('text', ('1', '22', '22 333', '22 a', '22+', '22)', '22\n333', '22\r\n333'))
+def test_integer_literals_positive_match(lexer, text):
+ """Validate that integer literals are tokenized as integers."""
+ token = list(lexer.get_tokens(text))[0]
+ assert token[0] == Number.Integer
+ assert token[1] in {'1', '22'}
+
+
+@pytest.mark.parametrize('text', ('1a', '1A', '1.', '1ひ', '1$', '1_', '1\u0080', '1\uffff'))
+def test_integer_literals_negative_match(lexer, text):
+ """Validate that non-integer texts are not matched as integers."""
+ assert list(lexer.get_tokens(text))[0][0] != Number.Integer
@pytest.mark.parametrize(
@pytest.mark.parametrize(
'text',
(
- 'abc_$123', '上市年限', 'ひらがな',
- '`a`', '`上市年限`', '`ひらがな`', '`select`', '`concat(`',
- '````', r'`\``', r'`\\`',
- '`-- `', '`/*`', '`#`',
+ 'abc_$123', '上市年限', 'ひらがな', '123_$abc', '123ひらがな',
),
)
-def test_schema_object_names(lexer, text):
+def test_schema_object_names_unquoted(lexer, text):
tokens = list(lexer.get_tokens(text))[:-1]
assert all(token[0] == Name for token in tokens)
assert ''.join(token[1] for token in tokens) == text
+@pytest.mark.parametrize(
+ 'text',
+ (
+ '`a`', '`1`', '`上市年限`', '`ひらがな`', '`select`', '`concat(`',
+ '`-- `', '`/*`', '`#`',
+ ),
+)
+def test_schema_object_names_quoted(lexer, text):
+ tokens = list(lexer.get_tokens(text))[:-1]
+ assert tokens[0] == (Name.Quoted, '`')
+ assert tokens[1] == (Name.Quoted, text[1:-1])
+ assert tokens[2] == (Name.Quoted, '`')
+ assert ''.join(token[1] for token in tokens) == text
+
+
+@pytest.mark.parametrize('text', ('````', ))
+def test_schema_object_names_quoted_escaped(lexer, text):
+ """Test quoted schema object names with escape sequences."""
+ tokens = list(lexer.get_tokens(text))[:-1]
+ assert tokens[0] == (Name.Quoted, '`')
+ assert tokens[1] == (Name.Quoted.Escape, text[1:-1])
+ assert tokens[2] == (Name.Quoted, '`')
+ assert ''.join(token[1] for token in tokens) == text
+
+
@pytest.mark.parametrize(
'text',
('+', '*', '/', '%', '&&', ':=', '!', '<', '->>', '^', '|', '~'),
--- /dev/null
+# -*- coding: utf-8 -*-
+"""
+ Basic RubyLexer Test
+ ~~~~~~~~~~~~~~~~~~~~
+
+ :copyright: Copyright 2006-2020 by the Pygments team, see AUTHORS.
+ :license: BSD, see LICENSE for details.
+"""
+
+import pytest
+
+from pygments.token import Name, Punctuation, Text
+from pygments.lexers import TurtleLexer, ShExCLexer
+
+
+@pytest.fixture(scope='module')
+def turtle_lexer():
+ yield TurtleLexer()
+
+@pytest.fixture(scope='module')
+def shexc_lexer():
+ yield ShExCLexer()
+
+def test_turtle_prefixed_name_starting_with_number(turtle_lexer):
+ fragment = 'alice:6f6e4241-75a2-4780-9b2a-40da53082e54\n'
+ tokens = [
+ (Name.Namespace, 'alice'),
+ (Punctuation, ':'),
+ (Name.Tag, '6f6e4241-75a2-4780-9b2a-40da53082e54'),
+ (Text, '\n'),
+ ]
+ assert list(turtle_lexer.get_tokens(fragment)) == tokens
+
+def test_shexc_prefixed_name_starting_with_number(shexc_lexer):
+ fragment = 'alice:6f6e4241-75a2-4780-9b2a-40da53082e54\n'
+ tokens = [
+ (Name.Namespace, 'alice'),
+ (Punctuation, ':'),
+ (Name.Tag, '6f6e4241-75a2-4780-9b2a-40da53082e54'),
+ (Text, '\n'),
+ ]
+ assert list(shexc_lexer.get_tokens(fragment)) == tokens
--- /dev/null
+import pytest
+
+from pygments.lexers.templates import JavascriptDjangoLexer, MasonLexer
+from pygments.token import Comment, Token
+
+
+@pytest.fixture(scope="module")
+def lexer():
+ yield JavascriptDjangoLexer()
+
+@pytest.fixture(scope='module')
+def lexerMason():
+ yield MasonLexer()
+
+def test_do_not_mistake_JSDoc_for_django_comment(lexer):
+ """
+ Test to make sure the lexer doesn't mistake
+ {* ... *} to be a django comment
+ """
+ text = """/**
+ * @param {*} cool
+ */
+ func = function(cool) {
+ };
+
+ /**
+ * @param {*} stuff
+ */
+ fun = function(stuff) {
+ };"""
+ tokens = lexer.get_tokens(text)
+ assert not any(t[0] == Comment for t in tokens)
+
+def test_mason_unnamed_block(lexerMason):
+ text = """
+ <%class>
+ has 'foo';
+ has 'bar' => (required => 1);
+ has 'baz' => (isa => 'Int', default => 17);
+ </%class>
+ """
+ res = lexerMason.analyse_text(text)
+ assert res == 1.0
+
+def test_mason_handles_tags_correctly(lexerMason):
+ fragment = "<%class>\nhas 'foo';\nhas 'bar' => (required => 1);\nhas 'baz' => (isa => 'Int', default => 17);\n</%class>\n"
+ tokens = [
+ (Token.Name.Tag, '<%class>'),
+ (Token.Text, '\n'),
+ (Token.Name, ''),
+ (Token.Name, 'has'),
+ (Token.Text, ' '),
+ (Token.Literal.String, "'foo'"),
+ (Token.Punctuation, ';'),
+ (Token.Text, '\n'),
+ (Token.Name, ''),
+ (Token.Name, 'has'),
+ (Token.Text, ' '),
+ (Token.Literal.String, "'bar'"),
+ (Token.Text, ' '),
+ (Token.Operator, '='),
+ (Token.Operator, '>'),
+ (Token.Text, ' '),
+ (Token.Punctuation, '('),
+ (Token.Name, ''),
+ (Token.Name, 'required'),
+ (Token.Text, ' '),
+ (Token.Operator, '='),
+ (Token.Operator, '>'),
+ (Token.Text, ' '),
+ (Token.Literal.Number.Integer, '1'),
+ (Token.Punctuation, ')'),
+ (Token.Punctuation, ';'),
+ (Token.Text, '\n'),
+ (Token.Name, ''),
+ (Token.Name, 'has'),
+ (Token.Text, ' '),
+ (Token.Literal.String, "'baz'"),
+ (Token.Text, ' '),
+ (Token.Operator, '='),
+ (Token.Operator, '>'),
+ (Token.Text, ' '),
+ (Token.Punctuation, '('),
+ (Token.Name, ''),
+ (Token.Name, 'isa'),
+ (Token.Text, ' '),
+ (Token.Operator, '='),
+ (Token.Operator, '>'),
+ (Token.Text, ' '),
+ (Token.Literal.String, "'Int'"),
+ (Token.Punctuation, ','),
+ (Token.Text, ' '),
+ (Token.Name, ''),
+ (Token.Name, 'default'),
+ (Token.Text, ' '),
+ (Token.Operator, '='),
+ (Token.Operator, '>'),
+ (Token.Text, ' '),
+ (Token.Literal.Number.Integer, '17'),
+ (Token.Punctuation, ')'),
+ (Token.Punctuation, ';'),
+ (Token.Text, '\n'),
+ (Token.Name.Tag, '</%class>'),
+ (Token.Text, '\n'),
+ ]
+ assert list(lexerMason.get_tokens(fragment)) == tokens
\ No newline at end of file