From 55a4c08a47fc1860bcc5217290b9179e528bdea0 Mon Sep 17 00:00:00 2001 From: Hyunjee Kim Date: Thu, 14 May 2020 13:33:09 +0900 Subject: [PATCH] Imported Upstream version 3.2.2 Change-Id: I5bd2b4438a0954128de440924562a79b06987c13 Signed-off-by: Hyunjee Kim --- Markdown.egg-info/PKG-INFO | 4 +- Markdown.egg-info/SOURCES.txt | 2 + Markdown.egg-info/requires.txt | 4 +- PKG-INFO | 4 +- docs/authors.md | 4 +- docs/change_log/index.md | 16 +- docs/change_log/release-2.2.md | 2 +- docs/change_log/release-2.3.md | 2 +- docs/change_log/release-3.2.md | 2 +- docs/cli.md | 2 +- docs/contributing.md | 5 +- docs/extensions/admonition.md | 2 +- docs/extensions/api.md | 1039 +++++++++-------- docs/extensions/attr_list.md | 4 +- docs/extensions/fenced_code_blocks.md | 4 + docs/reference.md | 31 +- docs/test_tools.md | 2 +- markdown/__meta__.py | 30 +- markdown/core.py | 8 +- markdown/extensions/__init__.py | 21 +- markdown/extensions/codehilite.py | 20 +- markdown/extensions/toc.py | 17 +- markdown/test_tools.py | 2 +- markdown/util.py | 9 +- pyproject.toml | 4 + setup.cfg | 3 - setup.py | 2 +- .../basic/markdown-documentation-basics.html | 2 +- tests/basic/markdown-documentation-basics.txt | 2 +- tests/basic/markdown-syntax.html | 4 +- tests/basic/markdown-syntax.txt | 4 +- tests/extensions/extra/markdown-syntax.html | 4 +- tests/extensions/extra/markdown-syntax.txt | 4 +- tests/extensions/toc.html | 4 +- tests/extensions/toc.txt | 4 +- .../Markdown Documentation - Basics.html | 2 +- .../Markdown Documentation - Syntax.html | 4 +- .../Markdown Documentation - Basics.html | 2 +- .../Markdown Documentation - Syntax.html | 4 +- tests/test_apis.py | 4 +- tests/test_meta.py | 24 + tests/test_syntax/extensions/test_toc.py | 22 + tox.ini | 7 +- 43 files changed, 739 insertions(+), 603 deletions(-) create mode 100644 pyproject.toml create mode 100644 tests/test_meta.py diff --git a/Markdown.egg-info/PKG-INFO b/Markdown.egg-info/PKG-INFO index 9526d2b..9ba70df 100644 --- a/Markdown.egg-info/PKG-INFO +++ b/Markdown.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: Markdown -Version: 3.2.1 +Version: 3.2.2 Summary: Python implementation of Markdown. Home-page: https://Python-Markdown.github.io/ Author: Manfred Stienstra, Yuri takhteyev and Waylan limberg @@ -8,7 +8,7 @@ Author-email: waylan.limberg@icloud.com Maintainer: Waylan Limberg Maintainer-email: waylan.limberg@icloud.com License: BSD License -Download-URL: http://pypi.python.org/packages/source/M/Markdown/Markdown-3.2.1-py2.py3-none-any.whl +Download-URL: http://pypi.python.org/packages/source/M/Markdown/Markdown-3.2.2-py2.py3-none-any.whl Description: This is a Python implementation of John Gruber's Markdown_. It is almost completely compliant with the reference implementation, diff --git a/Markdown.egg-info/SOURCES.txt b/Markdown.egg-info/SOURCES.txt index 04a8401..2c02d87 100644 --- a/Markdown.egg-info/SOURCES.txt +++ b/Markdown.egg-info/SOURCES.txt @@ -5,6 +5,7 @@ README.md doc-requirements.txt makefile mkdocs.yml +pyproject.toml setup.cfg setup.py tox.ini @@ -91,6 +92,7 @@ tests/__init__.py tests/test_apis.py tests/test_extensions.py tests/test_legacy.py +tests/test_meta.py tests/basic/amps-and-angle-encoding.html tests/basic/amps-and-angle-encoding.txt tests/basic/angle-links-and-img.html diff --git a/Markdown.egg-info/requires.txt b/Markdown.egg-info/requires.txt index ae0fe90..1c52704 100644 --- a/Markdown.egg-info/requires.txt +++ b/Markdown.egg-info/requires.txt @@ -1,4 +1,6 @@ -setuptools>=36 + +[:python_version < "3.8"] +importlib_metadata [testing] coverage diff --git a/PKG-INFO b/PKG-INFO index 9526d2b..9ba70df 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: Markdown -Version: 3.2.1 +Version: 3.2.2 Summary: Python implementation of Markdown. Home-page: https://Python-Markdown.github.io/ Author: Manfred Stienstra, Yuri takhteyev and Waylan limberg @@ -8,7 +8,7 @@ Author-email: waylan.limberg@icloud.com Maintainer: Waylan Limberg Maintainer-email: waylan.limberg@icloud.com License: BSD License -Download-URL: http://pypi.python.org/packages/source/M/Markdown/Markdown-3.2.1-py2.py3-none-any.whl +Download-URL: http://pypi.python.org/packages/source/M/Markdown/Markdown-3.2.2-py2.py3-none-any.whl Description: This is a Python implementation of John Gruber's Markdown_. It is almost completely compliant with the reference implementation, diff --git a/docs/authors.md b/docs/authors.md index ba674c3..acf78b0 100644 --- a/docs/authors.md +++ b/docs/authors.md @@ -6,7 +6,7 @@ Primary Authors * __[Waylan Limberg](https://github.com/waylan)__ @waylan is the current maintainer of the code and has written much of the - current code base, included a complete refactor of the core for version 2.0. + current code base, including a complete refactor of the core for version 2.0. He started out by authoring many of the available extensions and later was asked to join Yuri, where he began fixing numerous bugs, adding documentation and making general improvements to the existing code base. @@ -28,7 +28,7 @@ Primary Authors Ph.D. Various pieces of his code still exist, most notably the basic structure. -* __[Manfed Stienstra](http://www.dwerg.net/)__ +* __Manfed Stienstra__ Manfed wrote the original version of the script and is responsible for various parts of the existing code base. diff --git a/docs/change_log/index.md b/docs/change_log/index.md index 62fe4b2..a5873cc 100644 --- a/docs/change_log/index.md +++ b/docs/change_log/index.md @@ -3,7 +3,19 @@ title: Change Log Python-Markdown Change Log ========================= -Feb 12, 2020: Released version 3.2.1 (a bug-fix release). +May 8, 2020: version 3.2.2 (a bug-fix release). + +* Add `checklinks` tox environment to ensure all links in documentation are good. +* Refactor extension API documentation (#729). +* Load entry_points (for extensions) only once using `importlib.metadata`. +* Do not double escape entities in TOC. +* Correctly report if an extension raises a `TypeError` (#939). +* Raise a `KeyError` when attempting to delete a nonexistent key from the + extension registry (#939). +* Remove import of `packaging` (or `pkg_resources` fallback) entirely. +* Remove `setuptools` as a run-time dependency (`install_required`). + +Feb 12, 2020: Released version 3.2.1 (a bug-fix release). * The `name` property in `toc_tokens` from the TOC extension now escapes HTML special characters (`<`, `>`, and `&`). @@ -251,4 +263,4 @@ escape, emphasis in the beginning of the paragraph. Nov. 2004: Added links, blockquotes, HTML blocks to Manfred Stienstra's code -Apr. 2004: Manfred's version at +Apr. 2004: Manfred's version at `http://www.dwerg.net/projects/markdown/` diff --git a/docs/change_log/release-2.2.md b/docs/change_log/release-2.2.md index 379a43e..75f47fa 100644 --- a/docs/change_log/release-2.2.md +++ b/docs/change_log/release-2.2.md @@ -40,7 +40,7 @@ What's New in Python-Markdown 2.2 --------------------------------- The docs were refactored and can now be found at -. The docs are now maintained in the +`http://packages.python.org/Markdown/`. The docs are now maintained in the Repository and are generated by the `setup.py build_docs` command. The [Sane_Lists](../extensions/sane_lists.md) diff --git a/docs/change_log/release-2.3.md b/docs/change_log/release-2.3.md index fc1809e..f60e426 100644 --- a/docs/change_log/release-2.3.md +++ b/docs/change_log/release-2.3.md @@ -58,7 +58,7 @@ Backwards-incompatible Changes PyTidyLib rather than using an extension (for example: `tidylib.tidy_fragment(markdown.markdown(source), options={...})`). -[PyTidyLib]: http://countergram.com/open-source/pytidylib +[PyTidyLib]: http://countergram.github.io/pytidylib/ What's New in Python-Markdown 2.3 --------------------------------- diff --git a/docs/change_log/release-3.2.md b/docs/change_log/release-3.2.md index 7b7caf2..f9452cc 100644 --- a/docs/change_log/release-3.2.md +++ b/docs/change_log/release-3.2.md @@ -93,4 +93,4 @@ The following bug fixes are included in the 3.2 release: * HTML tag placeholders are no longer included in `.toc_tokens` (#899). * Unescape backslash-escaped characters in TOC ids (#864). * Refactor bold and italic logic in order to solve complex nesting issues (#792). -* Always wrap CodeHilite code in tags (#862). +* Always wrap CodeHilite code in `code` tags (#862). diff --git a/docs/cli.md b/docs/cli.md index 1c4e40a..50e9ec2 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -55,7 +55,7 @@ path. * **Windows**: Assuming a default install of Python on Windows, your "Scripts" directory - is most likely something like `C:\\Python26\Scripts`. Verify the location + is most likely something like `C:\\Python37\Scripts`. Verify the location of your "Scripts" directory and add it to you system path. Calling `markdown_py` from the command line will call the wrapper batch diff --git a/docs/contributing.md b/docs/contributing.md index ca95042..974d380 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -9,7 +9,7 @@ propose changes to this document in a pull request. This project and everyone participating in it is governed by the [Python-Markdown Code of Conduct]. By participating, you are expected to uphold -this code. Please report unacceptable behavior to . +this code. Please report unacceptable behavior to [markdown@freewisdom.org][email]. ## Project Organization @@ -473,6 +473,7 @@ label from the same group. [Python-Markdown Organization]: https://github.com/Python-Markdown [Python-Markdown Code of Conduct]: https://github.com/Python-Markdown/markdown/blob/master/CODE_OF_CONDUCT.md +[email]: mailto:markdown@freewisdom.org [Python-Markdown/markdown]: https://github.com/Python-Markdown/markdown [issue tracker]: https://github.com/Python-Markdown/markdown/issues [syntax rules]: https://daringfireball.net/projects/markdown/syntax @@ -495,7 +496,7 @@ label from the same group. [configure a remote]: https://help.github.com/articles/configuring-a-remote-for-a-fork [sync changes]: https://help.github.com/articles/syncing-a-fork [virtual environment]: https://virtualenv.pypa.io/en/stable/ -[User Guide]: https://virtualenv.pypa.io/en/stable/userguide/#usage +[User Guide]: https://virtualenv.pypa.io/en/stable/user_guide.html [Development Mode]: https://setuptools.readthedocs.io/en/latest/setuptools.html#development-mode [PyTidyLib]: https://countergram.github.io/pytidylib/ [HTML Tidy]: https://www.html-tidy.org/ diff --git a/docs/extensions/admonition.md b/docs/extensions/admonition.md index 886320a..24d8254 100644 --- a/docs/extensions/admonition.md +++ b/docs/extensions/admonition.md @@ -99,7 +99,7 @@ Styling There is no CSS included as part of this extension. Check out the default [Sphinx][sphinx] theme for inspiration. -[sphinx]: http://sphinx.pocoo.org/ +[sphinx]: https://www.sphinx-doc.org/en/stable/ ## Usage diff --git a/docs/extensions/api.md b/docs/extensions/api.md index a95a50d..ce2a873 100644 --- a/docs/extensions/api.md +++ b/docs/extensions/api.md @@ -2,431 +2,514 @@ title: Extensions API # Writing Extensions for Python-Markdown -Python-Markdown includes an API for extension writers to plug their own -custom functionality and/or syntax into the parser. There are Preprocessors -which allow you to alter the source before it is passed to the parser, -inline patterns which allow you to add, remove or override the syntax of -any inline elements, and Postprocessors which allow munging of the -output of the parser before it is returned. If you really want to dive in, -there are also Blockprocessors which are part of the core BlockParser. - -As the parser builds an [ElementTree][ElementTree] object which is later rendered -as Unicode text, there are also some helpers provided to ease manipulation of -the tree. Each part of the API is discussed in its respective section below. -Additionally, reading the source of some [Available Extensions][] may be -helpful. For example, the [Footnotes][] extension uses most of the features -documented here. - -## Preprocessors {: #preprocessors } - -Preprocessors munge the source text before it is passed into the Markdown -core. This is an excellent place to clean up bad syntax, extract things the -parser may otherwise choke on and perhaps even store it for later retrieval. - -Preprocessors should inherit from `markdown.preprocessors.Preprocessor` and -implement a `run` method with one argument `lines`. The `run` method of -each Preprocessor will be passed the entire source text as a list of Unicode -strings. Each string will contain one line of text. The `run` method should -return either that list, or an altered list of Unicode strings. +Python-Markdown includes an API for extension writers to plug their own custom functionality and syntax into the +parser. An extension will patch into one or more stages of the parser: -A pseudo example: +* [*Preprocessors*](#preprocessors) alter the source before it is passed to the parser. +* [*Block Processors*](#blockprocessors) work with blocks of text separated by blank lines. +* [*Tree Processors*](#treeprocessors) modify the constructed ElementTree +* [*Inline Processors*](#inlineprocessors) are common tree processors for inline elements, such as `*strong*`. +* [*Postprocessors*](#postprocessors) munge of the output of the parser just before it is returned. + +The parser loads text, applies the preprocessors, creates and builds an [ElementTree][ElementTree] object from the +block processors and inline processors, renders the ElementTree object as Unicode text, and then then applies the +postprocessors. + +There are classes and helpers provided to ease writing your extension. Each part of the API is discussed in its +respective section below. Additionally, you can walk through the [Tutorial on Writing Extensions][tutorial]; look at +some of the [Available Extensions][] and their [source code][extension source]. As always, you may report bugs, ask +for help, and discuss various other issues on the [bug tracker]. + +## Phases of processing {: #stages } + +### Preprocessors {: #preprocessors } + +Preprocessors munge the source text before it is passed to the Markdown parser. This is an excellent place to clean up +bad characters or to extract portions for later processing that the parser may otherwise choke on. + +Preprocessors inherit from `markdown.preprocessors.Preprocessor` and implement a `run` method, which takes a single +parameter `lines`. This parameter is the entire source text stored as a list of Unicode strings, one per line. `run` +should return its processed list of Unicode strings, one per line. + +#### Example + +This simple example removes any lines with 'NO RENDER' before processing: ```python from markdown.preprocessors import Preprocessor +import re -class MyPreprocessor(Preprocessor): +class NoRender(Preprocessor): + """ Skip any line with words 'NO RENDER' in it. """ def run(self, lines): new_lines = [] for line in lines: - m = MYREGEX.match(line) - if m: - # do stuff - else: - new_lines.append(line) + m = re.search("NO RENDER", line) + if not m: + # any line without NO RENDER is passed through + new_lines.append(line) return new_lines ``` -## Inline Patterns {: #inlinepatterns } +#### Usages -### Legacy +Some preprocessors in the Markdown source tree include: -Inline Patterns implement the inline HTML element syntax for Markdown such as -`*emphasis*` or `[links](http://example.com)`. Pattern objects should be -instances of classes that inherit from `markdown.inlinepatterns.Pattern` or -one of its children. Each pattern object uses a single regular expression and -must have the following methods: +| Class | Kind | Description | +| ------------------------------|-----------|------------------------------------------------- | +| [`NormalizeWhiteSpace`][c1] | built-in | Normalizes whitespace by expanding tabs, fixing `\r` line endings, etc. | +| [`HtmlBlockPreprocessor`][c2] | built-in | Removes html blocks from the text and stores them for later processing | +| [`ReferencePreprocessor`][c3] | built-in | Removes reference definitions from text and stores for later processing | +| [`MetaPreprocessor`][c4] | extension | Strips and records meta data at top of documents | +| [`FootnotesPreprocessor`][c5] | extension | Removes footnote blocks from the text and stores them for later processing | -* **`getCompiledRegExp()`**: +[c1]: https://github.com/Python-Markdown/markdown/blob/master/markdown/preprocessors.py +[c2]: https://github.com/Python-Markdown/markdown/blob/master/markdown/preprocessors.py +[c3]: https://github.com/Python-Markdown/markdown/blob/master/markdown/preprocessors.py +[c4]: https://github.com/Python-Markdown/markdown/blob/master/markdown/extensions/meta.py +[c5]: https://github.com/Python-Markdown/markdown/blob/master/markdown/extensions/footnotes.py - Returns a compiled regular expression. +### Block Processors {: #blockprocessors } -* **`handleMatch(m)`**: +A block processor parses blocks of text and adds new elements to the `ElementTree`. Blocks of text, separated from +other text by blank lines, may have a different syntax and produce a differently structured tree than other Markdown. +Block processors excel at code formatting, equation layouts, and tables. - Accepts a match object and returns an ElementTree element of a plain - Unicode string. +Block processors inherit from `markdown.blockprocessors.BlockProcessor`, are passed `md.parser` on initialization, and +implement both the `test` and `run` methods: -Also, Inline Patterns can define the property `ANCESTOR_EXCLUDES` with either -a list or tuple of undesirable ancestors. The pattern should not match if it -would cause the content to be a descendant of one of the defined tag names. +* `test(self, parent, block)` takes two parameters: `parent` is the parent `ElementTree` element and `block` is a + single, multi-line, Unicode string of the current block. `test`, often a regular expression match, returns a true + value if the block processor's `run` method should be called to process starting at that block. +* `run(self, parent, blocks)` has the same `parent` parameter as `test`; and `blocks` is the list of all remaining + blocks in the document, starting with the `block` passed to `test`. `run` may return `False` (not `None`) to signal + failure, meaning that it did not process the blocks after all. On success, `run` is expected to `pop` one or more + blocks from the front of `blocks` and attach new nodes to `parent`. -Note that any regular expression returned by `getCompiledRegExp` must capture -the whole block. Therefore, they should all start with `r'^(.*?)'` and end -with `r'(.*?)!'`. When using the default `getCompiledRegExp()` method -provided in the `Pattern` you can pass in a regular expression without that -and `getCompiledRegExp` will wrap your expression for you and set the -`re.DOTALL` and `re.UNICODE` flags. This means that the first group of your -match will be `m.group(2)` as `m.group(1)` will match everything before the -pattern. +Crafting block processors is more involved and flexible than the other processors, involving controlling recursive +parsing of the block's contents and managing state across invocations. For example, a blank line is allowed in +indented code, so the second invocation of the inline code processor appends to the element tree generated by the +previous call. Other block processors may insert new text into the `blocks` list, signal to future calls of itself, +and more. -For an example, consider this simplified emphasis pattern: +To make writing these complex beasts more tractable, three convenience functions have been provided by the +`BlockProcessor` parent class: -```python -from markdown.inlinepatterns import Pattern -import xml.etree.ElementTree as etree +* `lastChild(parent)` returns the last child of the given element or `None` if it has no children. +* `detab(text)` removes one level of indent (four spaces by default) from the front of each line of the given + multi-line, text string, until a non-blank line is indented less. +* `looseDetab(text, level)` removes multiple levels + of indent from the front of each line of `text` but does not affect lines indented less. -class EmphasisPattern(Pattern): - def handleMatch(self, m): - el = etree.Element('em') - el.text = m.group(2) - return el -``` +Also, `BlockProcessor` provides the fields `self.tab_length`, the tab length (default 4), and `self.parser`, the +current `BlockParser` instance. + +#### BlockParser + +`BlockParser`, not to be confused with `BlockProcessor`, is the class used by Markdown to cycle through all the +registered block processors. You should never need to create your own instance; use `self.parser` instead. + +The `BlockParser` instance provides a stack of strings for its current state, which your processor can push with +`self.parser.set(state)`, pop with `self.parser.reset()`, or check the the top state with +`self.parser.isstate(state)`. Be sure your code pops the states it pushes. + +The `BlockParser` instance can also be called recursively, that is, to process blocks from within your block +processor. There are three methods: + +* `parseDocument(lines)` parses a list of lines, each a single-line Unicode string, returning a complete + `ElementTree`. +* `parseChunk(parent, text)` parses a single, multi-line, possibly multi-block, Unicode string `text` and attaches the + resulting tree to `parent`. +* `parseBlocks(parent, blocks)` takes a list of `blocks`, each a multi-line Unicode string without blank lines, and + attaches the resulting tree to `parent`. -As discussed in [Integrating Your Code Into Markdown][], an instance of this -class will need to be provided to Markdown. That instance would be created -like so: +For perspective, Markdown calls `parseDocument` which calls `parseChunk` which calls `parseBlocks` which calls your +block processor, which, in turn, might call one of these routines. + +#### Example + +This example calls out important paragraphs by giving them a border. It looks for a fence line of exclamation points +before and after and renders the fenced blocks into a new, styled `div`. If it does not find the ending fence line, +it does nothing. + +Our code, like most block processors, is longer than other examples: ```python -# an oversimplified regex -MYPATTERN = r'\*([^*]+)\*' -# pass in pattern and create instance -emphasis = EmphasisPattern(MYPATTERN) +def test_block_processor(): + class BoxBlockProcessor(BlockProcessor): + RE_FENCE_START = r'^ *!{3,} *\n' # start line, e.g., ` !!!! ` + RE_FENCE_END = r'\n *!{3,}\s*$' # last non-blank line, e.g, '!!!\n \n\n' + + def test(self, parent, block): + return re.match(self.RE_FENCE_START, block) + + def run(self, parent, blocks): + original_block = blocks[0] + blocks[0] = re.sub(self.RE_FENCE_START, '', blocks[0]) + + # Find block with ending fence + for block_num, block in enumerate(blocks): + if re.search(self.RE_FENCE_END, block): + # remove fence + blocks[block_num] = re.sub(self.RE_FENCE_END, '', block) + # render fenced area inside a new div + e = etree.SubElement(parent, 'div') + e.set('style', 'display: inline-block; border: 1px solid red;') + self.parser.parseBlocks(e, blocks[0:block_num + 1]) + # remove used blocks + for i in range(0, block_num + 1): + blocks.pop(0) + return True # or could have had no return statement + # No closing marker! Restore and do nothing + blocks[0] = original_block + return False # equivalent to our test() routine returning False + + class BoxExtension(Extension): + def extendMarkdown(self, md): + md.parser.blockprocessors.register(BoxBlockProcessor(md.parser), 'box', 175) ``` -Actually it would not be necessary to create that pattern (and not just because -a more sophisticated emphasis pattern already exists in Markdown). The fact is, -that example pattern is not very DRY. A pattern for `**strong**` text would -be almost identical, with the exception that it would create a 'strong' element. -Therefore, Markdown provides a number of generic pattern classes that can -provide some common functionality. For example, both emphasis and strong are -implemented with separate instances of the `SimpleTagPattern` listed below. -Feel free to use or extend any of the Pattern classes found at -`markdown.inlinepatterns`. - -### Future - -While users can still create plugins with the existing -`markdown.inlinepatterns.Pattern`, a new, more flexible inline processor has -been added which users are encouraged to migrate to. The new inline processor -is found at `markdown.inlinepatterns.InlineProcessor`. - -The new processor is very similar to legacy with two major distinctions. - -1. Patterns no longer need to match the entire block, so patterns no longer - start with `r'^(.*?)'` and end with `r'(.*?)!'`. This was a huge - performance sink and this requirement has been removed. The returned match - object will only contain what is explicitly matched in the pattern, and - extension pattern groups now start with `m.group(1)`. - -2. The `handleMatch` method now takes an additional input called `data`, - which is the entire block under analysis, not just what is matched with - the specified pattern. The method also returns the element *and* the index - boundaries relative to `data` that the return element is replacing - (usually `m.start(0)` and `m.end(0)`). If the boundaries are returned as - `None`, it is assumed that the match did not take place, and nothing will - be altered in `data`. - -If all you need is the same functionality as the legacy processor, you can do -as shown below. Most of the time, simple regular expression processing is all -you'll need. +Start with this example input: -```python -from markdown.inlinepatterns import InlineProcessor -import xml.etree.ElementTree as etree +``` text +A regular paragraph of text. -# an oversimplified regex -MYPATTERN = r'\*([^*]+)\*' +!!!!! +First paragraph of wrapped text. -class EmphasisPattern(InlineProcessor): - def handleMatch(self, m, data): - el = etree.Element('em') - el.text = m.group(1) - return el, m.start(0), m.end(0) +Second Paragraph of **wrapped** text. +!!!!! -# pass in pattern and create instance -emphasis = EmphasisPattern(MYPATTERN) +Another regular paragraph of text. ``` -But, the new processor allows you handle much more complex patterns that are -too much for Python's Re to handle. For instance, to handle nested brackets in -link patterns, the built-in link inline processor uses the following pattern to -find where a link *might* start: +The fenced text adds one node with two children to the tree: -```python -LINK_RE = NOIMG + r'\[' -link = LinkInlineProcessor(LINK_RE, md_instance) -``` +* `div`, with a `style` attribute. It renders as + `
...
` + * `p` with text `First paragraph of wrapped text.` + * `p` with text `Second Paragraph of **wrapped** text`. The conversion to a `` tag will happen when + running the inline processors, which will happen after all of the block processors have completed. -It then uses programmed logic to actually walk the string (`data`), starting at -where the match started (`m.start(0)`). If for whatever reason, the text -does not appear to be a link, it returns `None` for the start and end boundary -in order to communicate to the parser that no match was found. +The example output might display as follows: -```python - # Just a snippet of the link's handleMatch - # method to illustrate new logic - def handleMatch(self, m, data): - text, index, handled = self.getText(data, m.end(0)) +!!! note "" +

A regular paragraph of text.

+
+

First paragraph of wrapped text.

+

Second Paragraph of **wrapped** text.

+
+

Another regular paragraph of text.

- if not handled: - return None, None, None +#### Usages - href, title, index, handled = self.getLink(data, index) - if not handled: - return None, None, None +Some block processors in the Markdown source tree include: - el = etree.Element("a") - el.text = text +| Class | Kind | Description | +| ----------------------------|-----------|---------------------------------------------| +| [`HashHeaderProcessor`][b1] | built-in | Title hashes (`#`), which may split blocks | +| [`HRProcessor`][b2] | built-in | Horizontal lines, e.g., `---` | +| [`OListProcessor`][b3] | built-in | Ordered lists; complex and using `state` | +| [`Admonition`][b4] | extension | Render each [Admonition][] in a new `div` | - el.set("href", href) +[b1]: https://github.com/Python-Markdown/markdown/blob/master/markdown/blockprocessors.py +[b2]: https://github.com/Python-Markdown/markdown/blob/master/markdown/blockprocessors.py +[b3]: https://github.com/Python-Markdown/markdown/blob/master/markdown/blockprocessors.py +[Admonition]: https://python-markdown.github.io/extensions/admonition/ +[b4]: https://github.com/Python-Markdown/markdown/blob/master/markdown/extensions/admonition.py - if title is not None: - el.set("title", title) +### Tree processors {: #treeprocessors } - return el, m.start(0), index -``` +Tree processors manipulate the tree created by block processors. They can even create an entirely new ElementTree +object. This is an excellent place for creating summaries, adding collected references, or last minute adjustments. + +A tree processor must inherit from `markdown.treeprocessors.Treeprocessor` (note the capitalization). A tree processor +must implement a `run` method which takes a single argument `root`. In most cases `root` would be an +`xml.etree.ElementTree.Element` instance; however, in rare cases it could be some other type of ElementTree object. +The `run` method may return `None`, in which case the (possibly modified) original `root` object is used, or it may +return an entirely new `Element` object, which will replace the existing `root` object and all of its children. It is +generally preferred to modify `root` in place and return `None`, which avoids creating multiple copies of the entire +document tree in memory. -### Generic Pattern Classes +For specifics on manipulating the ElementTree, see [Working with the ElementTree][workingwithetree] below. -Some example processors that are available. +#### Example + +A pseudo example: -* **`SimpleTextInlineProcessor(pattern)`**: +```python +from markdown.treeprocessors import Treeprocessor - Returns simple text of `group(2)` of a `pattern` and the start and end - position of the match. +class MyTreeprocessor(Treeprocessor): + def run(self, root): + root.text = 'modified content' + # No return statement is same as `return None` +``` -* **`SimpleTagInlineProcessor(pattern, tag)`**: +#### Usages - Returns an element of type "`tag`" with a text attribute of `group(3)` - of a `pattern`. `tag` should be a string of a HTML element (i.e.: 'em'). - It also returns the start and end position of the match. +The core `InlineProcessor` class is a tree processor. It walks the tree, matches patterns, and splits and creates +nodes on matches. -* **`SubstituteTagInlineProcessor(pattern, tag)`**: +Additional tree processors in the Markdown source tree include: - Returns an element of type "`tag`" with no children or text (i.e.: `br`) - and the start and end position of the match. +| Class | Kind | Description | +| ----------------------------------|-----------|---------------------------------------------------------------| +| [`PrettifyTreeprocessor`][e1] | built-in | Add line breaks to the html document | +| [`TocTreeprocessor`][e2] | extension | Builds a [table of contents][] from the finished tree | +| [`FootnoteTreeprocessor`][e3] | extension | Create [footnote][] div at end of document | +| [`FootnotePostTreeprocessor`][e4] | extension | Amend div created by `FootnoteTreeprocessor` with duplicates | -A very small number of the basic legacy processors are still available to -prevent breakage of 3rd party extensions during the transition period to the -new processors. Three of the available processors are listed below. +[e1]: https://github.com/Python-Markdown/markdown/blob/master/markdown/treeprocessors.py +[e2]: https://github.com/Python-Markdown/markdown/blob/master/markdown/extensions/toc.py +[e3]: https://github.com/Python-Markdown/markdown/blob/master/markdown/extensions/footnotes.py +[e4]: https://github.com/Python-Markdown/markdown/blob/master/markdown/extensions/footnotes.py +[table of contents]: https://python-markdown.github.io/extensions/toc/ +[footnote]: https://python-markdown.github.io/extensions/footnotes/ -* **`SimpleTextPattern(pattern)`**: +### Inline Processors {: #inlineprocessors } - Returns simple text of `group(2)` of a `pattern`. +Inline processors, previously called inline patterns, are used to add formatting, such as `**emphasis**`, by replacing +a matched pattern with a new element tree node. It is an excellent for adding new syntax for inline tags. Inline +processor code is often quite short. -* **`SimpleTagPattern(pattern, tag)`**: +Inline processors inherit from `InlineProcessor`, are initialized, and implement `handleMatch`: - Returns an element of type "`tag`" with a text attribute of `group(3)` - of a `pattern`. `tag` should be a string of a HTML element (i.e.: 'em'). +* `__init__(self, pattern, md=None)` is the inherited constructor. You do not need to implement your own. + * `pattern` is the regular expression string that must match the code block in order for the `handleMatch` method + to be called. + * `md`, an optional parameter, is a pointer to the instance of `markdown.Markdown` and is available as `self.md` + on the `InlineProcessor` instance. -* **`SubstituteTagPattern(pattern, tag)`**: +* `handleMatch(self, m, data)` must be implemented in all `InlineProcessor` subclasses. + * `m` is the regular expression [match object][] found by the `pattern` passed to `__init__`. + * `data` is a single, multi-line, Unicode string containing the entire block of text around the pattern. A block + is text set apart by blank lines. + * Returns either `(None, None, None)`, indicating the provided match was rejected or `(el, start, end)`, if the + match was successfully processed. On success, `el` is the element being added the tree, `start` and `end` are + indexes in `data` that were "consumed" by the pattern. The "consumed" span will be replaced by a placeholder. + The same inline processor may be called several times on the same block. - Returns an element of type "`tag`" with no children or text (i.e.: `br`). +Inline Processors can define the property `ANCESTOR_EXCLUDES` which is either a list or tuple of undesirable ancestors. +The processor will be skipped if it would cause the content to be a descendant of one of the listed tag names. -There may be other Pattern classes in the Markdown source that you could extend -or use as well. Read through the source and see if there is anything you can -use. You might even get a few ideas for different approaches to your specific -situation. +##### Convenience Classes -## Treeprocessors {: #treeprocessors } +Convenience subclasses of `InlineProcessor` are provide for common operations: -Treeprocessors manipulate an ElementTree object after it has passed through the -core BlockParser. This is where additional manipulation of the tree takes -place. Additionally, the InlineProcessor is a Treeprocessor which steps through -the tree and runs the Inline Patterns on the text of each Element in the tree. +* [`SimpleTextInlineProcessor`][i1] returns the text of `group(1)` of the match. +* [`SubstituteTagInlineProcessor`][i4] is initialized as `SubstituteTagInlineProcessor(pattern, tag)`. It returns a + new element `tag` whenever `pattern` is matched. +* [`SimpleTagInlineProcessor`][i3] is initialized as `SimpleTagInlineProcessor(pattern, tag)`. It returns an element + `tag` with a text field of `group(2)` of the match. -A Treeprocessor should inherit from `markdown.treeprocessors.Treeprocessor`, -over-ride the `run` method which takes one argument `root` (an ElementTree -object) and either modifies that root element and returns `None` or returns a -new ElementTree object. +##### Example -A pseudo example: +This example changes `--strike--` to `strike`. ```python -from markdown.treeprocessors import Treeprocessor +from markdown.inlinepatterns import InlineProcessor +from markdown.extensions import Extension +import xml.etree.ElementTree as etree -class MyTreeprocessor(Treeprocessor): - def run(self, root): - root.text = 'modified content' + +class DelInlineProcessor(InlineProcessor): + def handleMatch(self, m, data): + el = etree.Element('del') + el.text = m.group(1) + return el, m.start(0), m.end(0) + +class DelExtension(Extension): + def extendMarkdown(self, md): + DEL_PATTERN = r'--(.*?)--' # like --del-- + md.inlinePatterns.register(DelInlineProcessor(DEL_PATTERN, md), 'del', 175) ``` -Note that Python class methods return `None` by default when no `return` -statement is defined. Additionally all Python variables refer to objects by -reference. Therefore, the above `run` method modifies the `root` element -in place and returns `None`. The changes made to the `root` element and its -children are retained. +Use this input example: + +``` text +First line of the block. +This is --strike one--. +This is --strike two--. +End of the block. +``` -Some may be inclined to return the modified `root` element. While that would -work, it would cause a copy of the entire ElementTree to be generated each -time the Treeprocessor is run. Therefore, it is generally expected that -the `run` method would only return `None` or a new ElementTree object. +The example output might display as follows: -For specifics on manipulating the ElementTree, see -[Working with the ElementTree][workingwithetree] below. +!!! note "" +

First line of the block. + This is strike one. + This is strike two. + End of the block.

-## Postprocessors {: #postprocessors } +* On the first call to `handleMatch` + * `m` will be the match for `--strike one--` + * `data` will be the string: + `First line of the block.\nThis is --strike one--.\nThis is --strike two--.\nEnd of the block.` -Postprocessors manipulate the document after the ElementTree has been -serialized into a string. Postprocessors should be used to work with the -text just before output. + Because the match was successful, the region between the returned `start` and `end` are replaced with a + placeholder token and the new element is added to the tree. -A Postprocessor should inherit from `markdown.postprocessors.Postprocessor` -and over-ride the `run` method which takes one argument `text` and returns -a Unicode string. +* On the second call to `handleMatch` + * `m` will be the match for `--strike two--` + * `data` will be the string + `First line of the block.\nThis is klzzwxh:0000.\nThis is --strike two--.\nEnd of the block.` -Postprocessors are run after the ElementTree has been serialized back into -Unicode text. For example, this may be an appropriate place to add a table of -contents to a document: +Note the placeholder token `klzzwxh:0000`. This allows the regular expression to be run against the entire block, +not just the the text contained in an individual element. The placeholders will later be swapped back out for the +actual elements by the parser. + +Actually it would not be necessary to create the above inline processor. The fact is, that example is not very DRY +(Don't Repeat Yourself). A pattern for `**strong**` text would be almost identical, with the exception that it would +create a `strong` element. Therefore, Markdown provides a number of generic `InlineProcessor` subclasses that can +provide some common functionality. For example, strike could be implemented with an instance of the +`SimpleTagInlineProcessor` class as demonstrated below. Feel free to use or extend any of the `InlineProcessor` +subclasses found at `markdown.inlinepatterns`. ```python -from markdown.postprocessors import Postprocessor +from markdown.inlinepatterns import SimpleTagInlineProcessor +from markdown.extensions import Extension -class TocPostprocessor(Postprocessor): - def run(self, text): - return MYMARKERRE.sub(MyToc, text) +class DelExtension(Extension): + def extendMarkdown(self, md): + md.inlinePatterns.register(SimpleTagInlineProcessor(r'()--(.*?)--', 'del'), 'del', 175) ``` -## BlockParser {: #blockparser } -Sometimes, Preprocessors, Treeprocessors, Postprocessors, and Inline Patterns -are not going to do what you need. Perhaps you want a new type of block type -that needs to be integrated into the core parsing. In such a situation, you can -add/change/remove functionality of the core `BlockParser`. The BlockParser is -composed of a number of Blockprocessors. The BlockParser steps through each -block of text (split by blank lines) and passes each block to the appropriate -Blockprocessor. That Blockprocessor parses the block and adds it to the -ElementTree. The -[Definition Lists][] extension would be a good example of an extension that -adds/modifies Blockprocessors. +##### Usages -A Blockprocessor should inherit from `markdown.blockprocessors.BlockProcessor` -and implement both the `test` and `run` methods. +Here are some convenience functions and other examples: -The `test` method is used by BlockParser to identify the type of block. -Therefore the `test` method must return a Boolean value. If the test returns -`True`, then the BlockParser will call that Blockprocessor's `run` method. -If it returns `False`, the BlockParser will move on to the next -Blockprocessor. +| Class | Kind | Description | +| ---------------------------------|-----------|---------------------------------------------------------------| +| [`AsteriskProcessor`][i5] | built-in | Emphasis processor for handling strong and em matches inside asterisks | +| [`AbbrInlineProcessor`][i6] | extension | Apply tag to abbreviation registered by preprocessor | +| [`WikiLinksInlineProcessor`][i7] | extension | Link `[[article names]]` to wiki given in metadata | +| [`FootnoteInlineProcessor`][i8] | extension | Replaces footnote in text with link to footnote div at bottom | -The **`test`** method takes two arguments: +[i1]: https://github.com/Python-Markdown/markdown/blob/master/markdown/inlinepatterns.py +[i2]: https://github.com/Python-Markdown/markdown/blob/master/markdown/inlinepatterns.py +[i3]: https://github.com/Python-Markdown/markdown/blob/master/markdown/inlinepatterns.py +[i4]: https://github.com/Python-Markdown/markdown/blob/master/markdown/inlinepatterns.py +[i5]: https://github.com/Python-Markdown/markdown/blob/master/markdown/inlinepatterns.py +[i6]: https://github.com/Python-Markdown/markdown/blob/master/markdown/extensions/abbr.py +[i7]: https://github.com/Python-Markdown/markdown/blob/master/markdown/extensions/wikilinks.py +[i8]: https://github.com/Python-Markdown/markdown/blob/master/markdown/extensions/footnotes.py -* **`parent`**: The parent ElementTree Element of the block. This can be useful - as the block may need to be treated differently if it is inside a list, for - example. +### Patterns -* **`block`**: A string of the current block of text. The test may be a - simple string method (such as `block.startswith(some_text)`) or a complex - regular expression. +In version 3.0, a new, more flexible inline processor was added, `markdown.inlinepatterns.InlineProcessor`. The +original inline patterns, which inherit from `markdown.inlinepatterns.Pattern` or one of its children are still +supported, though users are encouraged to migrate. -The **`run`** method takes two arguments: +#### Comparison with new `InlineProcessor` -* **`parent`**: A pointer to the parent ElementTree Element of the block. The run - method will most likely attach additional nodes to this parent. Note that - nothing is returned by the method. The ElementTree object is altered in place. +The new `InlineProcessor` provides two major enhancements to `Patterns`: -* **`blocks`**: A list of all remaining blocks of the document. Your run - method must remove (pop) the first block from the list (which it altered in - place - not returned) and parse that block. You may find that a block of text - legitimately contains multiple block types. Therefore, after processing the - first type, your processor can insert the remaining text into the beginning - of the `blocks` list for future parsing. +1. Inline Processors no longer need to match the entire block, so regular expressions no longer need to start with + `r'^(.*?)'` and end with `r'(.*?)%'`. This runs faster. The returned [match object][] will only contain what is + explicitly matched in the pattern, and extension pattern groups now start with `m.group(1)`. -Please be aware that a single block can span multiple text blocks. For example, -The official Markdown syntax rules state that a blank line does not end a -Code Block. If the next block of text is also indented, then it is part of -the previous block. Therefore, the BlockParser was specifically designed to -address these types of situations. If you notice the `CodeBlockProcessor`, -in the core, you will note that it checks the last child of the `parent`. -If the last child is a code block (`
...
`), then it -appends that block to the previous code block rather than creating a new -code block. +2. The `handleMatch` method now takes an additional input called `data`, which is the entire block under analysis, + not just what is matched with the specified pattern. The method now returns the element *and* the indexes relative + to `data` that the return element is replacing (usually `m.start(0)` and `m.end(0)`). If the boundaries are + returned as `None`, it is assumed that the match did not take place, and nothing will be altered in `data`. -Each Blockprocessor has the following utility methods available: + This allows handling of more complex constructs than regular expressions can handle, e.g., matching nested + brackets, and explicit control of the span "consumed" by the processor. + +#### Inline Patterns -* **`lastChild(parent)`**: - - Returns the last child of the given ElementTree Element or `None` if it - had no children. +Inline Patterns can implement inline HTML element syntax for Markdown such as `*emphasis*` or +`[links](http://example.com)`. Pattern objects should be instances of classes that inherit from +`markdown.inlinepatterns.Pattern` or one of its children. Each pattern object uses a single regular expression and +must have the following methods: -* **`detab(text)`**: +* **`getCompiledRegExp()`**: - Removes one level of indent (four spaces by default) from the front of each - line of the given text string. + Returns a compiled regular expression. -* **`looseDetab(text, level)`**: +* **`handleMatch(m)`**: - Removes "level" levels of indent (defaults to 1) from the front of each line - of the given text string. However, this methods allows secondary lines to - not be indented as does some parts of the Markdown syntax. + Accepts a match object and returns an ElementTree element of a plain Unicode string. -Each Blockprocessor also has a pointer to the containing BlockParser instance at -`self.parser`, which can be used to check or alter the state of the parser. -The BlockParser tracks it's state in a stack at `parser.state`. The state -stack is an instance of the `State` class. +Inline Patterns can define the property `ANCESTOR_EXCLUDES` with is either a list or tuple of undesirable ancestors. +The pattern will be skipped if it would cause the content to be a descendant of one of the listed tag names. -**`State`** is a subclass of `list` and has the additional methods: +Note that any regular expression returned by `getCompiledRegExp` must capture the whole block. Therefore, they should +all start with `r'^(.*?)'` and end with `r'(.*?)!'`. When using the default `getCompiledRegExp()` method provided in +the `Pattern` you can pass in a regular expression without that and `getCompiledRegExp` will wrap your expression for +you and set the `re.DOTALL` and `re.UNICODE` flags. This means that the first group of your match will be `m.group(2)` +as `m.group(1)` will match everything before the pattern. -* **`set(state)`**: +For an example, consider this simplified emphasis pattern: - Set a new state to string `state`. The new state is appended to the end - of the stack. +```python +from markdown.inlinepatterns import Pattern +import xml.etree.ElementTree as etree -* **`reset()`**: +class EmphasisPattern(Pattern): + def handleMatch(self, m): + el = etree.Element('em') + el.text = m.group(2) + return el +``` - Step back one step in the stack. The last state at the end is removed from - the stack. +As discussed in [Integrating Your Code Into Markdown][], an instance of this class will need to be provided to +Markdown. That instance would be created like so: -* **`isstate(state)`**: +```python +# an oversimplified regex +MYPATTERN = r'\*([^*]+)\*' +# pass in pattern and create instance +emphasis = EmphasisPattern(MYPATTERN) +``` - Test that the top (current) level of the stack is of the given string - `state`. +### Postprocessors {: #postprocessors } -Note that to ensure that the state stack does not become corrupted, each time a -state is set for a block, that state *must* be reset when the parser finishes -parsing that block. +Postprocessors munge the document after the ElementTree has been serialized into a string. Postprocessors should be +used to work with the text just before output. Usually, they are used add back sections that were extracted in a +preprocessor, fix up outgoing encodings, or wrap the whole document. -An instance of the **`BlockParser`** is found at `Markdown.parser`. -`BlockParser` has the following methods: +Postprocessors inherit from `markdown.postprocessors.Postprocessor` and implement a `run` method which takes a single +parameter `text`, the entire HTML document as a single Unicode string. `run` should return a single Unicode string +ready for output. Note that preprocessors use a list of lines while postprocessors use a single multi-line string. -* **`parseDocument(lines)`**: +#### Example - Given a list of lines, an ElementTree object is returned. This should be - passed an entire document and is the only method the `Markdown` class - calls directly. +Here is a simple example that changes the output to one big page showing the raw html. -* **`parseChunk(parent, text)`**: +```python +from markdown.postprocessors import Postprocessor +import re - Parses a chunk of markdown text composed of multiple blocks and attaches - those blocks to the `parent` Element. The `parent` is altered in place - and nothing is returned. Extensions would most likely use this method for - block parsing. +class ShowActualHtmlPostprocesor(Postprocessor): + """ Wrap entire output in
 tags as a diagnostic. """
+    def run(self, text):
+        return '
\n' + re.sub('<', '<', text) + '
\n' +``` -* **`parseBlocks(parent, blocks)`**: +#### Usages - Parses a list of blocks of text and attaches those blocks to the `parent` - Element. The `parent` is altered in place and nothing is returned. This - method will generally only be used internally to recursively parse nested - blocks of text. +Some postprocessors in the Markdown source tree include: -While it is not recommended, an extension could subclass or completely replace -the `BlockParser`. The new class would have to provide the same public API. -However, be aware that other extensions may expect the core parser provided -and will not work with such a drastically different parser. +| Class | Kind | Description | +| ------------------------------|-----------|----------------------------------------------------| +| [`raw_html`][p1] | built-in | Restore raw html from `htmlStash`, stored by `HTMLBlockPreprocessor`, and code highlighters | +| [`amp_substitute`][p2] | built-in | Convert ampersand substitutes to `&`; used in links | +| [`unescape`][p3] | built-in | Convert some escaped characters back from integers; used in links | +| [`FootnotePostProcessor`][p4] | extension | Replace footnote placeholders with html entities; as set by other stages | + + [p1]: https://github.com/Python-Markdown/markdown/blob/master/markdown/postprocessors.py + [p2]: https://github.com/Python-Markdown/markdown/blob/master/markdown/postprocessors.py + [p3]: https://github.com/Python-Markdown/markdown/blob/master/markdown/postprocessors.py + [p4]: https://github.com/Python-Markdown/markdown/blob/master/markdown/extensions/footnotes.py + ## Working with the ElementTree {: #working_with_et } -As mentioned, the Markdown parser converts a source document to an -[ElementTree][ElementTree] object before serializing that back to Unicode text. -Markdown has provided some helpers to ease that manipulation within the context +As mentioned, the Markdown parser converts a source document to an [ElementTree][ElementTree] object before +serializing that back to Unicode text. Markdown has provided some helpers to ease that manipulation within the context of the Markdown module. First, import the ElementTree module: @@ -434,19 +517,17 @@ First, import the ElementTree module: ```python import xml.etree.ElementTree as etree ``` -Sometimes you may want text inserted into an element to be parsed by -[Inline Patterns][]. In such a situation, simply insert the text as you normally -would and the text will be automatically run through the Inline Patterns. -However, if you do *not* want some text to be parsed by Inline Patterns, -then insert the text as an `AtomicString`. +Sometimes you may want text inserted into an element to be parsed by [Inline Patterns][]. In such a situation, simply +insert the text as you normally would and the text will be automatically run through the Inline Patterns. However, if +you do *not* want some text to be parsed by Inline Patterns, then insert the text as an `AtomicString`. ```python from markdown.util import AtomicString some_element.text = AtomicString(some_text) ``` -Here's a basic example which creates an HTML table (note that the contents of -the second cell (`td2`) will be run through Inline Patterns latter): +Here's a basic example which creates an HTML table (note that the contents of the second cell (`td2`) will be run +through Inline Patterns latter): ```python table = etree.Element("table") @@ -459,50 +540,44 @@ td2.text = "*text* with **inline** formatting." # Add markup text table.tail = "Text after table" # Add text after table ``` -You can also manipulate an existing tree. Consider the following example which -adds a `class` attribute to `` elements: +You can also manipulate an existing tree. Consider the following example which adds a `class` attribute to `` +elements: ```python def set_link_class(self, element): for child in element: if child.tag == "a": child.set("class", "myclass") #set the class attribute - set_link_class(child) # run recursively on children + set_link_class(child) # run recursively on children ``` For more information about working with ElementTree see the ElementTree -[Documentation](https://effbot.org/zone/element-index.htm) -([Python Docs](https://docs.python.org/3/library/xml.etree.elementtree.html)). +[Documentation](https://effbot.org/zone/element-index.htm) ([Python +Docs](https://docs.python.org/3/library/xml.etree.elementtree.html)). ## Integrating Your Code Into Markdown {: #integrating_into_markdown } -Once you have the various pieces of your extension built, you need to tell -Markdown about them and ensure that they are run in the proper sequence. -Markdown accepts an `Extension` instance for each extension. Therefore, you -will need to define a class that extends `markdown.extensions.Extension` and -over-rides the `extendMarkdown` method. Within this class you will manage -configuration options for your extension and attach the various processors and -patterns to the Markdown instance. - -It is important to note that the order of the various processors and patterns -matters. For example, if we replace `http://...` links with `` elements, and -*then* try to deal with inline HTML, we will end up with a mess. Therefore, the -various types of processors and patterns are stored within an instance of the -Markdown class in a [Registry][]. Your `Extension` class will need to manipulate -those registries appropriately. You may `register` instances of your processors -and patterns with an appropriate priority, `deregister` built-in instances, or -replace a built-in instance with your own. +Once you have the various pieces of your extension built, you need to tell Markdown about them and ensure that they +are run in the proper sequence. Markdown accepts an `Extension` instance for each extension. Therefore, you will need +to define a class that extends `markdown.extensions.Extension` and over-rides the `extendMarkdown` method. Within this +class you will manage configuration options for your extension and attach the various processors and patterns to the +Markdown instance. + +It is important to note that the order of the various processors and patterns matters. For example, if we replace +`http://...` links with `` elements, and *then* try to deal with inline HTML, we will end up with a mess. +Therefore, the various types of processors and patterns are stored within an instance of the `markdown.Markdown` class +in a [Registry][]. Your `Extension` class will need to manipulate those registries appropriately. You may `register` +instances of your processors and patterns with an appropriate priority, `deregister` built-in instances, or replace a +built-in instance with your own. ### `extendMarkdown` {: #extendmarkdown } -The `extendMarkdown` method of a `markdown.extensions.Extension` class -accepts one argument: +The `extendMarkdown` method of a `markdown.extensions.Extension` class accepts one argument: * **`md`**: - A pointer to the instance of the Markdown class. You should use this to - access the [Registries][Registry] of processors and patterns. They are - found under the following attributes: + A pointer to the instance of the `markdown.Markdown` class. You should use this to access the + [Registries][Registry] of processors and patterns. They are found under the following attributes: * `md.preprocessors` * `md.inlinePatterns` @@ -510,7 +585,7 @@ accepts one argument: * `md.treeprocessors` * `md.postprocessors` - Some other things you may want to access in the markdown instance are: + Some other things you may want to access on the `markdown.Markdown` instance are: * `md.htmlStash` * `md.output_formats` @@ -523,12 +598,10 @@ accepts one argument: * `md.isBlockLevel()` !!! Warning - With access to the above items, theoretically you have the option to - change anything through various [monkey_patching][] techniques. However, - you should be aware that the various undocumented parts of markdown may - change without notice and your monkey_patches may break with a new release. - Therefore, what you really should be doing is inserting processors and - patterns into the markdown pipeline. Consider yourself warned! + With access to the above items, theoretically you have the option to change anything through various + [monkey_patching][] techniques. However, you should be aware that the various undocumented parts of Markdown may + change without notice and your monkey_patches may break with a new release. Therefore, what you really should be + doing is inserting processors and patterns into the Markdown pipeline. Consider yourself warned! [monkey_patching]: https://en.wikipedia.org/wiki/Monkey_patch @@ -543,77 +616,10 @@ class MyExtension(Extension): md.inlinePatterns.register(MyPattern(md), 'mypattern', 175) ``` -### Registry - -The `markdown.util.Registry` class is a priority sorted registry which Markdown -uses internally to determine the processing order of its various processors and -patterns. - -A `Registry` instance provides two public methods to alter the data of the -registry: `register` and `deregister`. Use `register` to add items and -`deregister` to remove items. See each method for specifics. - -When registering an item, a "name" and a "priority" must be provided. All -items are automatically sorted by the value of the "priority" parameter such -that the item with the highest value will be processed first. The "name" is -used to remove (`deregister`) and get items. - -A `Registry` instance is like a list (which maintains order) when reading -data. You may iterate over the items, get an item and get a count (length) -of all items. You may also check that the registry contains an item. - -When getting an item you may use either the index of the item or the -string-based "name". For example: - - registry = Registry() - registry.register(SomeItem(), 'itemname', 20) - # Get the item by index - item = registry[0] - # Get the item by name - item = registry['itemname'] - -When checking that the registry contains an item, you may use either the -string-based "name", or a reference to the actual item. For example: - - someitem = SomeItem() - registry.register(someitem, 'itemname', 20) - # Contains the name - assert 'itemname' in registry - # Contains the item instance - assert someitem in registry - -`markdown.util.Registry` has the following methods: - -#### `Registry.register(self, item, name, priority)` {: #registry.register } - -: Add an item to the registry with the given name and priority. - - Parameters: - - * `item`: The item being registered. - * `name`: A string used to reference the item. - * `priority`: An integer or float used to sort against all items. - - If an item is registered with a "name" which already exists, the existing - item is replaced with the new item. Tread carefully as the old item is lost - with no way to recover it. The new item will be sorted according to its - priority and will **not** retain the position of the old item. - -#### `Registry.deregister(self, name, strict=True)` {: #registry.deregister } - -: Remove an item from the registry. - - Set `strict=False` to fail silently. - -#### `Registry.get_index_for_name(self, name)` {: #registry.get_index_for_name } - -: Return the index of the given `name`. - ### registerExtension {: #registerextension } -Some extensions may need to have their state reset between multiple runs of the -Markdown class. For example, consider the following use of the [Footnotes][] -extension: +Some extensions may need to have their state reset between multiple runs of the `markdown.Markdown` class. For +example, consider the following use of the [Footnotes][] extension: ```python md = markdown.Markdown(extensions=['footnotes']) @@ -622,15 +628,12 @@ md.reset() html2 = md.convert(text_without_footnote) ``` -Without calling `reset`, the footnote definitions from the first document will -be inserted into the second document as they are still stored within the class -instance. Therefore the `Extension` class needs to define a `reset` method -that will reset the state of the extension (i.e.: `self.footnotes = {}`). -However, as many extensions do not have a need for `reset`, `reset` is only -called on extensions that are registered. +Without calling `reset`, the footnote definitions from the first document will be inserted into the second document as +they are still stored within the class instance. Therefore the `Extension` class needs to define a `reset` method that +will reset the state of the extension (i.e.: `self.footnotes = {}`). However, as many extensions do not have a need +for `reset`, `reset` is only called on extensions that are registered. -To register an extension, call `md.registerExtension` from within your -`extendMarkdown` method: +To register an extension, call `md.registerExtension` from within your `extendMarkdown` method: ```python def extendMarkdown(self, md): @@ -638,43 +641,41 @@ def extendMarkdown(self, md): # insert processors and patterns here ``` -Then, each time `reset` is called on the Markdown instance, the `reset` -method of each registered extension will be called as well. You should also -note that `reset` will be called on each registered extension after it is -initialized the first time. Keep that in mind when over-riding the extension's -`reset` method. +Then, each time `reset` is called on the `markdown.Markdown` instance, the `reset` method of each registered extension +will be called as well. You should also note that `reset` will be called on each registered extension after it is +initialized the first time. Keep that in mind when over-riding the extension's `reset` method. ### Configuration Settings {: #configsettings } -If an extension uses any parameters that the user may want to change, -those parameters should be stored in `self.config` of your -`markdown.extensions.Extension` class in the following format: +If an extension uses any parameters that the user may want to change, those parameters should be stored in +`self.config` of your `markdown.extensions.Extension` class in the following format: ```python class MyExtension(markdown.extensions.Extension): def __init__(self, **kwargs): - self.config = {'option1' : ['value1', 'description1'], - 'option2' : ['value2', 'description2'] } + self.config = { + 'option1' : ['value1', 'description1'], + 'option2' : ['value2', 'description2'] + } super(MyExtension, self).__init__(**kwargs) ``` -When implemented this way the configuration parameters can be over-ridden at -run time (thus the call to `super`). For example: +When implemented this way the configuration parameters can be over-ridden at run time (thus the call to `super`). For +example: ```python markdown.Markdown(extensions=[MyExtension(option1='other value')]) ``` -Note that if a keyword is passed in that is not already defined in -`self.config`, then a `KeyError` is raised. +Note that if a keyword is passed in that is not already defined in `self.config`, then a `KeyError` is raised. -The `markdown.extensions.Extension` class and its subclasses have the -following methods available to assist in working with configuration settings: +The `markdown.extensions.Extension` class and its subclasses have the following methods available to assist in working +with configuration settings: * **`getConfig(key [, default])`**: - Returns the stored value for the given `key` or `default` if the `key` - does not exist. If not set, `default` returns an empty string. + Returns the stored value for the given `key` or `default` if the `key` does not exist. If not set, `default` + returns an empty string. * **`getConfigs()`**: @@ -686,12 +687,10 @@ following methods available to assist in working with configuration settings: * **`setConfig(key, value)`**: - Sets a configuration setting for `key` with the given `value`. If `key` is - unknown, a `KeyError` is raised. If the previous value of `key` was - a Boolean value, then `value` is converted to a Boolean value. If - the previous value of `key` is `None`, then `value` is converted to - a Boolean value except when it is `None`. No conversion takes place - when the previous value of `key` is a string. + Sets a configuration setting for `key` with the given `value`. If `key` is unknown, a `KeyError` is raised. If the + previous value of `key` was a Boolean value, then `value` is converted to a Boolean value. If the previous value + of `key` is `None`, then `value` is converted to a Boolean value except when it is `None`. No conversion takes + place when the previous value of `key` is a string. * **`setConfigs(items)`**: @@ -699,9 +698,8 @@ following methods available to assist in working with configuration settings: ### Naming an Extension { #naming_an_extension } -As noted in the [library reference] an instance of an extension can be passed -directly to Markdown. In fact, this is the preferred way to use third-party -extensions. +As noted in the [library reference] an instance of an extension can be passed directly to `markdown.Markdown`. In +fact, this is the preferred way to use third-party extensions. For example: @@ -711,18 +709,15 @@ from path.to.module import MyExtension md = markdown.Markdown(extensions=[MyExtension(option='value')]) ``` -However, Markdown also accepts "named" third party extensions for those -occasions when it is impractical to import an extension directly (from the -command line or from within templates). A "name" can either be a registered -[entry point](#entry_point) or a string using Python's [dot -notation](#dot_notation). +However, Markdown also accepts "named" third party extensions for those occasions when it is impractical to import an +extension directly (from the command line or from within templates). A "name" can either be a registered [entry +point](#entry_point) or a string using Python's [dot notation](#dot_notation). #### Entry Point { #entry_point } -[Entry points] are defined in a Python package's `setup.py` script. The script -must use [setuptools] to support entry points. Python-Markdown extensions must -be assigned to the `markdown.extensions` group. An entry point definition might -look like this: +[Entry points] are defined in a Python package's `setup.py` script. The script must use [setuptools] to support entry +points. Python-Markdown extensions must be assigned to the `markdown.extensions` group. An entry point definition +might look like this: ```python from setuptools import setup @@ -735,25 +730,23 @@ setup( ) ``` -After a user installs your extension using the above script, they could then -call the extension using the `myextension` string name like this: +After a user installs your extension using the above script, they could then call the extension using the +`myextension` string name like this: ```python markdown.markdown(text, extensions=['myextension']) ``` -Note that if two or more entry points within the same group are assigned the -same name, Python-Markdown will only ever use the first one found and ignore all -others. Therefore, be sure to give your extension a unique name. +Note that if two or more entry points within the same group are assigned the same name, Python-Markdown will only ever +use the first one found and ignore all others. Therefore, be sure to give your extension a unique name. -For more information on writing `setup.py` scripts, see the Python documentation -on [Packaging and Distributing Projects]. +For more information on writing `setup.py` scripts, see the Python documentation on [Packaging and Distributing +Projects]. #### Dot Notation { #dot_notation } -If an extension does not have a registered entry point, Python's dot notation -may be used instead. The extension must be installed as a Python module on your -PYTHONPATH. Generally, a class should be specified in the name. The class must +If an extension does not have a registered entry point, Python's dot notation may be used instead. The extension must +be installed as a Python module on your PYTHONPATH. Generally, a class should be specified in the name. The class must be at the end of the name and be separated by a colon from the module. Therefore, if you were to import the class like this: @@ -768,16 +761,13 @@ Then the extension can be loaded as follows: markdown.markdown(text, extensions=['path.to.module:MyExtension']) ``` -You do not need to do anything special to support this feature. As long as your -extension class is able to be imported, a user can include it with the above -syntax. +You do not need to do anything special to support this feature. As long as your extension class is able to be +imported, a user can include it with the above syntax. -The above two methods are especially useful if you need to implement a large -number of extensions with more than one residing in a module. However, if you do -not want to require that your users include the class name in their string, you -must define only one extension per module and that module must contain a -module-level function called `makeExtension` that accepts `**kwargs` and returns -an extension instance. +The above two methods are especially useful if you need to implement a large number of extensions with more than one +residing in a module. However, if you do not want to require that your users include the class name in their string, +you must define only one extension per module and that module must contain a module-level function called +`makeExtension` that accepts `**kwargs` and returns an extension instance. For example: @@ -789,15 +779,78 @@ def makeExtension(**kwargs): return MyExtension(**kwargs) ``` -When Markdown is passed the "name" of your extension as a dot notation string -that does not include a class (for example `path.to.module`), it will import the -module and call the `makeExtension` function to initiate your extension. +When `markdown.Markdown` is passed the "name" of your extension as a dot notation string that does not include a class +(for example `path.to.module`), it will import the module and call the `makeExtension` function to initiate your +extension. + +## Registries + +The `markdown.util.Registry` class is a priority sorted registry which Markdown uses internally to determine the +processing order of its various processors and patterns. + +A `Registry` instance provides two public methods to alter the data of the registry: `register` and `deregister`. Use +`register` to add items and `deregister` to remove items. See each method for specifics. + +When registering an item, a "name" and a "priority" must be provided. All items are automatically sorted by the value +of the "priority" parameter such that the item with the highest value will be processed first. The "name" is used to +remove (`deregister`) and get items. + +A `Registry` instance is like a list (which maintains order) when reading data. You may iterate over the items, get an +item and get a count (length) of all items. You may also check that the registry contains an item. + +When getting an item you may use either the index of the item or the string-based "name". For example: + +```python +registry = Registry() +registry.register(SomeItem(), 'itemname', 20) +# Get the item by index +item = registry[0] +# Get the item by name +item = registry['itemname'] +``` + +When checking that the registry contains an item, you may use either the string-based "name", or a reference to the +actual item. For example: + +```python +someitem = SomeItem() +registry.register(someitem, 'itemname', 20) +# Contains the name +assert 'itemname' in registry +# Contains the item instance +assert someitem in registry +``` + +`markdown.util.Registry` has the following methods: + +### `Registry.register(self, item, name, priority)` {: #registry.register data-toc-label='Registry.register'} + +: Add an item to the registry with the given name and priority. + + Parameters: + + * `item`: The item being registered. + * `name`: A string used to reference the item. + * `priority`: An integer or float used to sort against all items. + + If an item is registered with a "name" which already exists, the existing item is replaced with the new item. + Tread carefully as the old item is lost with no way to recover it. The new item will be sorted according to its + priority and will **not** retain the position of the old item. + +### `Registry.deregister(self, name, strict=True)` {: #registry.deregister data-toc-label='Registry.deregister'} + +: Remove an item from the registry. + + Set `strict=False` to fail silently. + +### `Registry.get_index_for_name(self, name)` {: #registry.get_index_for_name data-toc-label='Registry.get_index_for_name'} + +: Return the index of the given `name`. -[Preprocessors]: #preprocessors -[Inline Patterns]: #inlinepatterns -[Treeprocessors]: #treeprocessors -[Postprocessors]: #postprocessors -[BlockParser]: #blockparser +[match object]: https://docs.python.org/3/library/re.html#match-objects +[bug tracker]: https://github.com/Python-Markdown/markdown/issues +[extension source]: https://github.com/Python-Markdown/markdown/tree/master/markdown/extensions +[tutorial]: https://github.com/Python-Markdown/markdown/wiki/Tutorial:-Writing-Extensions-for-Python-Markdown [workingwithetree]: #working_with_et [Integrating your code into Markdown]: #integrating_into_markdown [extendMarkdown]: #extendmarkdown @@ -807,8 +860,8 @@ module and call the `makeExtension` function to initiate your extension. [makeExtension]: #makeextension [ElementTree]: https://effbot.org/zone/element-index.htm [Available Extensions]: index.md -[Footnotes]: https://github.com/Python-Markdown/mdx_footnotes -[Definition Lists]: https://github.com/Python-Markdown/mdx_definition_lists +[Footnotes]: https://github.com/Python-Markdown/markdown/blob/master/markdown/extensions/footnotes.py +[Definition Lists]: https://github.com/Python-Markdown/markdown/blob/master/markdown/extensions/definition_lists [library reference]: ../reference.md [setuptools]: https://packaging.python.org/key_projects/#setuptools [Entry points]: https://setuptools.readthedocs.io/en/latest/setuptools.html#dynamic-discovery-of-services-and-plugins diff --git a/docs/extensions/attr_list.md b/docs/extensions/attr_list.md index d89eac2..4dcbc59 100644 --- a/docs/extensions/attr_list.md +++ b/docs/extensions/attr_list.md @@ -11,9 +11,9 @@ This extension is included in the standard Markdown library. ## Syntax -The basic syntax was inspired by [Maruku][]'s Attribute Lists feature. +The basic syntax was inspired by Maruku's Attribute Lists feature (see [web archive][Maruku]). -[Maruku]: http://maruku.rubyforge.org/proposal.html#attribute_lists +[Maruku]: https://web.archive.org/web/20170324172643/http://maruku.rubyforge.org/proposal.html ### The List diff --git a/docs/extensions/fenced_code_blocks.md b/docs/extensions/fenced_code_blocks.md index 9095057..0a584f7 100644 --- a/docs/extensions/fenced_code_blocks.md +++ b/docs/extensions/fenced_code_blocks.md @@ -34,6 +34,10 @@ part of the list. Fenced Code Blocks are only supported at the document root level. Therefore, they cannot be nested inside lists or blockquotes. + If you need to nest fenced code blocks, you may want to try the + the third party extension [SuperFences] instead. + +[SuperFences]: https://facelessuser.github.io/pymdown-extensions/extensions/superfences/ ### Language diff --git a/docs/reference.md b/docs/reference.md index 44fd174..8153ebe 100644 --- a/docs/reference.md +++ b/docs/reference.md @@ -25,7 +25,7 @@ instance of the `markdown.Markdown` class and pass multiple documents through it. If you do use a single instance though, make sure to call the `reset` method appropriately ([see below](#convert)). -### markdown.markdown(text [, **kwargs]) {: #markdown } +### markdown.markdown(text [, **kwargs]) {: #markdown data-toc-label='markdown.markdown' } The following options are available on the `markdown.markdown` function: @@ -34,24 +34,20 @@ __text__{: #text } : The source Unicode string. (required) !!! note "Important" - Python-Markdown expects **Unicode** as input (although - some simple ASCII strings *may* work) and returns output as Unicode. - Do not pass encoded strings to it! If your input is encoded, (e.g. as - UTF-8), it is your responsibility to decode it. For example: + Python-Markdown expects a **Unicode** string as input (some simple ASCII binary strings *may* work only by + coincidence) and returns output as a Unicode string. Do not pass binary strings to it! If your input is + encoded, (e.g. as UTF-8), it is your responsibility to decode it. For example: :::python - input_file = codecs.open("some_file.txt", mode="r", encoding="utf-8") - text = input_file.read() + with open("some_file.txt", "r", encoding="utf-8") as input_file: + text = input_file.read() html = markdown.markdown(text) If you want to write the output to disk, you *must* encode it yourself: :::python - output_file = codecs.open("some_file.html", "w", - encoding="utf-8", - errors="xmlcharrefreplace" - ) - output_file.write(html) + with open("some_file.html", "w", encoding="utf-8", errors="xmlcharrefreplace") as output_file: + output_file.write(html) __extensions__{: #extensions } @@ -181,7 +177,7 @@ __tab_length__{: #tab_length }: : Length of tabs in the source. Default: 4 -### `markdown.markdownFromFile (**kwargs)` {: #markdownFromFile } +### `markdown.markdownFromFile (**kwargs)` {: #markdownFromFile data-toc-label='markdown.markdownFromFile' } With a few exceptions, `markdown.markdownFromFile` accepts the same options as `markdown.markdown`. It does **not** accept a `text` (or Unicode) string. @@ -220,7 +216,7 @@ __encoding__{: #encoding } meet your specific needs, it is suggested that you write your own code to handle your encoding/decoding needs. -### markdown.Markdown([**kwargs]) {: #Markdown } +### markdown.Markdown([**kwargs]) {: #Markdown data-toc-label='markdown.Markdown' } The same options are available when initializing the `markdown.Markdown` class as on the [`markdown.markdown`](#markdown) function, except that the class does @@ -233,7 +229,7 @@ string must be passed to one of two instance methods. the thread they were created in. A single instance should not be accessed from multiple threads. -#### Markdown.convert(source) {: #convert } +#### Markdown.convert(source) {: #convert data-toc-label='Markdown.convert' } The `source` text must meet the same requirements as the [`text`](#text) argument of the [`markdown.markdown`](#markdown) function. @@ -248,8 +244,7 @@ html2 = md.convert(text2) ``` Depending on which options and/or extensions are being used, the parser may -need its state reset between each call to `convert`, otherwise performance -can degrade drastically: +need its state reset between each call to `convert`. ```python html1 = md.convert(text1) @@ -263,7 +258,7 @@ To make this easier, you can also chain calls to `reset` together: html3 = md.reset().convert(text3) ``` -#### Markdown.convertFile(**kwargs) {: #convertFile } +#### Markdown.convertFile(**kwargs) {: #convertFile data-toc-label='Markdown.convertFile' } The arguments of this method are identical to the arguments of the same name on the `markdown.markdownFromFile` function ([`input`](#input), diff --git a/docs/test_tools.md b/docs/test_tools.md index c252086..3a83d8e 100644 --- a/docs/test_tools.md +++ b/docs/test_tools.md @@ -169,6 +169,6 @@ rules apply. [unittest]: https://docs.python.org/3/library/unittest.html [Perl]: https://daringfireball.net/projects/markdown/ [PHP]: http://michelf.com/projects/php-markdown/ -[PyTidyLib]: http://countergram.com/open-source/pytidylib/ +[PyTidyLib]: http://countergram.github.io/pytidylib/ [Contributing Guide]: contributing.md [development environment]: contributing.md#development-environment diff --git a/markdown/__meta__.py b/markdown/__meta__.py index ead7c31..436ed0d 100644 --- a/markdown/__meta__.py +++ b/markdown/__meta__.py @@ -19,11 +19,6 @@ Copyright 2004 Manfred Stienstra (the original version) License: BSD (see LICENSE.md for details). """ -try: - import packaging.version -except ImportError: - from pkg_resources.extern import packaging - # __version_info__ format: # (major, minor, patch, dev/alpha/beta/rc/final, #) # (1, 1, 2, 'dev', 0) => "1.1.2.dev0" @@ -31,25 +26,24 @@ except ImportError: # (1, 2, 0, 'beta', 2) => "1.2b2" # (1, 2, 0, 'rc', 4) => "1.2rc4" # (1, 2, 0, 'final', 0) => "1.2" -__version_info__ = (3, 2, 1, 'final', 0) +__version_info__ = (3, 2, 2, 'final', 0) -def _get_version(): # pragma: no cover +def _get_version(version_info): " Returns a PEP 440-compliant version number from version_info. " - assert len(__version_info__) == 5 - assert __version_info__[3] in ('dev', 'alpha', 'beta', 'rc', 'final') + assert len(version_info) == 5 + assert version_info[3] in ('dev', 'alpha', 'beta', 'rc', 'final') - parts = 2 if __version_info__[2] == 0 else 3 - v = '.'.join(map(str, __version_info__[:parts])) + parts = 2 if version_info[2] == 0 else 3 + v = '.'.join(map(str, version_info[:parts])) - if __version_info__[3] == 'dev': - v += '.dev' + str(__version_info__[4]) - elif __version_info__[3] != 'final': + if version_info[3] == 'dev': + v += '.dev' + str(version_info[4]) + elif version_info[3] != 'final': mapping = {'alpha': 'a', 'beta': 'b', 'rc': 'rc'} - v += mapping[__version_info__[3]] + str(__version_info__[4]) + v += mapping[version_info[3]] + str(version_info[4]) - # Ensure version is valid and normalized - return str(packaging.version.Version(v)) + return v -__version__ = _get_version() +__version__ = _get_version(__version_info__) diff --git a/markdown/core.py b/markdown/core.py index 6c7822c..e2c0d88 100644 --- a/markdown/core.py +++ b/markdown/core.py @@ -23,7 +23,6 @@ import codecs import sys import logging import importlib -import pkg_resources from . import util from .preprocessors import build_preprocessors from .blockprocessors import build_block_parser @@ -141,9 +140,8 @@ class Markdown: Build extension from a string name, then return an instance. First attempt to load an entry point. The string name must be registered as an entry point in the - `markdown.extensions` group which points to a subclass of the `markdown.extensions.Extension` class. If - multiple distributions have registered the same name, the first one found by `pkg_resources.iter_entry_points` - is returned. + `markdown.extensions` group which points to a subclass of the `markdown.extensions.Extension` class. + If multiple distributions have registered the same name, the first one found is returned. If no entry point is found, assume dot notation (`path.to.module:ClassName`). Load the specified class and return an instance. If no class is specified, import the module and call a `makeExtension` function and return @@ -151,7 +149,7 @@ class Markdown: """ configs = dict(configs) - entry_points = [ep for ep in pkg_resources.iter_entry_points('markdown.extensions', ext_name)] + entry_points = [ep for ep in util.INSTALLED_EXTENSIONS if ep.name == ext_name] if entry_points: ext = entry_points[0].load() return ext(**configs) diff --git a/markdown/extensions/__init__.py b/markdown/extensions/__init__.py index 010e310..4bc8e5f 100644 --- a/markdown/extensions/__init__.py +++ b/markdown/extensions/__init__.py @@ -75,15 +75,18 @@ class Extension: md = args[0] try: self.extendMarkdown(md) - except TypeError: - # Must be a 2.x extension. Pass in a dumby md_globals. - self.extendMarkdown(md, {}) - warnings.warn( - "The 'md_globals' parameter of '{}.{}.extendMarkdown' is " - "deprecated.".format(self.__class__.__module__, self.__class__.__name__), - category=DeprecationWarning, - stacklevel=2 - ) + except TypeError as e: + if "missing 1 required positional argument" in str(e): + # Must be a 2.x extension. Pass in a dumby md_globals. + self.extendMarkdown(md, {}) + warnings.warn( + "The 'md_globals' parameter of '{}.{}.extendMarkdown' is " + "deprecated.".format(self.__class__.__module__, self.__class__.__name__), + category=DeprecationWarning, + stacklevel=2 + ) + else: + raise def extendMarkdown(self, md): """ diff --git a/markdown/extensions/codehilite.py b/markdown/extensions/codehilite.py index c3f3257..ac45ede 100644 --- a/markdown/extensions/codehilite.py +++ b/markdown/extensions/codehilite.py @@ -139,16 +139,16 @@ class CodeHilite: def _parseHeader(self): """ - Determines language of a code block from shebang line and whether said - line should be removed or left in place. If the sheband line contains a - path (even a single /) then it is assumed to be a real shebang line and - left alone. However, if no path is given (e.i.: #!python or :::python) - then it is assumed to be a mock shebang for language identifitation of - a code fragment and removed from the code block prior to processing for - code highlighting. When a mock shebang (e.i: #!python) is found, line - numbering is turned on. When colons are found in place of a shebang - (e.i.: :::python), line numbering is left in the current state - off - by default. + Determines language of a code block from shebang line and whether the + said line should be removed or left in place. If the sheband line + contains a path (even a single /) then it is assumed to be a real + shebang line and left alone. However, if no path is given + (e.i.: #!python or :::python) then it is assumed to be a mock shebang + for language identification of a code fragment and removed from the + code block prior to processing for code highlighting. When a mock + shebang (e.i: #!python) is found, line numbering is turned on. When + colons are found in place of a shebang (e.i.: :::python), line + numbering is left in the current state - off by default. Also parses optional list of highlight lines, like: diff --git a/markdown/extensions/toc.py b/markdown/extensions/toc.py index 8f2b13f..b6cdc73 100644 --- a/markdown/extensions/toc.py +++ b/markdown/extensions/toc.py @@ -15,9 +15,10 @@ License: [BSD](https://opensource.org/licenses/bsd-license.php) from . import Extension from ..treeprocessors import Treeprocessor -from ..util import code_escape, parseBoolValue, AMP_SUBSTITUTE, HTML_PLACEHOLDER_RE +from ..util import code_escape, parseBoolValue, AMP_SUBSTITUTE, HTML_PLACEHOLDER_RE, AtomicString from ..postprocessors import UnescapePostprocessor import re +import html import unicodedata import xml.etree.ElementTree as etree @@ -44,6 +45,18 @@ def unique(id, ids): return id +def get_name(el): + """Get title name.""" + + text = [] + for c in el.itertext(): + if isinstance(c, AtomicString): + text.append(html.unescape(c)) + else: + text.append(c) + return ''.join(text).strip() + + def stashedHTML2text(text, md, strip_entities=True): """ Extract raw HTML from stash, reduce to plain text and swap with placeholder. """ def _html_sub(m): @@ -253,7 +266,7 @@ class TocTreeprocessor(Treeprocessor): self.set_level(el) if int(el.tag[-1]) < self.toc_top or int(el.tag[-1]) > self.toc_bottom: continue - text = ''.join(el.itertext()).strip() + text = get_name(el) # Do not override pre-existing ids if "id" not in el.attrib: diff --git a/markdown/test_tools.py b/markdown/test_tools.py index a42b14b..be7bbf1 100644 --- a/markdown/test_tools.py +++ b/markdown/test_tools.py @@ -167,7 +167,7 @@ class LegacyTestCase(unittest.TestCase, metaclass=LegacyTestMeta): arguments for all test files in the directory. In addition, properties can be defined for each individual set of test files within - the directory. The property should be given the name of the file wihtout the file + the directory. The property should be given the name of the file without the file extension. Any spaces and dashes in the filename should be replaced with underscores. The value of the property should be a `Kwargs` instance which contains the keyword arguments that should be passed to `Markdown` for that diff --git a/markdown/util.py b/markdown/util.py index e7bc295..a8db7bd 100644 --- a/markdown/util.py +++ b/markdown/util.py @@ -27,6 +27,11 @@ import warnings import xml.etree.ElementTree from .pep562 import Pep562 +try: + from importlib import metadata +except ImportError: + # =36.6", "wheel"] +build-backend = "setuptools.build_meta" diff --git a/setup.cfg b/setup.cfg index 54809c0..5ac619d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,3 @@ -[bdist_wheel] -universal = 1 - [metadata] license_file = LICENSE.md diff --git a/setup.py b/setup.py index a3934ec..62f870d 100755 --- a/setup.py +++ b/setup.py @@ -88,7 +88,7 @@ setup( license='BSD License', packages=['markdown', 'markdown.extensions'], python_requires='>=3.5', - install_requires=['setuptools >= 36'], + install_requires=["importlib_metadata;python_version<'3.8'"], extras_require={ 'testing': [ 'coverage', diff --git a/tests/basic/markdown-documentation-basics.html b/tests/basic/markdown-documentation-basics.html index d9214a2..97c1e31 100644 --- a/tests/basic/markdown-documentation-basics.html +++ b/tests/basic/markdown-documentation-basics.html @@ -23,7 +23,7 @@ can see the source for it by adding '.t

A paragraph is simply one or more consecutive lines of text, separated by one or more blank lines. (A blank line is any line that looks like a blank line -- a line containing nothing spaces or tabs is considered -blank.) Normal paragraphs should not be intended with spaces or tabs.

+blank.) Normal paragraphs should not be indented with spaces or tabs.

Markdown offers two styles of headers: Setext and atx. Setext-style headers for <h1> and <h2> are created by "underlining" with equal signs (=) and hyphens (-), respectively. diff --git a/tests/basic/markdown-documentation-basics.txt b/tests/basic/markdown-documentation-basics.txt index 6c5a6fd..6de671a 100644 --- a/tests/basic/markdown-documentation-basics.txt +++ b/tests/basic/markdown-documentation-basics.txt @@ -37,7 +37,7 @@ can [see the source for it by adding '.text' to the URL] [src]. A paragraph is simply one or more consecutive lines of text, separated by one or more blank lines. (A blank line is any line that looks like a blank line -- a line containing nothing spaces or tabs is considered -blank.) Normal paragraphs should not be intended with spaces or tabs. +blank.) Normal paragraphs should not be indented with spaces or tabs. Markdown offers two styles of headers: *Setext* and *atx*. Setext-style headers for `

` and `

` are created by diff --git a/tests/basic/markdown-syntax.html b/tests/basic/markdown-syntax.html index 2b79d2d..cd7ba17 100644 --- a/tests/basic/markdown-syntax.html +++ b/tests/basic/markdown-syntax.html @@ -151,7 +151,7 @@ and & in your example code needs to be escaped.)

A paragraph is simply one or more consecutive lines of text, separated by one or more blank lines. (A blank line is any line that looks like a blank line -- a line containing nothing but spaces or tabs is considered -blank.) Normal paragraphs should not be intended with spaces or tabs.

+blank.) Normal paragraphs should not be indented with spaces or tabs.

The implication of the "one or more consecutive lines of text" rule is that Markdown supports "hard-wrapped" text paragraphs. This differs significantly from most other text-to-HTML formatters (including Movable @@ -328,7 +328,7 @@ items in <p> tags in the HTML output. For example, this input </ul>

List items may consist of multiple paragraphs. Each subsequent -paragraph in a list item must be intended by either 4 spaces +paragraph in a list item must be indented by either 4 spaces or one tab:

1.  This is a list item with two paragraphs. Lorem ipsum dolor
     sit amet, consectetuer adipiscing elit. Aliquam hendrerit
diff --git a/tests/basic/markdown-syntax.txt b/tests/basic/markdown-syntax.txt
index fabec2e..38f6e78 100644
--- a/tests/basic/markdown-syntax.txt
+++ b/tests/basic/markdown-syntax.txt
@@ -186,7 +186,7 @@ and `&` in your example code needs to be escaped.)
 A paragraph is simply one or more consecutive lines of text, separated
 by one or more blank lines. (A blank line is any line that looks like a
 blank line -- a line containing nothing but spaces or tabs is considered
-blank.) Normal paragraphs should not be intended with spaces or tabs.
+blank.) Normal paragraphs should not be indented with spaces or tabs.
 
 The implication of the "one or more consecutive lines of text" rule is
 that Markdown supports "hard-wrapped" text paragraphs. This differs
@@ -401,7 +401,7 @@ will turn into:
     
 
 List items may consist of multiple paragraphs. Each subsequent
-paragraph in a list item must be intended by either 4 spaces
+paragraph in a list item must be indented by either 4 spaces
 or one tab:
 
     1.  This is a list item with two paragraphs. Lorem ipsum dolor
diff --git a/tests/extensions/extra/markdown-syntax.html b/tests/extensions/extra/markdown-syntax.html
index 2b79d2d..cd7ba17 100644
--- a/tests/extensions/extra/markdown-syntax.html
+++ b/tests/extensions/extra/markdown-syntax.html
@@ -151,7 +151,7 @@ and & in your example code needs to be escaped.)

A paragraph is simply one or more consecutive lines of text, separated by one or more blank lines. (A blank line is any line that looks like a blank line -- a line containing nothing but spaces or tabs is considered -blank.) Normal paragraphs should not be intended with spaces or tabs.

+blank.) Normal paragraphs should not be indented with spaces or tabs.

The implication of the "one or more consecutive lines of text" rule is that Markdown supports "hard-wrapped" text paragraphs. This differs significantly from most other text-to-HTML formatters (including Movable @@ -328,7 +328,7 @@ items in <p> tags in the HTML output. For example, this input </ul>

List items may consist of multiple paragraphs. Each subsequent -paragraph in a list item must be intended by either 4 spaces +paragraph in a list item must be indented by either 4 spaces or one tab:

1.  This is a list item with two paragraphs. Lorem ipsum dolor
     sit amet, consectetuer adipiscing elit. Aliquam hendrerit
diff --git a/tests/extensions/extra/markdown-syntax.txt b/tests/extensions/extra/markdown-syntax.txt
index fabec2e..38f6e78 100644
--- a/tests/extensions/extra/markdown-syntax.txt
+++ b/tests/extensions/extra/markdown-syntax.txt
@@ -186,7 +186,7 @@ and `&` in your example code needs to be escaped.)
 A paragraph is simply one or more consecutive lines of text, separated
 by one or more blank lines. (A blank line is any line that looks like a
 blank line -- a line containing nothing but spaces or tabs is considered
-blank.) Normal paragraphs should not be intended with spaces or tabs.
+blank.) Normal paragraphs should not be indented with spaces or tabs.
 
 The implication of the "one or more consecutive lines of text" rule is
 that Markdown supports "hard-wrapped" text paragraphs. This differs
@@ -401,7 +401,7 @@ will turn into:
     
 
 List items may consist of multiple paragraphs. Each subsequent
-paragraph in a list item must be intended by either 4 spaces
+paragraph in a list item must be indented by either 4 spaces
 or one tab:
 
     1.  This is a list item with two paragraphs. Lorem ipsum dolor
diff --git a/tests/extensions/toc.html b/tests/extensions/toc.html
index 4936f0d..1f06b68 100644
--- a/tests/extensions/toc.html
+++ b/tests/extensions/toc.html
@@ -135,7 +135,7 @@ and & in your example code needs to be escaped.)

A paragraph is simply one or more consecutive lines of text, separated by one or more blank lines. (A blank line is any line that looks like a blank line -- a line containing nothing but spaces or tabs is considered -blank.) Normal paragraphs should not be intended with spaces or tabs.

+blank.) Normal paragraphs should not be indented with spaces or tabs.

The implication of the "one or more consecutive lines of text" rule is that Markdown supports "hard-wrapped" text paragraphs. This differs significantly from most other text-to-HTML formatters (including Movable @@ -309,7 +309,7 @@ items in <p> tags in the HTML output. For example, this input </ul>

List items may consist of multiple paragraphs. Each subsequent -paragraph in a list item must be intended by either 4 spaces +paragraph in a list item must be indented by either 4 spaces or one tab:

1.  This is a list item with two paragraphs. Lorem ipsum dolor
     sit amet, consectetuer adipiscing elit. Aliquam hendrerit
diff --git a/tests/extensions/toc.txt b/tests/extensions/toc.txt
index f71afd2..1a1de34 100644
--- a/tests/extensions/toc.txt
+++ b/tests/extensions/toc.txt
@@ -149,7 +149,7 @@ and `&` in your example code needs to be escaped.)
 A paragraph is simply one or more consecutive lines of text, separated
 by one or more blank lines. (A blank line is any line that looks like a
 blank line -- a line containing nothing but spaces or tabs is considered
-blank.) Normal paragraphs should not be intended with spaces or tabs.
+blank.) Normal paragraphs should not be indented with spaces or tabs.
 
 The implication of the "one or more consecutive lines of text" rule is
 that Markdown supports "hard-wrapped" text paragraphs. This differs
@@ -364,7 +364,7 @@ will turn into:
     
 
 List items may consist of multiple paragraphs. Each subsequent
-paragraph in a list item must be intended by either 4 spaces
+paragraph in a list item must be indented by either 4 spaces
 or one tab:
 
     1.  This is a list item with two paragraphs. Lorem ipsum dolor
diff --git a/tests/pl/Tests_2004/Markdown Documentation - Basics.html b/tests/pl/Tests_2004/Markdown Documentation - Basics.html
index d5bdbb2..342f0c1 100644
--- a/tests/pl/Tests_2004/Markdown Documentation - Basics.html	
+++ b/tests/pl/Tests_2004/Markdown Documentation - Basics.html	
@@ -29,7 +29,7 @@ can see the source for it by adding '.t
 

A paragraph is simply one or more consecutive lines of text, separated by one or more blank lines. (A blank line is any line that looks like a blank line -- a line containing nothing spaces or tabs is considered -blank.) Normal paragraphs should not be intended with spaces or tabs.

+blank.) Normal paragraphs should not be indented with spaces or tabs.

Markdown offers two styles of headers: Setext and atx. Setext-style headers for <h1> and <h2> are created by diff --git a/tests/pl/Tests_2004/Markdown Documentation - Syntax.html b/tests/pl/Tests_2004/Markdown Documentation - Syntax.html index 5c01306..7847793 100644 --- a/tests/pl/Tests_2004/Markdown Documentation - Syntax.html +++ b/tests/pl/Tests_2004/Markdown Documentation - Syntax.html @@ -186,7 +186,7 @@ and & in your example code needs to be escaped.)

A paragraph is simply one or more consecutive lines of text, separated by one or more blank lines. (A blank line is any line that looks like a blank line -- a line containing nothing but spaces or tabs is considered -blank.) Normal paragraphs should not be intended with spaces or tabs.

+blank.) Normal paragraphs should not be indented with spaces or tabs.

The implication of the "one or more consecutive lines of text" rule is that Markdown supports "hard-wrapped" text paragraphs. This differs @@ -414,7 +414,7 @@ items in <p> tags in the HTML output. For example, this input

List items may consist of multiple paragraphs. Each subsequent -paragraph in a list item must be intended by either 4 spaces +paragraph in a list item must be indented by either 4 spaces or one tab:

1.  This is a list item with two paragraphs. Lorem ipsum dolor
diff --git a/tests/pl/Tests_2007/Markdown Documentation - Basics.html b/tests/pl/Tests_2007/Markdown Documentation - Basics.html
index d5bdbb2..342f0c1 100644
--- a/tests/pl/Tests_2007/Markdown Documentation - Basics.html	
+++ b/tests/pl/Tests_2007/Markdown Documentation - Basics.html	
@@ -29,7 +29,7 @@ can see the source for it by adding '.t
 

A paragraph is simply one or more consecutive lines of text, separated by one or more blank lines. (A blank line is any line that looks like a blank line -- a line containing nothing spaces or tabs is considered -blank.) Normal paragraphs should not be intended with spaces or tabs.

+blank.) Normal paragraphs should not be indented with spaces or tabs.

Markdown offers two styles of headers: Setext and atx. Setext-style headers for <h1> and <h2> are created by diff --git a/tests/pl/Tests_2007/Markdown Documentation - Syntax.html b/tests/pl/Tests_2007/Markdown Documentation - Syntax.html index 5c01306..7847793 100644 --- a/tests/pl/Tests_2007/Markdown Documentation - Syntax.html +++ b/tests/pl/Tests_2007/Markdown Documentation - Syntax.html @@ -186,7 +186,7 @@ and & in your example code needs to be escaped.)

A paragraph is simply one or more consecutive lines of text, separated by one or more blank lines. (A blank line is any line that looks like a blank line -- a line containing nothing but spaces or tabs is considered -blank.) Normal paragraphs should not be intended with spaces or tabs.

+blank.) Normal paragraphs should not be indented with spaces or tabs.

The implication of the "one or more consecutive lines of text" rule is that Markdown supports "hard-wrapped" text paragraphs. This differs @@ -414,7 +414,7 @@ items in <p> tags in the HTML output. For example, this input

List items may consist of multiple paragraphs. Each subsequent -paragraph in a list item must be intended by either 4 spaces +paragraph in a list item must be indented by either 4 spaces or one tab:

1.  This is a list item with two paragraphs. Lorem ipsum dolor
diff --git a/tests/test_apis.py b/tests/test_apis.py
index 39236f2..6564c66 100644
--- a/tests/test_apis.py
+++ b/tests/test_apis.py
@@ -337,7 +337,7 @@ class RegistryTests(unittest.TestCase):
     def testRegistryDelItem(self):
         r = markdown.util.Registry()
         r.register(Item('a'), 'a', 20)
-        with self.assertRaises(TypeError):
+        with self.assertRaises(KeyError):
             del r[0]
         # TODO: restore this when deprecated __del__ is removed.
         # with self.assertRaises(TypeError):
@@ -352,7 +352,7 @@ class RegistryTests(unittest.TestCase):
             self.assertEqual(list(r), ['a', 'c'])
             del r['a']
             self.assertEqual(list(r), ['c'])
-            with self.assertRaises(TypeError):
+            with self.assertRaises(KeyError):
                 del r['badname']
             del r['c']
             self.assertEqual(list(r), [])
diff --git a/tests/test_meta.py b/tests/test_meta.py
new file mode 100644
index 0000000..10a2d33
--- /dev/null
+++ b/tests/test_meta.py
@@ -0,0 +1,24 @@
+import unittest
+from markdown.__meta__ import _get_version, __version__
+
+
+class TestVersion(unittest.TestCase):
+
+    def test_get_version(self):
+        """Test that _get_version formats __version_info__ as required by PEP 440."""
+
+        self.assertEqual(_get_version((1, 1, 2, 'dev', 0)), "1.1.2.dev0")
+        self.assertEqual(_get_version((1, 1, 2, 'alpha', 1)), "1.1.2a1")
+        self.assertEqual(_get_version((1, 2, 0, 'beta', 2)), "1.2b2")
+        self.assertEqual(_get_version((1, 2, 0, 'rc', 4)), "1.2rc4")
+        self.assertEqual(_get_version((1, 2, 0, 'final', 0)), "1.2")
+
+    def test__version__IsValid(self):
+        """Test that __version__ is valid and normalized."""
+
+        try:
+            import packaging.version
+        except ImportError:
+            from pkg_resources.extern import packaging
+
+        self.assertEqual(__version__, str(packaging.version.Version(__version__)))
diff --git a/tests/test_syntax/extensions/test_toc.py b/tests/test_syntax/extensions/test_toc.py
index 5b9ad92..3fc9780 100644
--- a/tests/test_syntax/extensions/test_toc.py
+++ b/tests/test_syntax/extensions/test_toc.py
@@ -27,6 +27,28 @@ class TestTOC(TestCase):
 
     # TODO: Move the rest of the TOC tests here.
 
+    def test_escaped_code(self):
+        self.assertMarkdownRenders(
+            self.dedent(
+                '''
+                [TOC]
+
+                # ``
+                '''
+            ),
+            self.dedent(
+                '''
+                
+                

<test>

+ ''' + ), + extensions=['toc'] + ) + def test_escaped_char_in_id(self): self.assertMarkdownRenders( r'# escaped\_character', diff --git a/tox.ini b/tox.ini index 14dcc21..ea153e4 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py35, py36, py37, py38, pypy3, flake8, checkspelling, pep517check +envlist = py35, py36, py37, py38, pypy3, flake8, checkspelling, pep517check, checklinks isolated_build = True min_verison = 1.9 @@ -20,6 +20,11 @@ deps = mkdocs_nature commands = {toxinidir}/checkspelling.sh +[testenv:checklinks] +whitelist_externals = markdown-link-check +deps = +commands = {toxinidir}/checklinks.sh + [testenv:pep517check] deps = pep517 commands = python -m pep517.check {toxinidir} -- 2.34.1