examples/includes/PHP-Markdown-Extra-1.2.3/markdown.php

   1 <?php
   2 #
   3 # Markdown Extra  -  A text-to-HTML conversion tool for web writers
   4 #
   5 # PHP Markdown & Extra
   6 # Copyright (c) 2004-2008 Michel Fortin
   7 # <http://www.michelf.com/projects/php-markdown/>
   8 #
   9 # Original Markdown
  10 # Copyright (c) 2004-2006 John Gruber
  11 # <http://daringfireball.net/projects/markdown/>
  12 #
  13
  14
  15 define( 'MARKDOWN_VERSION',  "1.0.1m" ); # Sat 21 Jun 2008
  16 define( 'MARKDOWNEXTRA_VERSION',  "1.2.3" ); # Wed 31 Dec 2008
  17
  18
  19 #
  20 # Global default settings:
  21 #
  22
  23 # Change to ">" for HTML output
  24 @define( 'MARKDOWN_EMPTY_ELEMENT_SUFFIX',  " />");
  25
  26 # Define the width of a tab for code blocks.
  27 @define( 'MARKDOWN_TAB_WIDTH',     4 );
  28
  29 # Optional title attribute for footnote links and backlinks.
  30 @define( 'MARKDOWN_FN_LINK_TITLE',         "" );
  31 @define( 'MARKDOWN_FN_BACKLINK_TITLE',     "" );
  32
  33 # Optional class attribute for footnote links and backlinks.
  34 @define( 'MARKDOWN_FN_LINK_CLASS',         "" );
  35 @define( 'MARKDOWN_FN_BACKLINK_CLASS',     "" );
  36
  37
  38 #
  39 # WordPress settings:
  40 #
  41
  42 # Change to false to remove Markdown from posts and/or comments.
  43 @define( 'MARKDOWN_WP_POSTS',      true );
  44 @define( 'MARKDOWN_WP_COMMENTS',   true );
  45
  46
  47
  48 ### Standard Function Interface ###
  49
  50 @define( 'MARKDOWN_PARSER_CLASS',  'MarkdownExtra_Parser' );
  51
  52 function Markdown($text) {
  53 #
  54 # Initialize the parser and return the result of its transform method.
  55 #
  56     # Setup static parser variable.
  57     static $parser;
  58     if (!isset($parser)) {
  59         $parser_class = MARKDOWN_PARSER_CLASS;
  60         $parser = new $parser_class;
  61     }
  62
  63     # Transform text using parser.
  64     return $parser->transform($text);
  65 }
  66
  67
  68 ### WordPress Plugin Interface ###
  69
  70 /*
  71 Plugin Name: Markdown Extra
  72 Plugin URI: http://www.michelf.com/projects/php-markdown/
  73 Description: <a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>
  74 Version: 1.2.2
  75 Author: Michel Fortin
  76 Author URI: http://www.michelf.com/
  77 */
  78
  79 if (isset($wp_version)) {
  80     # More details about how it works here:
  81     # <http://www.michelf.com/weblog/2005/wordpress-text-flow-vs-markdown/>
  82
  83     # Post content and excerpts
  84     # - Remove WordPress paragraph generator.
  85     # - Run Markdown on excerpt, then remove all tags.
  86     # - Add paragraph tag around the excerpt, but remove it for the excerpt rss.
  87     if (MARKDOWN_WP_POSTS) {
  88         remove_filter('the_content',     'wpautop');
  89         remove_filter('the_content_rss', 'wpautop');
  90         remove_filter('the_excerpt',     'wpautop');
  91         add_filter('the_content',     'mdwp_MarkdownPost', 6);
  92         add_filter('the_content_rss', 'mdwp_MarkdownPost', 6);
  93         add_filter('get_the_excerpt', 'mdwp_MarkdownPost', 6);
  94         add_filter('get_the_excerpt', 'trim', 7);
  95         add_filter('the_excerpt',     'mdwp_add_p');
  96         add_filter('the_excerpt_rss', 'mdwp_strip_p');
  97
  98         remove_filter('content_save_pre',  'balanceTags', 50);
  99         remove_filter('excerpt_save_pre',  'balanceTags', 50);
 100         add_filter('the_content',     'balanceTags', 50);
 101         add_filter('get_the_excerpt', 'balanceTags', 9);
 102     }
 103
 104     # Add a footnote id prefix to posts when inside a loop.
 105     function mdwp_MarkdownPost($text) {
 106         static $parser;
 107         if (!$parser) {
 108             $parser_class = MARKDOWN_PARSER_CLASS;
 109             $parser = new $parser_class;
 110         }
 111         if (is_single() || is_page() || is_feed()) {
 112             $parser->fn_id_prefix = "";
 113         } else {
 114             $parser->fn_id_prefix = get_the_ID() . ".";
 115         }
 116         return $parser->transform($text);
 117     }
 118
 119     # Comments
 120     # - Remove WordPress paragraph generator.
 121     # - Remove WordPress auto-link generator.
 122     # - Scramble important tags before passing them to the kses filter.
 123     # - Run Markdown on excerpt then remove paragraph tags.
 124     if (MARKDOWN_WP_COMMENTS) {
 125         remove_filter('comment_text', 'wpautop', 30);
 126         remove_filter('comment_text', 'make_clickable');
 127         add_filter('pre_comment_content', 'Markdown', 6);
 128         add_filter('pre_comment_content', 'mdwp_hide_tags', 8);
 129         add_filter('pre_comment_content', 'mdwp_show_tags', 12);
 130         add_filter('get_comment_text',    'Markdown', 6);
 131         add_filter('get_comment_excerpt', 'Markdown', 6);
 132         add_filter('get_comment_excerpt', 'mdwp_strip_p', 7);
 133
 134         global $mdwp_hidden_tags, $mdwp_placeholders;
 135         $mdwp_hidden_tags = explode(' ',
 136             '<p> </p> <pre> </pre> <ol> </ol> <ul> </ul> <li> </li>');
 137         $mdwp_placeholders = explode(' ', str_rot13(
 138             'pEj07ZbbBZ U1kqgh4w4p pre2zmeN6K QTi31t9pre ol0MP1jzJR '.
 139             'ML5IjmbRol ulANi1NsGY J7zRLJqPul liA8ctl16T K9nhooUHli'));
 140     }
 141
 142     function mdwp_add_p($text) {
 143         if (!preg_match('{^$|^<(p|ul|ol|dl|pre|blockquote)>}i', $text)) {
 144             $text = '<p>'.$text.'</p>';
 145             $text = preg_replace('{\n{2,}}', "</p>\n\n<p>", $text);
 146         }
 147         return $text;
 148     }
 149
 150     function mdwp_strip_p($t) { return preg_replace('{</?p>}i', '', $t); }
 151
 152     function mdwp_hide_tags($text) {
 153         global $mdwp_hidden_tags, $mdwp_placeholders;
 154         return str_replace($mdwp_hidden_tags, $mdwp_placeholders, $text);
 155     }
 156     function mdwp_show_tags($text) {
 157         global $mdwp_hidden_tags, $mdwp_placeholders;
 158         return str_replace($mdwp_placeholders, $mdwp_hidden_tags, $text);
 159     }
 160 }
 161
 162
 163 ### bBlog Plugin Info ###
 164
 165 function identify_modifier_markdown() {
 166     return array(
 167         'name' => 'markdown',
 168         'type' => 'modifier',
 169         'nicename' => 'PHP Markdown Extra',
 170         'description' => 'A text-to-HTML conversion tool for web writers',
 171         'authors' => 'Michel Fortin and John Gruber',
 172         'licence' => 'GPL',
 173         'version' => MARKDOWNEXTRA_VERSION,
 174         'help' => '<a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>',
 175         );
 176 }
 177
 178
 179 ### Smarty Modifier Interface ###
 180
 181 function smarty_modifier_markdown($text) {
 182     return Markdown($text);
 183 }
 184
 185
 186 ### Textile Compatibility Mode ###
 187
 188 # Rename this file to "classTextile.php" and it can replace Textile everywhere.
 189
 190 if (strcasecmp(substr(__FILE__, -16), "classTextile.php") == 0) {
 191     # Try to include PHP SmartyPants. Should be in the same directory.
 192     @include_once 'smartypants.php';
 193     # Fake Textile class. It calls Markdown instead.
 194     class Textile {
 195         function TextileThis($text, $lite='', $encode='') {
 196             if ($lite == '' && $encode == '')    $text = Markdown($text);
 197             if (function_exists('SmartyPants'))  $text = SmartyPants($text);
 198             return $text;
 199         }
 200         # Fake restricted version: restrictions are not supported for now.
 201         function TextileRestricted($text, $lite='', $noimage='') {
 202             return $this->TextileThis($text, $lite);
 203         }
 204         # Workaround to ensure compatibility with TextPattern 4.0.3.
 205         function blockLite($text) { return $text; }
 206     }
 207 }
 208
 209
 210
 211 #
 212 # Markdown Parser Class
 213 #
 214
 215 class Markdown_Parser {
 216
 217     # Regex to match balanced [brackets].
 218     # Needed to insert a maximum bracked depth while converting to PHP.
 219     var $nested_brackets_depth = 6;
 220     var $nested_brackets_re;
 221
 222     var $nested_url_parenthesis_depth = 4;
 223     var $nested_url_parenthesis_re;
 224
 225     # Table of hash values for escaped characters:
 226     var $escape_chars = '\`*_{}[]()>#+-.!';
 227     var $escape_chars_re;
 228
 229     # Change to ">" for HTML output.
 230     var $empty_element_suffix = MARKDOWN_EMPTY_ELEMENT_SUFFIX;
 231     var $tab_width = MARKDOWN_TAB_WIDTH;
 232
 233     # Change to `true` to disallow markup or entities.
 234     var $no_markup = false;
 235     var $no_entities = false;
 236
 237     # Predefined urls and titles for reference links and images.
 238     var $predef_urls = array();
 239     var $predef_titles = array();
 240
 241
 242     function Markdown_Parser() {
 243     #
 244     # Constructor function. Initialize appropriate member variables.
 245     #
 246         $this->_initDetab();
 247         $this->prepareItalicsAndBold();
 248
 249         $this->nested_brackets_re =
 250             str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth).
 251             str_repeat('\])*', $this->nested_brackets_depth);
 252
 253         $this->nested_url_parenthesis_re =
 254             str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth).
 255             str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth);
 256
 257         $this->escape_chars_re = '['.preg_quote($this->escape_chars).']';
 258
 259         # Sort document, block, and span gamut in ascendent priority order.
 260         asort($this->document_gamut);
 261         asort($this->block_gamut);
 262         asort($this->span_gamut);
 263     }
 264
 265
 266     # Internal hashes used during transformation.
 267     var $urls = array();
 268     var $titles = array();
 269     var $html_hashes = array();
 270
 271     # Status flag to avoid invalid nesting.
 272     var $in_anchor = false;
 273
 274
 275     function setup() {
 276     #
 277     # Called before the transformation process starts to setup parser
 278     # states.
 279     #
 280         # Clear global hashes.
 281         $this->urls = $this->predef_urls;
 282         $this->titles = $this->predef_titles;
 283         $this->html_hashes = array();
 284
 285         $in_anchor = false;
 286     }
 287
 288     function teardown() {
 289     #
 290     # Called after the transformation process to clear any variable
 291     # which may be taking up memory unnecessarly.
 292     #
 293         $this->urls = array();
 294         $this->titles = array();
 295         $this->html_hashes = array();
 296     }
 297
 298
 299     function transform($text) {
 300     #
 301     # Main function. Performs some preprocessing on the input text
 302     # and pass it through the document gamut.
 303     #
 304         $this->setup();
 305
 306         # Remove UTF-8 BOM and marker character in input, if present.
 307         $text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text);
 308
 309         # Standardize line endings:
 310         #   DOS to Unix and Mac to Unix
 311         $text = preg_replace('{\r\n?}', "\n", $text);
 312
 313         # Make sure $text ends with a couple of newlines:
 314         $text .= "\n\n";
 315
 316         # Convert all tabs to spaces.
 317         $text = $this->detab($text);
 318
 319         # Turn block-level HTML blocks into hash entries
 320         $text = $this->hashHTMLBlocks($text);
 321
 322         # Strip any lines consisting only of spaces and tabs.
 323         # This makes subsequent regexen easier to write, because we can
 324         # match consecutive blank lines with /\n+/ instead of something
 325         # contorted like /[ ]*\n+/ .
 326         $text = preg_replace('/^[ ]+$/m', '', $text);
 327
 328         # Run document gamut methods.
 329         foreach ($this->document_gamut as $method => $priority) {
 330             $text = $this->$method($text);
 331         }
 332
 333         $this->teardown();
 334
 335         return $text . "\n";
 336     }
 337
 338     var $document_gamut = array(
 339         # Strip link definitions, store in hashes.
 340         "stripLinkDefinitions" => 20,
 341
 342         "runBasicBlockGamut"   => 30,
 343         );
 344
 345
 346     function stripLinkDefinitions($text) {
 347     #
 348     # Strips link definitions from text, stores the URLs and titles in
 349     # hash references.
 350     #
 351         $less_than_tab = $this->tab_width - 1;
 352
 353         # Link defs are in the form: ^[id]: url "optional title"
 354         $text = preg_replace_callback('{
 355                             ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
 356                               [ ]*
 357                               \n?               # maybe *one* newline
 358                               [ ]*
 359                             <?(\S+?)>?          # url = $2
 360                               [ ]*
 361                               \n?               # maybe one newline
 362                               [ ]*
 363                             (?:
 364                                 (?<=\s)         # lookbehind for whitespace
 365                                 ["(]
 366                                 (.*?)           # title = $3
 367                                 [")]
 368                                 [ ]*
 369                             )?  # title is optional
 370                             (?:\n+|\Z)
 371             }xm',
 372             array(&$this, '_stripLinkDefinitions_callback'),
 373             $text);
 374         return $text;
 375     }
 376     function _stripLinkDefinitions_callback($matches) {
 377         $link_id = strtolower($matches[1]);
 378         $this->urls[$link_id] = $matches[2];
 379         $this->titles[$link_id] =& $matches[3];
 380         return ''; # String that will replace the block
 381     }
 382
 383
 384     function hashHTMLBlocks($text) {
 385         if ($this->no_markup)  return $text;
 386
 387         $less_than_tab = $this->tab_width - 1;
 388
 389         # Hashify HTML blocks:
 390         # We only want to do this for block-level HTML tags, such as headers,
 391         # lists, and tables. That's because we still want to wrap <p>s around
 392         # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
 393         # phrase emphasis, and spans. The list of tags we're looking for is
 394         # hard-coded:
 395         #
 396         # *  List "a" is made of tags which can be both inline or block-level.
 397         #    These will be treated block-level when the start tag is alone on
 398         #    its line, otherwise they're not matched here and will be taken as
 399         #    inline later.
 400         # *  List "b" is made of tags which are always block-level;
 401         #
 402         $block_tags_a_re = 'ins|del';
 403         $block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
 404                            'script|noscript|form|fieldset|iframe|math';
 405
 406         # Regular expression for the content of a block tag.
 407         $nested_tags_level = 4;
 408         $attr = '
 409             (?>             # optional tag attributes
 410               \s            # starts with whitespace
 411               (?>
 412                 [^>"/]+     # text outside quotes
 413               |
 414                 /+(?!>)     # slash not followed by ">"
 415               |
 416                 "[^"]*"     # text inside double quotes (tolerate ">")
 417               |
 418                 \'[^\']*\'  # text inside single quotes (tolerate ">")
 419               )*
 420             )?
 421             ';
 422         $content =
 423             str_repeat('
 424                 (?>
 425                   [^<]+         # content without tag
 426                 |
 427                   <\2           # nested opening tag
 428                     '.$attr.'   # attributes
 429                     (?>
 430                       />
 431                     |
 432                       >', $nested_tags_level).  # end of opening tag
 433                       '.*?'.                    # last level nested tag content
 434             str_repeat('
 435                       </\2\s*>  # closing nested tag
 436                     )
 437                   |
 438                     <(?!/\2\s*> # other tags with a different name
 439                   )
 440                 )*',
 441                 $nested_tags_level);
 442         $content2 = str_replace('\2', '\3', $content);
 443
 444         # First, look for nested blocks, e.g.:
 445         #   <div>
 446         #       <div>
 447         #       tags for inner block must be indented.
 448         #       </div>
 449         #   </div>
 450         #
 451         # The outermost tags must start at the left margin for this to match, and
 452         # the inner nested divs must be indented.
 453         # We need to do this before the next, more liberal match, because the next
 454         # match will start at the first `<div>` and stop at the first `</div>`.
 455         $text = preg_replace_callback('{(?>
 456             (?>
 457                 (?<=\n\n)       # Starting after a blank line
 458                 |               # or
 459                 \A\n?           # the beginning of the doc
 460             )
 461             (                       # save in $1
 462
 463               # Match from `\n<tag>` to `</tag>\n`, handling nested tags
 464               # in between.
 465
 466                         [ ]{0,'.$less_than_tab.'}
 467                         <('.$block_tags_b_re.')# start tag = $2
 468                         '.$attr.'>          # attributes followed by > and \n
 469                         '.$content.'        # content, support nesting
 470                         </\2>               # the matching end tag
 471                         [ ]*                # trailing spaces/tabs
 472                         (?=\n+|\Z)  # followed by a newline or end of document
 473
 474             | # Special version for tags of group a.
 475
 476                         [ ]{0,'.$less_than_tab.'}
 477                         <('.$block_tags_a_re.')# start tag = $3
 478                         '.$attr.'>[ ]*\n    # attributes followed by >
 479                         '.$content2.'       # content, support nesting
 480                         </\3>               # the matching end tag
 481                         [ ]*                # trailing spaces/tabs
 482                         (?=\n+|\Z)  # followed by a newline or end of document
 483
 484             | # Special case just for <hr />. It was easier to make a special
 485               # case than to make the other regex more complicated.
 486
 487                         [ ]{0,'.$less_than_tab.'}
 488                         <(hr)               # start tag = $2
 489                         '.$attr.'           # attributes
 490                         /?>                 # the matching end tag
 491                         [ ]*
 492                         (?=\n{2,}|\Z)       # followed by a blank line or end of document
 493
 494             | # Special case for standalone HTML comments:
 495
 496                     [ ]{0,'.$less_than_tab.'}
 497                     (?s:
 498                         <!-- .*? -->
 499                     )
 500                     [ ]*
 501                     (?=\n{2,}|\Z)       # followed by a blank line or end of document
 502
 503             | # PHP and ASP-style processor instructions (<? and <%)
 504
 505                     [ ]{0,'.$less_than_tab.'}
 506                     (?s:
 507                         <([?%])         # $2
 508                         .*?
 509                         \2>
 510                     )
 511                     [ ]*
 512                     (?=\n{2,}|\Z)       # followed by a blank line or end of document
 513
 514             )
 515             )}Sxmi',
 516             array(&$this, '_hashHTMLBlocks_callback'),
 517             $text);
 518
 519         return $text;
 520     }
 521     function _hashHTMLBlocks_callback($matches) {
 522         $text = $matches[1];
 523         $key  = $this->hashBlock($text);
 524         return "\n\n$key\n\n";
 525     }
 526
 527
 528     function hashPart($text, $boundary = 'X') {
 529     #
 530     # Called whenever a tag must be hashed when a function insert an atomic
 531     # element in the text stream. Passing $text to through this function gives
 532     # a unique text-token which will be reverted back when calling unhash.
 533     #
 534     # The $boundary argument specify what character should be used to surround
 535     # the token. By convension, "B" is used for block elements that needs not
 536     # to be wrapped into paragraph tags at the end, ":" is used for elements
 537     # that are word separators and "X" is used in the general case.
 538     #
 539         # Swap back any tag hash found in $text so we do not have to `unhash`
 540         # multiple times at the end.
 541         $text = $this->unhash($text);
 542
 543         # Then hash the block.
 544         static $i = 0;
 545         $key = "$boundary\x1A" . ++$i . $boundary;
 546         $this->html_hashes[$key] = $text;
 547         return $key; # String that will replace the tag.
 548     }
 549
 550
 551     function hashBlock($text) {
 552     #
 553     # Shortcut function for hashPart with block-level boundaries.
 554     #
 555         return $this->hashPart($text, 'B');
 556     }
 557
 558
 559     var $block_gamut = array(
 560     #
 561     # These are all the transformations that form block-level
 562     # tags like paragraphs, headers, and list items.
 563     #
 564         "doHeaders"         => 10,
 565         "doHorizontalRules" => 20,
 566
 567         "doLists"           => 40,
 568         "doCodeBlocks"      => 50,
 569         "doBlockQuotes"     => 60,
 570         );
 571
 572     function runBlockGamut($text) {
 573     #
 574     # Run block gamut tranformations.
 575     #
 576         # We need to escape raw HTML in Markdown source before doing anything
 577         # else. This need to be done for each block, and not only at the
 578         # begining in the Markdown function since hashed blocks can be part of
 579         # list items and could have been indented. Indented blocks would have
 580         # been seen as a code block in a previous pass of hashHTMLBlocks.
 581         $text = $this->hashHTMLBlocks($text);
 582
 583         return $this->runBasicBlockGamut($text);
 584     }
 585
 586     function runBasicBlockGamut($text) {
 587     #
 588     # Run block gamut tranformations, without hashing HTML blocks. This is
 589     # useful when HTML blocks are known to be already hashed, like in the first
 590     # whole-document pass.
 591     #
 592         foreach ($this->block_gamut as $method => $priority) {
 593             $text = $this->$method($text);
 594         }
 595
 596         # Finally form paragraph and restore hashed blocks.
 597         $text = $this->formParagraphs($text);
 598
 599         return $text;
 600     }
 601
 602
 603     function doHorizontalRules($text) {
 604         # Do Horizontal Rules:
 605         return preg_replace(
 606             '{
 607                 ^[ ]{0,3}   # Leading space
 608                 ([-*_])     # $1: First marker
 609                 (?>         # Repeated marker group
 610                     [ ]{0,2}    # Zero, one, or two spaces.
 611                     \1          # Marker character
 612                 ){2,}       # Group repeated at least twice
 613                 [ ]*        # Tailing spaces
 614                 $           # End of line.
 615             }mx',
 616             "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n",
 617             $text);
 618     }
 619
 620
 621     var $span_gamut = array(
 622     #
 623     # These are all the transformations that occur *within* block-level
 624     # tags like paragraphs, headers, and list items.
 625     #
 626         # Process character escapes, code spans, and inline HTML
 627         # in one shot.
 628         "parseSpan"           => -30,
 629
 630         # Process anchor and image tags. Images must come first,
 631         # because ![foo][f] looks like an anchor.
 632         "doImages"            =>  10,
 633         "doAnchors"           =>  20,
 634
 635         # Make links out of things like `<http://example.com/>`
 636         # Must come after doAnchors, because you can use < and >
 637         # delimiters in inline links like [this](<url>).
 638         "doAutoLinks"         =>  30,
 639         "encodeAmpsAndAngles" =>  40,
 640
 641         "doItalicsAndBold"    =>  50,
 642         "doHardBreaks"        =>  60,
 643         );
 644
 645     function runSpanGamut($text) {
 646     #
 647     # Run span gamut tranformations.
 648     #
 649         foreach ($this->span_gamut as $method => $priority) {
 650             $text = $this->$method($text);
 651         }
 652
 653         return $text;
 654     }
 655
 656
 657     function doHardBreaks($text) {
 658         # Do hard breaks:
 659         return preg_replace_callback('/ {2,}\n/',
 660             array(&$this, '_doHardBreaks_callback'), $text);
 661     }
 662     function _doHardBreaks_callback($matches) {
 663         return $this->hashPart("<br$this->empty_element_suffix\n");
 664     }
 665
 666
 667     function doAnchors($text) {
 668     #
 669     # Turn Markdown link shortcuts into XHTML <a> tags.
 670     #
 671         if ($this->in_anchor) return $text;
 672         $this->in_anchor = true;
 673
 674         #
 675         # First, handle reference-style links: [link text] [id]
 676         #
 677         $text = preg_replace_callback('{
 678             (                   # wrap whole match in $1
 679               \[
 680                 ('.$this->nested_brackets_re.') # link text = $2
 681               \]
 682
 683               [ ]?              # one optional space
 684               (?:\n[ ]*)?       # one optional newline followed by spaces
 685
 686               \[
 687                 (.*?)       # id = $3
 688               \]
 689             )
 690             }xs',
 691             array(&$this, '_doAnchors_reference_callback'), $text);
 692
 693         #
 694         # Next, inline-style links: [link text](url "optional title")
 695         #
 696         $text = preg_replace_callback('{
 697             (               # wrap whole match in $1
 698               \[
 699                 ('.$this->nested_brackets_re.') # link text = $2
 700               \]
 701               \(            # literal paren
 702                 [ ]*
 703                 (?:
 704                     <(\S*)> # href = $3
 705                 |
 706                     ('.$this->nested_url_parenthesis_re.')  # href = $4
 707                 )
 708                 [ ]*
 709                 (           # $5
 710                   ([\'"])   # quote char = $6
 711                   (.*?)     # Title = $7
 712                   \6        # matching quote
 713                   [ ]*  # ignore any spaces/tabs between closing quote and )
 714                 )?          # title is optional
 715               \)
 716             )
 717             }xs',
 718             array(&$this, '_DoAnchors_inline_callback'), $text);
 719
 720         #
 721         # Last, handle reference-style shortcuts: [link text]
 722         # These must come last in case you've also got [link test][1]
 723         # or [link test](/foo)
 724         #
 725 //      $text = preg_replace_callback('{
 726 //          (                   # wrap whole match in $1
 727 //            \[
 728 //              ([^\[\]]+)      # link text = $2; can\'t contain [ or ]
 729 //            \]
 730 //          )
 731 //          }xs',
 732 //          array(&$this, '_doAnchors_reference_callback'), $text);
 733
 734         $this->in_anchor = false;
 735         return $text;
 736     }
 737     function _doAnchors_reference_callback($matches) {
 738         $whole_match =  $matches[1];
 739         $link_text   =  $matches[2];
 740         $link_id     =& $matches[3];
 741
 742         if ($link_id == "") {
 743             # for shortcut links like [this][] or [this].
 744             $link_id = $link_text;
 745         }
 746
 747         # lower-case and turn embedded newlines into spaces
 748         $link_id = strtolower($link_id);
 749         $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
 750
 751         if (isset($this->urls[$link_id])) {
 752             $url = $this->urls[$link_id];
 753             $url = $this->encodeAttribute($url);
 754
 755             $result = "<a href=\"$url\"";
 756             if ( isset( $this->titles[$link_id] ) ) {
 757                 $title = $this->titles[$link_id];
 758                 $title = $this->encodeAttribute($title);
 759                 $result .=  " title=\"$title\"";
 760             }
 761
 762             $link_text = $this->runSpanGamut($link_text);
 763             $result .= ">$link_text</a>";
 764             $result = $this->hashPart($result);
 765         }
 766         else {
 767             $result = $whole_match;
 768         }
 769         return $result;
 770     }
 771     function _doAnchors_inline_callback($matches) {
 772         $whole_match    =  $matches[1];
 773         $link_text      =  $this->runSpanGamut($matches[2]);
 774         $url            =  $matches[3] == '' ? $matches[4] : $matches[3];
 775         $title          =& $matches[7];
 776
 777         $url = $this->encodeAttribute($url);
 778
 779         $result = "<a href=\"$url\"";
 780         if (isset($title)) {
 781             $title = $this->encodeAttribute($title);
 782             $result .=  " title=\"$title\"";
 783         }
 784
 785         $link_text = $this->runSpanGamut($link_text);
 786         $result .= ">$link_text</a>";
 787
 788         return $this->hashPart($result);
 789     }
 790
 791
 792     function doImages($text) {
 793     #
 794     # Turn Markdown image shortcuts into <img> tags.
 795     #
 796         #
 797         # First, handle reference-style labeled images: ![alt text][id]
 798         #
 799         $text = preg_replace_callback('{
 800             (               # wrap whole match in $1
 801               !\[
 802                 ('.$this->nested_brackets_re.')     # alt text = $2
 803               \]
 804
 805               [ ]?              # one optional space
 806               (?:\n[ ]*)?       # one optional newline followed by spaces
 807
 808               \[
 809                 (.*?)       # id = $3
 810               \]
 811
 812             )
 813             }xs',
 814             array(&$this, '_doImages_reference_callback'), $text);
 815
 816         #
 817         # Next, handle inline images:  ![alt text](url "optional title")
 818         # Don't forget: encode * and _
 819         #
 820         $text = preg_replace_callback('{
 821             (               # wrap whole match in $1
 822               !\[
 823                 ('.$this->nested_brackets_re.')     # alt text = $2
 824               \]
 825               \s?           # One optional whitespace character
 826               \(            # literal paren
 827                 [ ]*
 828                 (?:
 829                     <(\S*)> # src url = $3
 830                 |
 831                     ('.$this->nested_url_parenthesis_re.')  # src url = $4
 832                 )
 833                 [ ]*
 834                 (           # $5
 835                   ([\'"])   # quote char = $6
 836                   (.*?)     # title = $7
 837                   \6        # matching quote
 838                   [ ]*
 839                 )?          # title is optional
 840               \)
 841             )
 842             }xs',
 843             array(&$this, '_doImages_inline_callback'), $text);
 844
 845         return $text;
 846     }
 847     function _doImages_reference_callback($matches) {
 848         $whole_match = $matches[1];
 849         $alt_text    = $matches[2];
 850         $link_id     = strtolower($matches[3]);
 851
 852         if ($link_id == "") {
 853             $link_id = strtolower($alt_text); # for shortcut links like ![this][].
 854         }
 855
 856         $alt_text = $this->encodeAttribute($alt_text);
 857         if (isset($this->urls[$link_id])) {
 858             $url = $this->encodeAttribute($this->urls[$link_id]);
 859             $result = "<img src=\"$url\" alt=\"$alt_text\"";
 860             if (isset($this->titles[$link_id])) {
 861                 $title = $this->titles[$link_id];
 862                 $title = $this->encodeAttribute($title);
 863                 $result .=  " title=\"$title\"";
 864             }
 865             $result .= $this->empty_element_suffix;
 866             $result = $this->hashPart($result);
 867         }
 868         else {
 869             # If there's no such link ID, leave intact:
 870             $result = $whole_match;
 871         }
 872
 873         return $result;
 874     }
 875     function _doImages_inline_callback($matches) {
 876         $whole_match    = $matches[1];
 877         $alt_text       = $matches[2];
 878         $url            = $matches[3] == '' ? $matches[4] : $matches[3];
 879         $title          =& $matches[7];
 880
 881         $alt_text = $this->encodeAttribute($alt_text);
 882         $url = $this->encodeAttribute($url);
 883         $result = "<img src=\"$url\" alt=\"$alt_text\"";
 884         if (isset($title)) {
 885             $title = $this->encodeAttribute($title);
 886             $result .=  " title=\"$title\""; # $title already quoted
 887         }
 888         $result .= $this->empty_element_suffix;
 889
 890         return $this->hashPart($result);
 891     }
 892
 893
 894     function doHeaders($text) {
 895         # Setext-style headers:
 896         #     Header 1
 897         #     ========
 898         #
 899         #     Header 2
 900         #     --------
 901         #
 902         $text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx',
 903             array(&$this, '_doHeaders_callback_setext'), $text);
 904
 905         # atx-style headers:
 906         #   # Header 1
 907         #   ## Header 2
 908         #   ## Header 2 with closing hashes ##
 909         #   ...
 910         #   ###### Header 6
 911         #
 912         $text = preg_replace_callback('{
 913                 ^(\#{1,6})  # $1 = string of #\'s
 914                 [ ]*
 915                 (.+?)       # $2 = Header text
 916                 [ ]*
 917                 \#*         # optional closing #\'s (not counted)
 918                 \n+
 919             }xm',
 920             array(&$this, '_doHeaders_callback_atx'), $text);
 921
 922         return $text;
 923     }
 924     function _doHeaders_callback_setext($matches) {
 925         # Terrible hack to check we haven't found an empty list item.
 926         if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1]))
 927             return $matches[0];
 928
 929         $level = $matches[2]{0} == '=' ? 1 : 2;
 930         $block = "<h$level>".$this->runSpanGamut($matches[1])."</h$level>";
 931         return "\n" . $this->hashBlock($block) . "\n\n";
 932     }
 933     function _doHeaders_callback_atx($matches) {
 934         $level = strlen($matches[1]);
 935         $block = "<h$level>".$this->runSpanGamut($matches[2])."</h$level>";
 936         return "\n" . $this->hashBlock($block) . "\n\n";
 937     }
 938
 939
 940     function doLists($text) {
 941     #
 942     # Form HTML ordered (numbered) and unordered (bulleted) lists.
 943     #
 944         $less_than_tab = $this->tab_width - 1;
 945
 946         # Re-usable patterns to match list item bullets and number markers:
 947         $marker_ul_re  = '[*+-]';
 948         $marker_ol_re  = '\d+[.]';
 949         $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
 950
 951         $markers_relist = array($marker_ul_re, $marker_ol_re);
 952
 953         foreach ($markers_relist as $marker_re) {
 954             # Re-usable pattern to match any entirel ul or ol list:
 955             $whole_list_re = '
 956                 (                               # $1 = whole list
 957                   (                             # $2
 958                     [ ]{0,'.$less_than_tab.'}
 959                     ('.$marker_re.')            # $3 = first list item marker
 960                     [ ]+
 961                   )
 962                   (?s:.+?)
 963                   (                             # $4
 964                       \z
 965                     |
 966                       \n{2,}
 967                       (?=\S)
 968                       (?!                       # Negative lookahead for another list item marker
 969                         [ ]*
 970                         '.$marker_re.'[ ]+
 971                       )
 972                   )
 973                 )
 974             '; // mx
 975
 976             # We use a different prefix before nested lists than top-level lists.
 977             # See extended comment in _ProcessListItems().
 978
 979             if ($this->list_level) {
 980                 $text = preg_replace_callback('{
 981                         ^
 982                         '.$whole_list_re.'
 983                     }mx',
 984                     array(&$this, '_doLists_callback'), $text);
 985             }
 986             else {
 987                 $text = preg_replace_callback('{
 988                         (?:(?<=\n)\n|\A\n?) # Must eat the newline
 989                         '.$whole_list_re.'
 990                     }mx',
 991                     array(&$this, '_doLists_callback'), $text);
 992             }
 993         }
 994
 995         return $text;
 996     }
 997     function _doLists_callback($matches) {
 998         # Re-usable patterns to match list item bullets and number markers:
 999         $marker_ul_re  = '[*+-]';
1000         $marker_ol_re  = '\d+[.]';
1001         $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
1002
1003         $list = $matches[1];
1004         $list_type = preg_match("/$marker_ul_re/", $matches[3]) ? "ul" : "ol";
1005
1006         $marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re );
1007
1008         $list .= "\n";
1009         $result = $this->processListItems($list, $marker_any_re);
1010
1011         $result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>");
1012         return "\n". $result ."\n\n";
1013     }
1014
1015     var $list_level = 0;
1016
1017     function processListItems($list_str, $marker_any_re) {
1018     #
1019     #   Process the contents of a single ordered or unordered list, splitting it
1020     #   into individual list items.
1021     #
1022         # The $this->list_level global keeps track of when we're inside a list.
1023         # Each time we enter a list, we increment it; when we leave a list,
1024         # we decrement. If it's zero, we're not in a list anymore.
1025         #
1026         # We do this because when we're not inside a list, we want to treat
1027         # something like this:
1028         #
1029         #       I recommend upgrading to version
1030         #       8. Oops, now this line is treated
1031         #       as a sub-list.
1032         #
1033         # As a single paragraph, despite the fact that the second line starts
1034         # with a digit-period-space sequence.
1035         #
1036         # Whereas when we're inside a list (or sub-list), that line will be
1037         # treated as the start of a sub-list. What a kludge, huh? This is
1038         # an aspect of Markdown's syntax that's hard to parse perfectly
1039         # without resorting to mind-reading. Perhaps the solution is to
1040         # change the syntax rules such that sub-lists must start with a
1041         # starting cardinal number; e.g. "1." or "a.".
1042
1043         $this->list_level++;
1044
1045         # trim trailing blank lines:
1046         $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
1047
1048         $list_str = preg_replace_callback('{
1049             (\n)?                           # leading line = $1
1050             (^[ ]*)                         # leading whitespace = $2
1051             ('.$marker_any_re.'             # list marker and space = $3
1052                 (?:[ ]+|(?=\n)) # space only required if item is not empty
1053             )
1054             ((?s:.*?))                      # list item text   = $4
1055             (?:(\n+(?=\n))|\n)              # tailing blank line = $5
1056             (?= \n* (\z | \2 ('.$marker_any_re.') (?:[ ]+|(?=\n))))
1057             }xm',
1058             array(&$this, '_processListItems_callback'), $list_str);
1059
1060         $this->list_level--;
1061         return $list_str;
1062     }
1063     function _processListItems_callback($matches) {
1064         $item = $matches[4];
1065         $leading_line =& $matches[1];
1066         $leading_space =& $matches[2];
1067         $marker_space = $matches[3];
1068         $tailing_blank_line =& $matches[5];
1069
1070         if ($leading_line || $tailing_blank_line ||
1071             preg_match('/\n{2,}/', $item))
1072         {
1073             # Replace marker with the appropriate whitespace indentation
1074             $item = $leading_space . str_repeat(' ', strlen($marker_space)) . $item;
1075             $item = $this->runBlockGamut($this->outdent($item)."\n");
1076         }
1077         else {
1078             # Recursion for sub-lists:
1079             $item = $this->doLists($this->outdent($item));
1080             $item = preg_replace('/\n+$/', '', $item);
1081             $item = $this->runSpanGamut($item);
1082         }
1083
1084         return "<li>" . $item . "</li>\n";
1085     }
1086
1087
1088     function doCodeBlocks($text) {
1089     #
1090     #   Process Markdown `<pre><code>` blocks.
1091     #
1092         $text = preg_replace_callback('{
1093                 (?:\n\n|\A\n?)
1094                 (               # $1 = the code block -- one or more lines, starting with a space/tab
1095                   (?>
1096                     [ ]{'.$this->tab_width.'}  # Lines must start with a tab or a tab-width of spaces
1097                     .*\n+
1098                   )+
1099                 )
1100                 ((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
1101             }xm',
1102             array(&$this, '_doCodeBlocks_callback'), $text);
1103
1104         return $text;
1105     }
1106     function _doCodeBlocks_callback($matches) {
1107         $codeblock = $matches[1];
1108
1109         $codeblock = $this->outdent($codeblock);
1110         $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
1111
1112         # trim leading newlines and trailing newlines
1113         $codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock);
1114
1115         $codeblock = "<pre><code>$codeblock\n</code></pre>";
1116         return "\n\n".$this->hashBlock($codeblock)."\n\n";
1117     }
1118
1119
1120     function makeCodeSpan($code) {
1121     #
1122     # Create a code span markup for $code. Called from handleSpanToken.
1123     #
1124         $code = htmlspecialchars(trim($code), ENT_NOQUOTES);
1125         return $this->hashPart("<code>$code</code>");
1126     }
1127
1128
1129     var $em_relist = array(
1130         ''  => '(?:(?<!\*)\*(?!\*)|(?<!_)_(?!_))(?=\S)(?![.,:;]\s)',
1131         '*' => '(?<=\S)(?<!\*)\*(?!\*)',
1132         '_' => '(?<=\S)(?<!_)_(?!_)',
1133         );
1134     var $strong_relist = array(
1135         ''   => '(?:(?<!\*)\*\*(?!\*)|(?<!_)__(?!_))(?=\S)(?![.,:;]\s)',
1136         '**' => '(?<=\S)(?<!\*)\*\*(?!\*)',
1137         '__' => '(?<=\S)(?<!_)__(?!_)',
1138         );
1139     var $em_strong_relist = array(
1140         ''    => '(?:(?<!\*)\*\*\*(?!\*)|(?<!_)___(?!_))(?=\S)(?![.,:;]\s)',
1141         '***' => '(?<=\S)(?<!\*)\*\*\*(?!\*)',
1142         '___' => '(?<=\S)(?<!_)___(?!_)',
1143         );
1144     var $em_strong_prepared_relist;
1145
1146     function prepareItalicsAndBold() {
1147     #
1148     # Prepare regular expressions for seraching emphasis tokens in any
1149     # context.
1150     #
1151         foreach ($this->em_relist as $em => $em_re) {
1152             foreach ($this->strong_relist as $strong => $strong_re) {
1153                 # Construct list of allowed token expressions.
1154                 $token_relist = array();
1155                 if (isset($this->em_strong_relist["$em$strong"])) {
1156                     $token_relist[] = $this->em_strong_relist["$em$strong"];
1157                 }
1158                 $token_relist[] = $em_re;
1159                 $token_relist[] = $strong_re;
1160
1161                 # Construct master expression from list.
1162                 $token_re = '{('. implode('|', $token_relist) .')}';
1163                 $this->em_strong_prepared_relist["$em$strong"] = $token_re;
1164             }
1165         }
1166     }
1167
1168     function doItalicsAndBold($text) {
1169         $token_stack = array('');
1170         $text_stack = array('');
1171         $em = '';
1172         $strong = '';
1173         $tree_char_em = false;
1174
1175         while (1) {
1176             #
1177             # Get prepared regular expression for seraching emphasis tokens
1178             # in current context.
1179             #
1180             $token_re = $this->em_strong_prepared_relist["$em$strong"];
1181
1182             #
1183             # Each loop iteration seach for the next emphasis token.
1184             # Each token is then passed to handleSpanToken.
1185             #
1186             $parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
1187             $text_stack[0] .= $parts[0];
1188             $token =& $parts[1];
1189             $text =& $parts[2];
1190
1191             if (empty($token)) {
1192                 # Reached end of text span: empty stack without emitting.
1193                 # any more emphasis.
1194                 while ($token_stack[0]) {
1195                     $text_stack[1] .= array_shift($token_stack);
1196                     $text_stack[0] .= array_shift($text_stack);
1197                 }
1198                 break;
1199             }
1200
1201             $token_len = strlen($token);
1202             if ($tree_char_em) {
1203                 # Reached closing marker while inside a three-char emphasis.
1204                 if ($token_len == 3) {
1205                     # Three-char closing marker, close em and strong.
1206                     array_shift($token_stack);
1207                     $span = array_shift($text_stack);
1208                     $span = $this->runSpanGamut($span);
1209                     $span = "<strong><em>$span</em></strong>";
1210                     $text_stack[0] .= $this->hashPart($span);
1211                     $em = '';
1212                     $strong = '';
1213                 } else {
1214                     # Other closing marker: close one em or strong and
1215                     # change current token state to match the other
1216                     $token_stack[0] = str_repeat($token{0}, 3-$token_len);
1217                     $tag = $token_len == 2 ? "strong" : "em";
1218                     $span = $text_stack[0];
1219                     $span = $this->runSpanGamut($span);
1220                     $span = "<$tag>$span</$tag>";
1221                     $text_stack[0] = $this->hashPart($span);
1222                     $$tag = ''; # $$tag stands for $em or $strong
1223                 }
1224                 $tree_char_em = false;
1225             } else if ($token_len == 3) {
1226                 if ($em) {
1227                     # Reached closing marker for both em and strong.
1228                     # Closing strong marker:
1229                     for ($i = 0; $i < 2; ++$i) {
1230                         $shifted_token = array_shift($token_stack);
1231                         $tag = strlen($shifted_token) == 2 ? "strong" : "em";
1232                         $span = array_shift($text_stack);
1233                         $span = $this->runSpanGamut($span);
1234                         $span = "<$tag>$span</$tag>";
1235                         $text_stack[0] .= $this->hashPart($span);
1236                         $$tag = ''; # $$tag stands for $em or $strong
1237                     }
1238                 } else {
1239                     # Reached opening three-char emphasis marker. Push on token
1240                     # stack; will be handled by the special condition above.
1241                     $em = $token{0};
1242                     $strong = "$em$em";
1243                     array_unshift($token_stack, $token);
1244                     array_unshift($text_stack, '');
1245                     $tree_char_em = true;
1246                 }
1247             } else if ($token_len == 2) {
1248                 if ($strong) {
1249                     # Unwind any dangling emphasis marker:
1250                     if (strlen($token_stack[0]) == 1) {
1251                         $text_stack[1] .= array_shift($token_stack);
1252                         $text_stack[0] .= array_shift($text_stack);
1253                     }
1254                     # Closing strong marker:
1255                     array_shift($token_stack);
1256                     $span = array_shift($text_stack);
1257                     $span = $this->runSpanGamut($span);
1258                     $span = "<strong>$span</strong>";
1259                     $text_stack[0] .= $this->hashPart($span);
1260                     $strong = '';
1261                 } else {
1262                     array_unshift($token_stack, $token);
1263                     array_unshift($text_stack, '');
1264                     $strong = $token;
1265                 }
1266             } else {
1267                 # Here $token_len == 1
1268                 if ($em) {
1269                     if (strlen($token_stack[0]) == 1) {
1270                         # Closing emphasis marker:
1271                         array_shift($token_stack);
1272                         $span = array_shift($text_stack);
1273                         $span = $this->runSpanGamut($span);
1274                         $span = "<em>$span</em>";
1275                         $text_stack[0] .= $this->hashPart($span);
1276                         $em = '';
1277                     } else {
1278                         $text_stack[0] .= $token;
1279                     }
1280                 } else {
1281                     array_unshift($token_stack, $token);
1282                     array_unshift($text_stack, '');
1283                     $em = $token;
1284                 }
1285             }
1286         }
1287         return $text_stack[0];
1288     }
1289
1290
1291     function doBlockQuotes($text) {
1292         $text = preg_replace_callback('/
1293               (                             # Wrap whole match in $1
1294                 (?>
1295                   ^[ ]*>[ ]?            # ">" at the start of a line
1296                     .+\n                    # rest of the first line
1297                   (.+\n)*                   # subsequent consecutive lines
1298                   \n*                       # blanks
1299                 )+
1300               )
1301             /xm',
1302             array(&$this, '_doBlockQuotes_callback'), $text);
1303
1304         return $text;
1305     }
1306     function _doBlockQuotes_callback($matches) {
1307         $bq = $matches[1];
1308         # trim one level of quoting - trim whitespace-only lines
1309         $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq);
1310         $bq = $this->runBlockGamut($bq);        # recurse
1311
1312         $bq = preg_replace('/^/m', "  ", $bq);
1313         # These leading spaces cause problem with <pre> content,
1314         # so we need to fix that:
1315         $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx',
1316             array(&$this, '_DoBlockQuotes_callback2'), $bq);
1317
1318         return "\n". $this->hashBlock("<blockquote>\n$bq\n</blockquote>")."\n\n";
1319     }
1320     function _doBlockQuotes_callback2($matches) {
1321         $pre = $matches[1];
1322         $pre = preg_replace('/^  /m', '', $pre);
1323         return $pre;
1324     }
1325
1326
1327     function formParagraphs($text) {
1328     #
1329     #   Params:
1330     #       $text - string to process with html <p> tags
1331     #
1332         # Strip leading and trailing lines:
1333         $text = preg_replace('/\A\n+|\n+\z/', '', $text);
1334
1335         $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
1336
1337         #
1338         # Wrap <p> tags and unhashify HTML blocks
1339         #
1340         foreach ($grafs as $key => $value) {
1341             if (!preg_match('/^B\x1A[0-9]+B$/', $value)) {
1342                 # Is a paragraph.
1343                 $value = $this->runSpanGamut($value);
1344                 $value = preg_replace('/^([ ]*)/', "<p>", $value);
1345                 $value .= "</p>";
1346                 $grafs[$key] = $this->unhash($value);
1347             }
1348             else {
1349                 # Is a block.
1350                 # Modify elements of @grafs in-place...
1351                 $graf = $value;
1352                 $block = $this->html_hashes[$graf];
1353                 $graf = $block;
1354 //              if (preg_match('{
1355 //                  \A
1356 //                  (                           # $1 = <div> tag
1357 //                    <div  \s+
1358 //                    [^>]*
1359 //                    \b
1360 //                    markdown\s*=\s*  ([\'"])  #   $2 = attr quote char
1361 //                    1
1362 //                    \2
1363 //                    [^>]*
1364 //                    >
1365 //                  )
1366 //                  (                           # $3 = contents
1367 //                  .*
1368 //                  )
1369 //                  (</div>)                    # $4 = closing tag
1370 //                  \z
1371 //                  }xs', $block, $matches))
1372 //              {
1373 //                  list(, $div_open, , $div_content, $div_close) = $matches;
1374 //
1375 //                  # We can't call Markdown(), because that resets the hash;
1376 //                  # that initialization code should be pulled into its own sub, though.
1377 //                  $div_content = $this->hashHTMLBlocks($div_content);
1378 //
1379 //                  # Run document gamut methods on the content.
1380 //                  foreach ($this->document_gamut as $method => $priority) {
1381 //                      $div_content = $this->$method($div_content);
1382 //                  }
1383 //
1384 //                  $div_open = preg_replace(
1385 //                      '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
1386 //
1387 //                  $graf = $div_open . "\n" . $div_content . "\n" . $div_close;
1388 //              }
1389                 $grafs[$key] = $graf;
1390             }
1391         }
1392
1393         return implode("\n\n", $grafs);
1394     }
1395
1396
1397     function encodeAttribute($text) {
1398     #
1399     # Encode text for a double-quoted HTML attribute. This function
1400     # is *not* suitable for attributes enclosed in single quotes.
1401     #
1402         $text = $this->encodeAmpsAndAngles($text);
1403         $text = str_replace('"', '&quot;', $text);
1404         return $text;
1405     }
1406
1407
1408     function encodeAmpsAndAngles($text) {
1409     #
1410     # Smart processing for ampersands and angle brackets that need to
1411     # be encoded. Valid character entities are left alone unless the
1412     # no-entities mode is set.
1413     #
1414         if ($this->no_entities) {
1415             $text = str_replace('&', '&amp;', $text);
1416         } else {
1417             # Ampersand-encoding based entirely on Nat Irons's Amputator
1418             # MT plugin: <http://bumppo.net/projects/amputator/>
1419             $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/',
1420                                 '&amp;', $text);;
1421         }
1422         # Encode remaining <'s
1423         $text = str_replace('<', '&lt;', $text);
1424
1425         return $text;
1426     }
1427
1428
1429     function doAutoLinks($text) {
1430         $text = preg_replace_callback('{<((https?|ftp|dict):[^\'">\s]+)>}i',
1431             array(&$this, '_doAutoLinks_url_callback'), $text);
1432
1433         # Email addresses: <address@domain.foo>
1434         $text = preg_replace_callback('{
1435             <
1436             (?:mailto:)?
1437             (
1438                 [-.\w\x80-\xFF]+
1439                 \@
1440                 [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
1441             )
1442             >
1443             }xi',
1444             array(&$this, '_doAutoLinks_email_callback'), $text);
1445
1446         return $text;
1447     }
1448     function _doAutoLinks_url_callback($matches) {
1449         $url = $this->encodeAttribute($matches[1]);
1450         $link = "<a href=\"$url\">$url</a>";
1451         return $this->hashPart($link);
1452     }
1453     function _doAutoLinks_email_callback($matches) {
1454         $address = $matches[1];
1455         $link = $this->encodeEmailAddress($address);
1456         return $this->hashPart($link);
1457     }
1458
1459
1460     function encodeEmailAddress($addr) {
1461     #
1462     #   Input: an email address, e.g. "foo@example.com"
1463     #
1464     #   Output: the email address as a mailto link, with each character
1465     #       of the address encoded as either a decimal or hex entity, in
1466     #       the hopes of foiling most address harvesting spam bots. E.g.:
1467     #
1468     #     <p><a href="&#109;&#x61;&#105;&#x6c;&#116;&#x6f;&#58;&#x66;o&#111;
1469     #        &#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;&#101;&#46;&#x63;&#111;
1470     #        &#x6d;">&#x66;o&#111;&#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;
1471     #        &#101;&#46;&#x63;&#111;&#x6d;</a></p>
1472     #
1473     #   Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
1474     #   With some optimizations by Milian Wolff.
1475     #
1476         $addr = "mailto:" . $addr;
1477         $chars = preg_split('/(?<!^)(?!$)/', $addr);
1478         $seed = (int)abs(crc32($addr) / strlen($addr)); # Deterministic seed.
1479
1480         foreach ($chars as $key => $char) {
1481             $ord = ord($char);
1482             # Ignore non-ascii chars.
1483             if ($ord < 128) {
1484                 $r = ($seed * (1 + $key)) % 100; # Pseudo-random function.
1485                 # roughly 10% raw, 45% hex, 45% dec
1486                 # '@' *must* be encoded. I insist.
1487                 if ($r > 90 && $char != '@') /* do nothing */;
1488                 else if ($r < 45) $chars[$key] = '&#x'.dechex($ord).';';
1489                 else              $chars[$key] = '&#'.$ord.';';
1490             }
1491         }
1492
1493         $addr = implode('', $chars);
1494         $text = implode('', array_slice($chars, 7)); # text without `mailto:`
1495         $addr = "<a href=\"$addr\">$text</a>";
1496
1497         return $addr;
1498     }
1499
1500
1501     function parseSpan($str) {
1502     #
1503     # Take the string $str and parse it into tokens, hashing embeded HTML,
1504     # escaped characters and handling code spans.
1505     #
1506         $output = '';
1507
1508         $span_re = '{
1509                 (
1510                     \\\\'.$this->escape_chars_re.'
1511                 |
1512                     (?<![`\\\\])
1513                     `+                      # code span marker
1514             '.( $this->no_markup ? '' : '
1515                 |
1516                     <!--    .*?     -->     # comment
1517                 |
1518                     <\?.*?\?> | <%.*?%>     # processing instruction
1519                 |
1520                     <[/!$]?[-a-zA-Z0-9:]+   # regular tags
1521                     (?>
1522                         \s
1523                         (?>[^"\'>]+|"[^"]*"|\'[^\']*\')*
1524                     )?
1525                     >
1526             ').'
1527                 )
1528                 }xs';
1529
1530         while (1) {
1531             #
1532             # Each loop iteration seach for either the next tag, the next
1533             # openning code span marker, or the next escaped character.
1534             # Each token is then passed to handleSpanToken.
1535             #
1536             $parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE);
1537
1538             # Create token from text preceding tag.
1539             if ($parts[0] != "") {
1540                 $output .= $parts[0];
1541             }
1542
1543             # Check if we reach the end.
1544             if (isset($parts[1])) {
1545                 $output .= $this->handleSpanToken($parts[1], $parts[2]);
1546                 $str = $parts[2];
1547             }
1548             else {
1549                 break;
1550             }
1551         }
1552
1553         return $output;
1554     }
1555
1556
1557     function handleSpanToken($token, &$str) {
1558     #
1559     # Handle $token provided by parseSpan by determining its nature and
1560     # returning the corresponding value that should replace it.
1561     #
1562         switch ($token{0}) {
1563             case "\\":
1564                 return $this->hashPart("&#". ord($token{1}). ";");
1565             case "`":
1566                 # Search for end marker in remaining text.
1567                 if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm',
1568                     $str, $matches))
1569                 {
1570                     $str = $matches[2];
1571                     $codespan = $this->makeCodeSpan($matches[1]);
1572                     return $this->hashPart($codespan);
1573                 }
1574                 return $token; // return as text since no ending marker found.
1575             default:
1576                 return $this->hashPart($token);
1577         }
1578     }
1579
1580
1581     function outdent($text) {
1582     #
1583     # Remove one level of line-leading tabs or spaces
1584     #
1585         return preg_replace('/^(\t|[ ]{1,'.$this->tab_width.'})/m', '', $text);
1586     }
1587
1588
1589     # String length function for detab. `_initDetab` will create a function to
1590     # hanlde UTF-8 if the default function does not exist.
1591     var $utf8_strlen = 'mb_strlen';
1592
1593     function detab($text) {
1594     #
1595     # Replace tabs with the appropriate amount of space.
1596     #
1597         # For each line we separate the line in blocks delemited by
1598         # tab characters. Then we reconstruct every line by adding the
1599         # appropriate number of space between each blocks.
1600
1601         $text = preg_replace_callback('/^.*\t.*$/m',
1602             array(&$this, '_detab_callback'), $text);
1603
1604         return $text;
1605     }
1606     function _detab_callback($matches) {
1607         $line = $matches[0];
1608         $strlen = $this->utf8_strlen; # strlen function for UTF-8.
1609
1610         # Split in blocks.
1611         $blocks = explode("\t", $line);
1612         # Add each blocks to the line.
1613         $line = $blocks[0];
1614         unset($blocks[0]); # Do not add first block twice.
1615         foreach ($blocks as $block) {
1616             # Calculate amount of space, insert spaces, insert block.
1617             $amount = $this->tab_width -
1618                 $strlen($line, 'UTF-8') % $this->tab_width;
1619             $line .= str_repeat(" ", $amount) . $block;
1620         }
1621         return $line;
1622     }
1623     function _initDetab() {
1624     #
1625     # Check for the availability of the function in the `utf8_strlen` property
1626     # (initially `mb_strlen`). If the function is not available, create a
1627     # function that will loosely count the number of UTF-8 characters with a
1628     # regular expression.
1629     #
1630         if (function_exists($this->utf8_strlen)) return;
1631         $this->utf8_strlen = create_function('$text', 'return preg_match_all(
1632             "/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/",
1633             $text, $m);');
1634     }
1635
1636
1637     function unhash($text) {
1638     #
1639     # Swap back in all the tags hashed by _HashHTMLBlocks.
1640     #
1641         return preg_replace_callback('/(.)\x1A[0-9]+\1/',
1642             array(&$this, '_unhash_callback'), $text);
1643     }
1644     function _unhash_callback($matches) {
1645         return $this->html_hashes[$matches[0]];
1646     }
1647
1648 }
1649
1650
1651 #
1652 # Markdown Extra Parser Class
1653 #
1654
1655 class MarkdownExtra_Parser extends Markdown_Parser {
1656
1657     # Prefix for footnote ids.
1658     var $fn_id_prefix = "";
1659
1660     # Optional title attribute for footnote links and backlinks.
1661     var $fn_link_title = MARKDOWN_FN_LINK_TITLE;
1662     var $fn_backlink_title = MARKDOWN_FN_BACKLINK_TITLE;
1663
1664     # Optional class attribute for footnote links and backlinks.
1665     var $fn_link_class = MARKDOWN_FN_LINK_CLASS;
1666     var $fn_backlink_class = MARKDOWN_FN_BACKLINK_CLASS;
1667
1668     # Predefined abbreviations.
1669     var $predef_abbr = array();
1670
1671
1672     function MarkdownExtra_Parser() {
1673     #
1674     # Constructor function. Initialize the parser object.
1675     #
1676         # Add extra escapable characters before parent constructor
1677         # initialize the table.
1678         $this->escape_chars .= ':|';
1679
1680         # Insert extra document, block, and span transformations.
1681         # Parent constructor will do the sorting.
1682         $this->document_gamut += array(
1683             "doFencedCodeBlocks" => 5,
1684             "stripFootnotes"     => 15,
1685             "stripAbbreviations" => 25,
1686             "appendFootnotes"    => 50,
1687             );
1688         $this->block_gamut += array(
1689             "doFencedCodeBlocks" => 5,
1690             "doTables"           => 15,
1691             "doDefLists"         => 45,
1692             );
1693         $this->span_gamut += array(
1694             "doFootnotes"        => 5,
1695             "doAbbreviations"    => 70,
1696             );
1697
1698         parent::Markdown_Parser();
1699     }
1700
1701
1702     # Extra variables used during extra transformations.
1703     var $footnotes = array();
1704     var $footnotes_ordered = array();
1705     var $abbr_desciptions = array();
1706     var $abbr_word_re = '';
1707
1708     # Give the current footnote number.
1709     var $footnote_counter = 1;
1710
1711
1712     function setup() {
1713     #
1714     # Setting up Extra-specific variables.
1715     #
1716         parent::setup();
1717
1718         $this->footnotes = array();
1719         $this->footnotes_ordered = array();
1720         $this->abbr_desciptions = array();
1721         $this->abbr_word_re = '';
1722         $this->footnote_counter = 1;
1723
1724         foreach ($this->predef_abbr as $abbr_word => $abbr_desc) {
1725             if ($this->abbr_word_re)
1726                 $this->abbr_word_re .= '|';
1727             $this->abbr_word_re .= preg_quote($abbr_word);
1728             $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
1729         }
1730     }
1731
1732     function teardown() {
1733     #
1734     # Clearing Extra-specific variables.
1735     #
1736         $this->footnotes = array();
1737         $this->footnotes_ordered = array();
1738         $this->abbr_desciptions = array();
1739         $this->abbr_word_re = '';
1740
1741         parent::teardown();
1742     }
1743
1744
1745     ### HTML Block Parser ###
1746
1747     # Tags that are always treated as block tags:
1748     var $block_tags_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend';
1749
1750     # Tags treated as block tags only if the opening tag is alone on it's line:
1751     var $context_block_tags_re = 'script|noscript|math|ins|del';
1752
1753     # Tags where markdown="1" default to span mode:
1754     var $contain_span_tags_re = 'p|h[1-6]|li|dd|dt|td|th|legend|address';
1755
1756     # Tags which must not have their contents modified, no matter where
1757     # they appear:
1758     var $clean_tags_re = 'script|math';
1759
1760     # Tags that do not need to be closed.
1761     var $auto_close_tags_re = 'hr|img';
1762
1763
1764     function hashHTMLBlocks($text) {
1765     #
1766     # Hashify HTML Blocks and "clean tags".
1767     #
1768     # We only want to do this for block-level HTML tags, such as headers,
1769     # lists, and tables. That's because we still want to wrap <p>s around
1770     # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
1771     # phrase emphasis, and spans. The list of tags we're looking for is
1772     # hard-coded.
1773     #
1774     # This works by calling _HashHTMLBlocks_InMarkdown, which then calls
1775     # _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1"
1776     # attribute is found whitin a tag, _HashHTMLBlocks_InHTML calls back
1777     #  _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag.
1778     # These two functions are calling each other. It's recursive!
1779     #
1780         #
1781         # Call the HTML-in-Markdown hasher.
1782         #
1783         list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text);
1784
1785         return $text;
1786     }
1787     function _hashHTMLBlocks_inMarkdown($text, $indent = 0,
1788                                         $enclosing_tag_re = '', $span = false)
1789     {
1790     #
1791     # Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags.
1792     #
1793     # *   $indent is the number of space to be ignored when checking for code
1794     #     blocks. This is important because if we don't take the indent into
1795     #     account, something like this (which looks right) won't work as expected:
1796     #
1797     #     <div>
1798     #         <div markdown="1">
1799     #         Hello World.  <-- Is this a Markdown code block or text?
1800     #         </div>  <-- Is this a Markdown code block or a real tag?
1801     #     <div>
1802     #
1803     #     If you don't like this, just don't indent the tag on which
1804     #     you apply the markdown="1" attribute.
1805     #
1806     # *   If $enclosing_tag_re is not empty, stops at the first unmatched closing
1807     #     tag with that name. Nested tags supported.
1808     #
1809     # *   If $span is true, text inside must treated as span. So any double
1810     #     newline will be replaced by a single newline so that it does not create
1811     #     paragraphs.
1812     #
1813     # Returns an array of that form: ( processed text , remaining text )
1814     #
1815         if ($text === '') return array('', '');
1816
1817         # Regex to check for the presense of newlines around a block tag.
1818         $newline_before_re = '/(?:^\n?|\n\n)*$/';
1819         $newline_after_re =
1820             '{
1821                 ^                       # Start of text following the tag.
1822                 (?>[ ]*<!--.*?-->)?     # Optional comment.
1823                 [ ]*\n                  # Must be followed by newline.
1824             }xs';
1825
1826         # Regex to match any tag.
1827         $block_tag_re =
1828             '{
1829                 (                   # $2: Capture hole tag.
1830                     </?                 # Any opening or closing tag.
1831                         (?>             # Tag name.
1832                             '.$this->block_tags_re.'            |
1833                             '.$this->context_block_tags_re.'    |
1834                             '.$this->clean_tags_re.'            |
1835                             (?!\s)'.$enclosing_tag_re.'
1836                         )
1837                         (?:
1838                             (?=[\s"\'/a-zA-Z0-9])   # Allowed characters after tag name.
1839                             (?>
1840                                 ".*?"       |   # Double quotes (can contain `>`)
1841                                 \'.*?\'     |   # Single quotes (can contain `>`)
1842                                 .+?             # Anything but quotes and `>`.
1843                             )*?
1844                         )?
1845                     >                   # End of tag.
1846                 |
1847                     <!--    .*?     --> # HTML Comment
1848                 |
1849                     <\?.*?\?> | <%.*?%> # Processing instruction
1850                 |
1851                     <!\[CDATA\[.*?\]\]> # CData Block
1852                 |
1853                     # Code span marker
1854                     `+
1855                 '. ( !$span ? ' # If not in span.
1856                 |
1857                     # Indented code block
1858                     (?> ^[ ]*\n? | \n[ ]*\n )
1859                     [ ]{'.($indent+4).'}[^\n]* \n
1860                     (?>
1861                         (?: [ ]{'.($indent+4).'}[^\n]* | [ ]* ) \n
1862                     )*
1863                 |
1864                     # Fenced code block marker
1865                     (?> ^ | \n )
1866                     [ ]{'.($indent).'}~~~+[ ]*\n
1867                 ' : '' ). ' # End (if not is span).
1868                 )
1869             }xs';
1870
1871
1872         $depth = 0;     # Current depth inside the tag tree.
1873         $parsed = "";   # Parsed text that will be returned.
1874
1875         #
1876         # Loop through every tag until we find the closing tag of the parent
1877         # or loop until reaching the end of text if no parent tag specified.
1878         #
1879         do {
1880             #
1881             # Split the text using the first $tag_match pattern found.
1882             # Text before  pattern will be first in the array, text after
1883             # pattern will be at the end, and between will be any catches made
1884             # by the pattern.
1885             #
1886             $parts = preg_split($block_tag_re, $text, 2,
1887                                 PREG_SPLIT_DELIM_CAPTURE);
1888
1889             # If in Markdown span mode, add a empty-string span-level hash
1890             # after each newline to prevent triggering any block element.
1891             if ($span) {
1892                 $void = $this->hashPart("", ':');
1893                 $newline = "$void\n";
1894                 $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void;
1895             }
1896
1897             $parsed .= $parts[0]; # Text before current tag.
1898
1899             # If end of $text has been reached. Stop loop.
1900             if (count($parts) < 3) {
1901                 $text = "";
1902                 break;
1903             }
1904
1905             $tag  = $parts[1]; # Tag to handle.
1906             $text = $parts[2]; # Remaining text after current tag.
1907             $tag_re = preg_quote($tag); # For use in a regular expression.
1908
1909             #
1910             # Check for: Code span marker
1911             #
1912             if ($tag{0} == "`") {
1913                 # Find corresponding end marker.
1914                 $tag_re = preg_quote($tag);
1915                 if (preg_match('{^(?>.+?|\n(?!\n))*?(?<!`)'.$tag_re.'(?!`)}',
1916                     $text, $matches))
1917                 {
1918                     # End marker found: pass text unchanged until marker.
1919                     $parsed .= $tag . $matches[0];
1920                     $text = substr($text, strlen($matches[0]));
1921                 }
1922                 else {
1923                     # Unmatched marker: just skip it.
1924                     $parsed .= $tag;
1925                 }
1926             }
1927             #
1928             # Check for: Indented code block or fenced code block marker.
1929             #
1930             else if ($tag{0} == "\n" || $tag{0} == "~") {
1931                 if ($tag{1} == "\n" || $tag{1} == " ") {
1932                     # Indented code block: pass it unchanged, will be handled
1933                     # later.
1934                     $parsed .= $tag;
1935                 }
1936                 else {
1937                     # Fenced code block marker: find matching end marker.
1938                     $tag_re = preg_quote(trim($tag));
1939                     if (preg_match('{^(?>.*\n)+?'.$tag_re.' *\n}', $text,
1940                         $matches))
1941                     {
1942                         # End marker found: pass text unchanged until marker.
1943                         $parsed .= $tag . $matches[0];
1944                         $text = substr($text, strlen($matches[0]));
1945                     }
1946                     else {
1947                         # No end marker: just skip it.
1948                         $parsed .= $tag;
1949                     }
1950                 }
1951             }
1952             #
1953             # Check for: Opening Block level tag or
1954             #            Opening Context Block tag (like ins and del)
1955             #               used as a block tag (tag is alone on it's line).
1956             #
1957             else if (preg_match('{^<(?:'.$this->block_tags_re.')\b}', $tag) ||
1958                 (   preg_match('{^<(?:'.$this->context_block_tags_re.')\b}', $tag) &&
1959                     preg_match($newline_before_re, $parsed) &&
1960                     preg_match($newline_after_re, $text)    )
1961                 )
1962             {
1963                 # Need to parse tag and following text using the HTML parser.
1964                 list($block_text, $text) =
1965                     $this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true);
1966
1967                 # Make sure it stays outside of any paragraph by adding newlines.
1968                 $parsed .= "\n\n$block_text\n\n";
1969             }
1970             #
1971             # Check for: Clean tag (like script, math)
1972             #            HTML Comments, processing instructions.
1973             #
1974             else if (preg_match('{^<(?:'.$this->clean_tags_re.')\b}', $tag) ||
1975                 $tag{1} == '!' || $tag{1} == '?')
1976             {
1977                 # Need to parse tag and following text using the HTML parser.
1978                 # (don't check for markdown attribute)
1979                 list($block_text, $text) =
1980                     $this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false);
1981
1982                 $parsed .= $block_text;
1983             }
1984             #
1985             # Check for: Tag with same name as enclosing tag.
1986             #
1987             else if ($enclosing_tag_re !== '' &&
1988                 # Same name as enclosing tag.
1989                 preg_match('{^</?(?:'.$enclosing_tag_re.')\b}', $tag))
1990             {
1991                 #
1992                 # Increase/decrease nested tag count.
1993                 #
1994                 if ($tag{1} == '/')                     $depth--;
1995                 else if ($tag{strlen($tag)-2} != '/')   $depth++;
1996
1997                 if ($depth < 0) {
1998                     #
1999                     # Going out of parent element. Clean up and break so we
2000                     # return to the calling function.
2001                     #
2002                     $text = $tag . $text;
2003                     break;
2004                 }
2005
2006                 $parsed .= $tag;
2007             }
2008             else {
2009                 $parsed .= $tag;
2010             }
2011         } while ($depth >= 0);
2012
2013         return array($parsed, $text);
2014     }
2015     function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) {
2016     #
2017     # Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags.
2018     #
2019     # *   Calls $hash_method to convert any blocks.
2020     # *   Stops when the first opening tag closes.
2021     # *   $md_attr indicate if the use of the `markdown="1"` attribute is allowed.
2022     #     (it is not inside clean tags)
2023     #
2024     # Returns an array of that form: ( processed text , remaining text )
2025     #
2026         if ($text === '') return array('', '');
2027
2028         # Regex to match `markdown` attribute inside of a tag.
2029         $markdown_attr_re = '
2030             {
2031                 \s*         # Eat whitespace before the `markdown` attribute
2032                 markdown
2033                 \s*=\s*
2034                 (?>
2035                     (["\'])     # $1: quote delimiter
2036                     (.*?)       # $2: attribute value
2037                     \1          # matching delimiter
2038                 |
2039                     ([^\s>]*)   # $3: unquoted attribute value
2040                 )
2041                 ()              # $4: make $3 always defined (avoid warnings)
2042             }xs';
2043
2044         # Regex to match any tag.
2045         $tag_re = '{
2046                 (                   # $2: Capture hole tag.
2047                     </?                 # Any opening or closing tag.
2048                         [\w:$]+         # Tag name.
2049                         (?:
2050                             (?=[\s"\'/a-zA-Z0-9])   # Allowed characters after tag name.
2051                             (?>
2052                                 ".*?"       |   # Double quotes (can contain `>`)
2053                                 \'.*?\'     |   # Single quotes (can contain `>`)
2054                                 .+?             # Anything but quotes and `>`.
2055                             )*?
2056                         )?
2057                     >                   # End of tag.
2058                 |
2059                     <!--    .*?     --> # HTML Comment
2060                 |
2061                     <\?.*?\?> | <%.*?%> # Processing instruction
2062                 |
2063                     <!\[CDATA\[.*?\]\]> # CData Block
2064                 )
2065             }xs';
2066
2067         $original_text = $text;     # Save original text in case of faliure.
2068
2069         $depth      = 0;    # Current depth inside the tag tree.
2070         $block_text = "";   # Temporary text holder for current text.
2071         $parsed     = "";   # Parsed text that will be returned.
2072
2073         #
2074         # Get the name of the starting tag.
2075         # (This pattern makes $base_tag_name_re safe without quoting.)
2076         #
2077         if (preg_match('/^<([\w:$]*)\b/', $text, $matches))
2078             $base_tag_name_re = $matches[1];
2079
2080         #
2081         # Loop through every tag until we find the corresponding closing tag.
2082         #
2083         do {
2084             #
2085             # Split the text using the first $tag_match pattern found.
2086             # Text before  pattern will be first in the array, text after
2087             # pattern will be at the end, and between will be any catches made
2088             # by the pattern.
2089             #
2090             $parts = preg_split($tag_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
2091
2092             if (count($parts) < 3) {
2093                 #
2094                 # End of $text reached with unbalenced tag(s).
2095                 # In that case, we return original text unchanged and pass the
2096                 # first character as filtered to prevent an infinite loop in the
2097                 # parent function.
2098                 #
2099                 return array($original_text{0}, substr($original_text, 1));
2100             }
2101
2102             $block_text .= $parts[0]; # Text before current tag.
2103             $tag         = $parts[1]; # Tag to handle.
2104             $text        = $parts[2]; # Remaining text after current tag.
2105
2106             #
2107             # Check for: Auto-close tag (like <hr/>)
2108             #            Comments and Processing Instructions.
2109             #
2110             if (preg_match('{^</?(?:'.$this->auto_close_tags_re.')\b}', $tag) ||
2111                 $tag{1} == '!' || $tag{1} == '?')
2112             {
2113                 # Just add the tag to the block as if it was text.
2114                 $block_text .= $tag;
2115             }
2116             else {
2117                 #
2118                 # Increase/decrease nested tag count. Only do so if
2119                 # the tag's name match base tag's.
2120                 #
2121                 if (preg_match('{^</?'.$base_tag_name_re.'\b}', $tag)) {
2122                     if ($tag{1} == '/')                     $depth--;
2123                     else if ($tag{strlen($tag)-2} != '/')   $depth++;
2124                 }
2125
2126                 #
2127                 # Check for `markdown="1"` attribute and handle it.
2128                 #
2129                 if ($md_attr &&
2130                     preg_match($markdown_attr_re, $tag, $attr_m) &&
2131                     preg_match('/^1|block|span$/', $attr_m[2] . $attr_m[3]))
2132                 {
2133                     # Remove `markdown` attribute from opening tag.
2134                     $tag = preg_replace($markdown_attr_re, '', $tag);
2135
2136                     # Check if text inside this tag must be parsed in span mode.
2137                     $this->mode = $attr_m[2] . $attr_m[3];
2138                     $span_mode = $this->mode == 'span' || $this->mode != 'block' &&
2139                         preg_match('{^<(?:'.$this->contain_span_tags_re.')\b}', $tag);
2140
2141                     # Calculate indent before tag.
2142                     if (preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches)) {
2143                         $strlen = $this->utf8_strlen;
2144                         $indent = $strlen($matches[1], 'UTF-8');
2145                     } else {
2146                         $indent = 0;
2147                     }
2148
2149                     # End preceding block with this tag.
2150                     $block_text .= $tag;
2151                     $parsed .= $this->$hash_method($block_text);
2152
2153                     # Get enclosing tag name for the ParseMarkdown function.
2154                     # (This pattern makes $tag_name_re safe without quoting.)
2155                     preg_match('/^<([\w:$]*)\b/', $tag, $matches);
2156                     $tag_name_re = $matches[1];
2157
2158                     # Parse the content using the HTML-in-Markdown parser.
2159                     list ($block_text, $text)
2160                         = $this->_hashHTMLBlocks_inMarkdown($text, $indent,
2161                             $tag_name_re, $span_mode);
2162
2163                     # Outdent markdown text.
2164                     if ($indent > 0) {
2165                         $block_text = preg_replace("/^[ ]{1,$indent}/m", "",
2166                                                     $block_text);
2167                     }
2168
2169                     # Append tag content to parsed text.
2170                     if (!$span_mode)    $parsed .= "\n\n$block_text\n\n";
2171                     else                $parsed .= "$block_text";
2172
2173                     # Start over a new block.
2174                     $block_text = "";
2175                 }
2176                 else $block_text .= $tag;
2177             }
2178
2179         } while ($depth > 0);
2180
2181         #
2182         # Hash last block text that wasn't processed inside the loop.
2183         #
2184         $parsed .= $this->$hash_method($block_text);
2185
2186         return array($parsed, $text);
2187     }
2188
2189
2190     function hashClean($text) {
2191     #
2192     # Called whenever a tag must be hashed when a function insert a "clean" tag
2193     # in $text, it pass through this function and is automaticaly escaped,
2194     # blocking invalid nested overlap.
2195     #
2196         return $this->hashPart($text, 'C');
2197     }
2198
2199
2200     function doHeaders($text) {
2201     #
2202     # Redefined to add id attribute support.
2203     #
2204         # Setext-style headers:
2205         #     Header 1  {#header1}
2206         #     ========
2207         #
2208         #     Header 2  {#header2}
2209         #     --------
2210         #
2211         $text = preg_replace_callback(
2212             '{
2213                 (^.+?)                              # $1: Header text
2214                 (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})?    # $2: Id attribute
2215                 [ ]*\n(=+|-+)[ ]*\n+                # $3: Header footer
2216             }mx',
2217             array(&$this, '_doHeaders_callback_setext'), $text);
2218
2219         # atx-style headers:
2220         #   # Header 1        {#header1}
2221         #   ## Header 2       {#header2}
2222         #   ## Header 2 with closing hashes ##  {#header3}
2223         #   ...
2224         #   ###### Header 6   {#header2}
2225         #
2226         $text = preg_replace_callback('{
2227                 ^(\#{1,6})  # $1 = string of #\'s
2228                 [ ]*
2229                 (.+?)       # $2 = Header text
2230                 [ ]*
2231                 \#*         # optional closing #\'s (not counted)
2232                 (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? # id attribute
2233                 [ ]*
2234                 \n+
2235             }xm',
2236             array(&$this, '_doHeaders_callback_atx'), $text);
2237
2238         return $text;
2239     }
2240     function _doHeaders_attr($attr) {
2241         if (empty($attr))  return "";
2242         return " id=\"$attr\"";
2243     }
2244     function _doHeaders_callback_setext($matches) {
2245         if ($matches[3] == '-' && preg_match('{^- }', $matches[1]))
2246             return $matches[0];
2247         $level = $matches[3]{0} == '=' ? 1 : 2;
2248         $attr  = $this->_doHeaders_attr($id =& $matches[2]);
2249         $block = "<h$level$attr>".$this->runSpanGamut($matches[1])."</h$level>";
2250         return "\n" . $this->hashBlock($block) . "\n\n";
2251     }
2252     function _doHeaders_callback_atx($matches) {
2253         $level = strlen($matches[1]);
2254         $attr  = $this->_doHeaders_attr($id =& $matches[3]);
2255         $block = "<h$level$attr>".$this->runSpanGamut($matches[2])."</h$level>";
2256         return "\n" . $this->hashBlock($block) . "\n\n";
2257     }
2258
2259
2260     function doTables($text) {
2261     #
2262     # Form HTML tables.
2263     #
2264         $less_than_tab = $this->tab_width - 1;
2265         #
2266         # Find tables with leading pipe.
2267         #
2268         #   | Header 1 | Header 2
2269         #   | -------- | --------
2270         #   | Cell 1   | Cell 2
2271         #   | Cell 3   | Cell 4
2272         #
2273         $text = preg_replace_callback('
2274             {
2275                 ^                           # Start of a line
2276                 [ ]{0,'.$less_than_tab.'}   # Allowed whitespace.
2277                 [|]                         # Optional leading pipe (present)
2278                 (.+) \n                     # $1: Header row (at least one pipe)
2279
2280                 [ ]{0,'.$less_than_tab.'}   # Allowed whitespace.
2281                 [|] ([ ]*[-:]+[-| :]*) \n   # $2: Header underline
2282
2283                 (                           # $3: Cells
2284                     (?>
2285                         [ ]*                # Allowed whitespace.
2286                         [|] .* \n           # Row content.
2287                     )*
2288                 )
2289                 (?=\n|\Z)                   # Stop at final double newline.
2290             }xm',
2291             array(&$this, '_doTable_leadingPipe_callback'), $text);
2292
2293         #
2294         # Find tables without leading pipe.
2295         #
2296         #   Header 1 | Header 2
2297         #   -------- | --------
2298         #   Cell 1   | Cell 2
2299         #   Cell 3   | Cell 4
2300         #
2301         $text = preg_replace_callback('
2302             {
2303                 ^                           # Start of a line
2304                 [ ]{0,'.$less_than_tab.'}   # Allowed whitespace.
2305                 (\S.*[|].*) \n              # $1: Header row (at least one pipe)
2306
2307                 [ ]{0,'.$less_than_tab.'}   # Allowed whitespace.
2308                 ([-:]+[ ]*[|][-| :]*) \n    # $2: Header underline
2309
2310                 (                           # $3: Cells
2311                     (?>
2312                         .* [|] .* \n        # Row content
2313                     )*
2314                 )
2315                 (?=\n|\Z)                   # Stop at final double newline.
2316             }xm',
2317             array(&$this, '_DoTable_callback'), $text);
2318
2319         return $text;
2320     }
2321     function _doTable_leadingPipe_callback($matches) {
2322         $head       = $matches[1];
2323         $underline  = $matches[2];
2324         $content    = $matches[3];
2325
2326         # Remove leading pipe for each row.
2327         $content    = preg_replace('/^ *[|]/m', '', $content);
2328
2329         return $this->_doTable_callback(array($matches[0], $head, $underline, $content));
2330     }
2331     function _doTable_callback($matches) {
2332         $head       = $matches[1];
2333         $underline  = $matches[2];
2334         $content    = $matches[3];
2335
2336         # Remove any tailing pipes for each line.
2337         $head       = preg_replace('/[|] *$/m', '', $head);
2338         $underline  = preg_replace('/[|] *$/m', '', $underline);
2339         $content    = preg_replace('/[|] *$/m', '', $content);
2340
2341         # Reading alignement from header underline.
2342         $separators = preg_split('/ *[|] */', $underline);
2343         foreach ($separators as $n => $s) {
2344             if (preg_match('/^ *-+: *$/', $s))      $attr[$n] = ' align="right"';
2345             else if (preg_match('/^ *:-+: *$/', $s))$attr[$n] = ' align="center"';
2346             else if (preg_match('/^ *:-+ *$/', $s)) $attr[$n] = ' align="left"';
2347             else                                    $attr[$n] = '';
2348         }
2349
2350         # Parsing span elements, including code spans, character escapes,
2351         # and inline HTML tags, so that pipes inside those gets ignored.
2352         $head       = $this->parseSpan($head);
2353         $headers    = preg_split('/ *[|] */', $head);
2354         $col_count  = count($headers);
2355
2356         # Write column headers.
2357         $text = "<table>\n";
2358         $text .= "<thead>\n";
2359         $text .= "<tr>\n";
2360         foreach ($headers as $n => $header)
2361             $text .= "  <th$attr[$n]>".$this->runSpanGamut(trim($header))."</th>\n";
2362         $text .= "</tr>\n";
2363         $text .= "</thead>\n";
2364
2365         # Split content by row.
2366         $rows = explode("\n", trim($content, "\n"));
2367
2368         $text .= "<tbody>\n";
2369         foreach ($rows as $row) {
2370             # Parsing span elements, including code spans, character escapes,
2371             # and inline HTML tags, so that pipes inside those gets ignored.
2372             $row = $this->parseSpan($row);
2373
2374             # Split row by cell.
2375             $row_cells = preg_split('/ *[|] */', $row, $col_count);
2376             $row_cells = array_pad($row_cells, $col_count, '');
2377
2378             $text .= "<tr>\n";
2379             foreach ($row_cells as $n => $cell)
2380                 $text .= "  <td$attr[$n]>".$this->runSpanGamut(trim($cell))."</td>\n";
2381             $text .= "</tr>\n";
2382         }
2383         $text .= "</tbody>\n";
2384         $text .= "</table>";
2385
2386         return $this->hashBlock($text) . "\n";
2387     }
2388
2389
2390     function doDefLists($text) {
2391     #
2392     # Form HTML definition lists.
2393     #
2394         $less_than_tab = $this->tab_width - 1;
2395
2396         # Re-usable pattern to match any entire dl list:
2397         $whole_list_re = '(?>
2398             (                               # $1 = whole list
2399               (                             # $2
2400                 [ ]{0,'.$less_than_tab.'}
2401                 ((?>.*\S.*\n)+)             # $3 = defined term
2402                 \n?
2403                 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
2404               )
2405               (?s:.+?)
2406               (                             # $4
2407                   \z
2408                 |
2409                   \n{2,}
2410                   (?=\S)
2411                   (?!                       # Negative lookahead for another term
2412                     [ ]{0,'.$less_than_tab.'}
2413                     (?: \S.*\n )+?          # defined term
2414                     \n?
2415                     [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
2416                   )
2417                   (?!                       # Negative lookahead for another definition
2418                     [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
2419                   )
2420               )
2421             )
2422         )'; // mx
2423
2424         $text = preg_replace_callback('{
2425                 (?>\A\n?|(?<=\n\n))
2426                 '.$whole_list_re.'
2427             }mx',
2428             array(&$this, '_doDefLists_callback'), $text);
2429
2430         return $text;
2431     }
2432     function _doDefLists_callback($matches) {
2433         # Re-usable patterns to match list item bullets and number markers:
2434         $list = $matches[1];
2435
2436         # Turn double returns into triple returns, so that we can make a
2437         # paragraph for the last item in a list, if necessary:
2438         $result = trim($this->processDefListItems($list));
2439         $result = "<dl>\n" . $result . "\n</dl>";
2440         return $this->hashBlock($result) . "\n\n";
2441     }
2442
2443
2444     function processDefListItems($list_str) {
2445     #
2446     #   Process the contents of a single definition list, splitting it
2447     #   into individual term and definition list items.
2448     #
2449         $less_than_tab = $this->tab_width - 1;
2450
2451         # trim trailing blank lines:
2452         $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
2453
2454         # Process definition terms.
2455         $list_str = preg_replace_callback('{
2456             (?>\A\n?|\n\n+)                 # leading line
2457             (                               # definition terms = $1
2458                 [ ]{0,'.$less_than_tab.'}   # leading whitespace
2459                 (?![:][ ]|[ ])              # negative lookahead for a definition
2460                                             #   mark (colon) or more whitespace.
2461                 (?> \S.* \n)+?              # actual term (not whitespace).
2462             )
2463             (?=\n?[ ]{0,3}:[ ])             # lookahead for following line feed
2464                                             #   with a definition mark.
2465             }xm',
2466             array(&$this, '_processDefListItems_callback_dt'), $list_str);
2467
2468         # Process actual definitions.
2469         $list_str = preg_replace_callback('{
2470             \n(\n+)?                        # leading line = $1
2471             (                               # marker space = $2
2472                 [ ]{0,'.$less_than_tab.'}   # whitespace before colon
2473                 [:][ ]+                     # definition mark (colon)
2474             )
2475             ((?s:.+?))                      # definition text = $3
2476             (?= \n+                         # stop at next definition mark,
2477                 (?:                         # next term or end of text
2478                     [ ]{0,'.$less_than_tab.'} [:][ ]    |
2479                     <dt> | \z
2480                 )
2481             )
2482             }xm',
2483             array(&$this, '_processDefListItems_callback_dd'), $list_str);
2484
2485         return $list_str;
2486     }
2487     function _processDefListItems_callback_dt($matches) {
2488         $terms = explode("\n", trim($matches[1]));
2489         $text = '';
2490         foreach ($terms as $term) {
2491             $term = $this->runSpanGamut(trim($term));
2492             $text .= "\n<dt>" . $term . "</dt>";
2493         }
2494         return $text . "\n";
2495     }
2496     function _processDefListItems_callback_dd($matches) {
2497         $leading_line   = $matches[1];
2498         $marker_space   = $matches[2];
2499         $def            = $matches[3];
2500
2501         if ($leading_line || preg_match('/\n{2,}/', $def)) {
2502             # Replace marker with the appropriate whitespace indentation
2503             $def = str_repeat(' ', strlen($marker_space)) . $def;
2504             $def = $this->runBlockGamut($this->outdent($def . "\n\n"));
2505             $def = "\n". $def ."\n";
2506         }
2507         else {
2508             $def = rtrim($def);
2509             $def = $this->runSpanGamut($this->outdent($def));
2510         }
2511
2512         return "\n<dd>" . $def . "</dd>\n";
2513     }
2514
2515
2516     function doFencedCodeBlocks($text) {
2517     #
2518     # Adding the fenced code block syntax to regular Markdown:
2519     #
2520     # ~~~
2521     # Code block
2522     # ~~~
2523     #
2524         $less_than_tab = $this->tab_width;
2525
2526         $text = preg_replace_callback('{
2527                 (?:\n|\A)
2528                 # 1: Opening marker
2529                 (
2530                     ~{3,} # Marker: three tilde or more.
2531                 )
2532                 [ ]* \n # Whitespace and newline following marker.
2533
2534                 # 2: Content
2535                 (
2536                     (?>
2537                         (?!\1 [ ]* \n)  # Not a closing marker.
2538                         .*\n+
2539                     )+
2540                 )
2541
2542                 # Closing marker.
2543                 \1 [ ]* \n
2544             }xm',
2545             array(&$this, '_doFencedCodeBlocks_callback'), $text);
2546
2547         return $text;
2548     }
2549     function _doFencedCodeBlocks_callback($matches) {
2550         $codeblock = $matches[2];
2551         $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
2552         $codeblock = preg_replace_callback('/^\n+/',
2553             array(&$this, '_doFencedCodeBlocks_newlines'), $codeblock);
2554         $codeblock = "<pre><code>$codeblock</code></pre>";
2555         return "\n\n".$this->hashBlock($codeblock)."\n\n";
2556     }
2557     function _doFencedCodeBlocks_newlines($matches) {
2558         return str_repeat("<br$this->empty_element_suffix",
2559             strlen($matches[0]));
2560     }
2561
2562
2563     #
2564     # Redefining emphasis markers so that emphasis by underscore does not
2565     # work in the middle of a word.
2566     #
2567     var $em_relist = array(
2568         ''  => '(?:(?<!\*)\*(?!\*)|(?<![a-zA-Z0-9_])_(?!_))(?=\S)(?![.,:;]\s)',
2569         '*' => '(?<=\S)(?<!\*)\*(?!\*)',
2570         '_' => '(?<=\S)(?<!_)_(?![a-zA-Z0-9_])',
2571         );
2572     var $strong_relist = array(
2573         ''   => '(?:(?<!\*)\*\*(?!\*)|(?<![a-zA-Z0-9_])__(?!_))(?=\S)(?![.,:;]\s)',
2574         '**' => '(?<=\S)(?<!\*)\*\*(?!\*)',
2575         '__' => '(?<=\S)(?<!_)__(?![a-zA-Z0-9_])',
2576         );
2577     var $em_strong_relist = array(
2578         ''    => '(?:(?<!\*)\*\*\*(?!\*)|(?<![a-zA-Z0-9_])___(?!_))(?=\S)(?![.,:;]\s)',
2579         '***' => '(?<=\S)(?<!\*)\*\*\*(?!\*)',
2580         '___' => '(?<=\S)(?<!_)___(?![a-zA-Z0-9_])',
2581         );
2582
2583
2584     function formParagraphs($text) {
2585     #
2586     #   Params:
2587     #       $text - string to process with html <p> tags
2588     #
2589         # Strip leading and trailing lines:
2590         $text = preg_replace('/\A\n+|\n+\z/', '', $text);
2591
2592         $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
2593
2594         #
2595         # Wrap <p> tags and unhashify HTML blocks
2596         #
2597         foreach ($grafs as $key => $value) {
2598             $value = trim($this->runSpanGamut($value));
2599
2600             # Check if this should be enclosed in a paragraph.
2601             # Clean tag hashes & block tag hashes are left alone.
2602             $is_p = !preg_match('/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/', $value);
2603
2604             if ($is_p) {
2605                 $value = "<p>$value</p>";
2606             }
2607             $grafs[$key] = $value;
2608         }
2609
2610         # Join grafs in one text, then unhash HTML tags.
2611         $text = implode("\n\n", $grafs);
2612
2613         # Finish by removing any tag hashes still present in $text.
2614         $text = $this->unhash($text);
2615
2616         return $text;
2617     }
2618
2619
2620     ### Footnotes
2621
2622     function stripFootnotes($text) {
2623     #
2624     # Strips link definitions from text, stores the URLs and titles in
2625     # hash references.
2626     #
2627         $less_than_tab = $this->tab_width - 1;
2628
2629         # Link defs are in the form: [^id]: url "optional title"
2630         $text = preg_replace_callback('{
2631             ^[ ]{0,'.$less_than_tab.'}\[\^(.+?)\][ ]?:  # note_id = $1
2632               [ ]*
2633               \n?                   # maybe *one* newline
2634             (                       # text = $2 (no blank lines allowed)
2635                 (?:
2636                     .+              # actual text
2637                 |
2638                     \n              # newlines but
2639                     (?!\[\^.+?\]:\s)# negative lookahead for footnote marker.
2640                     (?!\n+[ ]{0,3}\S)# ensure line is not blank and followed
2641                                     # by non-indented content
2642                 )*
2643             )
2644             }xm',
2645             array(&$this, '_stripFootnotes_callback'),
2646             $text);
2647         return $text;
2648     }
2649     function _stripFootnotes_callback($matches) {
2650         $note_id = $this->fn_id_prefix . $matches[1];
2651         $this->footnotes[$note_id] = $this->outdent($matches[2]);
2652         return ''; # String that will replace the block
2653     }
2654
2655
2656     function doFootnotes($text) {
2657     #
2658     # Replace footnote references in $text [^id] with a special text-token
2659     # which will be replaced by the actual footnote marker in appendFootnotes.
2660     #
2661         if (!$this->in_anchor) {
2662             $text = preg_replace('{\[\^(.+?)\]}', "F\x1Afn:\\1\x1A:", $text);
2663         }
2664         return $text;
2665     }
2666
2667
2668     function appendFootnotes($text) {
2669     #
2670     # Append footnote list to text.
2671     #
2672         $text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}',
2673             array(&$this, '_appendFootnotes_callback'), $text);
2674
2675         if (!empty($this->footnotes_ordered)) {
2676             $text .= "\n\n";
2677             $text .= "<div class=\"footnotes\">\n";
2678             $text .= "<hr". MARKDOWN_EMPTY_ELEMENT_SUFFIX ."\n";
2679             $text .= "<ol>\n\n";
2680
2681             $attr = " rev=\"footnote\"";
2682             if ($this->fn_backlink_class != "") {
2683                 $class = $this->fn_backlink_class;
2684                 $class = $this->encodeAttribute($class);
2685                 $attr .= " class=\"$class\"";
2686             }
2687             if ($this->fn_backlink_title != "") {
2688                 $title = $this->fn_backlink_title;
2689                 $title = $this->encodeAttribute($title);
2690                 $attr .= " title=\"$title\"";
2691             }
2692             $num = 0;
2693
2694             while (!empty($this->footnotes_ordered)) {
2695                 $footnote = reset($this->footnotes_ordered);
2696                 $note_id = key($this->footnotes_ordered);
2697                 unset($this->footnotes_ordered[$note_id]);
2698
2699                 $footnote .= "\n"; # Need to append newline before parsing.
2700                 $footnote = $this->runBlockGamut("$footnote\n");
2701                 $footnote = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}',
2702                     array(&$this, '_appendFootnotes_callback'), $footnote);
2703
2704                 $attr = str_replace("%%", ++$num, $attr);
2705                 $note_id = $this->encodeAttribute($note_id);
2706
2707                 # Add backlink to last paragraph; create new paragraph if needed.
2708                 $backlink = "<a href=\"#fnref:$note_id\"$attr>&#8617;</a>";
2709                 if (preg_match('{</p>$}', $footnote)) {
2710                     $footnote = substr($footnote, 0, -4) . "&#160;$backlink</p>";
2711                 } else {
2712                     $footnote .= "\n\n<p>$backlink</p>";
2713                 }
2714
2715                 $text .= "<li id=\"fn:$note_id\">\n";
2716                 $text .= $footnote . "\n";
2717                 $text .= "</li>\n\n";
2718             }
2719
2720             $text .= "</ol>\n";
2721             $text .= "</div>";
2722         }
2723         return $text;
2724     }
2725     function _appendFootnotes_callback($matches) {
2726         $node_id = $this->fn_id_prefix . $matches[1];
2727
2728         # Create footnote marker only if it has a corresponding footnote *and*
2729         # the footnote hasn't been used by another marker.
2730         if (isset($this->footnotes[$node_id])) {
2731             # Transfert footnote content to the ordered list.
2732             $this->footnotes_ordered[$node_id] = $this->footnotes[$node_id];
2733             unset($this->footnotes[$node_id]);
2734
2735             $num = $this->footnote_counter++;
2736             $attr = " rel=\"footnote\"";
2737             if ($this->fn_link_class != "") {
2738                 $class = $this->fn_link_class;
2739                 $class = $this->encodeAttribute($class);
2740                 $attr .= " class=\"$class\"";
2741             }
2742             if ($this->fn_link_title != "") {
2743                 $title = $this->fn_link_title;
2744                 $title = $this->encodeAttribute($title);
2745                 $attr .= " title=\"$title\"";
2746             }
2747
2748             $attr = str_replace("%%", $num, $attr);
2749             $node_id = $this->encodeAttribute($node_id);
2750
2751             return
2752                 "<sup id=\"fnref:$node_id\">".
2753                 "<a href=\"#fn:$node_id\"$attr>$num</a>".
2754                 "</sup>";
2755         }
2756
2757         return "[^".$matches[1]."]";
2758     }
2759
2760
2761     ### Abbreviations ###
2762
2763     function stripAbbreviations($text) {
2764     #
2765     # Strips abbreviations from text, stores titles in hash references.
2766     #
2767         $less_than_tab = $this->tab_width - 1;
2768
2769         # Link defs are in the form: [id]*: url "optional title"
2770         $text = preg_replace_callback('{
2771             ^[ ]{0,'.$less_than_tab.'}\*\[(.+?)\][ ]?:  # abbr_id = $1
2772             (.*)                    # text = $2 (no blank lines allowed)
2773             }xm',
2774             array(&$this, '_stripAbbreviations_callback'),
2775             $text);
2776         return $text;
2777     }
2778     function _stripAbbreviations_callback($matches) {
2779         $abbr_word = $matches[1];
2780         $abbr_desc = $matches[2];
2781         if ($this->abbr_word_re)
2782             $this->abbr_word_re .= '|';
2783         $this->abbr_word_re .= preg_quote($abbr_word);
2784         $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
2785         return ''; # String that will replace the block
2786     }
2787
2788
2789     function doAbbreviations($text) {
2790     #
2791     # Find defined abbreviations in text and wrap them in <abbr> elements.
2792     #
2793         if ($this->abbr_word_re) {
2794             // cannot use the /x modifier because abbr_word_re may
2795             // contain significant spaces:
2796             $text = preg_replace_callback('{'.
2797                 '(?<![\w\x1A])'.
2798                 '(?:'.$this->abbr_word_re.')'.
2799                 '(?![\w\x1A])'.
2800                 '}',
2801                 array(&$this, '_doAbbreviations_callback'), $text);
2802         }
2803         return $text;
2804     }
2805     function _doAbbreviations_callback($matches) {
2806         $abbr = $matches[0];
2807         if (isset($this->abbr_desciptions[$abbr])) {
2808             $desc = $this->abbr_desciptions[$abbr];
2809             if (empty($desc)) {
2810                 return $this->hashPart("<abbr>$abbr</abbr>");
2811             } else {
2812                 $desc = $this->encodeAttribute($desc);
2813                 return $this->hashPart("<abbr title=\"$desc\">$abbr</abbr>");
2814             }
2815         } else {
2816             return $matches[0];
2817         }
2818     }
2819
2820 }
2821
2822
2823 /*
2824
2825 PHP Markdown Extra
2826 ==================
2827
2828 Description
2829 -----------
2830
2831 This is a PHP port of the original Markdown formatter written in Perl
2832 by John Gruber. This special "Extra" version of PHP Markdown features
2833 further enhancements to the syntax for making additional constructs
2834 such as tables and definition list.
2835
2836 Markdown is a text-to-HTML filter; it translates an easy-to-read /
2837 easy-to-write structured text format into HTML. Markdown's text format
2838 is most similar to that of plain text email, and supports features such
2839 as headers, *emphasis*, code blocks, blockquotes, and links.
2840
2841 Markdown's syntax is designed not as a generic markup language, but
2842 specifically to serve as a front-end to (X)HTML. You can use span-level
2843 HTML tags anywhere in a Markdown document, and you can use block level
2844 HTML tags (like <div> and <table> as well).
2845
2846 For more information about Markdown's syntax, see:
2847
2848 <http://daringfireball.net/projects/markdown/>
2849
2850
2851 Bugs
2852 ----
2853
2854 To file bug reports please send email to:
2855
2856 <michel.fortin@michelf.com>
2857
2858 Please include with your report: (1) the example input; (2) the output you
2859 expected; (3) the output Markdown actually produced.
2860
2861
2862 Version History
2863 ---------------
2864
2865 See the readme file for detailed release notes for this version.
2866
2867
2868 Copyright and License
2869 ---------------------
2870
2871 PHP Markdown & Extra
2872 Copyright (c) 2004-2008 Michel Fortin
2873 <http://www.michelf.com/>
2874 All rights reserved.
2875
2876 Based on Markdown
2877 Copyright (c) 2003-2006 John Gruber
2878 <http://daringfireball.net/>
2879 All rights reserved.
2880
2881 Redistribution and use in source and binary forms, with or without
2882 modification, are permitted provided that the following conditions are
2883 met:
2884
2885 *   Redistributions of source code must retain the above copyright notice,
2886     this list of conditions and the following disclaimer.
2887
2888 *   Redistributions in binary form must reproduce the above copyright
2889     notice, this list of conditions and the following disclaimer in the
2890     documentation and/or other materials provided with the distribution.
2891
2892 *   Neither the name "Markdown" nor the names of its contributors may
2893     be used to endorse or promote products derived from this software
2894     without specific prior written permission.
2895
2896 This software is provided by the copyright holders and contributors "as
2897 is" and any express or implied warranties, including, but not limited
2898 to, the implied warranties of merchantability and fitness for a
2899 particular purpose are disclaimed. In no event shall the copyright owner
2900 or contributors be liable for any direct, indirect, incidental, special,
2901 exemplary, or consequential damages (including, but not limited to,
2902 procurement of substitute goods or services; loss of use, data, or
2903 profits; or business interruption) however caused and on any theory of
2904 liability, whether in contract, strict liability, or tort (including
2905 negligence or otherwise) arising in any way out of the use of this
2906 software, even if advised of the possibility of such damage.
2907
2908 */
2909 ?>