src/markdown.cpp

   1 /******************************************************************************
   2  *
   3  * Copyright (C) 1997-2015 by Dimitri van Heesch.
   4  *
   5  * Permission to use, copy, modify, and distribute this software and its
   6  * documentation under the terms of the GNU General Public License is hereby
   7  * granted. No representations are made about the suitability of this software
   8  * for any purpose. It is provided "as is" without express or implied warranty.
   9  * See the GNU General Public License for more details.
  10  *
  11  * Documents produced by Doxygen are derivative works derived from the
  12  * input used in their production; they are not affected by this license.
  13  *
  14  */
  15
  16 /* Note: part of the code below is inspired by libupskirt written by
  17  * Natacha Porté. Original copyright message follows:
  18  *
  19  * Copyright (c) 2008, Natacha Porté
  20  *
  21  * Permission to use, copy, modify, and distribute this software for any
  22  * purpose with or without fee is hereby granted, provided that the above
  23  * copyright notice and this permission notice appear in all copies.
  24  *
  25  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  26  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  27  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  28  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  29  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  30  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  31  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  32  */
  33
  34 #include <stdio.h>
  35 #include <qglobal.h>
  36 #include <qregexp.h>
  37 #include <qfileinfo.h>
  38 #include <qdict.h>
  39 #include <qvector.h>
  40 //#define USE_ORIGINAL_TABLES
  41
  42 #include "markdown.h"
  43 #include "growbuf.h"
  44 #include "debug.h"
  45 #include "util.h"
  46 #include "doxygen.h"
  47 #include "commentscan.h"
  48 #include "entry.h"
  49 #include "bufstr.h"
  50 #include "commentcnv.h"
  51 #include "config.h"
  52 #include "section.h"
  53 #include "message.h"
  54
  55 //-----------
  56
  57 // is character at position i in data part of an identifier?
  58 #define isIdChar(i) \
  59   ((data[i]>='a' && data[i]<='z') || \
  60    (data[i]>='A' && data[i]<='Z') || \
  61    (data[i]>='0' && data[i]<='9') || \
  62    (((unsigned char)data[i])>=0x80)) // unicode characters
  63
  64 #define extraChar(i) \
  65   (data[i]=='-' || data[i]=='+' || data[i]=='!' || \
  66    data[i]=='?' || data[i]=='$' || data[i]=='@' || \
  67    data[i]=='&' || data[i]=='*' || data[i]=='%')
  68
  69 // is character at position i in data allowed before an emphasis section
  70 #define isOpenEmphChar(i) \
  71   (data[i]=='\n' || data[i]==' ' || data[i]=='\'' || data[i]=='<' || \
  72    data[i]=='{'  || data[i]=='(' || data[i]=='['  || data[i]==',' || \
  73    data[i]==':'  || data[i]==';')
  74
  75 // is character at position i in data an escape that prevents ending an emphasis section
  76 // so for example *bla (*.txt) is cool*
  77 #define ignoreCloseEmphChar(i) \
  78   (data[i]=='('  || data[i]=='{' || data[i]=='[' || data[i]=='<' || \
  79    data[i]=='\\' || \
  80    data[i]=='@')
  81
  82 //----------
  83
  84 struct LinkRef
  85 {
  86   LinkRef(const QCString &l,const QCString &t) : link(l), title(t) {}
  87   QCString link;
  88   QCString title;
  89 };
  90
  91 struct TableCell
  92 {
  93   TableCell() : colSpan(false) {}
  94   QCString cellText;
  95   bool colSpan;
  96 };
  97
  98 typedef int (*action_t)(GrowBuf &out,const char *data,int offset,int size);
  99
 100 enum Alignment { AlignNone, AlignLeft, AlignCenter, AlignRight };
 101
 102
 103 //----------
 104
 105 static QDict<LinkRef> g_linkRefs(257);
 106 static action_t       g_actions[256];
 107 static Entry         *g_current;
 108 static QCString       g_fileName;
 109 static int            g_lineNr;
 110
 111 // In case a markdown page starts with a level1 header, that header is used
 112 // as a title of the page, in effect making it a level0 header, so the
 113 // level of all other sections needs to be corrected as well.
 114 // This flag is TRUE if corrections are needed.
 115 //static bool           g_correctSectionLevel;
 116
 117
 118 //----------
 119
 120 const int codeBlockIndent = 4;
 121
 122 static void processInline(GrowBuf &out,const char *data,int size);
 123
 124 // escape characters that have a special meaning later on.
 125 static QCString escapeSpecialChars(const QCString &s)
 126 {
 127   if (s.isEmpty()) return "";
 128   bool insideQuote=FALSE;
 129   GrowBuf growBuf;
 130   const char *p=s;
 131   char c,pc='\0';
 132   while ((c=*p++))
 133   {
 134     switch (c)
 135     {
 136       case '"':  if (pc!='\\')  { insideQuote=!insideQuote; } growBuf.addChar(c);   break;
 137       case '<':  if (!insideQuote) { growBuf.addChar('\\'); } growBuf.addChar('<'); break;
 138       case '>':  if (!insideQuote) { growBuf.addChar('\\'); } growBuf.addChar('>'); break;
 139       case '\\': if (!insideQuote) { growBuf.addChar('\\'); } growBuf.addChar('\\'); break;
 140       case '@':  if (!insideQuote) { growBuf.addChar('\\'); } growBuf.addChar('@'); break;
 141       default:   growBuf.addChar(c); break;
 142     }
 143     pc=c;
 144   }
 145   growBuf.addChar(0);
 146   return growBuf.get();
 147 }
 148
 149 static void convertStringFragment(QCString &result,const char *data,int size)
 150 {
 151   if (size<0) size=0;
 152   result.resize(size+1);
 153   memcpy(result.rawData(),data,size);
 154   result.at(size)='\0';
 155 }
 156
 157 /** helper function to convert presence of left and/or right alignment markers
 158  *  to a alignment value
 159  */
 160 static Alignment markersToAlignment(bool leftMarker,bool rightMarker)
 161 {
 162   //printf("markerToAlignment(%d,%d)\n",leftMarker,rightMarker);
 163   if (leftMarker && rightMarker)
 164   {
 165     return AlignCenter;
 166   }
 167   else if (leftMarker)
 168   {
 169     return AlignLeft;
 170   }
 171   else if (rightMarker)
 172   {
 173     return AlignRight;
 174   }
 175   else
 176   {
 177     return AlignNone;
 178   }
 179 }
 180
 181
 182 // Check if data contains a block command. If so returned the command
 183 // that ends the block. If not an empty string is returned.
 184 // Note When offset>0 character position -1 will be inspected.
 185 //
 186 // Checks for and skip the following block commands:
 187 // {@code .. { .. } .. }
 188 // \dot .. \enddot
 189 // \code .. \endcode
 190 // \msc .. \endmsc
 191 // \f$..\f$
 192 // \f[..\f]
 193 // \f{..\f}
 194 // \verbatim..\endverbatim
 195 // \latexonly..\endlatexonly
 196 // \htmlonly..\endhtmlonly
 197 // \xmlonly..\endxmlonly
 198 // \rtfonly..\endrtfonly
 199 // \manonly..\endmanonly
 200 static QCString isBlockCommand(const char *data,int offset,int size)
 201 {
 202   bool openBracket = offset>0 && data[-1]=='{';
 203   bool isEscaped = offset>0 && (data[-1]=='\\' || data[-1]=='@');
 204   if (isEscaped) return QCString();
 205
 206   int end=1;
 207   while (end<size && (data[end]>='a' && data[end]<='z')) end++;
 208   if (end==1) return QCString();
 209   QCString blockName;
 210   convertStringFragment(blockName,data+1,end-1);
 211   if (blockName=="code" && openBracket)
 212   {
 213     return "}";
 214   }
 215   else if (blockName=="dot"         ||
 216            blockName=="code"        ||
 217            blockName=="msc"         ||
 218            blockName=="verbatim"    ||
 219            blockName=="latexonly"   ||
 220            blockName=="htmlonly"    ||
 221            blockName=="xmlonly"     ||
 222            blockName=="rtfonly"     ||
 223            blockName=="manonly"     ||
 224            blockName=="docbookonly"
 225      )
 226   {
 227     return "end"+blockName;
 228   }
 229   else if (blockName=="startuml")
 230   {
 231     return "enduml";
 232   }
 233   else if (blockName=="f" && end<size)
 234   {
 235     if (data[end]=='$')
 236     {
 237       return "f$";
 238     }
 239     else if (data[end]=='[')
 240     {
 241       return "f]";
 242     }
 243     else if (data[end]=='{')
 244     {
 245       return "f}";
 246     }
 247   }
 248   return QCString();
 249 }
 250
 251 /** looks for the next emph char, skipping other constructs, and
 252  *  stopping when either it is found, or we are at the end of a paragraph.
 253  */
 254 static int findEmphasisChar(const char *data, int size, char c, int c_size)
 255 {
 256   int i = 1;
 257
 258   while (i<size)
 259   {
 260     while (i<size && data[i]!=c    && data[i]!='`' &&
 261                      data[i]!='\\' && data[i]!='@' &&
 262                      data[i]!='\n') i++;
 263     //printf("findEmphasisChar: data=[%s] i=%d c=%c\n",data,i,data[i]);
 264
 265     // not counting escaped chars or characters that are unlikely
 266     // to appear as the end of the emphasis char
 267     if (i>0 && ignoreCloseEmphChar(i-1))
 268     {
 269       i++;
 270       continue;
 271     }
 272     else
 273     {
 274       // get length of emphasis token
 275       int len = 0;
 276       while (i+len<size && data[i+len]==c)
 277       {
 278         len++;
 279       }
 280
 281       if (len>0)
 282       {
 283         if (len!=c_size || (i<size-len && isIdChar(i+len))) // to prevent touching some_underscore_identifier
 284         {
 285           i=i+len;
 286           continue;
 287         }
 288         return i; // found it
 289       }
 290     }
 291
 292     // skipping a code span
 293     if (data[i]=='`')
 294     {
 295       int snb=0;
 296       while (i<size && data[i]=='`') snb++,i++;
 297
 298       // find same pattern to end the span
 299       int enb=0;
 300       while (i<size && enb<snb)
 301       {
 302         if (data[i]=='`') enb++;
 303         if (snb==1 && data[i]=='\'') break; // ` ended by '
 304         i++;
 305       }
 306     }
 307     else if (data[i]=='@' || data[i]=='\\')
 308     { // skip over blocks that should not be processed
 309       QCString endBlockName = isBlockCommand(data+i,i,size-i);
 310       if (!endBlockName.isEmpty())
 311       {
 312         i++;
 313         int l = endBlockName.length();
 314         while (i<size-l)
 315         {
 316           if ((data[i]=='\\' || data[i]=='@') && // command
 317               data[i-1]!='\\' && data[i-1]!='@') // not escaped
 318           {
 319             if (qstrncmp(&data[i+1],endBlockName,l)==0)
 320             {
 321               break;
 322             }
 323           }
 324           i++;
 325         }
 326       }
 327       else if (i<size-1 && isIdChar(i+1)) // @cmd, stop processing, see bug 690385
 328       {
 329         return 0;
 330       }
 331       else
 332       {
 333         i++;
 334       }
 335     }
 336     else if (data[i]=='\n') // end * or _ at paragraph boundary
 337     {
 338       i++;
 339       while (i<size && data[i]==' ') i++;
 340       if (i>=size || data[i]=='\n') return 0; // empty line -> paragraph
 341     }
 342     else // should not get here!
 343     {
 344       i++;
 345     }
 346
 347   }
 348   return 0;
 349 }
 350
 351 /** process single emphasis */
 352 static int processEmphasis1(GrowBuf &out, const char *data, int size, char c)
 353 {
 354   int i = 0, len;
 355
 356   /* skipping one symbol if coming from emph3 */
 357   if (size>1 && data[0]==c && data[1]==c) { i=1; }
 358
 359   while (i<size)
 360   {
 361     len = findEmphasisChar(data+i, size-i, c, 1);
 362     if (len==0) return 0;
 363     i+=len;
 364     if (i>=size) return 0;
 365
 366     if (i+1<size && data[i+1]==c)
 367     {
 368       i++;
 369       continue;
 370     }
 371     if (data[i]==c && data[i-1]!=' ' && data[i-1]!='\n')
 372     {
 373       out.addStr("<em>");
 374       processInline(out,data,i);
 375       out.addStr("</em>");
 376       return i+1;
 377     }
 378   }
 379   return 0;
 380 }
 381
 382 /** process double emphasis */
 383 static int processEmphasis2(GrowBuf &out, const char *data, int size, char c)
 384 {
 385   int i = 0, len;
 386
 387   while (i<size)
 388   {
 389     len = findEmphasisChar(data+i, size-i, c, 2);
 390     if (len==0)
 391     {
 392       return 0;
 393     }
 394     i += len;
 395     if (i+1<size && data[i]==c && data[i+1]==c && i && data[i-1]!=' ' &&
 396         data[i-1]!='\n'
 397        )
 398     {
 399       if (c == '~') out.addStr("<strike>");
 400       else out.addStr("<strong>");
 401       processInline(out,data,i);
 402       if (c == '~') out.addStr("</strike>");
 403       else out.addStr("</strong>");
 404       return i + 2;
 405     }
 406     i++;
 407   }
 408   return 0;
 409 }
 410
 411 /** Parsing triple emphasis.
 412  *  Finds the first closing tag, and delegates to the other emph
 413  */
 414 static int processEmphasis3(GrowBuf &out, const char *data, int size, char c)
 415 {
 416   int i = 0, len;
 417
 418   while (i<size)
 419   {
 420     len = findEmphasisChar(data+i, size-i, c, 3);
 421     if (len==0)
 422     {
 423       return 0;
 424     }
 425     i+=len;
 426
 427     /* skip whitespace preceded symbols */
 428     if (data[i]!=c || data[i-1]==' ' || data[i-1]=='\n')
 429     {
 430       continue;
 431     }
 432
 433     if (i+2<size && data[i+1]==c && data[i+2]==c)
 434     {
 435       out.addStr("<em><strong>");
 436       processInline(out,data,i);
 437       out.addStr("</strong></em>");
 438       return i+3;
 439     }
 440     else if (i+1<size && data[i+1]==c)
 441     {
 442       // double symbol found, handing over to emph1
 443       len = processEmphasis1(out, data-2, size+2, c);
 444       if (len==0)
 445       {
 446         return 0;
 447       }
 448       else
 449       {
 450         return len - 2;
 451       }
 452     }
 453     else
 454     {
 455       // single symbol found, handing over to emph2
 456       len = processEmphasis2(out, data-1, size+1, c);
 457       if (len==0)
 458       {
 459         return 0;
 460       }
 461       else
 462       {
 463         return len - 1;
 464       }
 465     }
 466   }
 467   return 0;
 468 }
 469
 470 /** Process ndash and mdashes */
 471 static int processNmdash(GrowBuf &out,const char *data,int off,int size)
 472 {
 473   // precondition: data[0]=='-'
 474   int i=1;
 475   int count=1;
 476   if (i<size && data[i]=='-') // found --
 477   {
 478     count++,i++;
 479   }
 480   if (i<size && data[i]=='-') // found ---
 481   {
 482     count++,i++;
 483   }
 484   if (i<size && data[i]=='-') // found ----
 485   {
 486     count++;
 487   }
 488   if (count==2 && off>=2 && qstrncmp(data-2,"<!",2)==0) return 0; // start HTML comment
 489   if (count==2 && (data[2]=='>')) return 0; // end HTML comment
 490   if (count==2 && (off<8 || qstrncmp(data-8,"operator",8)!=0)) // -- => ndash
 491   {
 492     out.addStr("&ndash;");
 493     return 2;
 494   }
 495   else if (count==3) // --- => ndash
 496   {
 497     out.addStr("&mdash;");
 498     return 3;
 499   }
 500   // not an ndash or mdash
 501   return 0;
 502 }
 503
 504 /** Process quoted section "...", can contain one embedded newline */
 505 static int processQuoted(GrowBuf &out,const char *data,int,int size)
 506 {
 507   int i=1;
 508   int nl=0;
 509   while (i<size && data[i]!='"' && nl<2)
 510   {
 511     if (data[i]=='\n') nl++;
 512     i++;
 513   }
 514   if (i<size && data[i]=='"' && nl<2)
 515   {
 516     out.addStr(data,i+1);
 517     return i+1;
 518   }
 519   // not a quoted section
 520   return 0;
 521 }
 522
 523 /** Process a HTML tag. Note that <pre>..</pre> are treated specially, in
 524  *  the sense that all code inside is written unprocessed
 525  */
 526 static int processHtmlTag(GrowBuf &out,const char *data,int offset,int size)
 527 {
 528   if (offset>0 && data[-1]=='\\') return 0; // escaped <
 529
 530   // find the end of the html tag
 531   int i=1;
 532   int l=0;
 533   // compute length of the tag name
 534   while (i<size && isIdChar(i)) i++,l++;
 535   QCString tagName;
 536   convertStringFragment(tagName,data+1,i-1);
 537   if (tagName.lower()=="pre") // found <pre> tag
 538   {
 539     bool insideStr=FALSE;
 540     while (i<size-6)
 541     {
 542       char c=data[i];
 543       if (!insideStr && c=='<') // potential start of html tag
 544       {
 545         if (data[i+1]=='/' &&
 546             tolower(data[i+2])=='p' && tolower(data[i+3])=='r' &&
 547             tolower(data[i+4])=='e' && tolower(data[i+5])=='>')
 548         { // found </pre> tag, copy from start to end of tag
 549           out.addStr(data,i+6);
 550           //printf("found <pre>..</pre> [%d..%d]\n",0,i+6);
 551           return i+6;
 552         }
 553       }
 554       else if (insideStr && c=='"')
 555       {
 556         if (data[i-1]!='\\') insideStr=FALSE;
 557       }
 558       else if (c=='"')
 559       {
 560         insideStr=TRUE;
 561       }
 562       i++;
 563     }
 564   }
 565   else // some other html tag
 566   {
 567     if (l>0 && i<size)
 568     {
 569       if (data[i]=='/' && i<size-1 && data[i+1]=='>') // <bla/>
 570       {
 571         //printf("Found htmlTag={%s}\n",QCString(data).left(i+2).data());
 572         out.addStr(data,i+2);
 573         return i+2;
 574       }
 575       else if (data[i]=='>') // <bla>
 576       {
 577         //printf("Found htmlTag={%s}\n",QCString(data).left(i+1).data());
 578         out.addStr(data,i+1);
 579         return i+1;
 580       }
 581       else if (data[i]==' ') // <bla attr=...
 582       {
 583         i++;
 584         bool insideAttr=FALSE;
 585         while (i<size)
 586         {
 587           if (!insideAttr && data[i]=='"')
 588           {
 589             insideAttr=TRUE;
 590           }
 591           else if (data[i]=='"' && data[i-1]!='\\')
 592           {
 593             insideAttr=FALSE;
 594           }
 595           else if (!insideAttr && data[i]=='>') // found end of tag
 596           {
 597             //printf("Found htmlTag={%s}\n",QCString(data).left(i+1).data());
 598             out.addStr(data,i+1);
 599             return i+1;
 600           }
 601           i++;
 602         }
 603       }
 604     }
 605   }
 606   //printf("Not a valid html tag\n");
 607   return 0;
 608 }
 609
 610 static int processEmphasis(GrowBuf &out,const char *data,int offset,int size)
 611 {
 612   if ((offset>0 && !isOpenEmphChar(-1)) || // invalid char before * or _
 613       (size>1 && data[0]!=data[1] && !(isIdChar(1) || extraChar(1) || data[1]=='[')) || // invalid char after * or _
 614       (size>2 && data[0]==data[1] && !(isIdChar(2) || extraChar(2) || data[2]=='[')))   // invalid char after ** or __
 615   {
 616     return 0;
 617   }
 618
 619   char c = data[0];
 620   int ret;
 621   if (size>2 && c!='~' && data[1]!=c) // _bla or *bla
 622   {
 623     // whitespace cannot follow an opening emphasis
 624     if (data[1]==' ' || data[1]=='\n' ||
 625         (ret = processEmphasis1(out, data+1, size-1, c)) == 0)
 626     {
 627       return 0;
 628     }
 629     return ret+1;
 630   }
 631   if (size>3 && data[1]==c && data[2]!=c) // __bla or **bla
 632   {
 633     if (data[2]==' ' || data[2]=='\n' ||
 634         (ret = processEmphasis2(out, data+2, size-2, c)) == 0)
 635     {
 636       return 0;
 637     }
 638     return ret+2;
 639   }
 640   if (size>4 && c!='~' && data[1]==c && data[2]==c && data[3]!=c) // ___bla or ***bla
 641   {
 642     if (data[3]==' ' || data[3]=='\n' ||
 643         (ret = processEmphasis3(out, data+3, size-3, c)) == 0)
 644     {
 645       return 0;
 646     }
 647     return ret+3;
 648   }
 649   return 0;
 650 }
 651
 652 static void writeMarkdownImage(GrowBuf &out, const char *fmt, bool explicitTitle, QCString title, QCString content, QCString link, FileDef *fd)
 653 {
 654   out.addStr("@image ");
 655   out.addStr(fmt);
 656   out.addStr(" ");
 657   out.addStr(link.mid(fd ? 0 : 5));
 658   if (!explicitTitle && !content.isEmpty())
 659   {
 660     out.addStr(" \"");
 661     out.addStr(content);
 662     out.addStr("\"");
 663   }
 664   else if ((content.isEmpty() || explicitTitle) && !title.isEmpty())
 665   {
 666     out.addStr(" \"");
 667     out.addStr(title);
 668     out.addStr("\"");
 669   }
 670   out.addStr("\n");
 671 }
 672
 673 static int processLink(GrowBuf &out,const char *data,int,int size)
 674 {
 675   QCString content;
 676   QCString link;
 677   QCString title;
 678   int contentStart,contentEnd,linkStart,titleStart,titleEnd;
 679   bool isImageLink = FALSE;
 680   bool isToc = FALSE;
 681   int i=1;
 682   if (data[0]=='!')
 683   {
 684     isImageLink = TRUE;
 685     if (size<2 || data[1]!='[')
 686     {
 687       return 0;
 688     }
 689     i++;
 690   }
 691   contentStart=i;
 692   int level=1;
 693   int nl=0;
 694   // find the matching ]
 695   while (i<size)
 696   {
 697     if (data[i-1]=='\\') // skip escaped characters
 698     {
 699     }
 700     else if (data[i]=='[')
 701     {
 702       level++;
 703     }
 704     else if (data[i]==']')
 705     {
 706       level--;
 707       if (level<=0) break;
 708     }
 709     else if (data[i]=='\n')
 710     {
 711       nl++;
 712       if (nl>1) return 0; // only allow one newline in the content
 713     }
 714     i++;
 715   }
 716   if (i>=size) return 0; // premature end of comment -> no link
 717   contentEnd=i;
 718   convertStringFragment(content,data+contentStart,contentEnd-contentStart);
 719   //printf("processLink: content={%s}\n",content.data());
 720   if (!isImageLink && content.isEmpty()) return 0; // no link text
 721   i++; // skip over ]
 722
 723   // skip whitespace
 724   while (i<size && data[i]==' ') i++;
 725   if (i<size && data[i]=='\n') // one newline allowed here
 726   {
 727     i++;
 728     // skip more whitespace
 729     while (i<size && data[i]==' ') i++;
 730   }
 731
 732   bool explicitTitle=FALSE;
 733   if (i<size && data[i]=='(') // inline link
 734   {
 735     i++;
 736     while (i<size && data[i]==' ') i++;
 737     if (i<size && data[i]=='<') i++;
 738     linkStart=i;
 739     nl=0;
 740     int braceCount=1;
 741     while (i<size && data[i]!='\'' && data[i]!='"' && braceCount>0)
 742     {
 743       if (data[i]=='\n') // unexpected EOL
 744       {
 745         nl++;
 746         if (nl>1) return 0;
 747       }
 748       else if (data[i]=='(')
 749       {
 750         braceCount++;
 751       }
 752       else if (data[i]==')')
 753       {
 754         braceCount--;
 755       }
 756       if (braceCount>0)
 757       {
 758         i++;
 759       }
 760     }
 761     if (i>=size || data[i]=='\n') return 0;
 762     convertStringFragment(link,data+linkStart,i-linkStart);
 763     link = link.stripWhiteSpace();
 764     //printf("processLink: link={%s}\n",link.data());
 765     if (link.isEmpty()) return 0;
 766     if (link.at(link.length()-1)=='>') link=link.left(link.length()-1);
 767
 768     // optional title
 769     if (data[i]=='\'' || data[i]=='"')
 770     {
 771       char c = data[i];
 772       i++;
 773       titleStart=i;
 774       nl=0;
 775       while (i<size && data[i]!=')')
 776       {
 777         if (data[i]=='\n')
 778         {
 779           if (nl>1) return 0;
 780           nl++;
 781         }
 782         i++;
 783       }
 784       if (i>=size)
 785       {
 786         return 0;
 787       }
 788       titleEnd = i-1;
 789       // search back for closing marker
 790       while (titleEnd>titleStart && data[titleEnd]==' ') titleEnd--;
 791       if (data[titleEnd]==c) // found it
 792       {
 793         convertStringFragment(title,data+titleStart,titleEnd-titleStart);
 794         //printf("processLink: title={%s}\n",title.data());
 795       }
 796       else
 797       {
 798         return 0;
 799       }
 800     }
 801     i++;
 802   }
 803   else if (i<size && data[i]=='[') // reference link
 804   {
 805     i++;
 806     linkStart=i;
 807     nl=0;
 808     // find matching ]
 809     while (i<size && data[i]!=']')
 810     {
 811       if (data[i]=='\n')
 812       {
 813         nl++;
 814         if (nl>1) return 0;
 815       }
 816       i++;
 817     }
 818     if (i>=size) return 0;
 819     // extract link
 820     convertStringFragment(link,data+linkStart,i-linkStart);
 821     //printf("processLink: link={%s}\n",link.data());
 822     link = link.stripWhiteSpace();
 823     if (link.isEmpty()) // shortcut link
 824     {
 825       link=content;
 826     }
 827     // lookup reference
 828     LinkRef *lr = g_linkRefs.find(link.lower());
 829     if (lr) // found it
 830     {
 831       link  = lr->link;
 832       title = lr->title;
 833       //printf("processLink: ref: link={%s} title={%s}\n",link.data(),title.data());
 834     }
 835     else // reference not found!
 836     {
 837       //printf("processLink: ref {%s} do not exist\n",link.lower().data());
 838       return 0;
 839     }
 840     i++;
 841   }
 842   else if (i<size && data[i]!=':' && !content.isEmpty()) // minimal link ref notation [some id]
 843   {
 844     LinkRef *lr = g_linkRefs.find(content.lower());
 845     //printf("processLink: minimal link {%s} lr=%p",content.data(),lr);
 846     if (lr) // found it
 847     {
 848       link  = lr->link;
 849       title = lr->title;
 850       explicitTitle=TRUE;
 851       i=contentEnd;
 852     }
 853     else if (content=="TOC")
 854     {
 855       isToc=TRUE;
 856       i=contentEnd;
 857     }
 858     else
 859     {
 860       return 0;
 861     }
 862     i++;
 863   }
 864   else
 865   {
 866     return 0;
 867   }
 868   if (isToc) // special case for [TOC]
 869   {
 870     int level = Config_getInt(TOC_INCLUDE_HEADINGS);
 871     if (level > 0 && level <=5)
 872     {
 873       char levStr[10];
 874       sprintf(levStr,"%d",level);
 875       out.addStr("@tableofcontents{html:");
 876       out.addStr(levStr);
 877       out.addStr("}");
 878     }
 879   }
 880   else if (isImageLink)
 881   {
 882     bool ambig;
 883     FileDef *fd=0;
 884     if (link.find("@ref ")!=-1 || link.find("\\ref ")!=-1 ||
 885         (fd=findFileDef(Doxygen::imageNameDict,link,ambig)))
 886         // assume doxygen symbol link or local image link
 887     {
 888       writeMarkdownImage(out, "html", explicitTitle, title, content, link, fd);
 889       writeMarkdownImage(out, "latex", explicitTitle, title, content, link, fd);
 890       writeMarkdownImage(out, "rtf", explicitTitle, title, content, link, fd);
 891       writeMarkdownImage(out, "docbook", explicitTitle, title, content, link, fd);
 892     }
 893     else
 894     {
 895       out.addStr("<img src=\"");
 896       out.addStr(link);
 897       out.addStr("\" alt=\"");
 898       out.addStr(content);
 899       out.addStr("\"");
 900       if (!title.isEmpty())
 901       {
 902         out.addStr(" title=\"");
 903         out.addStr(substitute(title.simplifyWhiteSpace(),"\"","&quot;"));
 904         out.addStr("\"");
 905       }
 906       out.addStr("/>");
 907     }
 908   }
 909   else
 910   {
 911     SrcLangExt lang = getLanguageFromFileName(link);
 912     int lp=-1;
 913     if ((lp=link.find("@ref "))!=-1 || (lp=link.find("\\ref "))!=-1 || lang==SrcLangExt_Markdown)
 914         // assume doxygen symbol link
 915     {
 916       if (lp==-1) // link to markdown page
 917       {
 918         out.addStr("@ref ");
 919       }
 920       out.addStr(link);
 921       out.addStr(" \"");
 922       if (explicitTitle && !title.isEmpty())
 923       {
 924         out.addStr(title);
 925       }
 926       else
 927       {
 928         out.addStr(content);
 929       }
 930       out.addStr("\"");
 931     }
 932     else if (link.find('/')!=-1 || link.find('.')!=-1 || link.find('#')!=-1)
 933     { // file/url link
 934       out.addStr("<a href=\"");
 935       out.addStr(link);
 936       out.addStr("\"");
 937       if (!title.isEmpty())
 938       {
 939         out.addStr(" title=\"");
 940         out.addStr(substitute(title.simplifyWhiteSpace(),"\"","&quot;"));
 941         out.addStr("\"");
 942       }
 943       out.addStr(">");
 944       content = content.simplifyWhiteSpace();
 945       processInline(out,content,content.length());
 946       out.addStr("</a>");
 947     }
 948     else // avoid link to e.g. F[x](y)
 949     {
 950       //printf("no link for '%s'\n",link.data());
 951       return 0;
 952     }
 953   }
 954   return i;
 955 }
 956
 957 /** '`' parsing a code span (assuming codespan != 0) */
 958 static int processCodeSpan(GrowBuf &out, const char *data, int /*offset*/, int size)
 959 {
 960   int end, nb = 0, i, f_begin, f_end;
 961
 962   /* counting the number of backticks in the delimiter */
 963   while (nb<size && data[nb]=='`')
 964   {
 965     nb++;
 966   }
 967
 968   /* finding the next delimiter */
 969   i = 0;
 970   int nl=0;
 971   for (end=nb; end<size && i<nb && nl<2; end++)
 972   {
 973     if (data[end]=='`')
 974     {
 975       i++;
 976     }
 977     else if (data[end]=='\n')
 978     {
 979       i=0;
 980       nl++;
 981     }
 982     else if (data[end]=='\'' && nb==1 && (end==size-1 || (end<size-1 && !isIdChar(end+1))))
 983     { // look for quoted strings like `some word', but skip strings like `it's cool`
 984       QCString textFragment;
 985       convertStringFragment(textFragment,data+nb,end-nb);
 986       out.addStr("&lsquo;");
 987       out.addStr(textFragment);
 988       out.addStr("&rsquo;");
 989       return end+1;
 990     }
 991     else
 992     {
 993       i=0;
 994     }
 995   }
 996   if (i < nb && end >= size)
 997   {
 998     return 0;  // no matching delimiter
 999   }
1000   if (nl==2) // too many newlines inside the span
1001   {
1002     return 0;
1003   }
1004
1005   // trimming outside whitespaces
1006   f_begin = nb;
1007   while (f_begin < end && data[f_begin]==' ')
1008   {
1009     f_begin++;
1010   }
1011   f_end = end - nb;
1012   while (f_end > nb && data[f_end-1]==' ')
1013   {
1014     f_end--;
1015   }
1016
1017   //printf("found code span '%s'\n",QCString(data+f_begin).left(f_end-f_begin).data());
1018
1019   /* real code span */
1020   if (f_begin < f_end)
1021   {
1022     QCString codeFragment;
1023     convertStringFragment(codeFragment,data+f_begin,f_end-f_begin);
1024     out.addStr("<tt>");
1025     //out.addStr(convertToHtml(codeFragment,TRUE));
1026     out.addStr(escapeSpecialChars(codeFragment));
1027     out.addStr("</tt>");
1028   }
1029   return end;
1030 }
1031
1032
1033 static int processSpecialCommand(GrowBuf &out, const char *data, int offset, int size)
1034 {
1035   int i=1;
1036   QCString endBlockName = isBlockCommand(data,offset,size);
1037   if (!endBlockName.isEmpty())
1038   {
1039     int l = endBlockName.length();
1040     while (i<size-l)
1041     {
1042       if ((data[i]=='\\' || data[i]=='@') && // command
1043           data[i-1]!='\\' && data[i-1]!='@') // not escaped
1044       {
1045         if (qstrncmp(&data[i+1],endBlockName,l)==0)
1046         {
1047           //printf("found end at %d\n",i);
1048           out.addStr(data,i+1+l);
1049           return i+1+l;
1050         }
1051       }
1052       i++;
1053     }
1054   }
1055   if (size>1 && data[0]=='\\')
1056   {
1057     char c=data[1];
1058     if (c=='[' || c==']' || c=='*' || c=='!' || c=='(' || c==')' || c=='`' || c=='_')
1059     {
1060       out.addChar(data[1]);
1061       return 2;
1062     }
1063     else if (c=='-' && size>3 && data[2]=='-' && data[3]=='-') // \---
1064     {
1065       out.addStr(&data[1],3);
1066       return 4;
1067     }
1068     else if (c=='-' && size>2 && data[2]=='-') // \--
1069     {
1070       out.addStr(&data[1],2);
1071       return 3;
1072     }
1073   }
1074   return 0;
1075 }
1076
1077 static void processInline(GrowBuf &out,const char *data,int size)
1078 {
1079   int i=0, end=0;
1080   action_t action = 0;
1081   while (i<size)
1082   {
1083     while (end<size && ((action=g_actions[(uchar)data[end]])==0)) end++;
1084     out.addStr(data+i,end-i);
1085     if (end>=size) break;
1086     i=end;
1087     end = action(out,data+i,i,size-i);
1088     if (!end)
1089     {
1090       end=i+1;
1091     }
1092     else
1093     {
1094       i+=end;
1095       end=i;
1096     }
1097   }
1098 }
1099
1100 /** returns whether the line is a setext-style hdr underline */
1101 static int isHeaderline(const char *data, int size)
1102 {
1103   int i=0, c=0;
1104   while (i<size && data[i]==' ') i++;
1105
1106   // test of level 1 header
1107   if (data[i]=='=')
1108   {
1109     while (i<size && data[i]=='=') i++,c++;
1110     while (i<size && data[i]==' ') i++;
1111     return (c>1 && (i>=size || data[i]=='\n')) ? 1 : 0;
1112   }
1113   // test of level 2 header
1114   if (data[i]=='-')
1115   {
1116     while (i<size && data[i]=='-') i++,c++;
1117     while (i<size && data[i]==' ') i++;
1118     return (c>1 && (i>=size || data[i]=='\n')) ? 2 : 0;
1119   }
1120   return 0;
1121 }
1122
1123 /** returns TRUE if this line starts a block quote */
1124 static bool isBlockQuote(const char *data,int size,int indent)
1125 {
1126   int i = 0;
1127   while (i<size && data[i]==' ') i++;
1128   if (i<indent+codeBlockIndent) // could be a quotation
1129   {
1130     // count >'s and skip spaces
1131     int level=0;
1132     while (i<size && (data[i]=='>' || data[i]==' '))
1133     {
1134       if (data[i]=='>') level++;
1135       i++;
1136     }
1137     // last characters should be a space or newline,
1138     // so a line starting with >= does not match
1139     return level>0 && i<size && ((data[i-1]==' ') || data[i]=='\n');
1140   }
1141   else // too much indentation -> code block
1142   {
1143     return FALSE;
1144   }
1145   //return i<size && data[i]=='>' && i<indent+codeBlockIndent;
1146 }
1147
1148 /** returns end of the link ref if this is indeed a link reference. */
1149 static int isLinkRef(const char *data,int size,
1150             QCString &refid,QCString &link,QCString &title)
1151 {
1152   //printf("isLinkRef data={%s}\n",data);
1153   // format: start with [some text]:
1154   int i = 0;
1155   while (i<size && data[i]==' ') i++;
1156   if (i>=size || data[i]!='[') return 0;
1157   i++;
1158   int refIdStart=i;
1159   while (i<size && data[i]!='\n' && data[i]!=']') i++;
1160   if (i>=size || data[i]!=']') return 0;
1161   convertStringFragment(refid,data+refIdStart,i-refIdStart);
1162   if (refid.isEmpty()) return 0;
1163   //printf("  isLinkRef: found refid='%s'\n",refid.data());
1164   i++;
1165   if (i>=size || data[i]!=':') return 0;
1166   i++;
1167
1168   // format: whitespace* \n? whitespace* (<url> | url)
1169   while (i<size && data[i]==' ') i++;
1170   if (i<size && data[i]=='\n')
1171   {
1172     i++;
1173     while (i<size && data[i]==' ') i++;
1174   }
1175   if (i>=size) return 0;
1176
1177   if (i<size && data[i]=='<') i++;
1178   int linkStart=i;
1179   while (i<size && data[i]!=' ' && data[i]!='\n') i++;
1180   int linkEnd=i;
1181   if (i<size && data[i]=='>') i++;
1182   if (linkStart==linkEnd) return 0; // empty link
1183   convertStringFragment(link,data+linkStart,linkEnd-linkStart);
1184   //printf("  isLinkRef: found link='%s'\n",link.data());
1185   if (link=="@ref" || link=="\\ref")
1186   {
1187     int argStart=i;
1188     while (i<size && data[i]!='\n' && data[i]!='"') i++;
1189     QCString refArg;
1190     convertStringFragment(refArg,data+argStart,i-argStart);
1191     link+=refArg;
1192   }
1193
1194   title.resize(0);
1195
1196   // format: (whitespace* \n? whitespace* ( 'title' | "title" | (title) ))?
1197   int eol=0;
1198   while (i<size && data[i]==' ') i++;
1199   if (i<size && data[i]=='\n')
1200   {
1201     eol=i;
1202     i++;
1203     while (i<size && data[i]==' ') i++;
1204   }
1205   if (i>=size)
1206   {
1207     //printf("end of isLinkRef while looking for title! i=%d\n",i);
1208     return i; // end of buffer while looking for the optional title
1209   }
1210
1211   char c = data[i];
1212   if (c=='\'' || c=='"' || c=='(') // optional title present?
1213   {
1214     //printf("  start of title found! char='%c'\n",c);
1215     i++;
1216     if (c=='(') c=')'; // replace c by end character
1217     int titleStart=i;
1218     // search for end of the line
1219     while (i<size && data[i]!='\n') i++;
1220     eol = i;
1221
1222     // search back to matching character
1223     int end=i-1;
1224     while (end>titleStart && data[end]!=c) end--;
1225     if (end>titleStart)
1226     {
1227       convertStringFragment(title,data+titleStart,end-titleStart);
1228     }
1229     //printf("  title found: '%s'\n",title.data());
1230   }
1231   while (i<size && data[i]==' ') i++;
1232   //printf("end of isLinkRef: i=%d size=%d data[i]='%c' eol=%d\n",
1233   //    i,size,data[i],eol);
1234   if      (i>=size)       return i;    // end of buffer while ref id was found
1235   else if (eol)           return eol;  // end of line while ref id was found
1236   return 0;                            // invalid link ref
1237 }
1238
1239 static int isHRuler(const char *data,int size)
1240 {
1241   int i=0;
1242   if (size>0 && data[size-1]=='\n') size--; // ignore newline character
1243   while (i<size && data[i]==' ') i++;
1244   if (i>=size) return 0; // empty line
1245   char c=data[i];
1246   if (c!='*' && c!='-' && c!='_')
1247   {
1248     return 0; // not a hrule character
1249   }
1250   int n=0;
1251   while (i<size)
1252   {
1253     if (data[i]==c)
1254     {
1255       n++; // count rule character
1256     }
1257     else if (data[i]!=' ')
1258     {
1259       return 0; // line contains non hruler characters
1260     }
1261     i++;
1262   }
1263   return n>=3; // at least 3 characters needed for a hruler
1264 }
1265
1266 static QCString extractTitleId(QCString &title, int level)
1267 {
1268   //static QRegExp r1("^[a-z_A-Z][a-z_A-Z0-9\\-]*:");
1269   static QRegExp r2("\\{#[a-z_A-Z][a-z_A-Z0-9\\-]*\\}");
1270   int l=0;
1271   int i = r2.match(title,0,&l);
1272   if (i!=-1 && title.mid(i+l).stripWhiteSpace().isEmpty()) // found {#id} style id
1273   {
1274     QCString id = title.mid(i+2,l-3);
1275     title = title.left(i);
1276     //printf("found id='%s' title='%s'\n",id.data(),title.data());
1277     return id;
1278   }
1279   if ((level > 0) && (level <= Config_getInt(TOC_INCLUDE_HEADINGS)))
1280   {
1281     static int autoId = 0;
1282     QCString id;
1283     id.sprintf("autotoc_md%d",autoId++);
1284     //printf("auto-generated id='%s' title='%s'\n",id.data(),title.data());
1285     return id;
1286   }
1287   //printf("no id found in title '%s'\n",title.data());
1288   return "";
1289 }
1290
1291
1292 static int isAtxHeader(const char *data,int size,
1293                        QCString &header,QCString &id)
1294 {
1295   int i = 0, end;
1296   int level = 0, blanks=0;
1297
1298   // find start of header text and determine heading level
1299   while (i<size && data[i]==' ') i++;
1300   if (i>=size || data[i]!='#')
1301   {
1302     return 0;
1303   }
1304   while (i<size && level<6 && data[i]=='#') i++,level++;
1305   while (i<size && data[i]==' ') i++,blanks++;
1306   if (level==1 && blanks==0)
1307   {
1308     return 0; // special case to prevent #someid seen as a header (see bug 671395)
1309   }
1310
1311   // find end of header text
1312   end=i;
1313   while (end<size && data[end]!='\n') end++;
1314   while (end>i && (data[end-1]=='#' || data[end-1]==' ')) end--;
1315
1316   // store result
1317   convertStringFragment(header,data+i,end-i);
1318   id = extractTitleId(header, level);
1319   if (!id.isEmpty()) // strip #'s between title and id
1320   {
1321     i=header.length()-1;
1322     while (i>=0 && (header.at(i)=='#' || header.at(i)==' ')) i--;
1323     header=header.left(i+1);
1324   }
1325
1326   return level;
1327 }
1328
1329 static int isEmptyLine(const char *data,int size)
1330 {
1331   int i=0;
1332   while (i<size)
1333   {
1334     if (data[i]=='\n') return TRUE;
1335     if (data[i]!=' ') return FALSE;
1336     i++;
1337   }
1338   return TRUE;
1339 }
1340
1341 #define isLiTag(i) \
1342    (data[(i)]=='<' && \
1343    (data[(i)+1]=='l' || data[(i)+1]=='L') && \
1344    (data[(i)+2]=='i' || data[(i)+2]=='I') && \
1345    (data[(i)+3]=='>'))
1346
1347 // compute the indent from the start of the input, excluding list markers
1348 // such as -, -#, *, +, 1., and <li>
1349 static int computeIndentExcludingListMarkers(const char *data,int size)
1350 {
1351   int i=0;
1352   int indent=0;
1353   bool isDigit=FALSE;
1354   bool isLi=FALSE;
1355   bool listMarkerSkipped=FALSE;
1356   while (i<size &&
1357          (data[i]==' ' ||                                    // space
1358           (!listMarkerSkipped &&                             // first list marker
1359            (data[i]=='+' || data[i]=='-' || data[i]=='*' ||  // unordered list char
1360             (data[i]=='#' && i>0 && data[i-1]=='-') ||       // -# item
1361             (isDigit=(data[i]>='1' && data[i]<='9')) ||      // ordered list marker?
1362             (isLi=(i<size-3 && isLiTag(i)))                  // <li> tag
1363            )
1364           )
1365          )
1366         )
1367   {
1368     if (isDigit) // skip over ordered list marker '10. '
1369     {
1370       int j=i+1;
1371       while (j<size && ((data[j]>='0' && data[j]<='9') || data[j]=='.'))
1372       {
1373         if (data[j]=='.') // should be end of the list marker
1374         {
1375           if (j<size-1 && data[j+1]==' ') // valid list marker
1376           {
1377             listMarkerSkipped=TRUE;
1378             indent+=j+1-i;
1379             i=j+1;
1380             break;
1381           }
1382           else // not a list marker
1383           {
1384             break;
1385           }
1386         }
1387         j++;
1388       }
1389     }
1390     else if (isLi)
1391     {
1392       i+=3; // skip over <li>
1393       indent+=3;
1394       listMarkerSkipped=TRUE;
1395     }
1396     else if (data[i]=='-' && i<size-2 && data[i+1]=='#' && data[i+2]==' ')
1397     { // case "-# "
1398       listMarkerSkipped=TRUE; // only a single list marker is accepted
1399       i++; // skip over #
1400       indent++;
1401     }
1402     else if (data[i]!=' ' && i<size-1 && data[i+1]==' ')
1403     { // case "- " or "+ " or "* "
1404       listMarkerSkipped=TRUE; // only a single list marker is accepted
1405     }
1406     if (data[i]!=' ' && !listMarkerSkipped)
1407     { // end of indent
1408       break;
1409     }
1410     indent++,i++;
1411   }
1412   //printf("{%s}->%d\n",QCString(data).left(size).data(),indent);
1413   return indent;
1414 }
1415
1416 static bool isFencedCodeBlock(const char *data,int size,int refIndent,
1417                              QCString &lang,int &start,int &end,int &offset)
1418 {
1419   // rules: at least 3 ~~~, end of the block same amount of ~~~'s, otherwise
1420   // return FALSE
1421   int i=0;
1422   int indent=0;
1423   int startTildes=0;
1424   while (i<size && data[i]==' ') indent++,i++;
1425   if (indent>=refIndent+4) return FALSE; // part of code block
1426   char tildaChar='~';
1427   if (i<size && data[i]=='`') tildaChar='`';
1428   while (i<size && data[i]==tildaChar) startTildes++,i++;
1429   if (startTildes<3) return FALSE; // not enough tildes
1430   if (i<size && data[i]=='{') i++; // skip over optional {
1431   int startLang=i;
1432   while (i<size && (data[i]!='\n' && data[i]!='}' && data[i]!=' ')) i++;
1433   convertStringFragment(lang,data+startLang,i-startLang);
1434   while (i<size && data[i]!='\n') i++; // proceed to the end of the line
1435   start=i;
1436   while (i<size)
1437   {
1438     if (data[i]==tildaChar)
1439     {
1440       end=i-1;
1441       int endTildes=0;
1442       while (i<size && data[i]==tildaChar) endTildes++,i++;
1443       while (i<size && data[i]==' ') i++;
1444       if (i==size || data[i]=='\n')
1445       {
1446         offset=i;
1447         return endTildes==startTildes;
1448       }
1449     }
1450     i++;
1451   }
1452   return FALSE;
1453 }
1454
1455 static bool isCodeBlock(const char *data,int offset,int size,int &indent)
1456 {
1457   //printf("<isCodeBlock(offset=%d,size=%d,indent=%d)\n",offset,size,indent);
1458   // determine the indent of this line
1459   int i=0;
1460   int indent0=0;
1461   while (i<size && data[i]==' ') indent0++,i++;
1462
1463   if (indent0<codeBlockIndent)
1464   {
1465     //printf(">isCodeBlock: line is not indented enough %d<4\n",indent0);
1466     return FALSE;
1467   }
1468   if (indent0>=size || data[indent0]=='\n') // empty line does not start a code block
1469   {
1470     //printf("only spaces at the end of a comment block\n");
1471     return FALSE;
1472   }
1473
1474   i=offset;
1475   int nl=0;
1476   int nl_pos[3];
1477   // search back 3 lines and remember the start of lines -1 and -2
1478   while (i>0 && nl<3)
1479   {
1480     if (data[i-offset-1]=='\n') nl_pos[nl++]=i-offset;
1481     i--;
1482   }
1483
1484   // if there are only 2 preceding lines, then line -2 starts at -offset
1485   if (i==0 && nl==2) nl_pos[nl++]=-offset;
1486   //printf("  nl=%d\n",nl);
1487
1488   if (nl==3) // we have at least 2 preceding lines
1489   {
1490     //printf("  positions: nl_pos=[%d,%d,%d] line[-2]='%s' line[-1]='%s'\n",
1491     //    nl_pos[0],nl_pos[1],nl_pos[2],
1492     //    QCString(data+nl_pos[1]).left(nl_pos[0]-nl_pos[1]-1).data(),
1493     //    QCString(data+nl_pos[2]).left(nl_pos[1]-nl_pos[2]-1).data());
1494
1495     // check that line -1 is empty
1496     if (!isEmptyLine(data+nl_pos[1],nl_pos[0]-nl_pos[1]-1))
1497     {
1498       return FALSE;
1499     }
1500
1501     // determine the indent of line -2
1502     indent=computeIndentExcludingListMarkers(data+nl_pos[2],nl_pos[1]-nl_pos[2]);
1503
1504     //printf(">isCodeBlock local_indent %d>=%d+4=%d\n",
1505     //    indent0,indent2,indent0>=indent2+4);
1506     // if the difference is >4 spaces -> code block
1507     return indent0>=indent+codeBlockIndent;
1508   }
1509   else // not enough lines to determine the relative indent, use global indent
1510   {
1511     // check that line -1 is empty
1512     if (nl==1 && !isEmptyLine(data-offset,offset-1))
1513     {
1514       return FALSE;
1515     }
1516     //printf(">isCodeBlock global indent %d>=%d+4=%d nl=%d\n",
1517     //    indent0,indent,indent0>=indent+4,nl);
1518     return indent0>=indent+codeBlockIndent;
1519   }
1520 }
1521
1522 /** Finds the location of the table's contains in the string \a data.
1523  *  Only one line will be inspected.
1524  *  @param[in] data pointer to the string buffer.
1525  *  @param[in] size the size of the buffer.
1526  *  @param[out] start offset of the first character of the table content
1527  *  @param[out] end   offset of the last character of the table content
1528  *  @param[out] columns number of table columns found
1529  *  @returns The offset until the next line in the buffer.
1530  */
1531 int findTableColumns(const char *data,int size,int &start,int &end,int &columns)
1532 {
1533   int i=0,n=0;
1534   int eol;
1535   // find start character of the table line
1536   while (i<size && data[i]==' ') i++;
1537   if (i<size && data[i]=='|' && data[i]!='\n') i++,n++; // leading | does not count
1538   start = i;
1539
1540   // find end character of the table line
1541   while (i<size && data[i]!='\n') i++;
1542   eol=i+1;
1543   i--;
1544   while (i>0 && data[i]==' ') i--;
1545   if (i>0 && data[i-1]!='\\' && data[i]=='|') i--,n++; // trailing or escaped | does not count
1546   end = i;
1547
1548   // count columns between start and end
1549   columns=0;
1550   if (end>start)
1551   {
1552     i=start;
1553     while (i<=end) // look for more column markers
1554     {
1555       if (data[i]=='|' && (i==0 || data[i-1]!='\\')) columns++;
1556       if (columns==1) columns++; // first | make a non-table into a two column table
1557       i++;
1558     }
1559   }
1560   if (n==2 && columns==0) // table row has | ... |
1561   {
1562     columns++;
1563   }
1564   //printf("findTableColumns(start=%d,end=%d,columns=%d) eol=%d\n",
1565   //    start,end,columns,eol);
1566   return eol;
1567 }
1568
1569 /** Returns TRUE iff data points to the start of a table block */
1570 static bool isTableBlock(const char *data,int size)
1571 {
1572   int cc0,start,end;
1573
1574   // the first line should have at least two columns separated by '|'
1575   int i = findTableColumns(data,size,start,end,cc0);
1576   if (i>=size || cc0<1)
1577   {
1578     //printf("isTableBlock: no |'s in the header\n");
1579     return FALSE;
1580   }
1581
1582   int cc1;
1583   int ret = findTableColumns(data+i,size-i,start,end,cc1);
1584   int j=i+start;
1585   // separator line should consist of |, - and : and spaces only
1586   while (j<=end+i)
1587   {
1588     if (data[j]!=':' && data[j]!='-' && data[j]!='|' && data[j]!=' ')
1589     {
1590       //printf("isTableBlock: invalid character '%c'\n",data[j]);
1591       return FALSE; // invalid characters in table separator
1592     }
1593     j++;
1594   }
1595   if (cc1!=cc0) // number of columns should be same as previous line
1596   {
1597     return FALSE;
1598   }
1599
1600   i+=ret; // goto next line
1601   int cc2;
1602   findTableColumns(data+i,size-i,start,end,cc2);
1603
1604   //printf("isTableBlock: %d\n",cc1==cc2);
1605   return cc1==cc2;
1606 }
1607
1608 static int writeTableBlock(GrowBuf &out,const char *data,int size)
1609 {
1610   int i=0,j,k;
1611   int columns,start,end,cc;
1612
1613   i = findTableColumns(data,size,start,end,columns);
1614
1615   int headerStart = start;
1616   int headerEnd = end;
1617
1618 #ifdef USE_ORIGINAL_TABLES
1619   out.addStr("<table>");
1620
1621   // write table header, in range [start..end]
1622   out.addStr("<tr>");
1623 #endif
1624
1625   // read cell alignments
1626   int ret = findTableColumns(data+i,size-i,start,end,cc);
1627   k=0;
1628   Alignment *columnAlignment = new Alignment[columns];
1629
1630   bool leftMarker=FALSE,rightMarker=FALSE;
1631   bool startFound=FALSE;
1632   j=start+i;
1633   while (j<=end+i)
1634   {
1635     if (!startFound)
1636     {
1637       if (data[j]==':') { leftMarker=TRUE; startFound=TRUE; }
1638       if (data[j]=='-') startFound=TRUE;
1639       //printf("  data[%d]=%c startFound=%d\n",j,data[j],startFound);
1640     }
1641     if      (data[j]=='-') rightMarker=FALSE;
1642     else if (data[j]==':') rightMarker=TRUE;
1643     if (j<=end+i && (data[j]=='|' && (j==0 || data[j-1]!='\\')))
1644     {
1645       if (k<columns)
1646       {
1647         columnAlignment[k] = markersToAlignment(leftMarker,rightMarker);
1648         //printf("column[%d] alignment=%d\n",k,columnAlignment[k]);
1649         leftMarker=FALSE;
1650         rightMarker=FALSE;
1651         startFound=FALSE;
1652       }
1653       k++;
1654     }
1655     j++;
1656   }
1657   if (k<columns)
1658   {
1659     columnAlignment[k] = markersToAlignment(leftMarker,rightMarker);
1660     //printf("column[%d] alignment=%d\n",k,columnAlignment[k]);
1661   }
1662   // proceed to next line
1663   i+=ret;
1664
1665 #ifdef USE_ORIGINAL_TABLES
1666
1667   int m=headerStart;
1668   for (k=0;k<columns;k++)
1669   {
1670     out.addStr("<th");
1671     switch (columnAlignment[k])
1672     {
1673       case AlignLeft:   out.addStr(" align=\"left\""); break;
1674       case AlignRight:  out.addStr(" align=\"right\""); break;
1675       case AlignCenter: out.addStr(" align=\"center\""); break;
1676       case AlignNone:   break;
1677     }
1678     out.addStr(">");
1679     while (m<=headerEnd && (data[m]!='|' || (m>0 && data[m-1]=='\\')))
1680     {
1681       out.addChar(data[m++]);
1682     }
1683     m++;
1684   }
1685   out.addStr("\n</th>\n");
1686
1687   // write table cells
1688   while (i<size)
1689   {
1690     int ret = findTableColumns(data+i,size-i,start,end,cc);
1691     //printf("findTableColumns cc=%d\n",cc);
1692     if (cc!=columns) break; // end of table
1693
1694     out.addStr("<tr>");
1695     j=start+i;
1696     int columnStart=j;
1697     k=0;
1698     while (j<=end+i)
1699     {
1700       if (j==columnStart)
1701       {
1702         out.addStr("<td");
1703         switch (columnAlignment[k])
1704         {
1705           case AlignLeft:   out.addStr(" align=\"left\""); break;
1706           case AlignRight:  out.addStr(" align=\"right\""); break;
1707           case AlignCenter: out.addStr(" align=\"center\""); break;
1708           case AlignNone:   break;
1709         }
1710         out.addStr(">");
1711       }
1712       if (j<=end+i && (data[j]=='|' && (j==0 || data[j-1]!='\\')))
1713       {
1714         columnStart=j+1;
1715         k++;
1716       }
1717       else
1718       {
1719         out.addChar(data[j]);
1720       }
1721       j++;
1722     }
1723     out.addChar('\n');
1724
1725     // proceed to next line
1726     i+=ret;
1727   }
1728
1729   out.addStr("</table> ");
1730 #else
1731   // Store the table cell information by row then column.  This
1732   // allows us to handle row spanning.
1733   QVector<QVector<TableCell> > tableContents;
1734   tableContents.setAutoDelete(TRUE);
1735
1736   int m=headerStart;
1737   QVector<TableCell> *headerContents = new QVector<TableCell>(columns);
1738   headerContents->setAutoDelete(TRUE);
1739   for (k=0;k<columns;k++)
1740   {
1741     headerContents->insert(k, new TableCell);
1742     while (m<=headerEnd && (data[m]!='|' || (m>0 && data[m-1]=='\\')))
1743     {
1744       headerContents->at(k)->cellText += data[m++];
1745     }
1746     m++;
1747     // do the column span test before stripping white space
1748     // || is spanning columns, | | is not
1749     headerContents->at(k)->colSpan = headerContents->at(k)->cellText.isEmpty();
1750     headerContents->at(k)->cellText = headerContents->at(k)->cellText.stripWhiteSpace();
1751   }
1752   // qvector doesn't have an append like std::vector, so we gotta do
1753   // extra work
1754   tableContents.resize(1);
1755   tableContents.insert(0, headerContents);
1756
1757   // write table cells
1758   int rowNum = 1;
1759   while (i<size)
1760   {
1761     int ret = findTableColumns(data+i,size-i,start,end,cc);
1762     if (cc!=columns) break; // end of table
1763
1764     j=start+i;
1765     k=0;
1766     QVector<TableCell> *rowContents = new QVector<TableCell>(columns);
1767     rowContents->setAutoDelete(TRUE);
1768     rowContents->insert(k, new TableCell);
1769     while (j<=end+i)
1770     {
1771       if (j<=end+i && (data[j]=='|' && (j==0 || data[j-1]!='\\')))
1772       {
1773         // do the column span test before stripping white space
1774         // || is spanning columns, | | is not
1775         rowContents->at(k)->colSpan = rowContents->at(k)->cellText.isEmpty();
1776         rowContents->at(k)->cellText = rowContents->at(k)->cellText.stripWhiteSpace();
1777         k++;
1778         rowContents->insert(k, new TableCell);
1779       } // if (j<=end+i && (data[j]=='|' && (j==0 || data[j-1]!='\\')))
1780       else
1781       {
1782         rowContents->at(k)->cellText += data[j];
1783       } // else { if (j<=end+i && (data[j]=='|' && (j==0 || data[j-1]!='\\'))) }
1784       j++;
1785     } // while (j<=end+i)
1786     // do the column span test before stripping white space
1787     // || is spanning columns, | | is not
1788     rowContents->at(k)->colSpan = rowContents->at(k)->cellText.isEmpty();
1789     rowContents->at(k)->cellText = rowContents->at(k)->cellText.stripWhiteSpace();
1790     // qvector doesn't have an append like std::vector, so we gotta do
1791     // extra work
1792     tableContents.resize(tableContents.size()+1);
1793     tableContents.insert(rowNum++, rowContents);
1794
1795     // proceed to next line
1796     i+=ret;
1797   }
1798
1799
1800   out.addStr("<table class=\"markdownTable\">\n");
1801   QCString cellTag("th"), cellClass("class=\"markdownTableHead");
1802   for (unsigned row = 0; row < tableContents.size(); row++)
1803   {
1804     if (row)
1805     {
1806       if (row % 2)
1807       {
1808         out.addStr("<tr class=\"markdownTableRowOdd\">\n");
1809       }
1810       else
1811       {
1812         out.addStr("<tr class=\"markdownTableRowEven\">\n");
1813       }
1814     }
1815     else
1816     {
1817       out.addStr("  <tr class=\"markdownTableHead\">\n");
1818     }
1819     for (int c = 0; c < columns; c++)
1820     {
1821       // save the cell text for use after column span computation
1822       QCString cellText(tableContents[row]->at(c)->cellText);
1823
1824       // Row span handling.  Spanning rows will contain a caret ('^').
1825       // If the current cell contains just a caret, this is part of an
1826       // earlier row's span and the cell should not be added to the
1827       // output.
1828       if (tableContents[row]->at(c)->cellText == "^")
1829         continue;
1830       unsigned rowSpan = 1, spanRow = row+1;
1831       while ((spanRow < tableContents.size()) &&
1832              (tableContents[spanRow]->at(c)->cellText == "^"))
1833       {
1834         spanRow++;
1835         rowSpan++;
1836       }
1837
1838       out.addStr("    <" + cellTag + " " + cellClass);
1839       // use appropriate alignment style
1840       switch (columnAlignment[c])
1841       {
1842         case AlignLeft:   out.addStr("Left\""); break;
1843         case AlignRight:  out.addStr("Right\""); break;
1844         case AlignCenter: out.addStr("Center\""); break;
1845         case AlignNone:   out.addStr("None\""); break;
1846       }
1847
1848       if (rowSpan > 1)
1849       {
1850         QCString spanStr;
1851         spanStr.setNum(rowSpan);
1852         out.addStr(" rowspan=\"" + spanStr + "\"");
1853       }
1854       // Column span handling, assumes that column spans will have
1855       // empty strings, which would indicate the sequence "||", used
1856       // to signify spanning columns.
1857       unsigned colSpan = 1;
1858       while ((c < columns-1) &&
1859              tableContents[row]->at(c+1)->colSpan)
1860       {
1861         c++;
1862         colSpan++;
1863       }
1864       if (colSpan > 1)
1865       {
1866         QCString spanStr;
1867         spanStr.setNum(colSpan);
1868         out.addStr(" colspan=\"" + spanStr + "\"");
1869       }
1870       // need at least one space on either side of the cell text in
1871       // order for doxygen to do other formatting
1872       out.addStr("> " + cellText + " </" + cellTag + ">\n");
1873     }
1874     cellTag = "td";
1875     cellClass = "class=\"markdownTableBody";
1876     out.addStr("  </tr>\n");
1877   }
1878   out.addStr("</table>\n");
1879 #endif
1880
1881   delete[] columnAlignment;
1882   return i;
1883 }
1884
1885
1886 static int hasLineBreak(const char *data,int size)
1887 {
1888   int i=0;
1889   while (i<size && data[i]!='\n') i++;
1890   if (i>=size) return 0; // empty line
1891   if (i<2) return 0; // not long enough
1892   return (data[i-1]==' ' && data[i-2]==' ');
1893 }
1894
1895
1896 void writeOneLineHeaderOrRuler(GrowBuf &out,const char *data,int size)
1897 {
1898   int level;
1899   QCString header;
1900   QCString id;
1901   if (isHRuler(data,size))
1902   {
1903     out.addStr("\n<hr>\n");
1904   }
1905   else if ((level=isAtxHeader(data,size,header,id)))
1906   {
1907     //if (level==1) g_correctSectionLevel=FALSE;
1908     //if (g_correctSectionLevel) level--;
1909     QCString hTag;
1910     if (level<5 && !id.isEmpty())
1911     {
1912       SectionInfo::SectionType type = SectionInfo::Anchor;
1913       switch(level)
1914       {
1915         case 1:  out.addStr("@section ");
1916                  type=SectionInfo::Section;
1917                  break;
1918         case 2:  out.addStr("@subsection ");
1919                  type=SectionInfo::Subsection;
1920                  break;
1921         case 3:  out.addStr("@subsubsection ");
1922                  type=SectionInfo::Subsubsection;
1923                  break;
1924         default: out.addStr("@paragraph ");
1925                  type=SectionInfo::Paragraph;
1926                  break;
1927       }
1928       out.addStr(id);
1929       out.addStr(" ");
1930       out.addStr(header);
1931       out.addStr("\n");
1932     }
1933     else
1934     {
1935       if (!id.isEmpty())
1936       {
1937         out.addStr("\\anchor "+id+"\n");
1938       }
1939       hTag.sprintf("h%d",level);
1940       out.addStr("<"+hTag+">");
1941       out.addStr(header);
1942       out.addStr("</"+hTag+">\n");
1943     }
1944   }
1945   else // nothing interesting -> just output the line
1946   {
1947     out.addStr(data,size);
1948     if (hasLineBreak(data,size))
1949     {
1950       out.addStr("\n");
1951     }
1952   }
1953 }
1954
1955 static int writeBlockQuote(GrowBuf &out,const char *data,int size)
1956 {
1957   int l;
1958   int i=0;
1959   int curLevel=0;
1960   int end=0;
1961   while (i<size)
1962   {
1963     // find end of this line
1964     end=i+1;
1965     while (end<=size && data[end-1]!='\n') end++;
1966     int j=i;
1967     int level=0;
1968     int indent=i;
1969     // compute the quoting level
1970     while (j<end && (data[j]==' ' || data[j]=='>'))
1971     {
1972       if (data[j]=='>') { level++; indent=j+1; }
1973       else if (j>0 && data[j-1]=='>') indent=j+1;
1974       j++;
1975     }
1976     if (j>0 && data[j-1]=='>' &&
1977         !(j==size || data[j]=='\n')) // disqualify last > if not followed by space
1978     {
1979       indent--;
1980       j--;
1981     }
1982     if (level>curLevel) // quote level increased => add start markers
1983     {
1984       for (l=curLevel;l<level;l++)
1985       {
1986         out.addStr("<blockquote>\n");
1987       }
1988     }
1989     else if (level<curLevel) // quote level descreased => add end markers
1990     {
1991       for (l=level;l<curLevel;l++)
1992       {
1993         out.addStr("</blockquote>\n");
1994       }
1995     }
1996     curLevel=level;
1997     if (level==0) break; // end of quote block
1998     // copy line without quotation marks
1999     out.addStr(data+indent,end-indent);
2000     // proceed with next line
2001     i=end;
2002   }
2003   // end of comment within blockquote => add end markers
2004   for (l=0;l<curLevel;l++)
2005   {
2006     out.addStr("</blockquote>\n");
2007   }
2008   return i;
2009 }
2010
2011 static int writeCodeBlock(GrowBuf &out,const char *data,int size,int refIndent)
2012 {
2013   int i=0,end;
2014   //printf("writeCodeBlock: data={%s}\n",QCString(data).left(size).data());
2015   out.addStr("@verbatim\n");
2016   int emptyLines=0;
2017   while (i<size)
2018   {
2019     // find end of this line
2020     end=i+1;
2021     while (end<=size && data[end-1]!='\n') end++;
2022     int j=i;
2023     int indent=0;
2024     while (j<end && data[j]==' ') j++,indent++;
2025     //printf("j=%d end=%d indent=%d refIndent=%d tabSize=%d data={%s}\n",
2026     //    j,end,indent,refIndent,Config_getInt(TAB_SIZE),QCString(data+i).left(end-i-1).data());
2027     if (j==end-1) // empty line
2028     {
2029       emptyLines++;
2030       i=end;
2031     }
2032     else if (indent>=refIndent+codeBlockIndent) // enough indent to contine the code block
2033     {
2034       while (emptyLines>0) // write skipped empty lines
2035       {
2036         // add empty line
2037         out.addStr("\n");
2038         emptyLines--;
2039       }
2040       // add code line minus the indent
2041       out.addStr(data+i+refIndent+codeBlockIndent,end-i-refIndent-codeBlockIndent);
2042       i=end;
2043     }
2044     else // end of code block
2045     {
2046       break;
2047     }
2048   }
2049   out.addStr("@endverbatim\n");
2050   while (emptyLines>0) // write skipped empty lines
2051   {
2052     // add empty line
2053     out.addStr("\n");
2054     emptyLines--;
2055   }
2056   //printf("i=%d\n",i);
2057   return i;
2058 }
2059
2060 // start searching for the end of the line start at offset \a i
2061 // keeping track of possible blocks that need to be skipped.
2062 static void findEndOfLine(GrowBuf &out,const char *data,int size,
2063                           int &pi,int&i,int &end)
2064 {
2065   // find end of the line
2066   int nb=0;
2067   end=i+1;
2068   while (end<=size && data[end-1]!='\n')
2069   {
2070     // while looking for the end of the line we might encounter a block
2071     // that needs to be passed unprocessed.
2072     if ((data[end-1]=='\\' || data[end-1]=='@') &&          // command
2073         (end<=1 || (data[end-2]!='\\' && data[end-2]!='@')) // not escaped
2074        )
2075     {
2076       QCString endBlockName = isBlockCommand(data+end-1,end-1,size-(end-1));
2077       end++;
2078       if (!endBlockName.isEmpty())
2079       {
2080         int l = endBlockName.length();
2081         for (;end<size-l-1;end++) // search for end of block marker
2082         {
2083           if ((data[end]=='\\' || data[end]=='@') &&
2084               data[end-1]!='\\' && data[end-1]!='@'
2085              )
2086           {
2087             if (qstrncmp(&data[end+1],endBlockName,l)==0)
2088             {
2089               if (pi!=-1) // output previous line if available
2090               {
2091                 //printf("feol out={%s}\n",QCString(data+pi).left(i-pi).data());
2092                 out.addStr(data+pi,i-pi);
2093               }
2094               // found end marker, skip over this block
2095               //printf("feol.block out={%s}\n",QCString(data+i).left(end+l+1-i).data());
2096               out.addStr(data+i,end+l+1-i);
2097               pi=-1;
2098               i=end+l+1; // continue after block
2099               end=i+1;
2100               break;
2101             }
2102           }
2103         }
2104       }
2105     }
2106     else if (nb==0 && data[end-1]=='<' && end<size-6 &&
2107              (end<=1 || (data[end-2]!='\\' && data[end-2]!='@'))
2108             )
2109     {
2110       if (tolower(data[end])=='p' && tolower(data[end+1])=='r' &&
2111           tolower(data[end+2])=='e' && data[end+3]=='>') // <pre> tag
2112       {
2113         if (pi!=-1) // output previous line if available
2114         {
2115           out.addStr(data+pi,i-pi);
2116         }
2117         // output part until <pre>
2118         out.addStr(data+i,end-1-i);
2119         // output part until </pre>
2120         i = end-1 + processHtmlTag(out,data+end-1,end-1,size-end+1);
2121         pi=-1;
2122         end = i+1;
2123         break;
2124       }
2125       else
2126       {
2127         end++;
2128       }
2129     }
2130     else if (nb==0 && data[end-1]=='`')
2131     {
2132       while (end<=size && data[end-1]=='`') end++,nb++;
2133     }
2134     else if (nb>0 && data[end-1]=='`')
2135     {
2136       int enb=0;
2137       while (end<=size && data[end-1]=='`') end++,enb++;
2138       if (enb==nb) nb=0;
2139     }
2140     else
2141     {
2142       end++;
2143     }
2144   }
2145   //printf("findEndOfLine pi=%d i=%d end=%d {%s}\n",pi,i,end,QCString(data+i).left(end-i).data());
2146 }
2147
2148 static void writeFencedCodeBlock(GrowBuf &out,const char *data,const char *lng,
2149                 int blockStart,int blockEnd)
2150 {
2151   QCString lang = lng;
2152   if (!lang.isEmpty() && lang.at(0)=='.') lang=lang.mid(1);
2153   out.addStr("@code");
2154   if (!lang.isEmpty())
2155   {
2156     out.addStr("{"+lang+"}");
2157   }
2158   out.addStr(data+blockStart,blockEnd-blockStart);
2159   out.addStr("\n");
2160   out.addStr("@endcode");
2161 }
2162
2163 static QCString processQuotations(const QCString &s,int refIndent)
2164 {
2165   GrowBuf out;
2166   const char *data = s.data();
2167   int size = s.length();
2168   int i=0,end=0,pi=-1;
2169   int blockStart,blockEnd,blockOffset;
2170   QCString lang;
2171   while (i<size)
2172   {
2173     findEndOfLine(out,data,size,pi,i,end);
2174     // line is now found at [i..end)
2175
2176     if (pi!=-1)
2177     {
2178       if (isFencedCodeBlock(data+pi,size-pi,refIndent,lang,blockStart,blockEnd,blockOffset))
2179       {
2180         writeFencedCodeBlock(out,data+pi,lang,blockStart,blockEnd);
2181         i=pi+blockOffset;
2182         pi=-1;
2183         end=i+1;
2184         continue;
2185       }
2186       else if (isBlockQuote(data+pi,i-pi,refIndent))
2187       {
2188         i = pi+writeBlockQuote(out,data+pi,size-pi);
2189         pi=-1;
2190         end=i+1;
2191         continue;
2192       }
2193       else
2194       {
2195         //printf("quote out={%s}\n",QCString(data+pi).left(i-pi).data());
2196         out.addStr(data+pi,i-pi);
2197       }
2198     }
2199     pi=i;
2200     i=end;
2201   }
2202   if (pi!=-1 && pi<size) // deal with the last line
2203   {
2204     if (isBlockQuote(data+pi,size-pi,refIndent))
2205     {
2206       writeBlockQuote(out,data+pi,size-pi);
2207     }
2208     else
2209     {
2210       out.addStr(data+pi,size-pi);
2211     }
2212   }
2213   out.addChar(0);
2214
2215   //printf("Process quotations\n---- input ----\n%s\n---- output ----\n%s\n------------\n",
2216   //    s.data(),out.get());
2217
2218   return out.get();
2219 }
2220
2221 static QCString processBlocks(const QCString &s,int indent)
2222 {
2223   GrowBuf out;
2224   const char *data = s.data();
2225   int size = s.length();
2226   int i=0,end=0,pi=-1,ref,level;
2227   QCString id,link,title;
2228   int blockIndent = indent;
2229
2230   // get indent for the first line
2231   end = i+1;
2232   int sp=0;
2233   while (end<=size && data[end-1]!='\n')
2234   {
2235     if (data[end-1]==' ') sp++;
2236     end++;
2237   }
2238
2239 #if 0 // commented out, since starting with a comment block is probably a usage error
2240       // see also http://stackoverflow.com/q/20478611/784672
2241
2242   // special case when the documentation starts with a code block
2243   // since the first line is skipped when looking for a code block later on.
2244   if (end>codeBlockIndent && isCodeBlock(data,0,end,blockIndent))
2245   {
2246     i=writeCodeBlock(out,data,size,blockIndent);
2247     end=i+1;
2248     pi=-1;
2249   }
2250 #endif
2251
2252   // process each line
2253   while (i<size)
2254   {
2255     findEndOfLine(out,data,size,pi,i,end);
2256     // line is now found at [i..end)
2257
2258     //printf("findEndOfLine: pi=%d i=%d end=%d\n",pi,i,end);
2259
2260     if (pi!=-1)
2261     {
2262       int blockStart,blockEnd,blockOffset;
2263       QCString lang;
2264       blockIndent = indent;
2265       //printf("isHeaderLine(%s)=%d\n",QCString(data+i).left(size-i).data(),level);
2266       if ((level=isHeaderline(data+i,size-i))>0)
2267       {
2268         //if (level==1) g_correctSectionLevel=FALSE;
2269         //if (g_correctSectionLevel) level--;
2270         //printf("Found header at %d-%d\n",i,end);
2271         while (pi<size && data[pi]==' ') pi++;
2272         QCString header,id;
2273         convertStringFragment(header,data+pi,i-pi-1);
2274         id = extractTitleId(header, level);
2275         //printf("header='%s' is='%s'\n",header.data(),id.data());
2276         if (!header.isEmpty())
2277         {
2278           if (!id.isEmpty())
2279           {
2280             out.addStr(level==1?"@section ":"@subsection ");
2281             out.addStr(id);
2282             out.addStr(" ");
2283             out.addStr(header);
2284             out.addStr("\n\n");
2285             SectionInfo *si = Doxygen::sectionDict->find(id);
2286             if (si)
2287             {
2288               if (si->lineNr != -1)
2289               {
2290                 warn(g_fileName,g_lineNr,"multiple use of section label '%s', (first occurrence: %s, line %d)",header.data(),si->fileName.data(),si->lineNr);
2291               }
2292               else
2293               {
2294                 warn(g_fileName,g_lineNr,"multiple use of section label '%s', (first occurrence: %s)",header.data(),si->fileName.data());
2295               }
2296             }
2297             else
2298             {
2299               si = new SectionInfo(g_fileName,g_lineNr,id,header,
2300                       level==1 ? SectionInfo::Section : SectionInfo::Subsection,level);
2301               if (g_current)
2302               {
2303                 g_current->anchors->append(si);
2304               }
2305               Doxygen::sectionDict->append(id,si);
2306             }
2307           }
2308           else
2309           {
2310             out.addStr(level==1?"<h1>":"<h2>");
2311             out.addStr(header);
2312             out.addStr(level==1?"\n</h1>\n":"\n</h2>\n");
2313           }
2314         }
2315         else
2316         {
2317           out.addStr("\n<hr>\n");
2318         }
2319         pi=-1;
2320         i=end;
2321         end=i+1;
2322         continue;
2323       }
2324       else if ((ref=isLinkRef(data+pi,size-pi,id,link,title)))
2325       {
2326         //printf("found link ref: id='%s' link='%s' title='%s'\n",
2327         //       id.data(),link.data(),title.data());
2328         g_linkRefs.insert(id.lower(),new LinkRef(link,title));
2329         i=ref+pi;
2330         pi=-1;
2331         end=i+1;
2332       }
2333       else if (isFencedCodeBlock(data+pi,size-pi,indent,lang,blockStart,blockEnd,blockOffset))
2334       {
2335         //printf("Found FencedCodeBlock lang='%s' start=%d end=%d code={%s}\n",
2336         //       lang.data(),blockStart,blockEnd,QCString(data+pi+blockStart).left(blockEnd-blockStart).data());
2337         writeFencedCodeBlock(out,data+pi,lang,blockStart,blockEnd);
2338         i=pi+blockOffset;
2339         pi=-1;
2340         end=i+1;
2341         continue;
2342       }
2343       else if (isCodeBlock(data+i,i,end-i,blockIndent))
2344       {
2345         // skip previous line (it is empty anyway)
2346         i+=writeCodeBlock(out,data+i,size-i,blockIndent);
2347         pi=-1;
2348         end=i+1;
2349         continue;
2350       }
2351       else if (isTableBlock(data+pi,size-pi))
2352       {
2353         i=pi+writeTableBlock(out,data+pi,size-pi);
2354         pi=-1;
2355         end=i+1;
2356         continue;
2357       }
2358       else
2359       {
2360         writeOneLineHeaderOrRuler(out,data+pi,i-pi);
2361       }
2362     }
2363     pi=i;
2364     i=end;
2365   }
2366   //printf("last line %d size=%d\n",i,size);
2367   if (pi!=-1 && pi<size) // deal with the last line
2368   {
2369     if (isLinkRef(data+pi,size-pi,id,link,title))
2370     {
2371       //printf("found link ref: id='%s' link='%s' title='%s'\n",
2372       //    id.data(),link.data(),title.data());
2373       g_linkRefs.insert(id.lower(),new LinkRef(link,title));
2374     }
2375     else
2376     {
2377       writeOneLineHeaderOrRuler(out,data+pi,size-pi);
2378     }
2379   }
2380
2381   out.addChar(0);
2382   return out.get();
2383 }
2384
2385 /** returns TRUE if input string docs starts with \@page or \@mainpage command */
2386 static bool isExplicitPage(const QCString &docs)
2387 {
2388   int i=0;
2389   const char *data = docs.data();
2390   if (data)
2391   {
2392     int size=docs.size();
2393     while (i<size && (data[i]==' ' || data[i]=='\n'))
2394     {
2395       i++;
2396     }
2397     if (i<size+1 &&
2398         (data[i]=='\\' || data[i]=='@') &&
2399         (qstrncmp(&data[i+1],"page ",5)==0 || qstrncmp(&data[i+1],"mainpage",8)==0)
2400        )
2401     {
2402       return TRUE;
2403     }
2404   }
2405   return FALSE;
2406 }
2407
2408 static QCString extractPageTitle(QCString &docs,QCString &id)
2409 {
2410   int ln=0;
2411   // first first non-empty line
2412   QCString title;
2413   const char *data = docs.data();
2414   int i=0;
2415   int size=docs.size();
2416   while (i<size && (data[i]==' ' || data[i]=='\n'))
2417   {
2418     if (data[i]=='\n') ln++;
2419     i++;
2420   }
2421   if (i>=size) return "";
2422   int end1=i+1;
2423   while (end1<size && data[end1-1]!='\n') end1++;
2424   //printf("i=%d end1=%d size=%d line='%s'\n",i,end1,size,docs.mid(i,end1-i).data());
2425   // first line from i..end1
2426   if (end1<size)
2427   {
2428     ln++;
2429     // second line form end1..end2
2430     int end2=end1+1;
2431     while (end2<size && data[end2-1]!='\n') end2++;
2432     if (isHeaderline(data+end1,size-end1))
2433     {
2434       convertStringFragment(title,data+i,end1-i-1);
2435       QCString lns;
2436       lns.fill('\n',ln);
2437       docs=lns+docs.mid(end2);
2438       id = extractTitleId(title, 0);
2439       //printf("extractPageTitle(title='%s' docs='%s' id='%s')\n",title.data(),docs.data(),id.data());
2440       return title;
2441     }
2442   }
2443   if (i<end1 && isAtxHeader(data+i,end1-i,title,id)>0)
2444   {
2445     docs=docs.mid(end1);
2446   }
2447   //printf("extractPageTitle(title='%s' docs='%s' id='%s')\n",title.data(),docs.data(),id.data());
2448   return title;
2449 }
2450
2451 static QCString detab(const QCString &s,int &refIndent)
2452 {
2453   static int tabSize = Config_getInt(TAB_SIZE);
2454   GrowBuf out;
2455   int size = s.length();
2456   const char *data = s.data();
2457   int i=0;
2458   int col=0;
2459   const int maxIndent=1000000; // value representing infinity
2460   int minIndent=maxIndent;
2461   while (i<size)
2462   {
2463     char c = data[i++];
2464     switch(c)
2465     {
2466       case '\t': // expand tab
2467         {
2468           int stop = tabSize - (col%tabSize);
2469           //printf("expand at %d stop=%d\n",col,stop);
2470           col+=stop;
2471           while (stop--) out.addChar(' ');
2472         }
2473         break;
2474       case '\n': // reset colomn counter
2475         out.addChar(c);
2476         col=0;
2477         break;
2478       case ' ': // increment column counter
2479         out.addChar(c);
2480         col++;
2481         break;
2482       default: // non-whitespace => update minIndent
2483         out.addChar(c);
2484         if (c<0 && i<size) // multibyte sequence
2485         {
2486           out.addChar(data[i++]); // >= 2 bytes
2487           if (((uchar)c&0xE0)==0xE0 && i<size)
2488           {
2489             out.addChar(data[i++]); // 3 bytes
2490           }
2491           if (((uchar)c&0xF0)==0xF0 && i<size)
2492           {
2493             out.addChar(data[i++]); // 4 byres
2494           }
2495         }
2496         if (col<minIndent) minIndent=col;
2497         col++;
2498     }
2499   }
2500   if (minIndent!=maxIndent) refIndent=minIndent; else refIndent=0;
2501   out.addChar(0);
2502   //printf("detab refIndent=%d\n",refIndent);
2503   return out.get();
2504 }
2505
2506 //---------------------------------------------------------------------------
2507
2508 QCString processMarkdown(const QCString &fileName,const int lineNr,Entry *e,const QCString &input)
2509 {
2510   static bool init=FALSE;
2511   if (!init)
2512   {
2513     // setup callback table for special characters
2514     g_actions[(unsigned int)'_']=processEmphasis;
2515     g_actions[(unsigned int)'*']=processEmphasis;
2516     g_actions[(unsigned int)'~']=processEmphasis;
2517     g_actions[(unsigned int)'`']=processCodeSpan;
2518     g_actions[(unsigned int)'\\']=processSpecialCommand;
2519     g_actions[(unsigned int)'@']=processSpecialCommand;
2520     g_actions[(unsigned int)'[']=processLink;
2521     g_actions[(unsigned int)'!']=processLink;
2522     g_actions[(unsigned int)'<']=processHtmlTag;
2523     g_actions[(unsigned int)'-']=processNmdash;
2524     g_actions[(unsigned int)'"']=processQuoted;
2525     init=TRUE;
2526   }
2527
2528   g_linkRefs.setAutoDelete(TRUE);
2529   g_linkRefs.clear();
2530   g_current = e;
2531   g_fileName = fileName;
2532   g_lineNr   = lineNr;
2533   static GrowBuf out;
2534   if (input.isEmpty()) return input;
2535   out.clear();
2536   int refIndent;
2537   // for replace tabs by spaces
2538   QCString s = detab(input,refIndent);
2539   //printf("======== DeTab =========\n---- output -----\n%s\n---------\n",s.data());
2540   // then process quotation blocks (as these may contain other blocks)
2541   s = processQuotations(s,refIndent);
2542   //printf("======== Quotations =========\n---- output -----\n%s\n---------\n",s.data());
2543   // then process block items (headers, rules, and code blocks, references)
2544   s = processBlocks(s,refIndent);
2545   //printf("======== Blocks =========\n---- output -----\n%s\n---------\n",s.data());
2546   // finally process the inline markup (links, emphasis and code spans)
2547   processInline(out,s,s.length());
2548   out.addChar(0);
2549   Debug::print(Debug::Markdown,0,"======== Markdown =========\n---- input ------- \n%s\n---- output -----\n%s\n=========\n",qPrint(input),qPrint(out.get()));
2550   return out.get();
2551 }
2552
2553 //---------------------------------------------------------------------------
2554
2555 QCString markdownFileNameToId(const QCString &fileName)
2556 {
2557   QCString baseFn  = stripFromPath(QFileInfo(fileName).absFilePath().utf8());
2558   int i = baseFn.findRev('.');
2559   if (i!=-1) baseFn = baseFn.left(i);
2560   QCString baseName = substitute(substitute(baseFn," ","_"),"/","_");
2561   return "md_"+baseName;
2562 }
2563
2564 void MarkdownFileParser::parseInput(const char *fileName,
2565                 const char *fileBuf,
2566                 Entry *root,
2567                 bool /*sameTranslationUnit*/,
2568                 QStrList & /*filesInSameTranslationUnit*/)
2569 {
2570   Entry *current = new Entry;
2571   current->lang = SrcLangExt_Markdown;
2572   current->fileName = fileName;
2573   current->docFile  = fileName;
2574   current->docLine  = 1;
2575   QCString docs = fileBuf;
2576   QCString id;
2577   QCString title=extractPageTitle(docs,id).stripWhiteSpace();
2578   QCString titleFn = QFileInfo(fileName).baseName().utf8();
2579   QCString fn      = QFileInfo(fileName).fileName().utf8();
2580   static QCString mdfileAsMainPage = Config_getString(USE_MDFILE_AS_MAINPAGE);
2581   if (id.isEmpty()) id = markdownFileNameToId(fileName);
2582   if (!isExplicitPage(docs))
2583   {
2584     if (!mdfileAsMainPage.isEmpty() &&
2585         (fn==mdfileAsMainPage || // name reference
2586          QFileInfo(fileName).absFilePath()==
2587          QFileInfo(mdfileAsMainPage).absFilePath()) // file reference with path
2588        )
2589     {
2590       docs.prepend("@mainpage "+title+"\n");
2591     }
2592     else if (id=="mainpage" || id=="index")
2593     {
2594       if (title.isEmpty()) title = titleFn;
2595       docs.prepend("@mainpage "+title+"\n");
2596     }
2597     else
2598     {
2599       if (title.isEmpty()) title = titleFn;
2600       docs.prepend("@page "+id+" "+title+"\n");
2601     }
2602   }
2603   int lineNr=1;
2604   int position=0;
2605
2606   // even without markdown support enabled, we still
2607   // parse markdown files as such
2608   bool markdownEnabled = Doxygen::markdownSupport;
2609   Doxygen::markdownSupport = TRUE;
2610
2611   bool needsEntry = FALSE;
2612   Protection prot=Public;
2613   while (parseCommentBlock(
2614         this,
2615         current,
2616         docs,
2617         fileName,
2618         lineNr,
2619         FALSE,     // isBrief
2620         FALSE,     // javadoc autobrief
2621         FALSE,     // inBodyDocs
2622         prot,      // protection
2623         position,
2624         needsEntry))
2625   {
2626     if (needsEntry)
2627     {
2628       QCString docFile = current->docFile;
2629       root->addSubEntry(current);
2630       current = new Entry;
2631       current->lang = SrcLangExt_Markdown;
2632       current->docFile = docFile;
2633       current->docLine = lineNr;
2634     }
2635   }
2636   if (needsEntry)
2637   {
2638     root->addSubEntry(current);
2639   }
2640
2641   // restore setting
2642   Doxygen::markdownSupport = markdownEnabled;
2643   //g_correctSectionLevel = FALSE;
2644 }
2645
2646 void MarkdownFileParser::parseCode(CodeOutputInterface &codeOutIntf,
2647                const char *scopeName,
2648                const QCString &input,
2649                SrcLangExt lang,
2650                bool isExampleBlock,
2651                const char *exampleName,
2652                FileDef *fileDef,
2653                int startLine,
2654                int endLine,
2655                bool inlineFragment,
2656                MemberDef *memberDef,
2657                bool showLineNumbers,
2658                Definition *searchCtx,
2659                bool collectXRefs
2660               )
2661 {
2662   ParserInterface *pIntf = Doxygen::parserManager->getParser("*.cpp");
2663   if (pIntf!=this)
2664   {
2665     pIntf->parseCode(
2666        codeOutIntf,scopeName,input,lang,isExampleBlock,exampleName,
2667        fileDef,startLine,endLine,inlineFragment,memberDef,showLineNumbers,
2668        searchCtx,collectXRefs);
2669   }
2670 }
2671
2672 void MarkdownFileParser::resetCodeParserState()
2673 {
2674   ParserInterface *pIntf = Doxygen::parserManager->getParser("*.cpp");
2675   if (pIntf!=this)
2676   {
2677     pIntf->resetCodeParserState();
2678   }
2679 }
2680
2681 void MarkdownFileParser::parsePrototype(const char *text)
2682 {
2683   ParserInterface *pIntf = Doxygen::parserManager->getParser("*.cpp");
2684   if (pIntf!=this)
2685   {
2686     pIntf->parsePrototype(text);
2687   }
2688 }
2689