src/markdown.cpp

   1 /******************************************************************************
   2  *
   3  * Copyright (C) 1997-2014 by Dimitri van Heesch.
   4  *
   5  * Permission to use, copy, modify, and distribute this software and its
   6  * documentation under the terms of the GNU General Public License is hereby
   7  * granted. No representations are made about the suitability of this software
   8  * for any purpose. It is provided "as is" without express or implied warranty.
   9  * See the GNU General Public License for more details.
  10  *
  11  * Documents produced by Doxygen are derivative works derived from the
  12  * input used in their production; they are not affected by this license.
  13  *
  14  */
  15
  16 /* Note: part of the code below is inspired by libupskirt written by
  17  * Natacha Porté. Original copyright message follows:
  18  *
  19  * Copyright (c) 2008, Natacha Porté
  20  *
  21  * Permission to use, copy, modify, and distribute this software for any
  22  * purpose with or without fee is hereby granted, provided that the above
  23  * copyright notice and this permission notice appear in all copies.
  24  *
  25  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  26  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  27  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  28  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  29  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  30  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  31  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  32  */
  33
  34 #include <stdio.h>
  35 #include <qglobal.h>
  36 #include <qregexp.h>
  37 #include <qfileinfo.h>
  38 #include <qdict.h>
  39
  40 #include "markdown.h"
  41 #include "growbuf.h"
  42 #include "debug.h"
  43 #include "util.h"
  44 #include "doxygen.h"
  45 #include "commentscan.h"
  46 #include "entry.h"
  47 #include "bufstr.h"
  48 #include "commentcnv.h"
  49 #include "config.h"
  50 #include "section.h"
  51 #include "message.h"
  52
  53 //-----------
  54
  55 // is character at position i in data part of an identifier?
  56 #define isIdChar(i) \
  57   ((data[i]>='a' && data[i]<='z') || \
  58    (data[i]>='A' && data[i]<='Z') || \
  59    (data[i]>='0' && data[i]<='9'))   \
  60
  61 // is character at position i in data allowed before an emphasis section
  62 #define isOpenEmphChar(i) \
  63   (data[i]=='\n' || data[i]==' ' || data[i]=='\'' || data[i]=='<' || \
  64    data[i]=='{'  || data[i]=='(' || data[i]=='['  || data[i]==',' || \
  65    data[i]==':'  || data[i]==';')
  66
  67 // is character at position i in data an escape that prevents ending an emphasis section
  68 // so for example *bla (*.txt) is cool*
  69 #define ignoreCloseEmphChar(i) \
  70   (data[i]=='('  || data[i]=='{' || data[i]=='[' || data[i]=='<' || \
  71    data[i]=='='  || data[i]=='+' || data[i]=='-' || data[i]=='\\' || \
  72    data[i]=='@')
  73
  74 //----------
  75
  76 struct LinkRef
  77 {
  78   LinkRef(const QCString &l,const QCString &t) : link(l), title(t) {}
  79   QCString link;
  80   QCString title;
  81 };
  82
  83 typedef int (*action_t)(GrowBuf &out,const char *data,int offset,int size);
  84
  85 enum Alignment { AlignNone, AlignLeft, AlignCenter, AlignRight };
  86
  87
  88 //----------
  89
  90 static QDict<LinkRef> g_linkRefs(257);
  91 static action_t       g_actions[256];
  92 static Entry         *g_current;
  93 static QCString       g_fileName;
  94 static int            g_lineNr;
  95
  96 // In case a markdown page starts with a level1 header, that header is used
  97 // as a title of the page, in effect making it a level0 header, so the
  98 // level of all other sections needs to be corrected as well.
  99 // This flag is TRUE if corrections are needed.
 100 //static bool           g_correctSectionLevel;
 101
 102
 103 //----------
 104
 105 const int codeBlockIndent = 4;
 106
 107 static void processInline(GrowBuf &out,const char *data,int size);
 108
 109 // escape characters that have a special meaning later on.
 110 static QCString escapeSpecialChars(const QCString &s)
 111 {
 112   if (s.isEmpty()) return "";
 113   GrowBuf growBuf;
 114   const char *p=s;
 115   char c;
 116   while ((c=*p++))
 117   {
 118     switch (c)
 119     {
 120       case '<':  growBuf.addStr("\\<");   break;
 121       case '>':  growBuf.addStr("\\>");   break;
 122       case '\\': growBuf.addStr("\\\\");  break;
 123       case '@':  growBuf.addStr("\\@");   break;
 124       default:   growBuf.addChar(c);      break;
 125     }
 126   }
 127   growBuf.addChar(0);
 128   return growBuf.get();
 129 }
 130
 131 static void convertStringFragment(QCString &result,const char *data,int size)
 132 {
 133   if (size<0) size=0;
 134   result.resize(size+1);
 135   memcpy(result.data(),data,size);
 136   result.at(size)='\0';
 137 }
 138
 139 /** helper function to convert presence of left and/or right alignment markers
 140  *  to a alignment value
 141  */
 142 static Alignment markersToAlignment(bool leftMarker,bool rightMarker)
 143 {
 144   //printf("markerToAlignment(%d,%d)\n",leftMarker,rightMarker);
 145   if (leftMarker && rightMarker)
 146   {
 147     return AlignCenter;
 148   }
 149   else if (leftMarker)
 150   {
 151     return AlignLeft;
 152   }
 153   else if (rightMarker)
 154   {
 155     return AlignRight;
 156   }
 157   else
 158   {
 159     return AlignNone;
 160   }
 161 }
 162
 163
 164 // Check if data contains a block command. If so returned the command
 165 // that ends the block. If not an empty string is returned.
 166 // Note When offset>0 character position -1 will be inspected.
 167 //
 168 // Checks for and skip the following block commands:
 169 // {@code .. { .. } .. }
 170 // \dot .. \enddot
 171 // \code .. \endcode
 172 // \msc .. \endmsc
 173 // \f$..\f$
 174 // \f[..\f]
 175 // \f{..\f}
 176 // \verbatim..\endverbatim
 177 // \latexonly..\endlatexonly
 178 // \htmlonly..\endhtmlonly
 179 // \xmlonly..\endxmlonly
 180 // \rtfonly..\endrtfonly
 181 // \manonly..\endmanonly
 182 static QCString isBlockCommand(const char *data,int offset,int size)
 183 {
 184   bool openBracket = offset>0 && data[-1]=='{';
 185   bool isEscaped = offset>0 && (data[-1]=='\\' || data[-1]=='@');
 186   if (isEscaped) return QCString();
 187
 188   int end=1;
 189   while (end<size && (data[end]>='a' && data[end]<='z')) end++;
 190   if (end==1) return QCString();
 191   QCString blockName;
 192   convertStringFragment(blockName,data+1,end-1);
 193   if (blockName=="code" && openBracket)
 194   {
 195     return "}";
 196   }
 197   else if (blockName=="dot"         ||
 198            blockName=="code"        ||
 199            blockName=="msc"         ||
 200            blockName=="verbatim"    ||
 201            blockName=="latexonly"   ||
 202            blockName=="htmlonly"    ||
 203            blockName=="xmlonly"     ||
 204            blockName=="rtfonly"     ||
 205            blockName=="manonly"     ||
 206            blockName=="docbookonly"
 207      )
 208   {
 209     return "end"+blockName;
 210   }
 211   else if (blockName=="f" && end<size)
 212   {
 213     if (data[end]=='$')
 214     {
 215       return "f$";
 216     }
 217     else if (data[end]=='[')
 218     {
 219       return "f]";
 220     }
 221     else if (data[end]=='}')
 222     {
 223       return "f}";
 224     }
 225   }
 226   return QCString();
 227 }
 228
 229 /** looks for the next emph char, skipping other constructs, and
 230  *  stopping when either it is found, or we are at the end of a paragraph.
 231  */
 232 static int findEmphasisChar(const char *data, int size, char c, int c_size)
 233 {
 234   int i = 1;
 235
 236   while (i<size)
 237   {
 238     while (i<size && data[i]!=c    && data[i]!='`' &&
 239                      data[i]!='\\' && data[i]!='@' &&
 240                      data[i]!='\n') i++;
 241     //printf("findEmphasisChar: data=[%s] i=%d c=%c\n",data,i,data[i]);
 242
 243     // not counting escaped chars or characters that are unlikely
 244     // to appear as the end of the emphasis char
 245     if (i>0 && ignoreCloseEmphChar(i-1))
 246     {
 247       i++;
 248       continue;
 249     }
 250     else
 251     {
 252       // get length of emphasis token
 253       int len = 0;
 254       while (i+len<size && data[i+len]==c)
 255       {
 256         len++;
 257       }
 258
 259       if (len>0)
 260       {
 261         if (len!=c_size || (i<size-len && isIdChar(i+len))) // to prevent touching some_underscore_identifier
 262         {
 263           i=i+len;
 264           continue;
 265         }
 266         return i; // found it
 267       }
 268     }
 269
 270     // skipping a code span
 271     if (data[i]=='`')
 272     {
 273       int snb=0;
 274       while (i<size && data[i]=='`') snb++,i++;
 275
 276       // find same pattern to end the span
 277       int enb=0;
 278       while (i<size && enb<snb)
 279       {
 280         if (data[i]=='`') enb++;
 281         if (snb==1 && data[i]=='\'') break; // ` ended by '
 282         i++;
 283       }
 284     }
 285     else if (data[i]=='@' || data[i]=='\\')
 286     { // skip over blocks that should not be processed
 287       QCString endBlockName = isBlockCommand(data+i,i,size-i);
 288       if (!endBlockName.isEmpty())
 289       {
 290         i++;
 291         int l = endBlockName.length();
 292         while (i<size-l)
 293         {
 294           if ((data[i]=='\\' || data[i]=='@') && // command
 295               data[i-1]!='\\' && data[i-1]!='@') // not escaped
 296           {
 297             if (qstrncmp(&data[i+1],endBlockName,l)==0)
 298             {
 299               break;
 300             }
 301           }
 302           i++;
 303         }
 304       }
 305       else if (i<size-1 && isIdChar(i+1)) // @cmd, stop processing, see bug 690385
 306       {
 307         return 0;
 308       }
 309       else
 310       {
 311         i++;
 312       }
 313     }
 314     else if (data[i]=='\n') // end * or _ at paragraph boundary
 315     {
 316       i++;
 317       while (i<size && data[i]==' ') i++;
 318       if (i>=size || data[i]=='\n') return 0; // empty line -> paragraph
 319     }
 320     else // should not get here!
 321     {
 322       i++;
 323     }
 324
 325   }
 326   return 0;
 327 }
 328
 329 /** process single emphasis */
 330 static int processEmphasis1(GrowBuf &out, const char *data, int size, char c)
 331 {
 332   int i = 0, len;
 333
 334   /* skipping one symbol if coming from emph3 */
 335   if (size>1 && data[0]==c && data[1]==c) { i=1; }
 336
 337   while (i<size)
 338   {
 339     len = findEmphasisChar(data+i, size-i, c, 1);
 340     if (len==0) return 0;
 341     i+=len;
 342     if (i>=size) return 0;
 343
 344     if (i+1<size && data[i+1]==c)
 345     {
 346       i++;
 347       continue;
 348     }
 349     if (data[i]==c && data[i-1]!=' ' && data[i-1]!='\n')
 350     {
 351       out.addStr("<em>");
 352       processInline(out,data,i);
 353       out.addStr("</em>");
 354       return i+1;
 355     }
 356   }
 357   return 0;
 358 }
 359
 360 /** process double emphasis */
 361 static int processEmphasis2(GrowBuf &out, const char *data, int size, char c)
 362 {
 363   int i = 0, len;
 364
 365   while (i<size)
 366   {
 367     len = findEmphasisChar(data+i, size-i, c, 2);
 368     if (len==0)
 369     {
 370       return 0;
 371     }
 372     i += len;
 373     if (i+1<size && data[i]==c && data[i+1]==c && i && data[i-1]!=' ' &&
 374         data[i-1]!='\n'
 375        )
 376     {
 377       out.addStr("<strong>");
 378       processInline(out,data,i);
 379       out.addStr("</strong>");
 380       return i + 2;
 381     }
 382     i++;
 383   }
 384   return 0;
 385 }
 386
 387 /** Parsing tripple emphasis.
 388  *  Finds the first closing tag, and delegates to the other emph
 389  */
 390 static int processEmphasis3(GrowBuf &out, const char *data, int size, char c)
 391 {
 392   int i = 0, len;
 393
 394   while (i<size)
 395   {
 396     len = findEmphasisChar(data+i, size-i, c, 3);
 397     if (len==0)
 398     {
 399       return 0;
 400     }
 401     i+=len;
 402
 403     /* skip whitespace preceded symbols */
 404     if (data[i]!=c || data[i-1]==' ' || data[i-1]=='\n')
 405     {
 406       continue;
 407     }
 408
 409     if (i+2<size && data[i+1]==c && data[i+2]==c)
 410     {
 411       out.addStr("<em><strong>");
 412       processInline(out,data,i);
 413       out.addStr("</strong></em>");
 414       return i+3;
 415     }
 416     else if (i+1<size && data[i+1]==c)
 417     {
 418       // double symbol found, handing over to emph1
 419       len = processEmphasis1(out, data-2, size+2, c);
 420       if (len==0)
 421       {
 422         return 0;
 423       }
 424       else
 425       {
 426         return len - 2;
 427       }
 428     }
 429     else
 430     {
 431       // single symbol found, handing over to emph2
 432       len = processEmphasis2(out, data-1, size+1, c);
 433       if (len==0)
 434       {
 435         return 0;
 436       }
 437       else
 438       {
 439         return len - 1;
 440       }
 441     }
 442   }
 443   return 0;
 444 }
 445
 446 /** Process ndash and mdashes */
 447 static int processNmdash(GrowBuf &out,const char *data,int off,int size)
 448 {
 449   // precondition: data[0]=='-'
 450   int i=1;
 451   int count=1;
 452   if (i<size && data[i]=='-') // found --
 453   {
 454     count++,i++;
 455   }
 456   if (i<size && data[i]=='-') // found ---
 457   {
 458     count++,i++;
 459   }
 460   if (i<size && data[i]=='-') // found ----
 461   {
 462     count++;
 463   }
 464   if (count==2 && (off<8 || qstrncmp(data-8,"operator",8)!=0)) // -- => ndash
 465   {
 466     out.addStr("&ndash;");
 467     return 2;
 468   }
 469   else if (count==3) // --- => ndash
 470   {
 471     out.addStr("&mdash;");
 472     return 3;
 473   }
 474   // not an ndash or mdash
 475   return 0;
 476 }
 477
 478 /** Process quoted section "...", can contain one embedded newline */
 479 static int processQuoted(GrowBuf &out,const char *data,int,int size)
 480 {
 481   int i=1;
 482   int nl=0;
 483   while (i<size && data[i]!='"' && nl<2)
 484   {
 485     if (data[i]=='\n') nl++;
 486     i++;
 487   }
 488   if (i<size && data[i]=='"' && nl<2)
 489   {
 490     out.addStr(data,i+1);
 491     return i+1;
 492   }
 493   // not a quoted section
 494   return 0;
 495 }
 496
 497 /** Process a HTML tag. Note that <pre>..</pre> are treated specially, in
 498  *  the sense that all code inside is written unprocessed
 499  */
 500 static int processHtmlTag(GrowBuf &out,const char *data,int offset,int size)
 501 {
 502   if (offset>0 && data[-1]=='\\') return 0; // escaped <
 503
 504   // find the end of the html tag
 505   int i=1;
 506   int l=0;
 507   // compute length of the tag name
 508   while (i<size && isIdChar(i)) i++,l++;
 509   QCString tagName;
 510   convertStringFragment(tagName,data+1,i-1);
 511   if (tagName.lower()=="pre") // found <pre> tag
 512   {
 513     bool insideStr=FALSE;
 514     while (i<size-6)
 515     {
 516       char c=data[i];
 517       if (!insideStr && c=='<') // potential start of html tag
 518       {
 519         if (data[i+1]=='/' &&
 520             tolower(data[i+2])=='p' && tolower(data[i+3])=='r' &&
 521             tolower(data[i+4])=='e' && tolower(data[i+5])=='>')
 522         { // found </pre> tag, copy from start to end of tag
 523           out.addStr(data,i+6);
 524           //printf("found <pre>..</pre> [%d..%d]\n",0,i+6);
 525           return i+6;
 526         }
 527       }
 528       else if (insideStr && c=='"')
 529       {
 530         if (data[i-1]!='\\') insideStr=FALSE;
 531       }
 532       else if (c=='"')
 533       {
 534         insideStr=TRUE;
 535       }
 536       i++;
 537     }
 538   }
 539   else // some other html tag
 540   {
 541     if (l>0 && i<size)
 542     {
 543       if (data[i]=='/' && i<size-1 && data[i+1]=='>') // <bla/>
 544       {
 545         //printf("Found htmlTag={%s}\n",QCString(data).left(i+2).data());
 546         out.addStr(data,i+2);
 547         return i+2;
 548       }
 549       else if (data[i]=='>') // <bla>
 550       {
 551         //printf("Found htmlTag={%s}\n",QCString(data).left(i+1).data());
 552         out.addStr(data,i+1);
 553         return i+1;
 554       }
 555       else if (data[i]==' ') // <bla attr=...
 556       {
 557         i++;
 558         bool insideAttr=FALSE;
 559         while (i<size)
 560         {
 561           if (!insideAttr && data[i]=='"')
 562           {
 563             insideAttr=TRUE;
 564           }
 565           else if (data[i]=='"' && data[i-1]!='\\')
 566           {
 567             insideAttr=FALSE;
 568           }
 569           else if (!insideAttr && data[i]=='>') // found end of tag
 570           {
 571             //printf("Found htmlTag={%s}\n",QCString(data).left(i+1).data());
 572             out.addStr(data,i+1);
 573             return i+1;
 574           }
 575           i++;
 576         }
 577       }
 578     }
 579   }
 580   //printf("Not a valid html tag\n");
 581   return 0;
 582 }
 583
 584 static int processEmphasis(GrowBuf &out,const char *data,int offset,int size)
 585 {
 586   if ((offset>0 && !isOpenEmphChar(-1)) || // invalid char before * or _
 587       (size>1 && data[0]!=data[1] && !isIdChar(1)) || // invalid char after * or _
 588       (size>2 && data[0]==data[1] && !isIdChar(2)))   // invalid char after ** or __
 589   {
 590     return 0;
 591   }
 592
 593   char c = data[0];
 594   int ret;
 595   if (size>2 && data[1]!=c) // _bla or *bla
 596   {
 597     // whitespace cannot follow an opening emphasis
 598     if (data[1]==' ' || data[1]=='\n' ||
 599         (ret = processEmphasis1(out, data+1, size-1, c)) == 0)
 600     {
 601       return 0;
 602     }
 603     return ret+1;
 604   }
 605   if (size>3 && data[1]==c && data[2]!=c) // __bla or **bla
 606   {
 607     if (data[2]==' ' || data[2]=='\n' ||
 608         (ret = processEmphasis2(out, data+2, size-2, c)) == 0)
 609     {
 610       return 0;
 611     }
 612     return ret+2;
 613   }
 614   if (size>4 && data[1]==c && data[2]==c && data[3]!=c) // ___bla or ***bla
 615   {
 616     if (data[3]==' ' || data[3]=='\n' ||
 617         (ret = processEmphasis3(out, data+3, size-3, c)) == 0)
 618     {
 619       return 0;
 620     }
 621     return ret+3;
 622   }
 623   return 0;
 624 }
 625
 626 static int processLink(GrowBuf &out,const char *data,int,int size)
 627 {
 628   QCString content;
 629   QCString link;
 630   QCString title;
 631   int contentStart,contentEnd,linkStart,titleStart,titleEnd;
 632   bool isImageLink = FALSE;
 633   bool isToc = FALSE;
 634   int i=1;
 635   if (data[0]=='!')
 636   {
 637     isImageLink = TRUE;
 638     if (size<2 || data[1]!='[')
 639     {
 640       return 0;
 641     }
 642     i++;
 643   }
 644   contentStart=i;
 645   int level=1;
 646   int nl=0;
 647   // find the matching ]
 648   while (i<size)
 649   {
 650     if (data[i-1]=='\\') // skip escaped characters
 651     {
 652     }
 653     else if (data[i]=='[')
 654     {
 655       level++;
 656     }
 657     else if (data[i]==']')
 658     {
 659       level--;
 660       if (level<=0) break;
 661     }
 662     else if (data[i]=='\n')
 663     {
 664       nl++;
 665       if (nl>1) return 0; // only allow one newline in the content
 666     }
 667     i++;
 668   }
 669   if (i>=size) return 0; // premature end of comment -> no link
 670   contentEnd=i;
 671   convertStringFragment(content,data+contentStart,contentEnd-contentStart);
 672   //printf("processLink: content={%s}\n",content.data());
 673   if (!isImageLink && content.isEmpty()) return 0; // no link text
 674   i++; // skip over ]
 675
 676   // skip whitespace
 677   while (i<size && data[i]==' ') i++;
 678   if (i<size && data[i]=='\n') // one newline allowed here
 679   {
 680     i++;
 681     // skip more whitespace
 682     while (i<size && data[i]==' ') i++;
 683   }
 684
 685   bool explicitTitle=FALSE;
 686   if (i<size && data[i]=='(') // inline link
 687   {
 688     i++;
 689     while (i<size && data[i]==' ') i++;
 690     if (i<size && data[i]=='<') i++;
 691     linkStart=i;
 692     nl=0;
 693     while (i<size && data[i]!='\'' && data[i]!='"' && data[i]!=')')
 694     {
 695       if (data[i]=='\n')
 696       {
 697         nl++;
 698         if (nl>1) return 0;
 699       }
 700       i++;
 701     }
 702     if (i>=size || data[i]=='\n') return 0;
 703     convertStringFragment(link,data+linkStart,i-linkStart);
 704     link = link.stripWhiteSpace();
 705     //printf("processLink: link={%s}\n",link.data());
 706     if (link.isEmpty()) return 0;
 707     if (link.at(link.length()-1)=='>') link=link.left(link.length()-1);
 708
 709     // optional title
 710     if (data[i]=='\'' || data[i]=='"')
 711     {
 712       char c = data[i];
 713       i++;
 714       titleStart=i;
 715       nl=0;
 716       while (i<size && data[i]!=')')
 717       {
 718         if (data[i]=='\n')
 719         {
 720           if (nl>1) return 0;
 721           nl++;
 722         }
 723         i++;
 724       }
 725       if (i>=size)
 726       {
 727         return 0;
 728       }
 729       titleEnd = i-1;
 730       // search back for closing marker
 731       while (titleEnd>titleStart && data[titleEnd]==' ') titleEnd--;
 732       if (data[titleEnd]==c) // found it
 733       {
 734         convertStringFragment(title,data+titleStart,titleEnd-titleStart);
 735         //printf("processLink: title={%s}\n",title.data());
 736       }
 737       else
 738       {
 739         return 0;
 740       }
 741     }
 742     i++;
 743   }
 744   else if (i<size && data[i]=='[') // reference link
 745   {
 746     i++;
 747     linkStart=i;
 748     nl=0;
 749     // find matching ]
 750     while (i<size && data[i]!=']')
 751     {
 752       if (data[i]=='\n')
 753       {
 754         nl++;
 755         if (nl>1) return 0;
 756       }
 757       i++;
 758     }
 759     if (i>=size) return 0;
 760     // extract link
 761     convertStringFragment(link,data+linkStart,i-linkStart);
 762     //printf("processLink: link={%s}\n",link.data());
 763     link = link.stripWhiteSpace();
 764     if (link.isEmpty()) // shortcut link
 765     {
 766       link=content;
 767     }
 768     // lookup reference
 769     LinkRef *lr = g_linkRefs.find(link.lower());
 770     if (lr) // found it
 771     {
 772       link  = lr->link;
 773       title = lr->title;
 774       //printf("processLink: ref: link={%s} title={%s}\n",link.data(),title.data());
 775     }
 776     else // reference not found!
 777     {
 778       //printf("processLink: ref {%s} do not exist\n",link.lower().data());
 779       return 0;
 780     }
 781     i++;
 782   }
 783   else if (i<size && data[i]!=':' && !content.isEmpty()) // minimal link ref notation [some id]
 784   {
 785     LinkRef *lr = g_linkRefs.find(content.lower());
 786     //printf("processLink: minimal link {%s} lr=%p",content.data(),lr);
 787     if (lr) // found it
 788     {
 789       link  = lr->link;
 790       title = lr->title;
 791       explicitTitle=TRUE;
 792       i=contentEnd;
 793     }
 794     else if (content=="TOC")
 795     {
 796       isToc=TRUE;
 797       i=contentEnd;
 798     }
 799     else
 800     {
 801       return 0;
 802     }
 803     i++;
 804   }
 805   else
 806   {
 807     return 0;
 808   }
 809   static QRegExp re("^[@\\]ref ");
 810   if (isToc) // special case for [TOC]
 811   {
 812     if (g_current) g_current->stat=TRUE;
 813   }
 814   else if (isImageLink)
 815   {
 816     bool ambig;
 817     FileDef *fd=0;
 818     if (link.find("@ref ")!=-1 || link.find("\\ref ")!=-1 ||
 819         (fd=findFileDef(Doxygen::imageNameDict,link,ambig)))
 820         // assume doxygen symbol link or local image link
 821     {
 822       out.addStr("@image html ");
 823       out.addStr(link.mid(fd ? 0 : 5));
 824       if (!explicitTitle && !content.isEmpty())
 825       {
 826         out.addStr(" \"");
 827         out.addStr(content);
 828         out.addStr("\"");
 829       }
 830       else if ((content.isEmpty() || explicitTitle) && !title.isEmpty())
 831       {
 832         out.addStr(" \"");
 833         out.addStr(title);
 834         out.addStr("\"");
 835       }
 836     }
 837     else
 838     {
 839       out.addStr("<img src=\"");
 840       out.addStr(link);
 841       out.addStr("\" alt=\"");
 842       out.addStr(content);
 843       out.addStr("\"");
 844       if (!title.isEmpty())
 845       {
 846         out.addStr(" title=\"");
 847         out.addStr(substitute(title.simplifyWhiteSpace(),"\"","&quot;"));
 848         out.addStr("\"");
 849       }
 850       out.addStr("/>");
 851     }
 852   }
 853   else
 854   {
 855     SrcLangExt lang = getLanguageFromFileName(link);
 856     int lp=-1;
 857     if ((lp=link.find("@ref "))!=-1 || (lp=link.find("\\ref "))!=-1 || lang==SrcLangExt_Markdown)
 858         // assume doxygen symbol link
 859     {
 860       if (lp==-1) // link to markdown page
 861       {
 862         out.addStr("@ref ");
 863       }
 864       out.addStr(link);
 865       out.addStr(" \"");
 866       if (explicitTitle && !title.isEmpty())
 867       {
 868         out.addStr(title);
 869       }
 870       else
 871       {
 872         out.addStr(content);
 873       }
 874       out.addStr("\"");
 875     }
 876     else if (link.find('/')!=-1 || link.find('.')!=-1 || link.find('#')!=-1)
 877     { // file/url link
 878       out.addStr("<a href=\"");
 879       out.addStr(link);
 880       out.addStr("\"");
 881       if (!title.isEmpty())
 882       {
 883         out.addStr(" title=\"");
 884         out.addStr(substitute(title.simplifyWhiteSpace(),"\"","&quot;"));
 885         out.addStr("\"");
 886       }
 887       out.addStr(">");
 888       out.addStr(content.simplifyWhiteSpace());
 889       out.addStr("</a>");
 890     }
 891     else // avoid link to e.g. F[x](y)
 892     {
 893       //printf("no link for '%s'\n",link.data());
 894       return 0;
 895     }
 896   }
 897   return i;
 898 }
 899
 900 /** '`' parsing a code span (assuming codespan != 0) */
 901 static int processCodeSpan(GrowBuf &out, const char *data, int /*offset*/, int size)
 902 {
 903   int end, nb = 0, i, f_begin, f_end;
 904
 905   /* counting the number of backticks in the delimiter */
 906   while (nb<size && data[nb]=='`')
 907   {
 908     nb++;
 909   }
 910
 911   /* finding the next delimiter */
 912   i = 0;
 913   int nl=0;
 914   for (end=nb; end<size && i<nb && nl<2; end++)
 915   {
 916     if (data[end]=='`')
 917     {
 918       i++;
 919     }
 920     else if (data[end]=='\n')
 921     {
 922       i=0;
 923       nl++;
 924     }
 925     else
 926     {
 927       i=0;
 928     }
 929   }
 930   if (i < nb && end >= size)
 931   {
 932     return 0;  // no matching delimiter
 933   }
 934   if (nl==2) // too many newlines inside the span
 935   {
 936     return 0;
 937   }
 938
 939   // trimming outside whitespaces
 940   f_begin = nb;
 941   while (f_begin < end && data[f_begin]==' ')
 942   {
 943     f_begin++;
 944   }
 945   f_end = end - nb;
 946   while (f_end > nb && data[f_end-1]==' ')
 947   {
 948     f_end--;
 949   }
 950
 951   if (nb==1) // check for closing ' followed by space within f_begin..f_end
 952   {
 953     i=f_begin;
 954     while (i<f_end-1)
 955     {
 956       if (data[i]=='\'' && !isIdChar(i+1)) // reject `some word' and not `it's cool`
 957       {
 958         return 0;
 959       }
 960       i++;
 961     }
 962   }
 963   //printf("found code span '%s'\n",QCString(data+f_begin).left(f_end-f_begin).data());
 964
 965   /* real code span */
 966   if (f_begin < f_end)
 967   {
 968     QCString codeFragment;
 969     convertStringFragment(codeFragment,data+f_begin,f_end-f_begin);
 970     out.addStr("<tt>");
 971     //out.addStr(convertToHtml(codeFragment,TRUE));
 972     out.addStr(escapeSpecialChars(codeFragment));
 973     out.addStr("</tt>");
 974   }
 975   return end;
 976 }
 977
 978
 979 static int processSpecialCommand(GrowBuf &out, const char *data, int offset, int size)
 980 {
 981   int i=1;
 982   QCString endBlockName = isBlockCommand(data,offset,size);
 983   if (!endBlockName.isEmpty())
 984   {
 985     int l = endBlockName.length();
 986     while (i<size-l)
 987     {
 988       if ((data[i]=='\\' || data[i]=='@') && // command
 989           data[i-1]!='\\' && data[i-1]!='@') // not escaped
 990       {
 991         if (qstrncmp(&data[i+1],endBlockName,l)==0)
 992         {
 993           //printf("found end at %d\n",i);
 994           out.addStr(data,i+1+l);
 995           return i+1+l;
 996         }
 997       }
 998       i++;
 999     }
1000   }
1001   if (size>1 && data[0]=='\\')
1002   {
1003     char c=data[1];
1004     if (c=='[' || c==']' || c=='*' || c=='+' || c=='-' ||
1005         c=='!' || c=='(' || c==')' || c=='.' || c=='`' || c=='_')
1006     {
1007       if (c=='-' && size>3 && data[2]=='-' && data[3]=='-') // \---
1008       {
1009         out.addStr(&data[1],3);
1010         return 4;
1011       }
1012       else if (c=='-' && size>2 && data[2]=='-') // \--
1013       {
1014         out.addStr(&data[1],2);
1015         return 3;
1016       }
1017       out.addStr(&data[1],1);
1018       return 2;
1019     }
1020   }
1021   return 0;
1022 }
1023
1024 static void processInline(GrowBuf &out,const char *data,int size)
1025 {
1026   int i=0, end=0;
1027   action_t action = 0;
1028   while (i<size)
1029   {
1030     while (end<size && ((action=g_actions[(uchar)data[end]])==0)) end++;
1031     out.addStr(data+i,end-i);
1032     if (end>=size) break;
1033     i=end;
1034     end = action(out,data+i,i,size-i);
1035     if (!end)
1036     {
1037       end=i+1;
1038     }
1039     else
1040     {
1041       i+=end;
1042       end=i;
1043     }
1044   }
1045 }
1046
1047 /** returns whether the line is a setext-style hdr underline */
1048 static int isHeaderline(const char *data, int size)
1049 {
1050   int i=0, c=0;
1051   while (i<size && data[i]==' ') i++;
1052
1053   // test of level 1 header
1054   if (data[i]=='=')
1055   {
1056     while (i<size && data[i]=='=') i++,c++;
1057     while (i<size && data[i]==' ') i++;
1058     return (c>1 && (i>=size || data[i]=='\n')) ? 1 : 0;
1059   }
1060   // test of level 2 header
1061   if (data[i]=='-')
1062   {
1063     while (i<size && data[i]=='-') i++,c++;
1064     while (i<size && data[i]==' ') i++;
1065     return (c>1 && (i>=size || data[i]=='\n')) ? 2 : 0;
1066   }
1067   return 0;
1068 }
1069
1070 /** returns TRUE if this line starts a block quote */
1071 static bool isBlockQuote(const char *data,int size,int indent)
1072 {
1073   int i = 0;
1074   while (i<size && data[i]==' ') i++;
1075   if (i<indent+codeBlockIndent) // could be a quotation
1076   {
1077     // count >'s and skip spaces
1078     int level=0;
1079     while (i<size && (data[i]=='>' || data[i]==' '))
1080     {
1081       if (data[i]=='>') level++;
1082       i++;
1083     }
1084     // last characters should be a space or newline,
1085     // so a line starting with >= does not match
1086     return level>0 && i<size && ((data[i-1]==' ') || data[i]=='\n');
1087   }
1088   else // too much indentation -> code block
1089   {
1090     return FALSE;
1091   }
1092   //return i<size && data[i]=='>' && i<indent+codeBlockIndent;
1093 }
1094
1095 /** returns end of the link ref if this is indeed a link reference. */
1096 static int isLinkRef(const char *data,int size,
1097             QCString &refid,QCString &link,QCString &title)
1098 {
1099   //printf("isLinkRef data={%s}\n",data);
1100   // format: start with [some text]:
1101   int i = 0;
1102   while (i<size && data[i]==' ') i++;
1103   if (i>=size || data[i]!='[') return 0;
1104   i++;
1105   int refIdStart=i;
1106   while (i<size && data[i]!='\n' && data[i]!=']') i++;
1107   if (i>=size || data[i]!=']') return 0;
1108   convertStringFragment(refid,data+refIdStart,i-refIdStart);
1109   if (refid.isEmpty()) return 0;
1110   //printf("  isLinkRef: found refid='%s'\n",refid.data());
1111   i++;
1112   if (i>=size || data[i]!=':') return 0;
1113   i++;
1114
1115   // format: whitespace* \n? whitespace* (<url> | url)
1116   while (i<size && data[i]==' ') i++;
1117   if (i<size && data[i]=='\n')
1118   {
1119     i++;
1120     while (i<size && data[i]==' ') i++;
1121   }
1122   if (i>=size) return 0;
1123
1124   if (i<size && data[i]=='<') i++;
1125   int linkStart=i;
1126   while (i<size && data[i]!=' ' && data[i]!='\n') i++;
1127   int linkEnd=i;
1128   if (i<size && data[i]=='>') i++;
1129   if (linkStart==linkEnd) return 0; // empty link
1130   convertStringFragment(link,data+linkStart,linkEnd-linkStart);
1131   //printf("  isLinkRef: found link='%s'\n",link.data());
1132   if (link=="@ref" || link=="\\ref")
1133   {
1134     int argStart=i;
1135     while (i<size && data[i]!='\n' && data[i]!='"') i++;
1136     QCString refArg;
1137     convertStringFragment(refArg,data+argStart,i-argStart);
1138     link+=refArg;
1139   }
1140
1141   title.resize(0);
1142
1143   // format: (whitespace* \n? whitespace* ( 'title' | "title" | (title) ))?
1144   int eol=0;
1145   while (i<size && data[i]==' ') i++;
1146   if (i<size && data[i]=='\n')
1147   {
1148     eol=i;
1149     i++;
1150     while (i<size && data[i]==' ') i++;
1151   }
1152   if (i>=size)
1153   {
1154     //printf("end of isLinkRef while looking for title! i=%d\n",i);
1155     return i; // end of buffer while looking for the optional title
1156   }
1157
1158   char c = data[i];
1159   if (c=='\'' || c=='"' || c=='(') // optional title present?
1160   {
1161     //printf("  start of title found! char='%c'\n",c);
1162     i++;
1163     if (c=='(') c=')'; // replace c by end character
1164     int titleStart=i;
1165     // search for end of the line
1166     while (i<size && data[i]!='\n') i++;
1167     eol = i;
1168
1169     // search back to matching character
1170     int end=i-1;
1171     while (end>titleStart && data[end]!=c) end--;
1172     if (end>titleStart)
1173     {
1174       convertStringFragment(title,data+titleStart,end-titleStart);
1175     }
1176     //printf("  title found: '%s'\n",title.data());
1177   }
1178   while (i<size && data[i]==' ') i++;
1179   //printf("end of isLinkRef: i=%d size=%d data[i]='%c' eol=%d\n",
1180   //    i,size,data[i],eol);
1181   if      (i>=size)       return i;    // end of buffer while ref id was found
1182   else if (eol)           return eol;  // end of line while ref id was found
1183   return 0;                            // invalid link ref
1184 }
1185
1186 static int isHRuler(const char *data,int size)
1187 {
1188   int i=0;
1189   if (size>0 && data[size-1]=='\n') size--; // ignore newline character
1190   while (i<size && data[i]==' ') i++;
1191   if (i>=size) return 0; // empty line
1192   char c=data[i];
1193   if (c!='*' && c!='-' && c!='_')
1194   {
1195     return 0; // not a hrule character
1196   }
1197   int n=0;
1198   while (i<size)
1199   {
1200     if (data[i]==c)
1201     {
1202       n++; // count rule character
1203     }
1204     else if (data[i]!=' ')
1205     {
1206       return 0; // line contains non hruler characters
1207     }
1208     i++;
1209   }
1210   return n>=3; // at least 3 characters needed for a hruler
1211 }
1212
1213 static QCString extractTitleId(QCString &title)
1214 {
1215   //static QRegExp r1("^[a-z_A-Z][a-z_A-Z0-9\\-]*:");
1216   static QRegExp r2("\\{#[a-z_A-Z][a-z_A-Z0-9\\-]*\\}");
1217   int l=0;
1218   int i = r2.match(title,0,&l);
1219   if (i!=-1 && title.mid(i+l).stripWhiteSpace().isEmpty()) // found {#id} style id
1220   {
1221     QCString id = title.mid(i+2,l-3);
1222     title = title.left(i);
1223     //printf("found id='%s' title='%s'\n",id.data(),title.data());
1224     return id;
1225   }
1226   //printf("no id found in title '%s'\n",title.data());
1227   return "";
1228 }
1229
1230
1231 static int isAtxHeader(const char *data,int size,
1232                        QCString &header,QCString &id)
1233 {
1234   int i = 0, end;
1235   int level = 0, blanks=0;
1236
1237   // find start of header text and determine heading level
1238   while (i<size && data[i]==' ') i++;
1239   if (i>=size || data[i]!='#')
1240   {
1241     return 0;
1242   }
1243   while (i<size && level<6 && data[i]=='#') i++,level++;
1244   while (i<size && data[i]==' ') i++,blanks++;
1245   if (level==1 && blanks==0)
1246   {
1247     return 0; // special case to prevent #someid seen as a header (see bug 671395)
1248   }
1249
1250   // find end of header text
1251   end=i;
1252   while (end<size && data[end]!='\n') end++;
1253   while (end>i && (data[end-1]=='#' || data[end-1]==' ')) end--;
1254
1255   // store result
1256   convertStringFragment(header,data+i,end-i);
1257   id = extractTitleId(header);
1258   if (!id.isEmpty()) // strip #'s between title and id
1259   {
1260     i=header.length()-1;
1261     while (i>=0 && (header.at(i)=='#' || header.at(i)==' ')) i--;
1262     header=header.left(i+1);
1263   }
1264
1265   return level;
1266 }
1267
1268 static int isEmptyLine(const char *data,int size)
1269 {
1270   int i=0;
1271   while (i<size)
1272   {
1273     if (data[i]=='\n') return TRUE;
1274     if (data[i]!=' ') return FALSE;
1275     i++;
1276   }
1277   return TRUE;
1278 }
1279
1280 #define isLiTag(i) \
1281    (data[(i)]=='<' && \
1282    (data[(i)+1]=='l' || data[(i)+1]=='L') && \
1283    (data[(i)+2]=='i' || data[(i)+2]=='I') && \
1284    (data[(i)+3]=='>'))
1285
1286 // compute the indent from the start of the input, excluding list markers
1287 // such as -, -#, *, +, 1., and <li>
1288 static int computeIndentExcludingListMarkers(const char *data,int size)
1289 {
1290   int i=0;
1291   int indent=0;
1292   bool isDigit=FALSE;
1293   bool isLi=FALSE;
1294   bool listMarkerSkipped=FALSE;
1295   while (i<size &&
1296          (data[i]==' ' ||                                    // space
1297           (!listMarkerSkipped &&                             // first list marker
1298            (data[i]=='+' || data[i]=='-' || data[i]=='*' ||  // unordered list char
1299             (data[i]=='#' && i>0 && data[i-1]=='-') ||       // -# item
1300             (isDigit=(data[i]>='1' && data[i]<='9')) ||      // ordered list marker?
1301             (isLi=(i<size-3 && isLiTag(i)))                  // <li> tag
1302            )
1303           )
1304          )
1305         )
1306   {
1307     if (isDigit) // skip over ordered list marker '10. '
1308     {
1309       int j=i+1;
1310       while (j<size && ((data[j]>='0' && data[j]<='9') || data[j]=='.'))
1311       {
1312         if (data[j]=='.') // should be end of the list marker
1313         {
1314           if (j<size-1 && data[j+1]==' ') // valid list marker
1315           {
1316             listMarkerSkipped=TRUE;
1317             indent+=j+1-i;
1318             i=j+1;
1319             break;
1320           }
1321           else // not a list marker
1322           {
1323             break;
1324           }
1325         }
1326         j++;
1327       }
1328     }
1329     else if (isLi)
1330     {
1331       i+=3; // skip over <li>
1332       indent+=3;
1333       listMarkerSkipped=TRUE;
1334     }
1335     else if (data[i]=='-' && i<size-2 && data[i+1]=='#' && data[i+2]==' ')
1336     { // case "-# "
1337       listMarkerSkipped=TRUE; // only a single list marker is accepted
1338       i++; // skip over #
1339       indent++;
1340     }
1341     else if (data[i]!=' ' && i<size-1 && data[i+1]==' ')
1342     { // case "- " or "+ " or "* "
1343       listMarkerSkipped=TRUE; // only a single list marker is accepted
1344     }
1345     if (data[i]!=' ' && !listMarkerSkipped)
1346     { // end of indent
1347       break;
1348     }
1349     indent++,i++;
1350   }
1351   //printf("{%s}->%d\n",QCString(data).left(size).data(),indent);
1352   return indent;
1353 }
1354
1355 static bool isFencedCodeBlock(const char *data,int size,int refIndent,
1356                              QCString &lang,int &start,int &end,int &offset)
1357 {
1358   // rules: at least 3 ~~~, end of the block same amount of ~~~'s, otherwise
1359   // return FALSE
1360   int i=0;
1361   int indent=0;
1362   int startTildes=0;
1363   while (i<size && data[i]==' ') indent++,i++;
1364   if (indent>=refIndent+4) return FALSE; // part of code block
1365   while (i<size && data[i]=='~') startTildes++,i++;
1366   if (startTildes<3) return FALSE; // not enough tildes
1367   if (i<size && data[i]=='{') i++; // skip over optional {
1368   int startLang=i;
1369   while (i<size && (data[i]!='\n' && data[i]!='}' && data[i]!=' ')) i++;
1370   convertStringFragment(lang,data+startLang,i-startLang);
1371   while (i<size && data[i]!='\n') i++; // proceed to the end of the line
1372   start=i;
1373   while (i<size)
1374   {
1375     if (data[i]=='~')
1376     {
1377       end=i-1;
1378       int endTildes=0;
1379       while (i<size && data[i]=='~') endTildes++,i++;
1380       while (i<size && data[i]==' ') i++;
1381       if (i==size || data[i]=='\n')
1382       {
1383         offset=i;
1384         return endTildes==startTildes;
1385       }
1386     }
1387     i++;
1388   }
1389   return FALSE;
1390 }
1391
1392 static bool isCodeBlock(const char *data,int offset,int size,int &indent)
1393 {
1394   //printf("<isCodeBlock(offset=%d,size=%d,indent=%d)\n",offset,size,indent);
1395   // determine the indent of this line
1396   int i=0;
1397   int indent0=0;
1398   while (i<size && data[i]==' ') indent0++,i++;
1399
1400   if (indent0<codeBlockIndent)
1401   {
1402     //printf(">isCodeBlock: line is not indented enough %d<4\n",indent0);
1403     return FALSE;
1404   }
1405   if (indent0>=size || data[indent0]=='\n') // empty line does not start a code block
1406   {
1407     //printf("only spaces at the end of a comment block\n");
1408     return FALSE;
1409   }
1410
1411   i=offset;
1412   int nl=0;
1413   int nl_pos[3];
1414   // search back 3 lines and remember the start of lines -1 and -2
1415   while (i>0 && nl<3)
1416   {
1417     if (data[i-offset-1]=='\n') nl_pos[nl++]=i-offset;
1418     i--;
1419   }
1420
1421   // if there are only 2 preceding lines, then line -2 starts at -offset
1422   if (i==0 && nl==2) nl_pos[nl++]=-offset;
1423   //printf("  nl=%d\n",nl);
1424
1425   if (nl==3) // we have at least 2 preceding lines
1426   {
1427     //printf("  positions: nl_pos=[%d,%d,%d] line[-2]='%s' line[-1]='%s'\n",
1428     //    nl_pos[0],nl_pos[1],nl_pos[2],
1429     //    QCString(data+nl_pos[1]).left(nl_pos[0]-nl_pos[1]-1).data(),
1430     //    QCString(data+nl_pos[2]).left(nl_pos[1]-nl_pos[2]-1).data());
1431
1432     // check that line -1 is empty
1433     if (!isEmptyLine(data+nl_pos[1],nl_pos[0]-nl_pos[1]-1))
1434     {
1435       return FALSE;
1436     }
1437
1438     // determine the indent of line -2
1439     indent=computeIndentExcludingListMarkers(data+nl_pos[2],nl_pos[1]-nl_pos[2]);
1440
1441     //printf(">isCodeBlock local_indent %d>=%d+4=%d\n",
1442     //    indent0,indent2,indent0>=indent2+4);
1443     // if the difference is >4 spaces -> code block
1444     return indent0>=indent+codeBlockIndent;
1445   }
1446   else // not enough lines to determine the relative indent, use global indent
1447   {
1448     // check that line -1 is empty
1449     if (nl==1 && !isEmptyLine(data-offset,offset-1))
1450     {
1451       return FALSE;
1452     }
1453     //printf(">isCodeBlock global indent %d>=%d+4=%d nl=%d\n",
1454     //    indent0,indent,indent0>=indent+4,nl);
1455     return indent0>=indent+codeBlockIndent;
1456   }
1457 }
1458
1459 /** Finds the location of the table's contains in the string \a data.
1460  *  Only one line will be inspected.
1461  *  @param[in] data pointer to the string buffer.
1462  *  @param[in] size the size of the buffer.
1463  *  @param[out] start offset of the first character of the table content
1464  *  @param[out] end   offset of the last character of the table content
1465  *  @param[out] columns number of table columns found
1466  *  @returns The offset until the next line in the buffer.
1467  */
1468 int findTableColumns(const char *data,int size,int &start,int &end,int &columns)
1469 {
1470   int i=0,n=0;
1471   int eol;
1472   // find start character of the table line
1473   while (i<size && data[i]==' ') i++;
1474   if (i<size && data[i]=='|' && data[i]!='\n') i++,n++; // leading | does not count
1475   start = i;
1476
1477   // find end character of the table line
1478   while (i<size && data[i]!='\n') i++;
1479   eol=i+1;
1480   i--;
1481   while (i>0 && data[i]==' ') i--;
1482   if (i>0 && data[i-1]!='\\' && data[i]=='|') i--,n++; // trailing or escaped | does not count
1483   end = i;
1484
1485   // count columns between start and end
1486   columns=0;
1487   if (end>start)
1488   {
1489     i=start;
1490     while (i<=end) // look for more column markers
1491     {
1492       if (data[i]=='|' && (i==0 || data[i-1]!='\\')) columns++;
1493       if (columns==1) columns++; // first | make a non-table into a two column table
1494       i++;
1495     }
1496   }
1497   if (n==2 && columns==0) // table row has | ... |
1498   {
1499     columns++;
1500   }
1501   //printf("findTableColumns(start=%d,end=%d,columns=%d) eol=%d\n",
1502   //    start,end,columns,eol);
1503   return eol;
1504 }
1505
1506 /** Returns TRUE iff data points to the start of a table block */
1507 static bool isTableBlock(const char *data,int size)
1508 {
1509   int cc0,start,end;
1510
1511   // the first line should have at least two columns separated by '|'
1512   int i = findTableColumns(data,size,start,end,cc0);
1513   if (i>=size || cc0<1)
1514   {
1515     //printf("isTableBlock: no |'s in the header\n");
1516     return FALSE;
1517   }
1518
1519   int cc1;
1520   int ret = findTableColumns(data+i,size-i,start,end,cc1);
1521   int j=i+start;
1522   // separator line should consist of |, - and : and spaces only
1523   while (j<=end+i)
1524   {
1525     if (data[j]!=':' && data[j]!='-' && data[j]!='|' && data[j]!=' ')
1526     {
1527       //printf("isTableBlock: invalid character '%c'\n",data[j]);
1528       return FALSE; // invalid characters in table separator
1529     }
1530     j++;
1531   }
1532   if (cc1!=cc0) // number of columns should be same as previous line
1533   {
1534     return FALSE;
1535   }
1536
1537   i+=ret; // goto next line
1538   int cc2;
1539   ret = findTableColumns(data+i,size-i,start,end,cc2);
1540
1541   //printf("isTableBlock: %d\n",cc1==cc2);
1542   return cc1==cc2;
1543 }
1544
1545 static int writeTableBlock(GrowBuf &out,const char *data,int size)
1546 {
1547   int i=0,j,k;
1548   int columns,start,end,cc;
1549
1550   i = findTableColumns(data,size,start,end,columns);
1551
1552   out.addStr("<table>");
1553
1554   // write table header, in range [start..end]
1555   out.addStr("<tr>");
1556
1557   int headerStart = start;
1558   int headerEnd = end;
1559
1560   // read cell alignments
1561   int ret = findTableColumns(data+i,size-i,start,end,cc);
1562   k=0;
1563   Alignment *columnAlignment = new Alignment[columns];
1564
1565   bool leftMarker=FALSE,rightMarker=FALSE;
1566   bool startFound=FALSE;
1567   j=start+i;
1568   while (j<=end+i)
1569   {
1570     if (!startFound)
1571     {
1572       if (data[j]==':') { leftMarker=TRUE; startFound=TRUE; }
1573       if (data[j]=='-') startFound=TRUE;
1574       //printf("  data[%d]=%c startFound=%d\n",j,data[j],startFound);
1575     }
1576     if      (data[j]=='-') rightMarker=FALSE;
1577     else if (data[j]==':') rightMarker=TRUE;
1578     if (j<=end+i && (data[j]=='|' && (j==0 || data[j-1]!='\\')))
1579     {
1580       if (k<columns)
1581       {
1582         columnAlignment[k] = markersToAlignment(leftMarker,rightMarker);
1583         //printf("column[%d] alignment=%d\n",k,columnAlignment[k]);
1584         leftMarker=FALSE;
1585         rightMarker=FALSE;
1586         startFound=FALSE;
1587       }
1588       k++;
1589     }
1590     j++;
1591   }
1592   if (k<columns)
1593   {
1594     columnAlignment[k] = markersToAlignment(leftMarker,rightMarker);
1595     //printf("column[%d] alignment=%d\n",k,columnAlignment[k]);
1596   }
1597   // proceed to next line
1598   i+=ret;
1599
1600   int m=headerStart;
1601   for (k=0;k<columns;k++)
1602   {
1603     out.addStr("<th");
1604     switch (columnAlignment[k])
1605     {
1606       case AlignLeft:   out.addStr(" align=\"left\""); break;
1607       case AlignRight:  out.addStr(" align=\"right\""); break;
1608       case AlignCenter: out.addStr(" align=\"center\""); break;
1609       case AlignNone:   break;
1610     }
1611     out.addStr(">");
1612     while (m<=headerEnd && (data[m]!='|' || (m>0 && data[m-1]=='\\')))
1613     {
1614       out.addChar(data[m++]);
1615     }
1616     m++;
1617   }
1618   out.addStr("\n</th>\n");
1619
1620   // write table cells
1621   while (i<size)
1622   {
1623     int ret = findTableColumns(data+i,size-i,start,end,cc);
1624     //printf("findTableColumns cc=%d\n",cc);
1625     if (cc!=columns) break; // end of table
1626
1627     out.addStr("<tr>");
1628     j=start+i;
1629     int columnStart=j;
1630     k=0;
1631     while (j<=end+i)
1632     {
1633       if (j==columnStart)
1634       {
1635         out.addStr("<td");
1636         switch (columnAlignment[k])
1637         {
1638           case AlignLeft:   out.addStr(" align=\"left\""); break;
1639           case AlignRight:  out.addStr(" align=\"right\""); break;
1640           case AlignCenter: out.addStr(" align=\"center\""); break;
1641           case AlignNone:   break;
1642         }
1643         out.addStr(">");
1644       }
1645       if (j<=end+i && (data[j]=='|' && (j==0 || data[j-1]!='\\')))
1646       {
1647         columnStart=j+1;
1648         k++;
1649       }
1650       else
1651       {
1652         out.addChar(data[j]);
1653       }
1654       j++;
1655     }
1656     out.addChar('\n');
1657
1658     // proceed to next line
1659     i+=ret;
1660   }
1661
1662   out.addStr("</table> ");
1663
1664   delete[] columnAlignment;
1665   return i;
1666 }
1667
1668
1669 void writeOneLineHeaderOrRuler(GrowBuf &out,const char *data,int size)
1670 {
1671   int level;
1672   QCString header;
1673   QCString id;
1674   if (isHRuler(data,size))
1675   {
1676     out.addStr("<hr>\n");
1677   }
1678   else if ((level=isAtxHeader(data,size,header,id)))
1679   {
1680     //if (level==1) g_correctSectionLevel=FALSE;
1681     //if (g_correctSectionLevel) level--;
1682     QCString hTag;
1683     if (level<5 && !id.isEmpty())
1684     {
1685       SectionInfo::SectionType type = SectionInfo::Anchor;
1686       switch(level)
1687       {
1688         case 1:  out.addStr("@section ");
1689                  type=SectionInfo::Section;
1690                  break;
1691         case 2:  out.addStr("@subsection ");
1692                  type=SectionInfo::Subsection;
1693                  break;
1694         case 3:  out.addStr("@subsubsection ");
1695                  type=SectionInfo::Subsubsection;
1696                  break;
1697         default: out.addStr("@paragraph ");
1698                  type=SectionInfo::Paragraph;
1699                  break;
1700       }
1701       out.addStr(id);
1702       out.addStr(" ");
1703       out.addStr(header);
1704       out.addStr("\n");
1705       SectionInfo *si = Doxygen::sectionDict->find(header);
1706       if (si)
1707       {
1708         if (si->lineNr != -1)
1709         {
1710           warn(g_fileName,g_lineNr,"multiple use of section label '%s', (first occurrence: %s, line %d)",header.data(),si->fileName.data(),si->lineNr);
1711         }
1712         else
1713         {
1714           warn(g_fileName,g_lineNr,"multiple use of section label '%s', (first occurrence: %s)",header.data(),si->fileName.data());
1715         }
1716       }
1717       else
1718       {
1719         si = new SectionInfo(g_fileName,g_lineNr,id,header,type,level);
1720         if (g_current)
1721         {
1722           g_current->anchors->append(si);
1723         }
1724         Doxygen::sectionDict->append(header,si);
1725       }
1726     }
1727     else
1728     {
1729       if (!id.isEmpty())
1730       {
1731         out.addStr("\\anchor "+id+"\n");
1732       }
1733       hTag.sprintf("h%d",level);
1734       out.addStr("<"+hTag+">");
1735       out.addStr(header);
1736       out.addStr("</"+hTag+">\n");
1737     }
1738   }
1739   else // nothing interesting -> just output the line
1740   {
1741     out.addStr(data,size);
1742   }
1743 }
1744
1745 static int writeBlockQuote(GrowBuf &out,const char *data,int size)
1746 {
1747   int l;
1748   int i=0;
1749   int curLevel=0;
1750   int end=0;
1751   while (i<size)
1752   {
1753     // find end of this line
1754     end=i+1;
1755     while (end<=size && data[end-1]!='\n') end++;
1756     int j=i;
1757     int level=0;
1758     int indent=i;
1759     // compute the quoting level
1760     while (j<end && (data[j]==' ' || data[j]=='>'))
1761     {
1762       if (data[j]=='>') { level++; indent=j+1; }
1763       else if (j>0 && data[j-1]=='>') indent=j+1;
1764       j++;
1765     }
1766     if (j>0 && data[j-1]=='>' &&
1767         !(j==size || data[j]=='\n')) // disqualify last > if not followed by space
1768     {
1769       indent--;
1770       j--;
1771     }
1772     if (level>curLevel) // quote level increased => add start markers
1773     {
1774       for (l=curLevel;l<level;l++)
1775       {
1776         out.addStr("<blockquote>\n");
1777       }
1778     }
1779     else if (level<curLevel) // quote level descreased => add end markers
1780     {
1781       for (l=level;l<curLevel;l++)
1782       {
1783         out.addStr("</blockquote>\n");
1784       }
1785     }
1786     curLevel=level;
1787     if (level==0) break; // end of quote block
1788     // copy line without quotation marks
1789     out.addStr(data+indent,end-indent);
1790     // proceed with next line
1791     i=end;
1792   }
1793   // end of comment within blockquote => add end markers
1794   for (l=0;l<curLevel;l++)
1795   {
1796     out.addStr("</blockquote>\n");
1797   }
1798   return i;
1799 }
1800
1801 static int writeCodeBlock(GrowBuf &out,const char *data,int size,int refIndent)
1802 {
1803   int i=0,end;
1804   //printf("writeCodeBlock: data={%s}\n",QCString(data).left(size).data());
1805   out.addStr("@verbatim\n");
1806   int emptyLines=0;
1807   while (i<size)
1808   {
1809     // find end of this line
1810     end=i+1;
1811     while (end<=size && data[end-1]!='\n') end++;
1812     int j=i;
1813     int indent=0;
1814     while (j<end && data[j]==' ') j++,indent++;
1815     //printf("j=%d end=%d indent=%d refIndent=%d tabSize=%d data={%s}\n",
1816     //    j,end,indent,refIndent,Config_getInt("TAB_SIZE"),QCString(data+i).left(end-i-1).data());
1817     if (j==end-1) // empty line
1818     {
1819       emptyLines++;
1820       i=end;
1821     }
1822     else if (indent>=refIndent+codeBlockIndent) // enough indent to contine the code block
1823     {
1824       while (emptyLines>0) // write skipped empty lines
1825       {
1826         // add empty line
1827         out.addStr("\n");
1828         emptyLines--;
1829       }
1830       // add code line minus the indent
1831       out.addStr(data+i+refIndent+codeBlockIndent,end-i-refIndent-codeBlockIndent);
1832       i=end;
1833     }
1834     else // end of code block
1835     {
1836       break;
1837     }
1838   }
1839   out.addStr("@endverbatim\n");
1840   while (emptyLines>0) // write skipped empty lines
1841   {
1842     // add empty line
1843     out.addStr("\n");
1844     emptyLines--;
1845   }
1846   //printf("i=%d\n",i);
1847   return i;
1848 }
1849
1850 // start searching for the end of the line start at offset \a i
1851 // keeping track of possible blocks that need to to skipped.
1852 static void findEndOfLine(GrowBuf &out,const char *data,int size,
1853                           int &pi,int&i,int &end)
1854 {
1855   // find end of the line
1856   int nb=0;
1857   end=i+1;
1858   while (end<=size && data[end-1]!='\n')
1859   {
1860     // while looking for the end of the line we might encounter a block
1861     // that needs to be passed unprocessed.
1862     if ((data[end-1]=='\\' || data[end-1]=='@') &&          // command
1863         (end<=1 || (data[end-2]!='\\' && data[end-2]!='@')) // not escaped
1864        )
1865     {
1866       QCString endBlockName = isBlockCommand(data+end-1,end-1,size-(end-1));
1867       end++;
1868       if (!endBlockName.isEmpty())
1869       {
1870         int l = endBlockName.length();
1871         for (;end<size-l-1;end++) // search for end of block marker
1872         {
1873           if ((data[end]=='\\' || data[end]=='@') &&
1874               data[end-1]!='\\' && data[end-1]!='@'
1875              )
1876           {
1877             if (qstrncmp(&data[end+1],endBlockName,l)==0)
1878             {
1879               if (pi!=-1) // output previous line if available
1880               {
1881                 //printf("feol out={%s}\n",QCString(data+pi).left(i-pi).data());
1882                 out.addStr(data+pi,i-pi);
1883               }
1884               // found end marker, skip over this block
1885               //printf("feol.block out={%s}\n",QCString(data+i).left(end+l+1-i).data());
1886               out.addStr(data+i,end+l+1-i);
1887               pi=-1;
1888               i=end+l+1; // continue after block
1889               end=i+1;
1890               break;
1891             }
1892           }
1893         }
1894       }
1895     }
1896     else if (nb==0 && data[end-1]=='<' && end<size-6 &&
1897              (end<=1 || (data[end-2]!='\\' && data[end-2]!='@'))
1898             )
1899     {
1900       if (tolower(data[end])=='p' && tolower(data[end+1])=='r' &&
1901           tolower(data[end+2])=='e' && data[end+3]=='>') // <pre> tag
1902       {
1903         if (pi!=-1) // output previous line if available
1904         {
1905           out.addStr(data+pi,i-pi);
1906         }
1907         // output part until <pre>
1908         out.addStr(data+i,end-1-i);
1909         // output part until </pre>
1910         i = end-1 + processHtmlTag(out,data+end-1,end-1,size-end+1);
1911         pi=-1;
1912         end = i+1;
1913         break;
1914       }
1915       else
1916       {
1917         end++;
1918       }
1919     }
1920     else if (nb==0 && data[end-1]=='`')
1921     {
1922       while (end<=size && data[end-1]=='`') end++,nb++;
1923     }
1924     else if (nb>0 && data[end-1]=='`')
1925     {
1926       int enb=0;
1927       while (end<=size && data[end-1]=='`') end++,enb++;
1928       if (enb==nb) nb=0;
1929     }
1930     else
1931     {
1932       end++;
1933     }
1934   }
1935   //printf("findEndOfLine pi=%d i=%d end=%d {%s}\n",pi,i,end,QCString(data+i).left(end-i).data());
1936 }
1937
1938 static void writeFencedCodeBlock(GrowBuf &out,const char *data,const char *lng,
1939                 int blockStart,int blockEnd)
1940 {
1941   QCString lang = lng;
1942   if (!lang.isEmpty() && lang.at(0)=='.') lang=lang.mid(1);
1943   out.addStr("@code");
1944   if (!lang.isEmpty())
1945   {
1946     out.addStr("{"+lang+"}");
1947   }
1948   out.addStr(data+blockStart,blockEnd-blockStart);
1949   out.addStr("\n");
1950   out.addStr("@endcode");
1951 }
1952
1953 static QCString processQuotations(const QCString &s,int refIndent)
1954 {
1955   GrowBuf out;
1956   const char *data = s.data();
1957   int size = s.length();
1958   int i=0,end=0,pi=-1;
1959   int blockStart,blockEnd,blockOffset;
1960   QCString lang;
1961   while (i<size)
1962   {
1963     findEndOfLine(out,data,size,pi,i,end);
1964     // line is now found at [i..end)
1965
1966     if (pi!=-1)
1967     {
1968       if (isFencedCodeBlock(data+pi,size-pi,refIndent,lang,blockStart,blockEnd,blockOffset))
1969       {
1970         writeFencedCodeBlock(out,data+pi,lang,blockStart,blockEnd);
1971         i=pi+blockOffset;
1972         pi=-1;
1973         end=i+1;
1974         continue;
1975       }
1976       else if (isBlockQuote(data+pi,i-pi,refIndent))
1977       {
1978         i = pi+writeBlockQuote(out,data+pi,size-pi);
1979         pi=-1;
1980         end=i+1;
1981         continue;
1982       }
1983       else
1984       {
1985         //printf("quote out={%s}\n",QCString(data+pi).left(i-pi).data());
1986         out.addStr(data+pi,i-pi);
1987       }
1988     }
1989     pi=i;
1990     i=end;
1991   }
1992   if (pi!=-1 && pi<size) // deal with the last line
1993   {
1994     if (isBlockQuote(data+pi,size-pi,refIndent))
1995     {
1996       writeBlockQuote(out,data+pi,size-pi);
1997     }
1998     else
1999     {
2000       out.addStr(data+pi,size-pi);
2001     }
2002   }
2003   out.addChar(0);
2004
2005   //printf("Process quotations\n---- input ----\n%s\n---- output ----\n%s\n------------\n",
2006   //    s.data(),out.get());
2007
2008   return out.get();
2009 }
2010
2011 static QCString processBlocks(const QCString &s,int indent)
2012 {
2013   GrowBuf out;
2014   const char *data = s.data();
2015   int size = s.length();
2016   int i=0,end=0,pi=-1,ref,level;
2017   QCString id,link,title;
2018   int blockIndent = indent;
2019
2020   // get indent for the first line
2021   end = i+1;
2022   int sp=0;
2023   while (end<=size && data[end-1]!='\n')
2024   {
2025     if (data[end-1]==' ') sp++;
2026     end++;
2027   }
2028
2029 #if 0 // commented out, since starting with a comment block is probably a usage error
2030       // see also http://stackoverflow.com/q/20478611/784672
2031
2032   // special case when the documentation starts with a code block
2033   // since the first line is skipped when looking for a code block later on.
2034   if (end>codeBlockIndent && isCodeBlock(data,0,end,blockIndent))
2035   {
2036     i=writeCodeBlock(out,data,size,blockIndent);
2037     end=i+1;
2038     pi=-1;
2039   }
2040 #endif
2041
2042   // process each line
2043   while (i<size)
2044   {
2045     findEndOfLine(out,data,size,pi,i,end);
2046     // line is now found at [i..end)
2047
2048     //printf("findEndOfLine: pi=%d i=%d end=%d\n",pi,i,end);
2049
2050     if (pi!=-1)
2051     {
2052       int blockStart,blockEnd,blockOffset;
2053       QCString lang;
2054       blockIndent = indent;
2055       //printf("isHeaderLine(%s)=%d\n",QCString(data+i).left(size-i).data(),level);
2056       if ((level=isHeaderline(data+i,size-i))>0)
2057       {
2058         //if (level==1) g_correctSectionLevel=FALSE;
2059         //if (g_correctSectionLevel) level--;
2060         //printf("Found header at %d-%d\n",i,end);
2061         while (pi<size && data[pi]==' ') pi++;
2062         QCString header,id;
2063         convertStringFragment(header,data+pi,i-pi-1);
2064         id = extractTitleId(header);
2065         //printf("header='%s' is='%s'\n",header.data(),id.data());
2066         if (!header.isEmpty())
2067         {
2068           if (!id.isEmpty())
2069           {
2070             out.addStr(level==1?"@section ":"@subsection ");
2071             out.addStr(id);
2072             out.addStr(" ");
2073             out.addStr(header);
2074             out.addStr("\n\n");
2075             SectionInfo *si = Doxygen::sectionDict->find(header);
2076             if (si)
2077             {
2078               if (si->lineNr != -1)
2079               {
2080                 warn(g_fileName,g_lineNr,"multiple use of section label '%s', (first occurrence: %s, line %d)",header.data(),si->fileName.data(),si->lineNr);
2081               }
2082               else
2083               {
2084                 warn(g_fileName,g_lineNr,"multiple use of section label '%s', (first occurrence: %s)",header.data(),si->fileName.data());
2085               }
2086             }
2087             else
2088             {
2089               si = new SectionInfo(g_fileName,g_lineNr,id,header,
2090                       level==1 ? SectionInfo::Section : SectionInfo::Subsection,level);
2091               if (g_current)
2092               {
2093                 g_current->anchors->append(si);
2094               }
2095               Doxygen::sectionDict->append(header,si);
2096             }
2097           }
2098           else
2099           {
2100             out.addStr(level==1?"<h1>":"<h2>");
2101             out.addStr(header);
2102             out.addStr(level==1?"\n</h1>\n":"\n</h2>\n");
2103           }
2104         }
2105         else
2106         {
2107           out.addStr("<hr>\n");
2108         }
2109         pi=-1;
2110         i=end;
2111         end=i+1;
2112         continue;
2113       }
2114       else if ((ref=isLinkRef(data+pi,size-pi,id,link,title)))
2115       {
2116         //printf("found link ref: id='%s' link='%s' title='%s'\n",
2117         //       id.data(),link.data(),title.data());
2118         g_linkRefs.insert(id.lower(),new LinkRef(link,title));
2119         i=ref+pi;
2120         pi=-1;
2121         end=i+1;
2122       }
2123       else if (isFencedCodeBlock(data+pi,size-pi,indent,lang,blockStart,blockEnd,blockOffset))
2124       {
2125         //printf("Found FencedCodeBlock lang='%s' start=%d end=%d code={%s}\n",
2126         //       lang.data(),blockStart,blockEnd,QCString(data+pi+blockStart).left(blockEnd-blockStart).data());
2127         writeFencedCodeBlock(out,data+pi,lang,blockStart,blockEnd);
2128         i=pi+blockOffset;
2129         pi=-1;
2130         end=i+1;
2131         continue;
2132       }
2133       else if (isCodeBlock(data+i,i,end-i,blockIndent))
2134       {
2135         // skip previous line (it is empty anyway)
2136         i+=writeCodeBlock(out,data+i,size-i,blockIndent);
2137         pi=-1;
2138         end=i+1;
2139         continue;
2140       }
2141       else if (isTableBlock(data+pi,size-pi))
2142       {
2143         i=pi+writeTableBlock(out,data+pi,size-pi);
2144         pi=-1;
2145         end=i+1;
2146         continue;
2147       }
2148       else
2149       {
2150         writeOneLineHeaderOrRuler(out,data+pi,i-pi);
2151       }
2152     }
2153     pi=i;
2154     i=end;
2155   }
2156   //printf("last line %d size=%d\n",i,size);
2157   if (pi!=-1 && pi<size) // deal with the last line
2158   {
2159     if (isLinkRef(data+pi,size-pi,id,link,title))
2160     {
2161       //printf("found link ref: id='%s' link='%s' title='%s'\n",
2162       //    id.data(),link.data(),title.data());
2163       g_linkRefs.insert(id.lower(),new LinkRef(link,title));
2164     }
2165     else
2166     {
2167       writeOneLineHeaderOrRuler(out,data+pi,size-pi);
2168     }
2169   }
2170
2171   out.addChar(0);
2172   return out.get();
2173 }
2174
2175 static QCString extractPageTitle(QCString &docs,QCString &id)
2176 {
2177   int ln=0;
2178   // first first non-empty line
2179   QCString title;
2180   const char *data = docs.data();
2181   int i=0;
2182   int size=docs.size();
2183   while (i<size && (data[i]==' ' || data[i]=='\n'))
2184   {
2185     if (data[i]=='\n') ln++;
2186     i++;
2187   }
2188   if (i>=size) return "";
2189   int end1=i+1;
2190   while (end1<size && data[end1-1]!='\n') end1++;
2191   //printf("i=%d end1=%d size=%d line='%s'\n",i,end1,size,docs.mid(i,end1-i).data());
2192   // first line from i..end1
2193   if (end1<size)
2194   {
2195     ln++;
2196     // second line form end1..end2
2197     int end2=end1+1;
2198     while (end2<size && data[end2-1]!='\n') end2++;
2199     if (isHeaderline(data+end1,size-end1))
2200     {
2201       convertStringFragment(title,data+i,end1-i-1);
2202       QCString lns;
2203       lns.fill('\n',ln);
2204       docs=lns+docs.mid(end2);
2205       id = extractTitleId(title);
2206       //printf("extractPageTitle(title='%s' docs='%s' id='%s')\n",title.data(),docs.data(),id.data());
2207       return title;
2208     }
2209   }
2210   if (i<end1 && isAtxHeader(data+i,end1-i,title,id)>0)
2211   {
2212     docs=docs.mid(end1);
2213   }
2214   //printf("extractPageTitle(title='%s' docs='%s' id='%s')\n",title.data(),docs.data(),id.data());
2215   return title;
2216 }
2217
2218 static QCString detab(const QCString &s,int &refIndent)
2219 {
2220   static int tabSize = Config_getInt("TAB_SIZE");
2221   GrowBuf out;
2222   int size = s.length();
2223   const char *data = s.data();
2224   int i=0;
2225   int col=0;
2226   const int maxIndent=1000000; // value representing infinity
2227   int minIndent=maxIndent;
2228   while (i<size)
2229   {
2230     char c = data[i++];
2231     switch(c)
2232     {
2233       case '\t': // expand tab
2234         {
2235           int stop = tabSize - (col%tabSize);
2236           //printf("expand at %d stop=%d\n",col,stop);
2237           col+=stop;
2238           while (stop--) out.addChar(' ');
2239         }
2240         break;
2241       case '\n': // reset colomn counter
2242         out.addChar(c);
2243         col=0;
2244         break;
2245       case ' ': // increment column counter
2246         out.addChar(c);
2247         col++;
2248         break;
2249       default: // non-whitespace => update minIndent
2250         out.addChar(c);
2251         if (c<0 && i<size) // multibyte sequence
2252         {
2253           out.addChar(data[i++]); // >= 2 bytes
2254           if (((uchar)c&0xE0)==0xE0 && i<size)
2255           {
2256             out.addChar(data[i++]); // 3 bytes
2257           }
2258           if (((uchar)c&0xF0)==0xF0 && i<size)
2259           {
2260             out.addChar(data[i++]); // 4 byres
2261           }
2262         }
2263         if (col<minIndent) minIndent=col;
2264         col++;
2265     }
2266   }
2267   if (minIndent!=maxIndent) refIndent=minIndent; else refIndent=0;
2268   out.addChar(0);
2269   //printf("detab refIndent=%d\n",refIndent);
2270   return out.get();
2271 }
2272
2273 //---------------------------------------------------------------------------
2274
2275 QCString processMarkdown(const QCString &fileName,const int lineNr,Entry *e,const QCString &input)
2276 {
2277   static bool init=FALSE;
2278   if (!init)
2279   {
2280     // setup callback table for special characters
2281     g_actions[(unsigned int)'_']=processEmphasis;
2282     g_actions[(unsigned int)'*']=processEmphasis;
2283     g_actions[(unsigned int)'`']=processCodeSpan;
2284     g_actions[(unsigned int)'\\']=processSpecialCommand;
2285     g_actions[(unsigned int)'@']=processSpecialCommand;
2286     g_actions[(unsigned int)'[']=processLink;
2287     g_actions[(unsigned int)'!']=processLink;
2288     g_actions[(unsigned int)'<']=processHtmlTag;
2289     g_actions[(unsigned int)'-']=processNmdash;
2290     g_actions[(unsigned int)'"']=processQuoted;
2291     init=TRUE;
2292   }
2293
2294   g_linkRefs.setAutoDelete(TRUE);
2295   g_linkRefs.clear();
2296   g_current = e;
2297   g_fileName = fileName;
2298   g_lineNr   = lineNr;
2299   static GrowBuf out;
2300   if (input.isEmpty()) return input;
2301   out.clear();
2302   int refIndent;
2303   // for replace tabs by spaces
2304   QCString s = detab(input,refIndent);
2305   //printf("======== DeTab =========\n---- output -----\n%s\n---------\n",s.data());
2306   // then process quotation blocks (as these may contain other blocks)
2307   s = processQuotations(s,refIndent);
2308   //printf("======== Quotations =========\n---- output -----\n%s\n---------\n",s.data());
2309   // then process block items (headers, rules, and code blocks, references)
2310   s = processBlocks(s,refIndent);
2311   //printf("======== Blocks =========\n---- output -----\n%s\n---------\n",s.data());
2312   // finally process the inline markup (links, emphasis and code spans)
2313   processInline(out,s,s.length());
2314   out.addChar(0);
2315   Debug::print(Debug::Markdown,0,"======== Markdown =========\n---- input ------- \n%s\n---- output -----\n%s\n---------\n",input.data(),out.get());
2316   return out.get();
2317 }
2318
2319 //---------------------------------------------------------------------------
2320
2321 QCString markdownFileNameToId(const QCString &fileName)
2322 {
2323   QCString baseFn  = stripFromPath(QFileInfo(fileName).absFilePath().utf8());
2324   int i = baseFn.findRev('.');
2325   if (i!=-1) baseFn = baseFn.left(i);
2326   QCString baseName = substitute(substitute(baseFn," ","_"),"/","_");
2327   return "md_"+baseName;
2328 }
2329
2330 void MarkdownFileParser::parseInput(const char *fileName,
2331                 const char *fileBuf,
2332                 Entry *root,
2333                 bool /*sameTranslationUnit*/,
2334                 QStrList & /*filesInSameTranslationUnit*/)
2335 {
2336   Entry *current = new Entry;
2337   current->lang = SrcLangExt_Markdown;
2338   current->fileName = fileName;
2339   current->docFile  = fileName;
2340   current->docLine  = 1;
2341   QCString docs = fileBuf;
2342   QCString id;
2343   QCString title=extractPageTitle(docs,id).stripWhiteSpace();
2344   QCString titleFn = QFileInfo(fileName).baseName().utf8();
2345   QCString fn      = QFileInfo(fileName).fileName().utf8();
2346   static QCString mdfileAsMainPage = Config_getString("USE_MDFILE_AS_MAINPAGE");
2347   if (id.isEmpty()) id = markdownFileNameToId(fileName);
2348   if (title.isEmpty()) title = titleFn;
2349   if (!mdfileAsMainPage.isEmpty() &&
2350       (fn==mdfileAsMainPage || // name reference
2351        QFileInfo(fileName).absFilePath()==
2352        QFileInfo(mdfileAsMainPage).absFilePath()) // file reference with path
2353      )
2354   {
2355     docs.prepend("@mainpage\n");
2356   }
2357   else if (id=="mainpage" || id=="index")
2358   {
2359     docs.prepend("@mainpage "+title+"\n");
2360   }
2361   else
2362   {
2363     docs.prepend("@page "+id+" "+title+"\n");
2364   }
2365   int lineNr=1;
2366   int position=0;
2367
2368   // even without markdown support enabled, we still
2369   // parse markdown files as such
2370   bool markdownEnabled = Doxygen::markdownSupport;
2371   Doxygen::markdownSupport = TRUE;
2372
2373   bool needsEntry;
2374   Protection prot;
2375   while (parseCommentBlock(
2376         this,
2377         current,
2378         docs,
2379         fileName,
2380         lineNr,
2381         FALSE,     // isBrief
2382         FALSE,     // javadoc autobrief
2383         FALSE,     // inBodyDocs
2384         prot,      // protection
2385         position,
2386         needsEntry))
2387   {
2388     if (needsEntry)
2389     {
2390       QCString docFile = current->docFile;
2391       root->addSubEntry(current);
2392       current = new Entry;
2393       current->lang = SrcLangExt_Markdown;
2394       current->docFile = docFile;
2395       current->docLine = lineNr;
2396     }
2397   }
2398   if (needsEntry)
2399   {
2400     root->addSubEntry(current);
2401   }
2402
2403   // restore setting
2404   Doxygen::markdownSupport = markdownEnabled;
2405   //g_correctSectionLevel = FALSE;
2406 }
2407
2408 void MarkdownFileParser::parseCode(CodeOutputInterface &codeOutIntf,
2409                const char *scopeName,
2410                const QCString &input,
2411                SrcLangExt lang,
2412                bool isExampleBlock,
2413                const char *exampleName,
2414                FileDef *fileDef,
2415                int startLine,
2416                int endLine,
2417                bool inlineFragment,
2418                MemberDef *memberDef,
2419                bool showLineNumbers,
2420                Definition *searchCtx,
2421                bool collectXRefs
2422               )
2423 {
2424   ParserInterface *pIntf = Doxygen::parserManager->getParser("*.cpp");
2425   if (pIntf!=this)
2426   {
2427     pIntf->parseCode(
2428        codeOutIntf,scopeName,input,lang,isExampleBlock,exampleName,
2429        fileDef,startLine,endLine,inlineFragment,memberDef,showLineNumbers,
2430        searchCtx,collectXRefs);
2431   }
2432 }
2433
2434 void MarkdownFileParser::resetCodeParserState()
2435 {
2436   ParserInterface *pIntf = Doxygen::parserManager->getParser("*.cpp");
2437   if (pIntf!=this)
2438   {
2439     pIntf->resetCodeParserState();
2440   }
2441 }
2442
2443 void MarkdownFileParser::parsePrototype(const char *text)
2444 {
2445   ParserInterface *pIntf = Doxygen::parserManager->getParser("*.cpp");
2446   if (pIntf!=this)
2447   {
2448     pIntf->parsePrototype(text);
2449   }
2450 }
2451