Merge pull request #24002 from sandreenko/fixDesktopFailure
[platform/upstream/coreclr.git] / src / ilasm / grammar_after.cpp
1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
4 /********************************************************************************/
5 /* Code goes here */
6
7 /********************************************************************************/
8 extern int yyparse();
9
10 struct Keywords {
11     const char* name;
12     unsigned short token;
13     unsigned short tokenVal;// this holds the instruction enumeration for those keywords that are instrs
14     size_t   stname;
15 };
16
17 #define NO_VALUE        ((unsigned short)-1)              // The token has no value
18
19 static Keywords keywords[] = {
20 // Attention! Because of aliases, the instructions MUST go first!
21 // Redefine all the instructions (defined in assembler.h <- asmenum.h <- opcode.def)
22 #undef InlineNone
23 #undef InlineVar        
24 #undef ShortInlineVar
25 #undef InlineI          
26 #undef ShortInlineI     
27 #undef InlineI8         
28 #undef InlineR          
29 #undef ShortInlineR     
30 #undef InlineBrTarget
31 #undef ShortInlineBrTarget
32 #undef InlineMethod
33 #undef InlineField 
34 #undef InlineType 
35 #undef InlineString
36 #undef InlineSig        
37 #undef InlineTok        
38 #undef InlineSwitch
39 #undef InlineVarTok     
40
41
42 #define InlineNone              INSTR_NONE
43 #define InlineVar               INSTR_VAR
44 #define ShortInlineVar          INSTR_VAR
45 #define InlineI                 INSTR_I
46 #define ShortInlineI            INSTR_I
47 #define InlineI8                INSTR_I8
48 #define InlineR                 INSTR_R
49 #define ShortInlineR            INSTR_R
50 #define InlineBrTarget          INSTR_BRTARGET
51 #define ShortInlineBrTarget             INSTR_BRTARGET
52 #define InlineMethod            INSTR_METHOD
53 #define InlineField             INSTR_FIELD
54 #define InlineType              INSTR_TYPE
55 #define InlineString            INSTR_STRING
56 #define InlineSig               INSTR_SIG
57 #define InlineTok               INSTR_TOK
58 #define InlineSwitch            INSTR_SWITCH
59
60 #define InlineVarTok            0
61 #define NEW_INLINE_NAMES
62                 // The volatile instruction collides with the volatile keyword, so 
63                 // we treat it as a keyword everywhere and modify the grammar accordingly (Yuck!) 
64 #define OPDEF(c,s,pop,push,args,type,l,s1,s2,ctrl) { s, args, c, lengthof(s)-1 },
65 #define OPALIAS(alias_c, s, c) { s, NO_VALUE, c, lengthof(s)-1 },
66 #include "opcode.def"
67 #undef OPALIAS
68 #undef OPDEF
69
70                 /* keywords */
71 #define KYWD(name, sym, val)    { name, sym, val, lengthof(name)-1 },
72 #include "il_kywd.h"
73 #undef KYWD
74
75 };
76
77 /********************************************************************************/
78 /* File encoding-dependent functions */
79 /*--------------------------------------------------------------------------*/
80 char* nextcharA(__in __nullterminated char* pos)
81 {
82     return (*pos > 0) ? ++pos : (char *)_mbsinc((const unsigned char *)pos);
83 }
84
85 char* nextcharU(__in __nullterminated char* pos)
86 {
87     return ++pos;
88 }
89
90 char* nextcharW(__in __nullterminated char* pos)
91 {
92     return (pos+2);
93 }
94 /*--------------------------------------------------------------------------*/
95 unsigned SymAU(__in __nullterminated char* curPos)
96 {
97     return (unsigned)*curPos;
98 }
99
100 unsigned SymW(__in __nullterminated char* curPos)
101 {
102     return (unsigned)*((WCHAR*)curPos);
103 }
104 /*--------------------------------------------------------------------------*/
105 char* NewStrFromTokenAU(__in_ecount(tokLen) char* curTok, size_t tokLen)
106 {
107     char *nb = new char[tokLen+1];
108     if(nb != NULL)
109     {
110         memcpy(nb, curTok, tokLen);
111         nb[tokLen] = 0;
112     }
113     return nb;
114 }
115 char* NewStrFromTokenW(__in_ecount(tokLen) char* curTok, size_t tokLen)
116 {
117     WCHAR* wcurTok = (WCHAR*)curTok;
118     char *nb = new char[(tokLen<<1) + 2];
119     if(nb != NULL)
120     {
121         tokLen = WszWideCharToMultiByte(CP_UTF8,0,(LPCWSTR)wcurTok,(int)(tokLen >> 1),nb,(int)(tokLen<<1) + 2,NULL,NULL);
122         nb[tokLen] = 0;
123     }
124     return nb;
125 }
126 /*--------------------------------------------------------------------------*/
127 char* NewStaticStrFromTokenAU(__in_ecount(tokLen) char* curTok, size_t tokLen, __out_ecount(bufSize) char* staticBuf, size_t bufSize)
128 {
129     if(tokLen >= bufSize) return NULL;
130     memcpy(staticBuf, curTok, tokLen);
131     staticBuf[tokLen] = 0;
132     return staticBuf;
133 }
134 char* NewStaticStrFromTokenW(__in_ecount(tokLen) char* curTok, size_t tokLen, __out_ecount(bufSize) char* staticBuf, size_t bufSize)
135 {
136     WCHAR* wcurTok = (WCHAR*)curTok;
137     if(tokLen >= bufSize/2) return NULL;
138     tokLen = WszWideCharToMultiByte(CP_UTF8,0,(LPCWSTR)wcurTok,(int)(tokLen >> 1),staticBuf,(int)bufSize,NULL,NULL);
139     staticBuf[tokLen] = 0;
140     return staticBuf;
141 }
142 /*--------------------------------------------------------------------------*/
143 unsigned GetDoubleAU(__in __nullterminated char* begNum, unsigned L, double** ppRes)
144 {
145     static char dbuff[128];
146     char* pdummy;
147     if(L > 127) L = 127;
148     memcpy(dbuff,begNum,L);
149     dbuff[L] = 0;
150     *ppRes = new double(strtod(dbuff, &pdummy));
151     return ((unsigned)(pdummy - dbuff));
152 }
153
154 unsigned GetDoubleW(__in __nullterminated char* begNum, unsigned L, double** ppRes)
155 {
156     static char dbuff[256];
157     char* pdummy;
158     if(L > 254) L = 254;
159     memcpy(dbuff,begNum,L);
160     dbuff[L] = 0;
161     dbuff[L+1] = 0;
162     *ppRes = new double(wcstod((const wchar_t*)dbuff, (wchar_t**)&pdummy));
163     return ((unsigned)(pdummy - dbuff));
164 }
165 /*--------------------------------------------------------------------------*/
166 char* yygetline(int Line)
167 {
168     static char buff[0x4000];
169     char *pLine=NULL, *pNextLine=NULL;
170     char *pBegin=NULL, *pEnd = NULL;
171     unsigned uCount = parser->getAll(&pBegin);
172     pEnd = pBegin + uCount;
173     buff[0] = 0;
174     for(uCount=0, pLine=pBegin; pLine < pEnd; pLine = nextchar(pLine))
175     {
176         if(Sym(pLine) == '\n') uCount++;
177         if(uCount == (unsigned int)(Line-1)) break;
178     }
179     pLine = nextchar(pLine);
180     if(pLine < pEnd)
181     {
182         for(pNextLine = pLine; pNextLine < pEnd; pNextLine = nextchar(pNextLine))
183         {
184             if(Sym(pNextLine) == '\n') break;
185         }
186         if(Sym == SymW) // Unicode file
187         {
188             if(*((WCHAR*)pNextLine - 1) == '\r') pNextLine -= 2;
189             uCount = (unsigned)(pNextLine - pLine);
190             uCount &= 0x1FFF; // limit: 8K wchars
191             WCHAR* wzBuff = (WCHAR*)buff;
192             memcpy(buff,pLine,uCount);
193             wzBuff[uCount >> 1] = 0;
194         }
195         else
196         {
197             if(*(pNextLine-1)=='\r') pNextLine--;
198             uCount = (unsigned)(pNextLine - pLine);
199             uCount &= 0x3FFF; // limit: 16K chars
200             memcpy(buff,pLine,uCount);
201             buff[uCount]=0;
202         }
203     }    
204     return buff;
205 }
206
207 void yyerror(__in __nullterminated const char* str) {
208     char tokBuff[64];
209     WCHAR *wzfile = (WCHAR*)(PENV->in->namew());
210     int iline = PENV->curLine;
211     
212     size_t len = PENV->curPos - PENV->curTok;
213     if (len > 62) len = 62;
214     memcpy(tokBuff, PENV->curTok, len);
215     tokBuff[len] = 0;
216     tokBuff[len+1] = 0;
217     if(PENV->bExternSource)
218     {
219         wzfile = PASM->m_wzSourceFileName;
220         iline = PENV->nExtLine;
221     }
222     if(Sym == SymW) // Unicode file
223         fprintf(stderr, "%S(%d) : error : %s at token '%S' in: %S\n", 
224                 wzfile, iline, str, (WCHAR*)tokBuff, (WCHAR*)yygetline(PENV->curLine));
225     else
226         fprintf(stderr, "%S(%d) : error : %s at token '%s' in: %s\n", 
227                 wzfile, iline, str, tokBuff, yygetline(PENV->curLine));
228     parser->success = false;
229 }
230
231 /********************************************************************************/
232 /* looks up the typedef 'name' of length 'nameLen' (name does not need to be 
233    null terminated)   Returns 0 on failure */
234 TypeDefDescr* findTypedef(__in_ecount(NameLen) char* name, size_t NameLen)
235 {
236     TypeDefDescr* pRet = NULL;
237     static char Name[4096];
238     if(PASM->NumTypeDefs())
239     {
240         if(NewStaticStrFromToken(name,NameLen,Name,4096))
241             pRet = PASM->FindTypeDef(Name);
242     }
243     return pRet;
244 }
245
246 int TYPEDEF(TypeDefDescr* pTDD)
247 {
248     switch(TypeFromToken(pTDD->m_tkTypeSpec))
249     {
250         case mdtTypeDef:
251         case mdtTypeRef:
252             return TYPEDEF_T;
253         case mdtMethodDef:
254         case 0x99000000:
255             return TYPEDEF_M;
256         case mdtFieldDef:
257         case 0x98000000:
258             return TYPEDEF_F;
259         case mdtMemberRef:
260             return TYPEDEF_MR;
261         case mdtTypeSpec:
262             return TYPEDEF_TS;
263         case mdtCustomAttribute:
264             return TYPEDEF_CA;
265     }
266     return ERROR_;
267             
268 }
269
270 /********************************************************************************/
271 void indexKeywords(Indx* indx)  // called in Assembler constructor (assem.cpp)
272 {
273     Keywords* low = keywords;
274     Keywords* high = keywords + (sizeof(keywords) / sizeof(Keywords));
275     Keywords* mid;
276     for(mid = low; mid < high; mid++)
277     {
278         indx->IndexString((char*)(mid->name),mid);
279     }
280 }
281
282 Instr* SetupInstr(unsigned short opcode)
283 {
284     Instr* pVal = NULL;
285     if((pVal = PASM->GetInstr()))
286     {
287         pVal->opcode = opcode;
288         if((pVal->pWriter = PASM->m_pSymDocument)!=NULL)
289         {
290             if(PENV->bExternSource)
291             {
292                 pVal->linenum = PENV->nExtLine;
293                 pVal->column = PENV->nExtCol;
294                 pVal->linenum_end = PENV->nExtLineEnd;
295                 pVal->column_end = PENV->nExtColEnd;
296                 pVal->pc = nCurrPC;
297             }
298             else
299             {
300                 pVal->linenum = PENV->curLine;
301                 pVal->column = 1;
302                 pVal->linenum_end = PENV->curLine;
303                 pVal->column_end = 0;
304                 pVal->pc = PASM->m_CurPC;
305             }
306         }
307     }
308     return pVal;
309 }
310 /* looks up the keyword 'name' of length 'nameLen' (name does not need to be 
311    null terminated)   Returns 0 on failure */
312 int findKeyword(const char* name, size_t nameLen, unsigned short* pOpcode) 
313 {
314     static char Name[128];
315     Keywords* mid;
316
317     if(NULL == NewStaticStrFromToken((char*)name,nameLen,Name,128)) return 0; // can't be a keyword
318     mid = (Keywords*)(PASM->indxKeywords.FindString(Name));
319     if(mid == NULL) return 0;
320     *pOpcode = mid->tokenVal;
321
322     return(mid->token);
323 }
324
325 /********************************************************************************/
326 /* convert str to a uint64 */
327 unsigned digits[128];
328 void Init_str2uint64()
329 {
330     int i;
331     memset(digits,255,sizeof(digits));
332     for(i='0'; i <= '9'; i++) digits[i] = i - '0';
333     for(i='A'; i <= 'Z'; i++) digits[i] = i + 10 - 'A';
334     for(i='a'; i <= 'z'; i++) digits[i] = i + 10 - 'a';
335 }
336 static unsigned __int64 str2uint64(const char* str, const char** endStr, unsigned radix) 
337 {
338     unsigned __int64 ret = 0;
339     unsigned digit,ix;
340     _ASSERTE(radix <= 36);
341     for(;;str = nextchar((char*)str)) 
342     {
343         ix = Sym((char*)str);
344         if(ix <= 0x7F)
345         {
346             digit = digits[ix];
347             if(digit < radix)
348             {
349                 ret = ret * radix + digit;
350                 continue;
351             }
352         }
353         *endStr = str;
354         return(ret);
355     }
356 }
357 /********************************************************************************/
358 /* Append an UTF-8 string preceded by compressed length, no zero terminator, to a BinStr */
359 static void AppendStringWithLength(BinStr* pbs, __in __nullterminated char* sz)
360 {
361     if((pbs != NULL) && (sz != NULL))
362     {
363         unsigned L = (unsigned) strlen(sz);
364         BYTE* pb = NULL;
365         corEmitInt(pbs,L);
366         if((pb = pbs->getBuff(L)) != NULL)
367             memcpy(pb,sz,L);
368     }
369 }
370
371 /********************************************************************************/
372 /* fetch the next token, and return it   Also set the yylval.union if the
373    lexical token also has a value */
374    
375
376 BOOL _Alpha[128];
377 BOOL _Digit[128];
378 BOOL _AlNum[128];
379 BOOL _ValidSS[128];
380 BOOL _ValidCS[128];
381 void SetSymbolTables()
382 {
383     unsigned i;
384     memset(_Alpha,0,sizeof(_Alpha)); 
385     memset(_Digit,0,sizeof(_Digit)); 
386     memset(_AlNum,0,sizeof(_AlNum)); 
387     memset(_ValidSS,0,sizeof(_ValidSS)); 
388     memset(_ValidCS,0,sizeof(_ValidCS));
389     for(i = 'A'; i <= 'Z'; i++)
390     {
391         _Alpha[i] = TRUE;
392         _AlNum[i] = TRUE;
393         _ValidSS[i] = TRUE;
394         _ValidCS[i] = TRUE;
395     } 
396     for(i = 'a'; i <= 'z'; i++)
397     {
398         _Alpha[i] = TRUE;
399         _AlNum[i] = TRUE;
400         _ValidSS[i] = TRUE;
401         _ValidCS[i] = TRUE;
402     }
403     for(i = '0'; i <= '9'; i++)
404     {
405         _Digit[i] = TRUE;
406         _AlNum[i] = TRUE;
407         _ValidCS[i] = TRUE;
408     }
409     _ValidSS[(unsigned char)'_'] = TRUE;
410     _ValidSS[(unsigned char)'#'] = TRUE;
411     _ValidSS[(unsigned char)'$'] = TRUE;
412     _ValidSS[(unsigned char)'@'] = TRUE;
413      
414     _ValidCS[(unsigned char)'_'] = TRUE;
415     _ValidCS[(unsigned char)'?'] = TRUE;
416     _ValidCS[(unsigned char)'$'] = TRUE;
417     _ValidCS[(unsigned char)'@'] = TRUE;
418     _ValidCS[(unsigned char)'`'] = TRUE;
419 }
420 BOOL IsAlpha(unsigned x) { return (x < 128)&&_Alpha[x]; }
421 BOOL IsDigit(unsigned x) { return (x < 128)&&_Digit[x]; }     
422 BOOL IsAlNum(unsigned x) { return (x < 128)&&_AlNum[x]; }     
423 BOOL IsValidStartingSymbol(unsigned x) { return (x < 128)&&_ValidSS[x]; }     
424 BOOL IsValidContinuingSymbol(unsigned x) { return (x < 128)&&_ValidCS[x]; } 
425
426
427 char* nextBlank(__in __nullterminated char* curPos)
428 {
429     for(;;) 
430     {   
431         switch(Sym(curPos)) 
432         {
433             case '/' :
434                 if ((Sym(nextchar(curPos)) == '/')|| (Sym(nextchar(curPos)) == '*'))
435                     return curPos;
436                 else
437                 {
438                     curPos = nextchar(curPos);
439                     break;
440                 }
441             case 0: 
442             case '\n':
443             case '\r':
444             case ' ' :
445             case '\t':
446             case '\f':
447                 return curPos;
448
449             default:
450                 curPos = nextchar(curPos);
451         }
452     }
453 }
454
455 char* skipBlanks(__in __nullterminated char* curPos, unsigned* pstate)
456 {
457     const unsigned eolComment = 1;
458     const unsigned multiComment = 2;
459     unsigned nextSym, state = *pstate;
460     char* nextPos;
461     for(;;) 
462     {   // skip whitespace and comments
463         if (curPos >= PENV->endPos) 
464         {
465             *pstate = state;    
466             return NULL;
467         }
468         switch(Sym(curPos)) 
469         {
470             case 0: 
471                 return NULL;       // EOF
472             case '\n':
473                 state &= ~eolComment;
474                 PENV->curLine++;
475                 if(PENV->bExternSource)
476                 {
477                     if(PENV->bExternSourceAutoincrement) PENV->nExtLine++;
478                     PASM->m_ulCurLine = PENV->nExtLine;
479                     PASM->m_ulCurColumn = PENV->nExtCol;
480                 }
481                 else
482                 {
483                     PASM->m_ulCurLine = PENV->curLine;
484                     PASM->m_ulCurColumn = 1;
485                 }
486                 break;
487             case '\r':
488             case ' ' :
489             case '\t':
490             case '\f':
491                 break;
492
493             case '*' :
494                 if(state == 0) goto PAST_WHITESPACE;
495                 if(state & multiComment)
496                 {
497                     nextPos = nextchar(curPos);
498                     if (Sym(nextPos) == '/') 
499                     {
500                         curPos = nextPos;
501                         state &= ~multiComment;
502                     }
503                 }
504                 break;
505
506             case '/' :
507                 if(state == 0)
508                 {
509                     nextPos = nextchar(curPos);
510                     nextSym = Sym(nextPos);
511                     if (nextSym == '/')
512                     {
513                         curPos = nextPos;
514                         state |= eolComment;
515                     }
516                     else if (nextSym == '*') 
517                     {
518                         curPos = nextPos;
519                         state |= multiComment;
520                     }
521                     else goto PAST_WHITESPACE;
522                 }
523                 break;
524
525             default:
526                 if (state == 0)  goto PAST_WHITESPACE;
527         }
528         curPos = nextchar(curPos);
529     }
530 PAST_WHITESPACE:
531     *pstate = state;
532     return curPos;
533 }
534
535 char* FullFileName(__in __nullterminated WCHAR* wzFileName, unsigned uCodePage);
536
537 int ProcessEOF()
538 {
539     PARSING_ENVIRONMENT* prev_penv = parser->PEStack.POP();
540     if(prev_penv != NULL)
541     {
542         //delete [] (WCHAR*)(PENV->in->namew());
543         delete PENV->in;
544         delete PENV;
545         parser->penv = prev_penv;
546         SetFunctionPtrs();
547         char* szFileName = new char[strlen(PENV->szFileName)+1];
548         strcpy_s(szFileName,strlen(PENV->szFileName)+1,PENV->szFileName);
549         PASM->SetSourceFileName(szFileName); // deletes the argument!
550         return ';';
551     }
552     //PENV->in = NULL;
553     return 0;
554 }
555
556 #define NEXT_TOKEN  {state=0; curPos=PENV->curPos; goto NextToken;}
557
558 int parse_literal(unsigned curSym, __inout __nullterminated char* &curPos, BOOL translate_escapes)
559 {
560     unsigned quote = curSym;
561     curPos = nextchar(curPos);
562     char* fromPtr = curPos;
563     bool escape = false;
564
565     for(;;) 
566     {     // Find matching quote
567         curSym = (curPos >= PENV->endPos) ? 0 : Sym(curPos);
568         if(curSym == 0)
569         {
570             PENV->curPos = curPos; 
571             return(BAD_LITERAL_);
572         }
573         else if(curSym == '\\')
574             escape = !escape;
575         else
576         {
577             if(curSym == '\n')
578             {
579                 PENV->curLine++;
580                 if(PENV->bExternSource)
581                 {
582                     if(PENV->bExternSourceAutoincrement) PENV->nExtLine++;
583                     PASM->m_ulCurLine = PENV->nExtLine;
584                     PASM->m_ulCurColumn = PENV->nExtCol;
585                 }
586                 else
587                 {
588                     PASM->m_ulCurLine = PENV->curLine;
589                     PASM->m_ulCurColumn = 1;
590                 }
591                 if (!escape) { PENV->curPos = curPos; return(BAD_LITERAL_); }
592             }
593             else if ((curSym == quote) && (!escape)) break;
594             escape = false;
595         }        
596         curPos = nextchar(curPos);
597     }
598     // translate escaped characters
599     unsigned tokLen = (unsigned)(curPos - fromPtr);
600     char* newstr = NewStrFromToken(fromPtr, tokLen);
601     char* toPtr;
602     curPos = nextchar(curPos);  // skip closing quote
603     if(translate_escapes)
604     {
605         fromPtr = newstr;
606         //_ASSERTE(0);
607         tokLen = (unsigned)strlen(newstr);
608         toPtr = new char[tokLen+1];
609         if(toPtr==NULL) return BAD_LITERAL_;
610         yylval.string = toPtr;
611         char* endPtr = fromPtr+tokLen;
612         while(fromPtr < endPtr) 
613         {
614             if (*fromPtr == '\\') 
615             {
616                 fromPtr++;
617                 switch(*fromPtr) 
618                 {
619                     case 't':
620                             *toPtr++ = '\t';
621                             break;
622                     case 'n':
623                             *toPtr++ = '\n';
624                             break;
625                     case 'b':
626                             *toPtr++ = '\b';
627                             break;
628                     case 'f':
629                             *toPtr++ = '\f';
630                             break;
631                     case 'v':
632                             *toPtr++ = '\v';
633                             break;
634                     case '?':
635                             *toPtr++ = '\?';
636                             break;
637                     case 'r':
638                             *toPtr++ = '\r';
639                             break;
640                     case 'a':
641                             *toPtr++ = '\a';
642                             break;
643                     case '\n':
644                             do      fromPtr++;
645                             while(isspace(*fromPtr));
646                             --fromPtr;              // undo the increment below   
647                             break;
648                     case '0':
649                     case '1':
650                     case '2':
651                     case '3':
652                             if (IsDigit(fromPtr[1]) && IsDigit(fromPtr[2])) 
653                             {
654                                 *toPtr++ = ((fromPtr[0] - '0') * 8 + (fromPtr[1] - '0')) * 8 + (fromPtr[2] - '0');
655                                 fromPtr+= 2;                                                            
656                             }
657                             else if(*fromPtr == '0') *toPtr++ = 0;
658                             else *toPtr++ = *fromPtr;
659                             break;
660                     default:
661                             *toPtr++ = *fromPtr;
662                 }
663                 fromPtr++;
664             }
665             else
666             //  *toPtr++ = *fromPtr++;
667             {
668                 char* tmpPtr = fromPtr;
669                 fromPtr = (nextchar == nextcharW) ? nextcharU(fromPtr) : nextchar(fromPtr);
670                 while(tmpPtr < fromPtr) *toPtr++ = *tmpPtr++;
671             }
672     
673         } //end while(fromPtr < endPtr)
674         *toPtr = 0;                     // terminate string
675         delete [] newstr;
676     }
677     else
678     {
679         yylval.string = newstr;
680         toPtr = newstr + strlen(newstr);
681     }
682             
683     PENV->curPos = curPos;
684     if(quote == '"')
685     {
686         BinStr* pBS = new BinStr();
687         unsigned size = (unsigned)(toPtr - yylval.string);
688         memcpy(pBS->getBuff(size),yylval.string,size);
689         delete [] yylval.string;
690         yylval.binstr = pBS;
691         return QSTRING;
692     }
693     else
694     {
695         if(PASM->NumTypeDefs())
696         {
697             TypeDefDescr* pTDD = PASM->FindTypeDef(yylval.string);
698             if(pTDD != NULL)
699             {
700                 delete [] yylval.string;
701                 yylval.tdd = pTDD;
702                 return(TYPEDEF(pTDD));
703             }
704         }
705         return SQSTRING;
706     }
707 }
708
709 #ifdef _PREFAST_
710 #pragma warning(push)
711 #pragma warning(disable:21000) // Suppress PREFast warning about overly large function
712 #endif
713 int yylex() 
714 {
715     char* curPos = PENV->curPos;
716     unsigned state = 0;
717     const unsigned multiComment = 2;
718     unsigned curSym;
719     
720     char* newstr;
721     
722 NextToken:    
723     // Skip any leading whitespace and comments
724     curPos = skipBlanks(curPos, &state);
725     if(curPos == NULL)
726     {
727         if (state & multiComment) return (BAD_COMMENT_);
728         if(ProcessEOF() == 0) return 0;       // EOF
729         NEXT_TOKEN;
730     }
731     char* curTok = curPos;
732     PENV->curTok = curPos;
733     PENV->curPos = curPos;
734     int tok = ERROR_;
735     yylval.string = 0;
736
737     curSym = Sym(curPos);
738     if(bParsingByteArray) // only hexadecimals w/o 0x, ')' and white space allowed!
739     {
740         int i,s=0;
741         for(i=0; i<2; i++, curPos = nextchar(curPos), curSym = Sym(curPos))
742         {
743             if(('0' <= curSym)&&(curSym <= '9')) s = s*16+(curSym - '0');
744             else if(('A' <= curSym)&&(curSym <= 'F')) s = s*16+(curSym - 'A' + 10);
745             else if(('a' <= curSym)&&(curSym <= 'f')) s = s*16+(curSym - 'a' + 10);
746             else break; // don't increase curPos!
747         }
748         if(i)
749         {
750             tok = HEXBYTE;
751             yylval.int32 = s;
752         }
753         else
754         {
755             if(curSym == ')' || curSym == '}') 
756             {
757                 bParsingByteArray = FALSE;
758                 goto Just_A_Character;
759             }
760         }
761         PENV->curPos = curPos;
762         return(tok);
763     }
764     if(curSym == '?') // '?' may be part of an identifier, if it's not followed by punctuation
765     {
766         if(IsValidContinuingSymbol(Sym(nextchar(curPos)))) goto Its_An_Id;
767         goto Just_A_Character;
768     }
769
770     if (IsValidStartingSymbol(curSym)) 
771     { // is it an ID
772 Its_An_Id:
773         size_t offsetDot = (size_t)-1; // first appearance of '.'
774                 size_t offsetDotDigit = (size_t)-1; // first appearance of '.<digit>' (not DOTTEDNAME!)
775         do 
776         {
777             curPos = nextchar(curPos);
778             if (Sym(curPos) == '.') 
779             {
780                 if (offsetDot == (size_t)-1) offsetDot = curPos - curTok;
781                 curPos = nextchar(curPos);
782                 if((offsetDotDigit==(size_t)-1)&&(Sym(curPos) >= '0')&&(Sym(curPos) <= '9')) 
783                         offsetDotDigit = curPos - curTok - 1;
784             }
785         } while(IsValidContinuingSymbol(Sym(curPos)));
786         
787         size_t tokLen = curPos - curTok;
788         // check to see if it is a keyword
789         int token = findKeyword(curTok, tokLen, &yylval.opcode);
790         if (token != 0) 
791         {
792             //printf("yylex: TOK = %d, curPos=0x%8.8X\n",token,curPos);
793             PENV->curPos = curPos;
794             PENV->curTok = curTok;
795             if(!SkipToken)
796             {
797                 switch(token)
798                 {
799                     case P_INCLUDE:
800                         //if(include_first_pass)
801                         //{
802                         //    PENV->curPos = curTok;
803                         //    include_first_pass = FALSE;
804                         //    return ';';
805                         //}
806                         //include_first_pass = TRUE;
807                         curPos = skipBlanks(curPos,&state);
808                         if(curPos == NULL)
809                         {
810                             if (state & multiComment) return (BAD_COMMENT_);
811                             if(ProcessEOF() == 0) return 0;       // EOF
812                             NEXT_TOKEN;
813                         }
814                         if(Sym(curPos) != '"') return ERROR_;
815                         curPos = nextchar(curPos);
816                         curTok = curPos;
817                         PENV->curTok = curPos;
818                         while(Sym(curPos) != '"')
819                         {
820                             curPos = nextchar(curPos);
821                             if(curPos >= PENV->endPos) return ERROR_;
822                             PENV->curPos = curPos;
823                         }
824                         tokLen = PENV->curPos - curTok;
825                         curPos = nextchar(curPos);
826                         PENV->curPos = curPos;
827                         {
828                             WCHAR* wzFile=NULL;
829                             if(Sym == SymW)
830                             {
831                                 if((wzFile = new WCHAR[tokLen/2 + 1]) != NULL)
832                                 {
833                                     memcpy(wzFile,curTok,tokLen);
834                                     wzFile[tokLen/2] = 0;
835                                 }
836                             }
837                             else
838                             {
839                                 if((wzFile = new WCHAR[tokLen+1]) != NULL)
840                                 {
841                                     tokLen = WszMultiByteToWideChar(g_uCodePage,0,curTok,(int)tokLen,wzFile,(int)tokLen+1);
842                                     wzFile[tokLen] = 0;
843                                 }
844                             }
845                             if(wzFile != NULL)
846                             {
847                                 if((parser->wzIncludePath != NULL)
848                                  &&(wcschr(wzFile,'\\')==NULL)&&(wcschr(wzFile,':')==NULL))
849                                 {
850                                     PathString wzFullName;
851
852                                     WCHAR* pwz;
853                                     DWORD dw = WszSearchPath(parser->wzIncludePath,wzFile,NULL,
854                                                 TRUE, wzFullName,&pwz);
855                                     if(dw != 0)
856                                     {
857                                         delete [] wzFile;
858
859                                         wzFile = wzFullName.GetCopyOfUnicodeString();
860                                     }
861                                     
862                                 }
863                                 if(PASM->m_fReportProgress)
864                                     parser->msg("\nIncluding '%S'\n",wzFile);
865                                 MappedFileStream *pIn = new MappedFileStream(wzFile);
866                                 if((pIn != NULL)&&pIn->IsValid())
867                                 {
868                                     parser->PEStack.PUSH(PENV);
869                                     PASM->SetSourceFileName(FullFileName(wzFile,CP_UTF8)); // deletes the argument!
870                                     parser->CreateEnvironment(pIn);
871                                     NEXT_TOKEN;
872                                 }
873                                 else
874                                 {
875                                     delete [] wzFile;
876                                     PASM->report->error("#include failed\n");
877                                     return ERROR_;
878                                 }
879                             }
880                             else
881                             {
882                                 PASM->report->error("Out of memory\n");
883                                 return ERROR_;
884                             }
885                         }
886                         curPos = PENV->curPos;
887                         curTok = PENV->curTok;
888                         break;
889                     case P_IFDEF:
890                     case P_IFNDEF:
891                     case P_DEFINE:
892                     case P_UNDEF:
893                         curPos = skipBlanks(curPos,&state);
894                         if(curPos == NULL)
895                         {
896                             if (state & multiComment) return (BAD_COMMENT_);
897                             if(ProcessEOF() == 0) return 0;       // EOF
898                             NEXT_TOKEN;
899                         }
900                         curTok = curPos;
901                         PENV->curTok = curPos;
902                         PENV->curPos = curPos;
903                         if (!IsValidStartingSymbol(Sym(curPos))) return ERROR_;
904                         do 
905                         {
906                             curPos = nextchar(curPos);
907                         } while(IsValidContinuingSymbol(Sym(curPos)));
908                         tokLen = curPos - curTok;
909                             
910                         newstr = NewStrFromToken(curTok, tokLen);
911                         if((token==P_DEFINE)||(token==P_UNDEF))
912                         {
913                             if(token == P_DEFINE)
914                             {
915                                 curPos = skipBlanks(curPos,&state);
916                                 if ((curPos == NULL) && (ProcessEOF() == 0))
917                                 {
918                                     DefineVar(newstr, NULL);
919                                     return 0;
920                                 }
921                                 curSym = Sym(curPos);
922                                 if(curSym != '"')
923                                     DefineVar(newstr, NULL);
924                                 else
925                                 {
926                                     tok = parse_literal(curSym, curPos, FALSE);
927                                     if(tok == QSTRING)
928                                     {
929                                         // if not ANSI, then string is in UTF-8,
930                                         // insert prefix
931                                         if(nextchar != nextcharA)
932                                         {
933                                             yylval.binstr->insertInt8(0xEF);
934                                             yylval.binstr->insertInt8(0xBB);
935                                             yylval.binstr->insertInt8(0xBF);
936                                         }
937                                         yylval.binstr->appendInt8(' ');
938                                         DefineVar(newstr, yylval.binstr);
939                                     }
940                                     else
941                                         return tok;
942                                 }
943                             }
944                             else UndefVar(newstr);
945                         }
946                         else
947                         {
948                             SkipToken = IsVarDefined(newstr);
949                             if(token == P_IFDEF) SkipToken = !SkipToken;
950                             IfEndif++;
951                             if(SkipToken) IfEndifSkip=IfEndif;
952                         }
953                         break;
954                     case P_ELSE:
955                         SkipToken = TRUE;
956                         IfEndifSkip=IfEndif;
957                         break;
958                     case P_ENDIF:
959                         if(IfEndif == 0)
960                         {
961                             PASM->report->error("Unmatched #endif\n");
962                             return ERROR_;
963                         }
964                         IfEndif--;
965                         break;
966                     default:
967                         return(token);
968                 }
969                 goto NextToken;
970             }
971             if(SkipToken)
972             {
973                 switch(token)
974                 {
975                     case P_IFDEF:
976                     case P_IFNDEF:
977                         IfEndif++;
978                         break;
979                     case P_ELSE:
980                         if(IfEndif == IfEndifSkip) SkipToken = FALSE;
981                         break;
982                     case P_ENDIF:
983                         if(IfEndif == IfEndifSkip) SkipToken = FALSE;
984                         IfEndif--;
985                         break;
986                     default:
987                         break;
988                 }
989                 //if(yylval.instr) yylval.instr->opcode = -1;
990                 goto NextToken;
991             }
992             return(token);
993         } // end if token != 0
994         if(SkipToken) { curPos = nextBlank(curPos); goto NextToken; }
995         
996         VarName* pVarName = FindVarDef(NewStrFromToken(curTok, tokLen));
997         if(pVarName != NULL)
998         {
999             if(pVarName->pbody != NULL)
1000             {
1001                 BinStrStream *pIn = new BinStrStream(pVarName->pbody);
1002                 if((pIn != NULL)&&pIn->IsValid())
1003                 {
1004                     PENV->curPos = curPos;
1005                     parser->PEStack.PUSH(PENV);
1006                     parser->CreateEnvironment(pIn);
1007                     NEXT_TOKEN;
1008                 }
1009             }
1010         }
1011         
1012         TypeDefDescr* pTDD = findTypedef(curTok,tokLen);
1013         
1014         if(pTDD != NULL)
1015         {
1016             yylval.tdd = pTDD;
1017             PENV->curPos = curPos;
1018             PENV->curTok = curTok;
1019             return(TYPEDEF(pTDD));
1020         }
1021         if(Sym(curTok) == '#') 
1022         {
1023             PENV->curPos = curPos;
1024             PENV->curTok = curTok;
1025             return(ERROR_);
1026         }
1027         // Not a keyword, normal identifiers don't have '.' in them
1028         if (offsetDot < (size_t)-1) 
1029         {
1030             if(offsetDotDigit < (size_t)-1)
1031             {
1032                 curPos = curTok+offsetDotDigit;
1033                 tokLen = offsetDotDigit;
1034             }
1035             // protection against something like Foo.Bar..123 or Foo.Bar.
1036             unsigned D = (Sym == SymW) ? 2 : 1; // Unicode or ANSI/UTF8!
1037             while((Sym(curPos-D)=='.')&&(tokLen))
1038             {
1039                 curPos -= D;
1040                 tokLen -= D;
1041             }
1042         }
1043         if((yylval.string = NewStrFromToken(curTok,tokLen)))
1044         {
1045             tok = (offsetDot == (size_t)(-1))? ID : DOTTEDNAME;
1046             //printf("yylex: ID = '%s', curPos=0x%8.8X\n",yylval.string,curPos);
1047         }
1048         else return BAD_LITERAL_;
1049     }
1050     else if(SkipToken) { curPos = nextBlank(curPos); goto NextToken; }
1051     else if (IsDigit(curSym) 
1052         || (curSym == '.' && IsDigit(Sym(nextchar(curPos))))
1053         || (curSym == '-' && IsDigit(Sym(nextchar(curPos))))) 
1054     {
1055         const char* begNum = curPos;
1056         unsigned radix = 10;
1057
1058         neg = (curSym == '-');    // always make it unsigned 
1059         if (neg) curPos = nextchar(curPos);
1060
1061         if (Sym(curPos) == '0' && Sym(nextchar(curPos)) != '.') 
1062         {
1063             curPos = nextchar(curPos);
1064             radix = 8;
1065             if (Sym(curPos) == 'x' || Sym(curPos) == 'X') 
1066             {
1067                 curPos = nextchar(curPos);
1068                 radix = 16;
1069             }
1070         }
1071         begNum = curPos;
1072         {
1073             unsigned __int64 i64 = str2uint64(begNum, const_cast<const char**>(&curPos), radix);
1074             unsigned __int64 mask64 = neg ? UI64(0xFFFFFFFF80000000) : UI64(0xFFFFFFFF00000000);
1075             unsigned __int64 largestNegVal32 = UI64(0x0000000080000000);
1076             if ((i64 & mask64) && (i64 != largestNegVal32))
1077             {
1078                 yylval.int64 = new __int64(i64);
1079                 tok = INT64;                    
1080                 if (neg) *yylval.int64 = -*yylval.int64;
1081             }
1082             else
1083             {
1084                 yylval.int32 = (__int32)i64;
1085                 tok = INT32;
1086                 if(neg) yylval.int32 = -yylval.int32;
1087             }
1088         }
1089         if (radix == 10 && ((Sym(curPos) == '.' && Sym(nextchar(curPos)) != '.') || Sym(curPos) == 'E' || Sym(curPos) == 'e')) 
1090         {
1091             unsigned L = (unsigned)(PENV->endPos - begNum);
1092             curPos = (char*)begNum + GetDouble((char*)begNum,L,&yylval.float64);
1093             if (neg) *yylval.float64 = -*yylval.float64;
1094             tok = FLOAT64;
1095         }
1096     }
1097     else 
1098     {   //      punctuation
1099         if (curSym == '"' || curSym == '\'') 
1100         {
1101             return parse_literal(curSym, curPos, TRUE);
1102         } // end if (*curPos == '"' || *curPos == '\'')
1103         else if (curSym==':' && Sym(nextchar(curPos))==':') 
1104         {
1105             curPos = nextchar(nextchar(curPos));
1106             tok = DCOLON;
1107         }       
1108         else if(curSym == '.') 
1109         {
1110             if (Sym(nextchar(curPos))=='.' && Sym(nextchar(nextchar(curPos)))=='.') 
1111             {
1112                 curPos = nextchar(nextchar(nextchar(curPos)));
1113                 tok = ELIPSIS;
1114             }
1115             else
1116             {
1117                 do
1118                 {
1119                     curPos = nextchar(curPos);
1120                     if (curPos >= PENV->endPos) 
1121                     return ERROR_;
1122                     curSym = Sym(curPos);
1123                 }
1124                 while(IsAlNum(curSym) || curSym == '_' || curSym == '$'|| curSym == '@'|| curSym == '?');
1125                 size_t tokLen = curPos - curTok;
1126     
1127                 // check to see if it is a keyword
1128                 int token = findKeyword(curTok, tokLen, &yylval.opcode);
1129                 if(token)
1130                         {
1131                     //printf("yylex: TOK = %d, curPos=0x%8.8X\n",token,curPos);
1132                     PENV->curPos = curPos;
1133                     PENV->curTok = curTok; 
1134                     return(token);
1135                 }
1136                 tok = '.';
1137                 curPos = nextchar(curTok);
1138             }
1139         }
1140         else 
1141         {
1142 Just_A_Character:
1143             tok = curSym;
1144             curPos = nextchar(curPos);
1145         }
1146         //printf("yylex: PUNCT curPos=0x%8.8X\n",curPos);
1147     }
1148     dbprintf(("    Line %d token %d (%c) val = %s\n", PENV->curLine, tok, 
1149             (tok < 128 && isprint(tok)) ? tok : ' ', 
1150             (tok > 255 && tok != INT32 && tok != INT64 && tok!= FLOAT64) ? yylval.string : ""));
1151
1152     PENV->curPos = curPos;
1153     PENV->curTok = curTok; 
1154     return(tok);
1155 }
1156 #ifdef _PREFAST_
1157 #pragma warning(pop)
1158 #endif
1159
1160 /**************************************************************************/
1161 static char* newString(__in __nullterminated const char* str1)
1162 {
1163     char* ret = new char[strlen(str1)+1];
1164     if(ret) strcpy_s(ret, strlen(str1)+1, str1);
1165     return(ret);
1166 }
1167
1168 /**************************************************************************/
1169 /* concatenate strings and release them */
1170
1171 static char* newStringWDel(__in __nullterminated char* str1, char delimiter, __in __nullterminated char* str3) 
1172 {
1173     size_t len1 = strlen(str1);
1174     size_t len = len1+2;
1175     if (str3) len += strlen(str3);
1176     char* ret = new char[len];
1177     if(ret)
1178     {
1179         strcpy_s(ret, len, str1);
1180         delete [] str1;
1181         ret[len1] = delimiter;
1182         ret[len1+1] = 0;
1183         if (str3)
1184         {
1185             strcat_s(ret, len, str3);
1186             delete [] str3;
1187         }
1188     }
1189     return(ret);
1190 }
1191
1192 /**************************************************************************/
1193 static void corEmitInt(BinStr* buff, unsigned data) 
1194 {
1195     unsigned cnt = CorSigCompressData(data, buff->getBuff(5));
1196     buff->remove(5 - cnt);
1197 }
1198
1199
1200 /**************************************************************************/
1201 /* move 'ptr past the exactly one type description */
1202
1203 unsigned __int8* skipType(unsigned __int8* ptr, BOOL fFixupType) 
1204 {
1205     mdToken  tk;
1206 AGAIN:
1207     switch(*ptr++) {
1208         case ELEMENT_TYPE_VOID         :
1209         case ELEMENT_TYPE_BOOLEAN      :
1210         case ELEMENT_TYPE_CHAR         :
1211         case ELEMENT_TYPE_I1           :
1212         case ELEMENT_TYPE_U1           :
1213         case ELEMENT_TYPE_I2           :
1214         case ELEMENT_TYPE_U2           :
1215         case ELEMENT_TYPE_I4           :
1216         case ELEMENT_TYPE_U4           :
1217         case ELEMENT_TYPE_I8           :
1218         case ELEMENT_TYPE_U8           :
1219         case ELEMENT_TYPE_R4           :
1220         case ELEMENT_TYPE_R8           :
1221         case ELEMENT_TYPE_U            :
1222         case ELEMENT_TYPE_I            :
1223         case ELEMENT_TYPE_STRING       :
1224         case ELEMENT_TYPE_OBJECT       :
1225         case ELEMENT_TYPE_TYPEDBYREF   :
1226         case ELEMENT_TYPE_SENTINEL     :
1227                 /* do nothing */
1228                 break;
1229
1230         case ELEMENT_TYPE_VALUETYPE   :
1231         case ELEMENT_TYPE_CLASS        :
1232                 ptr += CorSigUncompressToken(ptr, &tk);
1233                 break;
1234
1235         case ELEMENT_TYPE_CMOD_REQD    :
1236         case ELEMENT_TYPE_CMOD_OPT     :
1237                 ptr += CorSigUncompressToken(ptr, &tk);
1238                 goto AGAIN;
1239         
1240         case ELEMENT_TYPE_ARRAY         :
1241                 {
1242                     ptr = skipType(ptr, fFixupType);                    // element Type
1243                     unsigned rank = CorSigUncompressData((PCCOR_SIGNATURE&) ptr);
1244                     if (rank != 0)
1245                     {
1246                         unsigned numSizes = CorSigUncompressData((PCCOR_SIGNATURE&) ptr);
1247                         while(numSizes > 0)
1248                                                 {
1249                             CorSigUncompressData((PCCOR_SIGNATURE&) ptr);
1250                                                         --numSizes;
1251                                                 }
1252                         unsigned numLowBounds = CorSigUncompressData((PCCOR_SIGNATURE&) ptr);
1253                         while(numLowBounds > 0)
1254                                                 {
1255                             CorSigUncompressData((PCCOR_SIGNATURE&) ptr);
1256                                                         --numLowBounds;
1257                                                 }
1258                     }
1259                 }
1260                 break;
1261
1262                 // Modifiers or dependent types
1263         case ELEMENT_TYPE_PINNED                :
1264         case ELEMENT_TYPE_PTR                   :
1265         case ELEMENT_TYPE_BYREF                 :
1266         case ELEMENT_TYPE_SZARRAY               :
1267                 // tail recursion optimization
1268                 // ptr = skipType(ptr, fFixupType);
1269                 // break
1270                 goto AGAIN;
1271
1272         case ELEMENT_TYPE_VAR:
1273         case ELEMENT_TYPE_MVAR:
1274                 CorSigUncompressData((PCCOR_SIGNATURE&) ptr);  // bound
1275                 break;
1276         
1277         case ELEMENT_TYPE_VARFIXUP:
1278         case ELEMENT_TYPE_MVARFIXUP:
1279                 if(fFixupType)
1280                 {
1281                     BYTE* pb = ptr-1; // ptr incremented in switch
1282                     unsigned __int8* ptr_save = ptr;
1283                     int n = CorSigUncompressData((PCCOR_SIGNATURE&) ptr);  // fixup #
1284                     int compressed_size_n = (int)(ptr - ptr_save);  // ptr was updated by CorSigUncompressData()
1285                     int m = -1;
1286                     if(PASM->m_TyParList)
1287                         m = PASM->m_TyParList->IndexOf(TyParFixupList.PEEK(n));
1288                     if(m == -1)
1289                     {
1290                         PASM->report->error("(fixupType) Invalid %stype parameter '%s'\n",
1291                             (*pb == ELEMENT_TYPE_MVARFIXUP)? "method ": "",
1292                             TyParFixupList.PEEK(n));
1293                         m = 0;
1294                     }
1295                     *pb = (*pb == ELEMENT_TYPE_MVARFIXUP)? ELEMENT_TYPE_MVAR : ELEMENT_TYPE_VAR;
1296                     int compressed_size_m = (int)CorSigCompressData(m,pb+1);
1297
1298                     // Note that CorSigCompressData() (and hence, CorSigUncompressData()) store a number
1299                     // 0 <= x <= 0x1FFFFFFF in 1, 2, or 4 bytes. Above, 'n' is the fixup number being read,
1300                     // and 'm' is the generic parameter number being written out (in the same place where 'n'
1301                     // came from). If 'n' takes more space to compress than 'm' (e.g., 0x80 <= n <= 0x3fff so
1302                     // it takes 2 bytes, and m < 0x80 so it takes one byte), then when we overwrite the fixup
1303                     // number with the generic parameter number, we'll leave extra bytes in the signature following
1304                     // the written generic parameter number. Thus, we do something of a hack to ensure that the
1305                     // compressed number is correctly readable even if 'm' compresses smaller than 'n' did: we
1306                     // recompress 'm' to use the same amount of space as 'n' used. This is possible because smaller
1307                     // numbers can still be compressed in a larger amount of space, even though it's not optimal (and
1308                     // CorSigCompressData() would never do it). If, however, the compressed sizes are the other
1309                     // way around (m takes more space to compress than n), then we've already corrupted the
1310                     // signature that we're reading by writing beyond what we should (is there some reason why
1311                     // this is not possible?).
1312                     // Note that 'ptr' has already been adjusted, above, to point to the next type after this one.
1313                     // There is no need to update it when recompressing the data.
1314
1315                     if (compressed_size_m > compressed_size_n)
1316                     {
1317                         // We've got a problem: we just corrupted the rest of the signature!
1318                         // (Can this ever happen in practice?)
1319                         PASM->report->error("(fixupType) Too many %stype parameters\n",
1320                             (*pb == ELEMENT_TYPE_MVARFIXUP)? "method ": "");
1321                     }
1322                     else if (compressed_size_m < compressed_size_n)
1323                     {
1324                         // We didn't write out as much data as we read. This will leave extra bytes in the
1325                         // signature that will be incorrectly recognized. Ideally, we would just shrink the
1326                         // signature. That's not easy to do here. Instead, pad the bytes to force it to use
1327                         // a larger encoding than needed. This assumes knowledge of the CorSigCompressData()
1328                         // encoding.
1329                         //
1330                         // The cases:
1331                         //      compressed_size_m   m bytes     compressed_size_n   result bytes
1332                         //      1                   m1          2                   0x80 m1
1333                         //      1                   m1          4                   0xC0 0x00 0x00 m1
1334                         //      2                   m1 m2       4                   0xC0 0x00 (m1 & 0x7f) m2
1335
1336                         _ASSERTE((compressed_size_m == 1) || (compressed_size_m == 2) || (compressed_size_m == 4));
1337                         _ASSERTE((compressed_size_n == 1) || (compressed_size_n == 2) || (compressed_size_n == 4));
1338
1339                         if ((compressed_size_m == 1) &&
1340                             (compressed_size_n == 2))
1341                         {
1342                             unsigned __int8 m1 = *(pb + 1);
1343                             _ASSERTE(m1 < 0x80);
1344                             *(pb + 1) = 0x80;
1345                             *(pb + 2) = m1;
1346                         }
1347                         else
1348                         if ((compressed_size_m == 1) &&
1349                             (compressed_size_n == 4))
1350                         {
1351                             unsigned __int8 m1 = *(pb + 1);
1352                             _ASSERTE(m1 < 0x80);
1353                             *(pb + 1) = 0xC0;
1354                             *(pb + 2) = 0x00;
1355                             *(pb + 3) = 0x00;
1356                             *(pb + 4) = m1;
1357                         }
1358                         else
1359                         if ((compressed_size_m == 2) &&
1360                             (compressed_size_n == 4))
1361                         {
1362                             unsigned __int8 m1 = *(pb + 1);
1363                             unsigned __int8 m2 = *(pb + 2);
1364                             _ASSERTE(m1 >= 0x80);
1365                             m1 &= 0x7f; // strip the bit indicating it's a 2-byte thing
1366                             *(pb + 1) = 0xC0;
1367                             *(pb + 2) = 0x00;
1368                             *(pb + 3) = m1;
1369                             *(pb + 4) = m2;
1370                         }
1371                     }
1372                 }
1373                 else
1374                     CorSigUncompressData((PCCOR_SIGNATURE&) ptr);  // bound
1375                 break;
1376
1377         case ELEMENT_TYPE_FNPTR: 
1378                 {
1379                     CorSigUncompressData((PCCOR_SIGNATURE&) ptr);    // calling convention
1380                     unsigned argCnt = CorSigUncompressData((PCCOR_SIGNATURE&) ptr);    // arg count
1381                     ptr = skipType(ptr, fFixupType);                             // return type
1382                     while(argCnt > 0)
1383                     {
1384                         ptr = skipType(ptr, fFixupType);
1385                         --argCnt;
1386                     }
1387                 }
1388                 break;
1389
1390         case ELEMENT_TYPE_GENERICINST: 
1391                {
1392                    ptr = skipType(ptr, fFixupType);                 // type constructor
1393                    unsigned argCnt = CorSigUncompressData((PCCOR_SIGNATURE&)ptr);               // arg count
1394                    while(argCnt > 0) {
1395                        ptr = skipType(ptr, fFixupType);
1396                        --argCnt;
1397                    }
1398                }
1399                break;                        
1400
1401         default:
1402         case ELEMENT_TYPE_END                   :
1403                 _ASSERTE(!"Unknown Type");
1404                 break;
1405     }
1406     return(ptr);
1407 }
1408
1409 /**************************************************************************/
1410 void FixupTyPars(PCOR_SIGNATURE pSig, ULONG cSig)
1411 {
1412     if(TyParFixupList.COUNT() > 0)
1413     {
1414         BYTE* ptr = (BYTE*)pSig;
1415         BYTE* ptrEnd = ptr + cSig;
1416         while(ptr < ptrEnd)
1417         {
1418             ptr = skipType(ptr, TRUE);
1419         } // end while
1420     } // end if(COUNT>0)
1421 }
1422 void FixupTyPars(BinStr* pbstype) 
1423 {
1424     FixupTyPars((PCOR_SIGNATURE)(pbstype->ptr()),(ULONG)(pbstype->length()));
1425 }
1426 /**************************************************************************/
1427 static unsigned corCountArgs(BinStr* args) 
1428 {
1429     unsigned __int8* ptr = args->ptr();
1430     unsigned __int8* end = &args->ptr()[args->length()];
1431     unsigned ret = 0;
1432     while(ptr < end) 
1433     {
1434         if (*ptr != ELEMENT_TYPE_SENTINEL) 
1435         {
1436             ptr = skipType(ptr, FALSE);
1437             ret++;
1438         }
1439         else ptr++;
1440     }
1441     return(ret);
1442 }
1443
1444 /********************************************************************************/
1445 AsmParse::AsmParse(ReadStream* aIn, Assembler *aAssem) 
1446 {
1447 #ifdef DEBUG_PARSING
1448     extern int yydebug;
1449     yydebug = 1;
1450 #endif
1451
1452     assem = aAssem;
1453     assem->SetErrorReporter((ErrorReporter *)this);
1454
1455     assem->m_ulCurLine = 1;
1456     assem->m_ulCurColumn = 1;
1457     
1458     wzIncludePath = NULL;
1459     penv = NULL;
1460
1461     hstdout = GetStdHandle(STD_OUTPUT_HANDLE);
1462     hstderr = GetStdHandle(STD_ERROR_HANDLE);
1463
1464     success = true; 
1465     _ASSERTE(parser == 0);          // Should only be one parser instance at a time
1466
1467    // Resolve aliases
1468    for (unsigned int i = 0; i < sizeof(keywords) / sizeof(Keywords); i++)
1469    {
1470        if (keywords[i].token == NO_VALUE)
1471            keywords[i].token = keywords[keywords[i].tokenVal].token;
1472    }
1473     SetSymbolTables();
1474     Init_str2uint64();
1475     parser = this;
1476     //yyparse();
1477 }
1478
1479 /********************************************************************************/
1480 AsmParse::~AsmParse() 
1481 {
1482     parser = 0;
1483     delete penv;
1484     while(m_ANSLast.POP());
1485 }
1486
1487 /**************************************************************************/
1488 DWORD AsmParse::IsItUnicode(CONST LPVOID pBuff, int cb, LPINT lpi)
1489 {
1490     return IsTextUnicode(pBuff,cb,lpi);
1491 }
1492
1493 /**************************************************************************/
1494 void AsmParse::CreateEnvironment(ReadStream* stream) 
1495
1496     penv = new PARSING_ENVIRONMENT;
1497     memset(penv,0,sizeof(PARSING_ENVIRONMENT));
1498     penv->in = stream; 
1499     penv->curLine = 1;
1500     strcpy_s(penv->szFileName, MAX_FILENAME_LENGTH*3+1,assem->m_szSourceFileName); 
1501     
1502     penv->curPos = fillBuff(NULL);
1503     penv->uCodePage = g_uCodePage;
1504     
1505     SetFunctionPtrs();
1506 };
1507
1508 /**************************************************************************/
1509 void AsmParse::ParseFile(ReadStream* stream) 
1510
1511     CreateEnvironment(stream);
1512     yyparse(); 
1513     penv->in = NULL; 
1514 };
1515
1516 /**************************************************************************/
1517 char* AsmParse::fillBuff(__in_opt __nullterminated char* pos) 
1518 {
1519     int iPutToBuffer;
1520     int iOptions = IS_TEXT_UNICODE_UNICODE_MASK;
1521     g_uCodePage = CP_ACP;
1522     iPutToBuffer = (int)penv->in->getAll(&(penv->curPos));
1523     
1524     penv->endPos = penv->curPos + iPutToBuffer;
1525     if(iPutToBuffer > 128) iPutToBuffer = 128;
1526     if(IsItUnicode(penv->curPos,iPutToBuffer,&iOptions))
1527     {
1528         g_uCodePage = CP_UTF8;
1529         if(iOptions & IS_TEXT_UNICODE_SIGNATURE)
1530         {
1531             penv->curPos += 2;
1532         }
1533         if(assem->m_fReportProgress) printf("Source file is UNICODE\n\n");
1534         penv->pfn_Sym = SymW;
1535         penv->pfn_nextchar = nextcharW;
1536         penv->pfn_NewStrFromToken = NewStrFromTokenW;
1537         penv->pfn_NewStaticStrFromToken = NewStaticStrFromTokenW;
1538         penv->pfn_GetDouble = GetDoubleW;
1539     }
1540     else
1541     {
1542         if(((penv->curPos[0]&0xFF)==0xEF)&&((penv->curPos[1]&0xFF)==0xBB)&&((penv->curPos[2]&0xFF)==0xBF))
1543         {
1544             g_uCodePage = CP_UTF8;
1545             penv->curPos += 3;
1546             if(assem->m_fReportProgress) printf("Source file is UTF-8\n\n");
1547             penv->pfn_nextchar = nextcharU;
1548         }
1549         else
1550         {
1551             if(assem->m_fReportProgress) printf("Source file is ANSI\n\n");
1552             penv->pfn_nextchar = nextcharA;
1553         }    
1554         penv->pfn_Sym = SymAU;
1555         penv->pfn_NewStrFromToken = NewStrFromTokenAU;
1556         penv->pfn_NewStaticStrFromToken = NewStaticStrFromTokenAU;
1557         penv->pfn_GetDouble = GetDoubleAU;
1558     }
1559     return(penv->curPos);
1560 }
1561
1562 /********************************************************************************/
1563 BinStr* AsmParse::MakeSig(unsigned callConv, BinStr* retType, BinStr* args, int ntyargs) 
1564 {
1565     _ASSERTE((ntyargs != 0) == ((callConv & IMAGE_CEE_CS_CALLCONV_GENERIC) != 0));
1566     BinStr* ret = new BinStr();
1567     if(ret)
1568     {
1569         //if (retType != 0) 
1570         ret->insertInt8(callConv);
1571         if (ntyargs != 0)
1572             corEmitInt(ret, ntyargs);
1573         corEmitInt(ret, corCountArgs(args));
1574
1575         if (retType != 0) 
1576         {
1577             ret->append(retType); 
1578             delete retType;
1579         }
1580         ret->append(args); 
1581     }
1582     else
1583         error("\nOut of memory!\n");
1584
1585     delete args;
1586     return(ret);
1587 }
1588
1589 /********************************************************************************/
1590 BinStr* AsmParse::MakeTypeArray(CorElementType kind, BinStr* elemType, BinStr* bounds) 
1591 {
1592     // 'bounds' is a binary buffer, that contains an array of 'struct Bounds' 
1593     struct Bounds {
1594         int lowerBound;
1595         unsigned numElements;
1596     };
1597
1598     _ASSERTE(bounds->length() % sizeof(Bounds) == 0);
1599     unsigned boundsLen = bounds->length() / sizeof(Bounds);
1600     _ASSERTE(boundsLen > 0);
1601     Bounds* boundsArr = (Bounds*) bounds->ptr();
1602
1603     BinStr* ret = new BinStr();
1604
1605     ret->appendInt8(kind);
1606     ret->append(elemType);
1607     corEmitInt(ret, boundsLen);                     // emit the rank
1608
1609     unsigned lowerBoundsDefined = 0;
1610     unsigned numElementsDefined = 0;
1611     unsigned i;
1612     for(i=0; i < boundsLen; i++) 
1613     {
1614         if(boundsArr[i].lowerBound < 0x7FFFFFFF) lowerBoundsDefined = i+1;
1615         else boundsArr[i].lowerBound = 0;
1616
1617         if(boundsArr[i].numElements < 0x7FFFFFFF) numElementsDefined = i+1;
1618         else boundsArr[i].numElements = 0;
1619     }
1620
1621     corEmitInt(ret, numElementsDefined);                    // emit number of bounds
1622
1623     for(i=0; i < numElementsDefined; i++) 
1624     {
1625         _ASSERTE (boundsArr[i].numElements >= 0);               // enforced at rule time
1626         corEmitInt(ret, boundsArr[i].numElements);
1627
1628     }
1629
1630     corEmitInt(ret, lowerBoundsDefined);    // emit number of lower bounds
1631     for(i=0; i < lowerBoundsDefined; i++)
1632         {
1633                 unsigned cnt = CorSigCompressSignedInt(boundsArr[i].lowerBound, ret->getBuff(5));
1634                 ret->remove(5 - cnt);
1635         }
1636     delete elemType;
1637     delete bounds;
1638     return(ret);
1639 }
1640
1641 /********************************************************************************/
1642 BinStr* AsmParse::MakeTypeClass(CorElementType kind, mdToken tk) 
1643 {
1644
1645     BinStr* ret = new BinStr();
1646     _ASSERTE(kind == ELEMENT_TYPE_CLASS || kind == ELEMENT_TYPE_VALUETYPE ||
1647                      kind == ELEMENT_TYPE_CMOD_REQD || kind == ELEMENT_TYPE_CMOD_OPT);
1648     ret->appendInt8(kind);
1649     unsigned cnt = CorSigCompressToken(tk, ret->getBuff(5));
1650     ret->remove(5 - cnt);
1651     return(ret);
1652 }
1653 /**************************************************************************/
1654 void PrintANSILine(FILE* pF, __in __nullterminated char* sz)
1655 {
1656         WCHAR *wz = &wzUniBuf[0];
1657         if(g_uCodePage != CP_ACP)
1658         {
1659                 memset(wz,0,dwUniBuf); // dwUniBuf/2 WCHARs = dwUniBuf bytes
1660                 WszMultiByteToWideChar(g_uCodePage,0,sz,-1,wz,(dwUniBuf >> 1)-1);
1661
1662                 memset(sz,0,dwUniBuf);
1663                 WszWideCharToMultiByte(g_uConsoleCP,0,wz,-1,sz,dwUniBuf-1,NULL,NULL);
1664         }
1665         fprintf(pF,"%s",sz);
1666 }
1667 /**************************************************************************/
1668 void AsmParse::error(const char* fmt, ...)
1669 {
1670     char *sz = (char*)(&wzUniBuf[(dwUniBuf >> 1)]);
1671     char *psz=&sz[0];
1672     FILE* pF = ((!assem->m_fReportProgress)&&(assem->OnErrGo)) ? stdout : stderr;
1673     success = false;
1674     va_list args;
1675     va_start(args, fmt);
1676
1677     if((penv) && (penv->in)) psz+=sprintf_s(psz, (dwUniBuf >> 1), "%S(%d) : ", penv->in->namew(), penv->curLine);
1678     psz+=sprintf_s(psz, (dwUniBuf >> 1), "error : ");
1679     _vsnprintf_s(psz, (dwUniBuf >> 1),(dwUniBuf >> 1)-strlen(sz)-1, fmt, args);
1680     PrintANSILine(pF,sz);
1681 }
1682
1683 /**************************************************************************/
1684 void AsmParse::warn(const char* fmt, ...)
1685 {
1686     char *sz = (char*)(&wzUniBuf[(dwUniBuf >> 1)]);
1687     char *psz=&sz[0];
1688     FILE* pF = ((!assem->m_fReportProgress)&&(assem->OnErrGo)) ? stdout : stderr;
1689     va_list args;
1690     va_start(args, fmt);
1691
1692     if((penv) && (penv->in)) psz+=sprintf_s(psz, (dwUniBuf >> 1), "%S(%d) : ", penv->in->namew(), penv->curLine);
1693     psz+=sprintf_s(psz, (dwUniBuf >> 1), "warning : ");
1694     _vsnprintf_s(psz, (dwUniBuf >> 1),(dwUniBuf >> 1)-strlen(sz)-1, fmt, args);
1695     PrintANSILine(pF,sz);
1696 }
1697 /**************************************************************************/
1698 void AsmParse::msg(const char* fmt, ...)
1699 {
1700     char *sz = (char*)(&wzUniBuf[(dwUniBuf >> 1)]);
1701     va_list args;
1702     va_start(args, fmt);
1703
1704     _vsnprintf_s(sz, (dwUniBuf >> 1),(dwUniBuf >> 1)-1, fmt, args);
1705     PrintANSILine(stdout,sz);
1706 }
1707
1708 #ifdef _MSC_VER
1709 #pragma warning(default : 4640)
1710 #endif