Merge pull request #3273 from krytarowski/netbsd-support-52
[platform/upstream/coreclr.git] / src / ilasm / grammar_after.cpp
1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
4 /********************************************************************************/
5 /* Code goes here */
6
7 /********************************************************************************/
8 extern int yyparse();
9
10 struct Keywords {
11     const char* name;
12     unsigned short token;
13     unsigned short tokenVal;// this holds the instruction enumeration for those keywords that are instrs
14     size_t   stname;
15 };
16
17 #define NO_VALUE        ((unsigned short)-1)              // The token has no value
18
19 static Keywords keywords[] = {
20 // Attention! Because of aliases, the instructions MUST go first!
21 // Redefine all the instructions (defined in assembler.h <- asmenum.h <- opcode.def)
22 #undef InlineNone
23 #undef InlineVar        
24 #undef ShortInlineVar
25 #undef InlineI          
26 #undef ShortInlineI     
27 #undef InlineI8         
28 #undef InlineR          
29 #undef ShortInlineR     
30 #undef InlineBrTarget
31 #undef ShortInlineBrTarget
32 #undef InlineMethod
33 #undef InlineField 
34 #undef InlineType 
35 #undef InlineString
36 #undef InlineSig        
37 #undef InlineTok        
38 #undef InlineSwitch
39 #undef InlineVarTok     
40
41
42 #define InlineNone              INSTR_NONE
43 #define InlineVar               INSTR_VAR
44 #define ShortInlineVar          INSTR_VAR
45 #define InlineI                 INSTR_I
46 #define ShortInlineI            INSTR_I
47 #define InlineI8                INSTR_I8
48 #define InlineR                 INSTR_R
49 #define ShortInlineR            INSTR_R
50 #define InlineBrTarget          INSTR_BRTARGET
51 #define ShortInlineBrTarget             INSTR_BRTARGET
52 #define InlineMethod            INSTR_METHOD
53 #define InlineField             INSTR_FIELD
54 #define InlineType              INSTR_TYPE
55 #define InlineString            INSTR_STRING
56 #define InlineSig               INSTR_SIG
57 #define InlineTok               INSTR_TOK
58 #define InlineSwitch            INSTR_SWITCH
59
60 #define InlineVarTok            0
61 #define NEW_INLINE_NAMES
62                 // The volatile instruction collides with the volatile keyword, so 
63                 // we treat it as a keyword everywhere and modify the grammar accordingly (Yuck!) 
64 #define OPDEF(c,s,pop,push,args,type,l,s1,s2,ctrl) { s, args, c, lengthof(s)-1 },
65 #define OPALIAS(alias_c, s, c) { s, NO_VALUE, c, lengthof(s)-1 },
66 #include "opcode.def"
67 #undef OPALIAS
68 #undef OPDEF
69
70                 /* keywords */
71 #define KYWD(name, sym, val)    { name, sym, val, lengthof(name)-1 },
72 #include "il_kywd.h"
73 #undef KYWD
74
75 };
76
77 /********************************************************************************/
78 /* File encoding-dependent functions */
79 /*--------------------------------------------------------------------------*/
80 char* nextcharA(__in __nullterminated char* pos)
81 {
82     return (*pos > 0) ? ++pos : (char *)_mbsinc((const unsigned char *)pos);
83 }
84
85 char* nextcharU(__in __nullterminated char* pos)
86 {
87     return ++pos;
88 }
89
90 char* nextcharW(__in __nullterminated char* pos)
91 {
92     return (pos+2);
93 }
94 /*--------------------------------------------------------------------------*/
95 unsigned SymAU(__in __nullterminated char* curPos)
96 {
97     return (unsigned)*curPos;
98 }
99
100 unsigned SymW(__in __nullterminated char* curPos)
101 {
102     return (unsigned)*((WCHAR*)curPos);
103 }
104 /*--------------------------------------------------------------------------*/
105 char* NewStrFromTokenAU(__in_ecount(tokLen) char* curTok, size_t tokLen)
106 {
107     char *nb = new char[tokLen+1];
108     if(nb != NULL)
109     {
110         memcpy(nb, curTok, tokLen);
111         nb[tokLen] = 0;
112     }
113     return nb;
114 }
115 char* NewStrFromTokenW(__in_ecount(tokLen) char* curTok, size_t tokLen)
116 {
117     WCHAR* wcurTok = (WCHAR*)curTok;
118     char *nb = new char[(tokLen<<1) + 2];
119     if(nb != NULL)
120     {
121         tokLen = WszWideCharToMultiByte(CP_UTF8,0,(LPCWSTR)wcurTok,(int)(tokLen >> 1),nb,(int)(tokLen<<1) + 2,NULL,NULL);
122         nb[tokLen] = 0;
123     }
124     return nb;
125 }
126 /*--------------------------------------------------------------------------*/
127 char* NewStaticStrFromTokenAU(__in_ecount(tokLen) char* curTok, size_t tokLen, __out_ecount(bufSize) char* staticBuf, size_t bufSize)
128 {
129     if(tokLen >= bufSize) return NULL;
130     memcpy(staticBuf, curTok, tokLen);
131     staticBuf[tokLen] = 0;
132     return staticBuf;
133 }
134 char* NewStaticStrFromTokenW(__in_ecount(tokLen) char* curTok, size_t tokLen, __out_ecount(bufSize) char* staticBuf, size_t bufSize)
135 {
136     WCHAR* wcurTok = (WCHAR*)curTok;
137     if(tokLen >= bufSize/2) return NULL;
138     tokLen = WszWideCharToMultiByte(CP_UTF8,0,(LPCWSTR)wcurTok,(int)(tokLen >> 1),staticBuf,(int)bufSize,NULL,NULL);
139     staticBuf[tokLen] = 0;
140     return staticBuf;
141 }
142 /*--------------------------------------------------------------------------*/
143 unsigned GetDoubleAU(__in __nullterminated char* begNum, unsigned L, double** ppRes)
144 {
145     static char dbuff[128];
146     char* pdummy;
147     if(L > 127) L = 127;
148     memcpy(dbuff,begNum,L);
149     dbuff[L] = 0;
150     *ppRes = new double(strtod(dbuff, &pdummy));
151     return ((unsigned)(pdummy - dbuff));
152 }
153
154 unsigned GetDoubleW(__in __nullterminated char* begNum, unsigned L, double** ppRes)
155 {
156     static char dbuff[256];
157     char* pdummy;
158     if(L > 254) L = 254;
159     memcpy(dbuff,begNum,L);
160     dbuff[L] = 0;
161     dbuff[L+1] = 0;
162     *ppRes = new double(wcstod((const wchar_t*)dbuff, (wchar_t**)&pdummy));
163     return ((unsigned)(pdummy - dbuff));
164 }
165 /*--------------------------------------------------------------------------*/
166 char* yygetline(int Line)
167 {
168     static char buff[0x4000];
169     char *pLine=NULL, *pNextLine=NULL;
170     char *pBegin=NULL, *pEnd = NULL;
171     unsigned uCount = parser->getAll(&pBegin);
172     pEnd = pBegin + uCount;
173     buff[0] = 0;
174     for(uCount=0, pLine=pBegin; pLine < pEnd; pLine = nextchar(pLine))
175     {
176         if(Sym(pLine) == '\n') uCount++;
177         if(uCount == (unsigned int)(Line-1)) break;
178     }
179     pLine = nextchar(pLine);
180     if(pLine < pEnd)
181     {
182         for(pNextLine = pLine; pNextLine < pEnd; pNextLine = nextchar(pNextLine))
183         {
184             if(Sym(pNextLine) == '\n') break;
185         }
186         if(Sym == SymW) // Unicode file
187         {
188             if(*((WCHAR*)pNextLine - 1) == '\r') pNextLine -= 2;
189             uCount = (unsigned)(pNextLine - pLine);
190             uCount &= 0x1FFF; // limit: 8K wchars
191             WCHAR* wzBuff = (WCHAR*)buff;
192             memcpy(buff,pLine,uCount);
193             wzBuff[uCount >> 1] = 0;
194         }
195         else
196         {
197             if(*(pNextLine-1)=='\r') pNextLine--;
198             uCount = (unsigned)(pNextLine - pLine);
199             uCount &= 0x3FFF; // limit: 16K chars
200             memcpy(buff,pLine,uCount);
201             buff[uCount]=0;
202         }
203     }    
204     return buff;
205 }
206
207 void yyerror(__in __nullterminated const char* str) {
208     char tokBuff[64];
209     WCHAR *wzfile = (WCHAR*)(PENV->in->namew());
210     int iline = PENV->curLine;
211     
212     size_t len = PENV->curPos - PENV->curTok;
213     if (len > 62) len = 62;
214     memcpy(tokBuff, PENV->curTok, len);
215     tokBuff[len] = 0;
216     tokBuff[len+1] = 0;
217     if(PENV->bExternSource)
218     {
219         wzfile = PASM->m_wzSourceFileName;
220         iline = PENV->nExtLine;
221     }
222     if(Sym == SymW) // Unicode file
223         fprintf(stderr, "%S(%d) : error : %s at token '%S' in: %S\n", 
224                 wzfile, iline, str, (WCHAR*)tokBuff, (WCHAR*)yygetline(PENV->curLine));
225     else
226         fprintf(stderr, "%S(%d) : error : %s at token '%s' in: %s\n", 
227                 wzfile, iline, str, tokBuff, yygetline(PENV->curLine));
228     parser->success = false;
229 }
230
231 /********************************************************************************/
232 /* looks up the typedef 'name' of length 'nameLen' (name does not need to be 
233    null terminated)   Returns 0 on failure */
234 TypeDefDescr* findTypedef(__in_ecount(NameLen) char* name, size_t NameLen)
235 {
236     TypeDefDescr* pRet = NULL;
237     static char Name[4096];
238     if(PASM->NumTypeDefs())
239     {
240         if(NewStaticStrFromToken(name,NameLen,Name,4096))
241             pRet = PASM->FindTypeDef(Name);
242     }
243     return pRet;
244 }
245
246 int TYPEDEF(TypeDefDescr* pTDD)
247 {
248     switch(TypeFromToken(pTDD->m_tkTypeSpec))
249     {
250         case mdtTypeDef:
251         case mdtTypeRef:
252             return TYPEDEF_T;
253         case mdtMethodDef:
254         case 0x99000000:
255             return TYPEDEF_M;
256         case mdtFieldDef:
257         case 0x98000000:
258             return TYPEDEF_F;
259         case mdtMemberRef:
260             return TYPEDEF_MR;
261         case mdtTypeSpec:
262             return TYPEDEF_TS;
263         case mdtCustomAttribute:
264             return TYPEDEF_CA;
265     }
266     return ERROR_;
267             
268 }
269
270 /********************************************************************************/
271 void indexKeywords(Indx* indx)  // called in Assembler constructor (assem.cpp)
272 {
273     Keywords* low = keywords;
274     Keywords* high = keywords + (sizeof(keywords) / sizeof(Keywords));
275     Keywords* mid;
276     for(mid = low; mid < high; mid++)
277     {
278         indx->IndexString((char*)(mid->name),mid);
279     }
280 }
281
282 Instr* SetupInstr(unsigned short opcode)
283 {
284     Instr* pVal = NULL;
285     if((pVal = PASM->GetInstr()))
286     {
287         pVal->opcode = opcode;
288         if((pVal->pWriter = PASM->m_pSymDocument)!=NULL)
289         {
290             if(PENV->bExternSource)
291             {
292                 pVal->linenum = PENV->nExtLine;
293                 pVal->column = PENV->nExtCol;
294                 pVal->linenum_end = PENV->nExtLineEnd;
295                 pVal->column_end = PENV->nExtColEnd;
296                 pVal->pc = nCurrPC;
297             }
298             else
299             {
300                 pVal->linenum = PENV->curLine;
301                 pVal->column = 1;
302                 pVal->linenum_end = PENV->curLine;
303                 pVal->column_end = 0;
304                 pVal->pc = PASM->m_CurPC;
305             }
306         }
307     }
308     return pVal;
309 }
310 /* looks up the keyword 'name' of length 'nameLen' (name does not need to be 
311    null terminated)   Returns 0 on failure */
312 int findKeyword(const char* name, size_t nameLen, unsigned short* pOpcode) 
313 {
314     static char Name[128];
315     Keywords* mid;
316
317     if(NULL == NewStaticStrFromToken((char*)name,nameLen,Name,128)) return 0; // can't be a keyword
318     mid = (Keywords*)(PASM->indxKeywords.FindString(Name));
319     if(mid == NULL) return 0;
320     *pOpcode = mid->tokenVal;
321
322     return(mid->token);
323 }
324
325 /********************************************************************************/
326 /* convert str to a uint64 */
327 unsigned digits[128];
328 void Init_str2uint64()
329 {
330     int i;
331     memset(digits,255,sizeof(digits));
332     for(i='0'; i <= '9'; i++) digits[i] = i - '0';
333     for(i='A'; i <= 'Z'; i++) digits[i] = i + 10 - 'A';
334     for(i='a'; i <= 'z'; i++) digits[i] = i + 10 - 'a';
335 }
336 static unsigned __int64 str2uint64(const char* str, const char** endStr, unsigned radix) 
337 {
338     unsigned __int64 ret = 0;
339     unsigned digit,ix;
340     _ASSERTE(radix <= 36);
341     for(;;str = nextchar((char*)str)) 
342     {
343         ix = Sym((char*)str);
344         if(ix <= 0x7F)
345         {
346             digit = digits[ix];
347             if(digit < radix)
348             {
349                 ret = ret * radix + digit;
350                 continue;
351             }
352         }
353         *endStr = str;
354         return(ret);
355     }
356 }
357 /********************************************************************************/
358 /* Append an UTF-8 string preceded by compressed length, no zero terminator, to a BinStr */
359 static void AppendStringWithLength(BinStr* pbs, __in __nullterminated char* sz)
360 {
361     if((pbs != NULL) && (sz != NULL))
362     {
363         unsigned L = (unsigned) strlen(sz);
364         BYTE* pb = NULL;
365         corEmitInt(pbs,L);
366         if((pb = pbs->getBuff(L)) != NULL)
367             memcpy(pb,sz,L);
368     }
369 }
370
371 /********************************************************************************/
372 /* fetch the next token, and return it   Also set the yylval.union if the
373    lexical token also has a value */
374    
375 #if (0)   
376
377 #define IsAlpha(x) ((('A' <= (x))&&((x) <= 'Z'))||(('a' <= (x))&&((x) <= 'z')))
378 #define IsDigit(x) (('0' <= (x))&&((x) <= '9'))
379 #define IsAlNum(x) (IsAlpha(x) || IsDigit(x))
380 #define IsValidStartingSymbol(x) (IsAlpha(x)||((x)=='#')||((x)=='_')||((x)=='@')||((x)=='$'))
381 #define IsValidContinuingSymbol(x) (IsAlNum(x)||((x)=='_')||((x)=='@')||((x)=='$')||((x)=='?'))
382 void SetSymbolTables() { ; }
383
384 #else
385
386 BOOL _Alpha[128];
387 BOOL _Digit[128];
388 BOOL _AlNum[128];
389 BOOL _ValidSS[128];
390 BOOL _ValidCS[128];
391 void SetSymbolTables()
392 {
393     unsigned i;
394     memset(_Alpha,0,sizeof(_Alpha)); 
395     memset(_Digit,0,sizeof(_Digit)); 
396     memset(_AlNum,0,sizeof(_AlNum)); 
397     memset(_ValidSS,0,sizeof(_ValidSS)); 
398     memset(_ValidCS,0,sizeof(_ValidCS));
399     for(i = 'A'; i <= 'Z'; i++)
400     {
401         _Alpha[i] = TRUE;
402         _AlNum[i] = TRUE;
403         _ValidSS[i] = TRUE;
404         _ValidCS[i] = TRUE;
405     } 
406     for(i = 'a'; i <= 'z'; i++)
407     {
408         _Alpha[i] = TRUE;
409         _AlNum[i] = TRUE;
410         _ValidSS[i] = TRUE;
411         _ValidCS[i] = TRUE;
412     }
413     for(i = '0'; i <= '9'; i++)
414     {
415         _Digit[i] = TRUE;
416         _AlNum[i] = TRUE;
417         _ValidCS[i] = TRUE;
418     }
419     _ValidSS[(unsigned char)'_'] = TRUE;
420     _ValidSS[(unsigned char)'#'] = TRUE;
421     _ValidSS[(unsigned char)'$'] = TRUE;
422     _ValidSS[(unsigned char)'@'] = TRUE;
423      
424     _ValidCS[(unsigned char)'_'] = TRUE;
425     _ValidCS[(unsigned char)'?'] = TRUE;
426     _ValidCS[(unsigned char)'$'] = TRUE;
427     _ValidCS[(unsigned char)'@'] = TRUE;
428     _ValidCS[(unsigned char)'`'] = TRUE;
429 }
430 BOOL IsAlpha(unsigned x) { return (x < 128)&&_Alpha[x]; }
431 BOOL IsDigit(unsigned x) { return (x < 128)&&_Digit[x]; }     
432 BOOL IsAlNum(unsigned x) { return (x < 128)&&_AlNum[x]; }     
433 BOOL IsValidStartingSymbol(unsigned x) { return (x < 128)&&_ValidSS[x]; }     
434 BOOL IsValidContinuingSymbol(unsigned x) { return (x < 128)&&_ValidCS[x]; } 
435
436 #endif
437
438 char* nextBlank(__in __nullterminated char* curPos)
439 {
440     for(;;) 
441     {   
442         switch(Sym(curPos)) 
443         {
444             case '/' :
445                 if ((Sym(nextchar(curPos)) == '/')|| (Sym(nextchar(curPos)) == '*'))
446                     return curPos;
447                 else
448                 {
449                     curPos = nextchar(curPos);
450                     break;
451                 }
452             case 0: 
453             case '\n':
454             case '\r':
455             case ' ' :
456             case '\t':
457             case '\f':
458                 return curPos;
459
460             default:
461                 curPos = nextchar(curPos);
462         }
463     }
464 }
465
466 char* skipBlanks(__in __nullterminated char* curPos, unsigned* pstate)
467 {
468     const unsigned eolComment = 1;
469     const unsigned multiComment = 2;
470     unsigned nextSym, state = *pstate;
471     char* nextPos;
472     for(;;) 
473     {   // skip whitespace and comments
474         if (curPos >= PENV->endPos) 
475         {
476             *pstate = state;    
477             return NULL;
478         }
479         switch(Sym(curPos)) 
480         {
481             case 0: 
482                 return NULL;       // EOF
483             case '\n':
484                 state &= ~eolComment;
485                 PENV->curLine++;
486                 if(PENV->bExternSource)
487                 {
488                     if(PENV->bExternSourceAutoincrement) PENV->nExtLine++;
489                     PASM->m_ulCurLine = PENV->nExtLine;
490                     PASM->m_ulCurColumn = PENV->nExtCol;
491                 }
492                 else
493                 {
494                     PASM->m_ulCurLine = PENV->curLine;
495                     PASM->m_ulCurColumn = 1;
496                 }
497                 break;
498             case '\r':
499             case ' ' :
500             case '\t':
501             case '\f':
502                 break;
503
504             case '*' :
505                 if(state == 0) goto PAST_WHITESPACE;
506                 if(state & multiComment)
507                 {
508                     nextPos = nextchar(curPos);
509                     if (Sym(nextPos) == '/') 
510                     {
511                         curPos = nextPos;
512                         state &= ~multiComment;
513                     }
514                 }
515                 break;
516
517             case '/' :
518                 if(state == 0)
519                 {
520                     nextPos = nextchar(curPos);
521                     nextSym = Sym(nextPos);
522                     if (nextSym == '/')
523                     {
524                         curPos = nextPos;
525                         state |= eolComment;
526                     }
527                     else if (nextSym == '*') 
528                     {
529                         curPos = nextPos;
530                         state |= multiComment;
531                     }
532                     else goto PAST_WHITESPACE;
533                 }
534                 break;
535
536             default:
537                 if (state == 0)  goto PAST_WHITESPACE;
538         }
539         curPos = nextchar(curPos);
540     }
541 PAST_WHITESPACE:
542     *pstate = state;
543     return curPos;
544 }
545
546 char* FullFileName(__in __nullterminated WCHAR* wzFileName, unsigned uCodePage);
547
548 int ProcessEOF()
549 {
550     PARSING_ENVIRONMENT* prev_penv = parser->PEStack.POP();
551     if(prev_penv != NULL)
552     {
553         //delete [] (WCHAR*)(PENV->in->namew());
554         delete PENV->in;
555         delete PENV;
556         parser->penv = prev_penv;
557         SetFunctionPtrs();
558         char* szFileName = new char[strlen(PENV->szFileName)+1];
559         strcpy_s(szFileName,strlen(PENV->szFileName)+1,PENV->szFileName);
560         PASM->SetSourceFileName(szFileName); // deletes the argument!
561         return ';';
562     }
563     //PENV->in = NULL;
564     return 0;
565 }
566
567 #define NEXT_TOKEN  {state=0; curPos=PENV->curPos; goto NextToken;}
568
569 int parse_literal(unsigned curSym, __inout __nullterminated char* &curPos, BOOL translate_escapes)
570 {
571     unsigned quote = curSym;
572     curPos = nextchar(curPos);
573     char* fromPtr = curPos;
574     bool escape = false;
575
576     for(;;) 
577     {     // Find matching quote
578         curSym = (curPos >= PENV->endPos) ? 0 : Sym(curPos);
579         if(curSym == 0)
580         {
581             PENV->curPos = curPos; 
582             return(BAD_LITERAL_);
583         }
584         else if(curSym == '\\')
585             escape = !escape;
586         else
587         {
588             if(curSym == '\n')
589             {
590                 PENV->curLine++;
591                 if(PENV->bExternSource)
592                 {
593                     if(PENV->bExternSourceAutoincrement) PENV->nExtLine++;
594                     PASM->m_ulCurLine = PENV->nExtLine;
595                     PASM->m_ulCurColumn = PENV->nExtCol;
596                 }
597                 else
598                 {
599                     PASM->m_ulCurLine = PENV->curLine;
600                     PASM->m_ulCurColumn = 1;
601                 }
602                 if (!escape) { PENV->curPos = curPos; return(BAD_LITERAL_); }
603             }
604             else if ((curSym == quote) && (!escape)) break;
605             escape = false;
606         }        
607         curPos = nextchar(curPos);
608     }
609     // translate escaped characters
610     unsigned tokLen = (unsigned)(curPos - fromPtr);
611     char* newstr = NewStrFromToken(fromPtr, tokLen);
612     char* toPtr;
613     curPos = nextchar(curPos);  // skip closing quote
614     if(translate_escapes)
615     {
616         fromPtr = newstr;
617         //_ASSERTE(0);
618         tokLen = (unsigned)strlen(newstr);
619         toPtr = new char[tokLen+1];
620         if(toPtr==NULL) return BAD_LITERAL_;
621         yylval.string = toPtr;
622         char* endPtr = fromPtr+tokLen;
623         while(fromPtr < endPtr) 
624         {
625             if (*fromPtr == '\\') 
626             {
627                 fromPtr++;
628                 switch(*fromPtr) 
629                 {
630                     case 't':
631                             *toPtr++ = '\t';
632                             break;
633                     case 'n':
634                             *toPtr++ = '\n';
635                             break;
636                     case 'b':
637                             *toPtr++ = '\b';
638                             break;
639                     case 'f':
640                             *toPtr++ = '\f';
641                             break;
642                     case 'v':
643                             *toPtr++ = '\v';
644                             break;
645                     case '?':
646                             *toPtr++ = '\?';
647                             break;
648                     case 'r':
649                             *toPtr++ = '\r';
650                             break;
651                     case 'a':
652                             *toPtr++ = '\a';
653                             break;
654                     case '\n':
655                             do      fromPtr++;
656                             while(isspace(*fromPtr));
657                             --fromPtr;              // undo the increment below   
658                             break;
659                     case '0':
660                     case '1':
661                     case '2':
662                     case '3':
663                             if (IsDigit(fromPtr[1]) && IsDigit(fromPtr[2])) 
664                             {
665                                 *toPtr++ = ((fromPtr[0] - '0') * 8 + (fromPtr[1] - '0')) * 8 + (fromPtr[2] - '0');
666                                 fromPtr+= 2;                                                            
667                             }
668                             else if(*fromPtr == '0') *toPtr++ = 0;
669                             else *toPtr++ = *fromPtr;
670                             break;
671                     default:
672                             *toPtr++ = *fromPtr;
673                 }
674                 fromPtr++;
675             }
676             else
677             //  *toPtr++ = *fromPtr++;
678             {
679                 char* tmpPtr = fromPtr;
680                 fromPtr = (nextchar == nextcharW) ? nextcharU(fromPtr) : nextchar(fromPtr);
681                 while(tmpPtr < fromPtr) *toPtr++ = *tmpPtr++;
682             }
683     
684         } //end while(fromPtr < endPtr)
685         *toPtr = 0;                     // terminate string
686         delete [] newstr;
687     }
688     else
689     {
690         yylval.string = newstr;
691         toPtr = newstr + strlen(newstr);
692     }
693             
694     PENV->curPos = curPos;
695     if(quote == '"')
696     {
697         BinStr* pBS = new BinStr();
698         unsigned size = (unsigned)(toPtr - yylval.string);
699         memcpy(pBS->getBuff(size),yylval.string,size);
700         delete [] yylval.string;
701         yylval.binstr = pBS;
702         return QSTRING;
703     }
704     else
705     {
706         if(PASM->NumTypeDefs())
707         {
708             TypeDefDescr* pTDD = PASM->FindTypeDef(yylval.string);
709             if(pTDD != NULL)
710             {
711                 delete [] yylval.string;
712                 yylval.tdd = pTDD;
713                 return(TYPEDEF(pTDD));
714             }
715         }
716         return SQSTRING;
717     }
718 }
719
720 #ifdef _PREFAST_
721 #pragma warning(push)
722 #pragma warning(disable:21000) // Suppress PREFast warning about overly large function
723 #endif
724 int yylex() 
725 {
726     char* curPos = PENV->curPos;
727     unsigned state = 0;
728     const unsigned multiComment = 2;
729     unsigned curSym;
730     
731     char* newstr;
732     
733 NextToken:    
734     // Skip any leading whitespace and comments
735     curPos = skipBlanks(curPos, &state);
736     if(curPos == NULL)
737     {
738         if (state & multiComment) return (BAD_COMMENT_);
739         if(ProcessEOF() == 0) return 0;       // EOF
740         NEXT_TOKEN;
741     }
742     char* curTok = curPos;
743     PENV->curTok = curPos;
744     PENV->curPos = curPos;
745     int tok = ERROR_;
746     yylval.string = 0;
747
748     curSym = Sym(curPos);
749     if(bParsingByteArray) // only hexadecimals w/o 0x, ')' and white space allowed!
750     {
751         int i,s=0;
752         for(i=0; i<2; i++, curPos = nextchar(curPos), curSym = Sym(curPos))
753         {
754             if(('0' <= curSym)&&(curSym <= '9')) s = s*16+(curSym - '0');
755             else if(('A' <= curSym)&&(curSym <= 'F')) s = s*16+(curSym - 'A' + 10);
756             else if(('a' <= curSym)&&(curSym <= 'f')) s = s*16+(curSym - 'a' + 10);
757             else break; // don't increase curPos!
758         }
759         if(i)
760         {
761             tok = HEXBYTE;
762             yylval.int32 = s;
763         }
764         else
765         {
766             if(curSym == ')' || curSym == '}') 
767             {
768                 bParsingByteArray = FALSE;
769                 goto Just_A_Character;
770             }
771         }
772         PENV->curPos = curPos;
773         return(tok);
774     }
775     if(curSym == '?') // '?' may be part of an identifier, if it's not followed by punctuation
776     {
777         if(IsValidContinuingSymbol(Sym(nextchar(curPos)))) goto Its_An_Id;
778         goto Just_A_Character;
779     }
780
781     if (IsValidStartingSymbol(curSym)) 
782     { // is it an ID
783 Its_An_Id:
784         size_t offsetDot = (size_t)-1; // first appearance of '.'
785                 size_t offsetDotDigit = (size_t)-1; // first appearance of '.<digit>' (not DOTTEDNAME!)
786         do 
787         {
788             curPos = nextchar(curPos);
789             if (Sym(curPos) == '.') 
790             {
791                 if (offsetDot == (size_t)-1) offsetDot = curPos - curTok;
792                 curPos = nextchar(curPos);
793                 if((offsetDotDigit==(size_t)-1)&&(Sym(curPos) >= '0')&&(Sym(curPos) <= '9')) 
794                         offsetDotDigit = curPos - curTok - 1;
795             }
796         } while(IsValidContinuingSymbol(Sym(curPos)));
797         
798         size_t tokLen = curPos - curTok;
799         // check to see if it is a keyword
800         int token = findKeyword(curTok, tokLen, &yylval.opcode);
801         if (token != 0) 
802         {
803             //printf("yylex: TOK = %d, curPos=0x%8.8X\n",token,curPos);
804             PENV->curPos = curPos;
805             PENV->curTok = curTok;
806             if(!SkipToken)
807             {
808                 switch(token)
809                 {
810                     case P_INCLUDE:
811                         //if(include_first_pass)
812                         //{
813                         //    PENV->curPos = curTok;
814                         //    include_first_pass = FALSE;
815                         //    return ';';
816                         //}
817                         //include_first_pass = TRUE;
818                         curPos = skipBlanks(curPos,&state);
819                         if(curPos == NULL)
820                         {
821                             if (state & multiComment) return (BAD_COMMENT_);
822                             if(ProcessEOF() == 0) return 0;       // EOF
823                             NEXT_TOKEN;
824                         }
825                         if(Sym(curPos) != '"') return ERROR_;
826                         curPos = nextchar(curPos);
827                         curTok = curPos;
828                         PENV->curTok = curPos;
829                         while(Sym(curPos) != '"')
830                         {
831                             curPos = nextchar(curPos);
832                             if(curPos >= PENV->endPos) return ERROR_;
833                             PENV->curPos = curPos;
834                         }
835                         tokLen = PENV->curPos - curTok;
836                         curPos = nextchar(curPos);
837                         PENV->curPos = curPos;
838                         {
839                             WCHAR* wzFile=NULL;
840                             if(Sym == SymW)
841                             {
842                                 if((wzFile = new WCHAR[tokLen/2 + 1]) != NULL)
843                                 {
844                                     memcpy(wzFile,curTok,tokLen);
845                                     wzFile[tokLen/2] = 0;
846                                 }
847                             }
848                             else
849                             {
850                                 if((wzFile = new WCHAR[tokLen+1]) != NULL)
851                                 {
852                                     tokLen = WszMultiByteToWideChar(g_uCodePage,0,curTok,(int)tokLen,wzFile,(int)tokLen+1);
853                                     wzFile[tokLen] = 0;
854                                 }
855                             }
856                             if(wzFile != NULL)
857                             {
858                                 if((parser->wzIncludePath != NULL)
859                                  &&(wcschr(wzFile,'\\')==NULL)&&(wcschr(wzFile,':')==NULL))
860                                 {
861                                     PathString wzFullName;
862
863                                     WCHAR* pwz;
864                                     DWORD dw = WszSearchPath(parser->wzIncludePath,wzFile,NULL,
865                                                 TRUE, wzFullName,&pwz);
866                                     if(dw != 0)
867                                     {
868                                         wzFullName.CloseBuffer((COUNT_T)(dw));
869                                         delete [] wzFile;
870
871                                         wzFile = wzFullName.GetCopyOfUnicodeString();
872                                     }
873                                     
874                                 }
875                                 if(PASM->m_fReportProgress)
876                                     parser->msg("\nIncluding '%S'\n",wzFile);
877                                 MappedFileStream *pIn = new MappedFileStream(wzFile);
878                                 if((pIn != NULL)&&pIn->IsValid())
879                                 {
880                                     parser->PEStack.PUSH(PENV);
881                                     PASM->SetSourceFileName(FullFileName(wzFile,CP_UTF8)); // deletes the argument!
882                                     parser->CreateEnvironment(pIn);
883                                     NEXT_TOKEN;
884                                 }
885                                 else
886                                 {
887                                     delete [] wzFile;
888                                     PASM->report->error("#include failed\n");
889                                     return ERROR_;
890                                 }
891                             }
892                             else
893                             {
894                                 PASM->report->error("Out of memory\n");
895                                 return ERROR_;
896                             }
897                         }
898                         curPos = PENV->curPos;
899                         curTok = PENV->curTok;
900                         break;
901                     case P_IFDEF:
902                     case P_IFNDEF:
903                     case P_DEFINE:
904                     case P_UNDEF:
905                         curPos = skipBlanks(curPos,&state);
906                         if(curPos == NULL)
907                         {
908                             if (state & multiComment) return (BAD_COMMENT_);
909                             if(ProcessEOF() == 0) return 0;       // EOF
910                             NEXT_TOKEN;
911                         }
912                         curTok = curPos;
913                         PENV->curTok = curPos;
914                         PENV->curPos = curPos;
915                         if (!IsValidStartingSymbol(Sym(curPos))) return ERROR_;
916                         do 
917                         {
918                             curPos = nextchar(curPos);
919                         } while(IsValidContinuingSymbol(Sym(curPos)));
920                         tokLen = curPos - curTok;
921                             
922                         newstr = NewStrFromToken(curTok, tokLen);
923                         if((token==P_DEFINE)||(token==P_UNDEF))
924                         {
925                             if(token == P_DEFINE)
926                             {
927                                 curPos = skipBlanks(curPos,&state);
928                                 if ((curPos == NULL) && (ProcessEOF() == 0))
929                                 {
930                                     DefineVar(newstr, NULL);
931                                     return 0;
932                                 }
933                                 curSym = Sym(curPos);
934                                 if(curSym != '"')
935                                     DefineVar(newstr, NULL);
936                                 else
937                                 {
938                                     tok = parse_literal(curSym, curPos, FALSE);
939                                     if(tok == QSTRING)
940                                     {
941                                         // if not ANSI, then string is in UTF-8,
942                                         // insert prefix
943                                         if(nextchar != nextcharA)
944                                         {
945                                             yylval.binstr->insertInt8(0xEF);
946                                             yylval.binstr->insertInt8(0xBB);
947                                             yylval.binstr->insertInt8(0xBF);
948                                         }
949                                         yylval.binstr->appendInt8(' ');
950                                         DefineVar(newstr, yylval.binstr);
951                                     }
952                                     else
953                                         return tok;
954                                 }
955                             }
956                             else UndefVar(newstr);
957                         }
958                         else
959                         {
960                             SkipToken = IsVarDefined(newstr);
961                             if(token == P_IFDEF) SkipToken = !SkipToken;
962                             IfEndif++;
963                             if(SkipToken) IfEndifSkip=IfEndif;
964                         }
965                         break;
966                     case P_ELSE:
967                         SkipToken = TRUE;
968                         IfEndifSkip=IfEndif;
969                         break;
970                     case P_ENDIF:
971                         if(IfEndif == 0)
972                         {
973                             PASM->report->error("Unmatched #endif\n");
974                             return ERROR_;
975                         }
976                         IfEndif--;
977                         break;
978                     default:
979                         return(token);
980                 }
981                 goto NextToken;
982             }
983             if(SkipToken)
984             {
985                 switch(token)
986                 {
987                     case P_IFDEF:
988                     case P_IFNDEF:
989                         IfEndif++;
990                         break;
991                     case P_ELSE:
992                         if(IfEndif == IfEndifSkip) SkipToken = FALSE;
993                         break;
994                     case P_ENDIF:
995                         if(IfEndif == IfEndifSkip) SkipToken = FALSE;
996                         IfEndif--;
997                         break;
998                     default:
999                         break;
1000                 }
1001                 //if(yylval.instr) yylval.instr->opcode = -1;
1002                 goto NextToken;
1003             }
1004             return(token);
1005         } // end if token != 0
1006         if(SkipToken) { curPos = nextBlank(curPos); goto NextToken; }
1007         
1008         VarName* pVarName = FindVarDef(NewStrFromToken(curTok, tokLen));
1009         if(pVarName != NULL)
1010         {
1011             if(pVarName->pbody != NULL)
1012             {
1013                 BinStrStream *pIn = new BinStrStream(pVarName->pbody);
1014                 if((pIn != NULL)&&pIn->IsValid())
1015                 {
1016                     PENV->curPos = curPos;
1017                     parser->PEStack.PUSH(PENV);
1018                     parser->CreateEnvironment(pIn);
1019                     NEXT_TOKEN;
1020                 }
1021             }
1022         }
1023         
1024         TypeDefDescr* pTDD = findTypedef(curTok,tokLen);
1025         
1026         if(pTDD != NULL)
1027         {
1028             yylval.tdd = pTDD;
1029             PENV->curPos = curPos;
1030             PENV->curTok = curTok;
1031             return(TYPEDEF(pTDD));
1032         }
1033         if(Sym(curTok) == '#') 
1034         {
1035             PENV->curPos = curPos;
1036             PENV->curTok = curTok;
1037             return(ERROR_);
1038         }
1039         // Not a keyword, normal identifiers don't have '.' in them
1040         if (offsetDot < (size_t)-1) 
1041         {
1042             if(offsetDotDigit < (size_t)-1)
1043             {
1044                 curPos = curTok+offsetDotDigit;
1045                 tokLen = offsetDotDigit;
1046             }
1047             // protection against something like Foo.Bar..123 or Foo.Bar.
1048             unsigned D = (Sym == SymW) ? 2 : 1; // Unicode or ANSI/UTF8!
1049             while((Sym(curPos-D)=='.')&&(tokLen))
1050             {
1051                 curPos -= D;
1052                 tokLen -= D;
1053             }
1054         }
1055         if((yylval.string = NewStrFromToken(curTok,tokLen)))
1056         {
1057             tok = (offsetDot == (size_t)(-1))? ID : DOTTEDNAME;
1058             //printf("yylex: ID = '%s', curPos=0x%8.8X\n",yylval.string,curPos);
1059         }
1060         else return BAD_LITERAL_;
1061     }
1062     else if(SkipToken) { curPos = nextBlank(curPos); goto NextToken; }
1063     else if (IsDigit(curSym) 
1064         || (curSym == '.' && IsDigit(Sym(nextchar(curPos))))
1065         || (curSym == '-' && IsDigit(Sym(nextchar(curPos))))) 
1066     {
1067         const char* begNum = curPos;
1068         unsigned radix = 10;
1069
1070         neg = (curSym == '-');    // always make it unsigned 
1071         if (neg) curPos = nextchar(curPos);
1072
1073         if (Sym(curPos) == '0' && Sym(nextchar(curPos)) != '.') 
1074         {
1075             curPos = nextchar(curPos);
1076             radix = 8;
1077             if (Sym(curPos) == 'x' || Sym(curPos) == 'X') 
1078             {
1079                 curPos = nextchar(curPos);
1080                 radix = 16;
1081             }
1082         }
1083         begNum = curPos;
1084         {
1085             unsigned __int64 i64 = str2uint64(begNum, const_cast<const char**>(&curPos), radix);
1086             unsigned __int64 mask64 = neg ? UI64(0xFFFFFFFF80000000) : UI64(0xFFFFFFFF00000000);
1087             unsigned __int64 largestNegVal32 = UI64(0x0000000080000000);
1088             if ((i64 & mask64) && (i64 != largestNegVal32))
1089             {
1090                 yylval.int64 = new __int64(i64);
1091                 tok = INT64;                    
1092                 if (neg) *yylval.int64 = -*yylval.int64;
1093             }
1094             else
1095             {
1096                 yylval.int32 = (__int32)i64;
1097                 tok = INT32;
1098                 if(neg) yylval.int32 = -yylval.int32;
1099             }
1100         }
1101         if (radix == 10 && ((Sym(curPos) == '.' && Sym(nextchar(curPos)) != '.') || Sym(curPos) == 'E' || Sym(curPos) == 'e')) 
1102         {
1103             unsigned L = (unsigned)(PENV->endPos - begNum);
1104             curPos = (char*)begNum + GetDouble((char*)begNum,L,&yylval.float64);
1105             if (neg) *yylval.float64 = -*yylval.float64;
1106             tok = FLOAT64;
1107         }
1108     }
1109     else 
1110     {   //      punctuation
1111         if (curSym == '"' || curSym == '\'') 
1112         {
1113             return parse_literal(curSym, curPos, TRUE);
1114         } // end if (*curPos == '"' || *curPos == '\'')
1115         else if (curSym==':' && Sym(nextchar(curPos))==':') 
1116         {
1117             curPos = nextchar(nextchar(curPos));
1118             tok = DCOLON;
1119         }       
1120         else if(curSym == '.') 
1121         {
1122             if (Sym(nextchar(curPos))=='.' && Sym(nextchar(nextchar(curPos)))=='.') 
1123             {
1124                 curPos = nextchar(nextchar(nextchar(curPos)));
1125                 tok = ELIPSIS;
1126             }
1127             else
1128             {
1129                 do
1130                 {
1131                     curPos = nextchar(curPos);
1132                     if (curPos >= PENV->endPos) 
1133                     return ERROR_;
1134                     curSym = Sym(curPos);
1135                 }
1136                 while(IsAlNum(curSym) || curSym == '_' || curSym == '$'|| curSym == '@'|| curSym == '?');
1137                 size_t tokLen = curPos - curTok;
1138     
1139                 // check to see if it is a keyword
1140                 int token = findKeyword(curTok, tokLen, &yylval.opcode);
1141                 if(token)
1142                         {
1143                     //printf("yylex: TOK = %d, curPos=0x%8.8X\n",token,curPos);
1144                     PENV->curPos = curPos;
1145                     PENV->curTok = curTok; 
1146                     return(token);
1147                 }
1148                 tok = '.';
1149                 curPos = nextchar(curTok);
1150             }
1151         }
1152         else 
1153         {
1154 Just_A_Character:
1155             tok = curSym;
1156             curPos = nextchar(curPos);
1157         }
1158         //printf("yylex: PUNCT curPos=0x%8.8X\n",curPos);
1159     }
1160     dbprintf(("    Line %d token %d (%c) val = %s\n", PENV->curLine, tok, 
1161             (tok < 128 && isprint(tok)) ? tok : ' ', 
1162             (tok > 255 && tok != INT32 && tok != INT64 && tok!= FLOAT64) ? yylval.string : ""));
1163
1164     PENV->curPos = curPos;
1165     PENV->curTok = curTok; 
1166     return(tok);
1167 }
1168 #ifdef _PREFAST_
1169 #pragma warning(pop)
1170 #endif
1171
1172 /**************************************************************************/
1173 static char* newString(__in __nullterminated const char* str1)
1174 {
1175     char* ret = new char[strlen(str1)+1];
1176     if(ret) strcpy_s(ret, strlen(str1)+1, str1);
1177     return(ret);
1178 }
1179
1180 /**************************************************************************/
1181 /* concatenate strings and release them */
1182
1183 static char* newStringWDel(__in __nullterminated char* str1, char delimiter, __in __nullterminated char* str3) 
1184 {
1185     size_t len1 = strlen(str1);
1186     size_t len = len1+2;
1187     if (str3) len += strlen(str3);
1188     char* ret = new char[len];
1189     if(ret)
1190     {
1191         strcpy_s(ret, len, str1);
1192         delete [] str1;
1193         ret[len1] = delimiter;
1194         ret[len1+1] = 0;
1195         if (str3)
1196         {
1197             strcat_s(ret, len, str3);
1198             delete [] str3;
1199         }
1200     }
1201     return(ret);
1202 }
1203
1204 /**************************************************************************/
1205 static void corEmitInt(BinStr* buff, unsigned data) 
1206 {
1207     unsigned cnt = CorSigCompressData(data, buff->getBuff(5));
1208     buff->remove(5 - cnt);
1209 }
1210
1211
1212 /**************************************************************************/
1213 /* move 'ptr past the exactly one type description */
1214
1215 unsigned __int8* skipType(unsigned __int8* ptr, BOOL fFixupType) 
1216 {
1217     mdToken  tk;
1218 AGAIN:
1219     switch(*ptr++) {
1220         case ELEMENT_TYPE_VOID         :
1221         case ELEMENT_TYPE_BOOLEAN      :
1222         case ELEMENT_TYPE_CHAR         :
1223         case ELEMENT_TYPE_I1           :
1224         case ELEMENT_TYPE_U1           :
1225         case ELEMENT_TYPE_I2           :
1226         case ELEMENT_TYPE_U2           :
1227         case ELEMENT_TYPE_I4           :
1228         case ELEMENT_TYPE_U4           :
1229         case ELEMENT_TYPE_I8           :
1230         case ELEMENT_TYPE_U8           :
1231         case ELEMENT_TYPE_R4           :
1232         case ELEMENT_TYPE_R8           :
1233         case ELEMENT_TYPE_U            :
1234         case ELEMENT_TYPE_I            :
1235         case ELEMENT_TYPE_STRING       :
1236         case ELEMENT_TYPE_OBJECT       :
1237         case ELEMENT_TYPE_TYPEDBYREF   :
1238         case ELEMENT_TYPE_SENTINEL     :
1239                 /* do nothing */
1240                 break;
1241
1242         case ELEMENT_TYPE_VALUETYPE   :
1243         case ELEMENT_TYPE_CLASS        :
1244                 ptr += CorSigUncompressToken(ptr, &tk);
1245                 break;
1246
1247         case ELEMENT_TYPE_CMOD_REQD    :
1248         case ELEMENT_TYPE_CMOD_OPT     :
1249                 ptr += CorSigUncompressToken(ptr, &tk);
1250                 goto AGAIN;
1251         
1252         case ELEMENT_TYPE_ARRAY         :
1253                 {
1254                     ptr = skipType(ptr, fFixupType);                    // element Type
1255                     unsigned rank = CorSigUncompressData((PCCOR_SIGNATURE&) ptr);
1256                     if (rank != 0)
1257                     {
1258                         unsigned numSizes = CorSigUncompressData((PCCOR_SIGNATURE&) ptr);
1259                         while(numSizes > 0)
1260                                                 {
1261                             CorSigUncompressData((PCCOR_SIGNATURE&) ptr);
1262                                                         --numSizes;
1263                                                 }
1264                         unsigned numLowBounds = CorSigUncompressData((PCCOR_SIGNATURE&) ptr);
1265                         while(numLowBounds > 0)
1266                                                 {
1267                             CorSigUncompressData((PCCOR_SIGNATURE&) ptr);
1268                                                         --numLowBounds;
1269                                                 }
1270                     }
1271                 }
1272                 break;
1273
1274                 // Modifiers or depedant types
1275         case ELEMENT_TYPE_PINNED                :
1276         case ELEMENT_TYPE_PTR                   :
1277         case ELEMENT_TYPE_BYREF                 :
1278         case ELEMENT_TYPE_SZARRAY               :
1279                 // tail recursion optimization
1280                 // ptr = skipType(ptr, fFixupType);
1281                 // break
1282                 goto AGAIN;
1283
1284         case ELEMENT_TYPE_VAR:
1285         case ELEMENT_TYPE_MVAR:
1286                 CorSigUncompressData((PCCOR_SIGNATURE&) ptr);  // bound
1287                 break;
1288         
1289         case ELEMENT_TYPE_VARFIXUP:
1290         case ELEMENT_TYPE_MVARFIXUP:
1291                 if(fFixupType)
1292                 {
1293                     BYTE* pb = ptr-1; // ptr incremented in switch
1294                     unsigned __int8* ptr_save = ptr;
1295                     int n = CorSigUncompressData((PCCOR_SIGNATURE&) ptr);  // fixup #
1296                     int compressed_size_n = (int)(ptr - ptr_save);  // ptr was updated by CorSigUncompressData()
1297                     int m = -1;
1298                     if(PASM->m_TyParList)
1299                         m = PASM->m_TyParList->IndexOf(TyParFixupList.PEEK(n));
1300                     if(m == -1)
1301                     {
1302                         PASM->report->error("(fixupType) Invalid %stype parameter '%s'\n",
1303                             (*pb == ELEMENT_TYPE_MVARFIXUP)? "method ": "",
1304                             TyParFixupList.PEEK(n));
1305                         m = 0;
1306                     }
1307                     *pb = (*pb == ELEMENT_TYPE_MVARFIXUP)? ELEMENT_TYPE_MVAR : ELEMENT_TYPE_VAR;
1308                     int compressed_size_m = (int)CorSigCompressData(m,pb+1);
1309
1310                     // Note that CorSigCompressData() (and hence, CorSigUncompressData()) store a number
1311                     // 0 <= x <= 0x1FFFFFFF in 1, 2, or 4 bytes. Above, 'n' is the fixup number being read,
1312                     // and 'm' is the generic parameter number being written out (in the same place where 'n'
1313                     // came from). If 'n' takes more space to compress than 'm' (e.g., 0x80 <= n <= 0x3fff so
1314                     // it takes 2 bytes, and m < 0x80 so it takes one byte), then when we overwrite the fixup
1315                     // number with the generic parameter number, we'll leave extra bytes in the signature following
1316                     // the written generic parameter number. Thus, we do something of a hack to ensure that the
1317                     // compressed number is correctly readable even if 'm' compresses smaller than 'n' did: we
1318                     // recompress 'm' to use the same amount of space as 'n' used. This is possible because smaller
1319                     // numbers can still be compressed in a larger amount of space, even though it's not optimal (and
1320                     // CorSigCompressData() would never do it). If, however, the compressed sizes are the other
1321                     // way around (m takes more space to compress than n), then we've already corrupted the
1322                     // signature that we're reading by writing beyond what we should (is there some reason why
1323                     // this is not possible?).
1324                     // Note that 'ptr' has already been adjusted, above, to point to the next type after this one.
1325                     // There is no need to update it when recompressing the data.
1326
1327                     if (compressed_size_m > compressed_size_n)
1328                     {
1329                         // We've got a problem: we just corrupted the rest of the signature!
1330                         // (Can this ever happen in practice?)
1331                         PASM->report->error("(fixupType) Too many %stype parameters\n",
1332                             (*pb == ELEMENT_TYPE_MVARFIXUP)? "method ": "");
1333                     }
1334                     else if (compressed_size_m < compressed_size_n)
1335                     {
1336                         // We didn't write out as much data as we read. This will leave extra bytes in the
1337                         // signature that will be incorrectly recognized. Ideally, we would just shrink the
1338                         // signature. That's not easy to do here. Instead, pad the bytes to force it to use
1339                         // a larger encoding than needed. This assumes knowledge of the CorSigCompressData()
1340                         // encoding.
1341                         //
1342                         // The cases:
1343                         //      compressed_size_m   m bytes     compressed_size_n   result bytes
1344                         //      1                   m1          2                   0x80 m1
1345                         //      1                   m1          4                   0xC0 0x00 0x00 m1
1346                         //      2                   m1 m2       4                   0xC0 0x00 (m1 & 0x7f) m2
1347
1348                         _ASSERTE((compressed_size_m == 1) || (compressed_size_m == 2) || (compressed_size_m == 4));
1349                         _ASSERTE((compressed_size_n == 1) || (compressed_size_n == 2) || (compressed_size_n == 4));
1350
1351                         if ((compressed_size_m == 1) &&
1352                             (compressed_size_n == 2))
1353                         {
1354                             unsigned __int8 m1 = *(pb + 1);
1355                             _ASSERTE(m1 < 0x80);
1356                             *(pb + 1) = 0x80;
1357                             *(pb + 2) = m1;
1358                         }
1359                         else
1360                         if ((compressed_size_m == 1) &&
1361                             (compressed_size_n == 4))
1362                         {
1363                             unsigned __int8 m1 = *(pb + 1);
1364                             _ASSERTE(m1 < 0x80);
1365                             *(pb + 1) = 0xC0;
1366                             *(pb + 2) = 0x00;
1367                             *(pb + 3) = 0x00;
1368                             *(pb + 4) = m1;
1369                         }
1370                         else
1371                         if ((compressed_size_m == 2) &&
1372                             (compressed_size_n == 4))
1373                         {
1374                             unsigned __int8 m1 = *(pb + 1);
1375                             unsigned __int8 m2 = *(pb + 2);
1376                             _ASSERTE(m1 >= 0x80);
1377                             m1 &= 0x7f; // strip the bit indicating it's a 2-byte thing
1378                             *(pb + 1) = 0xC0;
1379                             *(pb + 2) = 0x00;
1380                             *(pb + 3) = m1;
1381                             *(pb + 4) = m2;
1382                         }
1383                     }
1384                 }
1385                 else
1386                     CorSigUncompressData((PCCOR_SIGNATURE&) ptr);  // bound
1387                 break;
1388
1389         case ELEMENT_TYPE_FNPTR: 
1390                 {
1391                     CorSigUncompressData((PCCOR_SIGNATURE&) ptr);    // calling convention
1392                     unsigned argCnt = CorSigUncompressData((PCCOR_SIGNATURE&) ptr);    // arg count
1393                     ptr = skipType(ptr, fFixupType);                             // return type
1394                     while(argCnt > 0)
1395                     {
1396                         ptr = skipType(ptr, fFixupType);
1397                         --argCnt;
1398                     }
1399                 }
1400                 break;
1401
1402         case ELEMENT_TYPE_GENERICINST: 
1403                {
1404                    ptr = skipType(ptr, fFixupType);                 // type constructor
1405                    unsigned argCnt = CorSigUncompressData((PCCOR_SIGNATURE&)ptr);               // arg count
1406                    while(argCnt > 0) {
1407                        ptr = skipType(ptr, fFixupType);
1408                        --argCnt;
1409                    }
1410                }
1411                break;                        
1412
1413         default:
1414         case ELEMENT_TYPE_END                   :
1415                 _ASSERTE(!"Unknown Type");
1416                 break;
1417     }
1418     return(ptr);
1419 }
1420
1421 /**************************************************************************/
1422 void FixupTyPars(PCOR_SIGNATURE pSig, ULONG cSig)
1423 {
1424     if(TyParFixupList.COUNT() > 0)
1425     {
1426         BYTE* ptr = (BYTE*)pSig;
1427         BYTE* ptrEnd = ptr + cSig;
1428         while(ptr < ptrEnd)
1429         {
1430             ptr = skipType(ptr, TRUE);
1431         } // end while
1432     } // end if(COUNT>0)
1433 }
1434 void FixupTyPars(BinStr* pbstype) 
1435 {
1436     FixupTyPars((PCOR_SIGNATURE)(pbstype->ptr()),(ULONG)(pbstype->length()));
1437 }
1438 /**************************************************************************/
1439 static unsigned corCountArgs(BinStr* args) 
1440 {
1441     unsigned __int8* ptr = args->ptr();
1442     unsigned __int8* end = &args->ptr()[args->length()];
1443     unsigned ret = 0;
1444     while(ptr < end) 
1445     {
1446         if (*ptr != ELEMENT_TYPE_SENTINEL) 
1447         {
1448             ptr = skipType(ptr, FALSE);
1449             ret++;
1450         }
1451         else ptr++;
1452     }
1453     return(ret);
1454 }
1455
1456 /********************************************************************************/
1457 AsmParse::AsmParse(ReadStream* aIn, Assembler *aAssem) 
1458 {
1459 #ifdef DEBUG_PARSING
1460     extern int yydebug;
1461     yydebug = 1;
1462 #endif
1463
1464     assem = aAssem;
1465     assem->SetErrorReporter((ErrorReporter *)this);
1466
1467     assem->m_ulCurLine = 1;
1468     assem->m_ulCurColumn = 1;
1469     
1470     wzIncludePath = NULL;
1471     penv = NULL;
1472
1473     hstdout = GetStdHandle(STD_OUTPUT_HANDLE);
1474     hstderr = GetStdHandle(STD_ERROR_HANDLE);
1475
1476     success = true; 
1477     _ASSERTE(parser == 0);          // Should only be one parser instance at a time
1478
1479    // Resolve aliases
1480    for (unsigned int i = 0; i < sizeof(keywords) / sizeof(Keywords); i++)
1481    {
1482        if (keywords[i].token == NO_VALUE)
1483            keywords[i].token = keywords[keywords[i].tokenVal].token;
1484    }
1485     SetSymbolTables();
1486     Init_str2uint64();
1487     parser = this;
1488     //yyparse();
1489 }
1490
1491 /********************************************************************************/
1492 AsmParse::~AsmParse() 
1493 {
1494     parser = 0;
1495     delete penv;
1496     while(m_ANSLast.POP());
1497 }
1498
1499 /**************************************************************************/
1500 DWORD AsmParse::IsItUnicode(CONST LPVOID pBuff, int cb, LPINT lpi)
1501 {
1502     return IsTextUnicode(pBuff,cb,lpi);
1503 }
1504
1505 /**************************************************************************/
1506 void AsmParse::CreateEnvironment(ReadStream* stream) 
1507
1508     penv = new PARSING_ENVIRONMENT;
1509     memset(penv,0,sizeof(PARSING_ENVIRONMENT));
1510     penv->in = stream; 
1511     penv->curLine = 1;
1512     strcpy_s(penv->szFileName, MAX_FILENAME_LENGTH*3+1,assem->m_szSourceFileName); 
1513     
1514     penv->curPos = fillBuff(NULL);
1515     penv->uCodePage = g_uCodePage;
1516     
1517     SetFunctionPtrs();
1518 };
1519
1520 /**************************************************************************/
1521 void AsmParse::ParseFile(ReadStream* stream) 
1522
1523     CreateEnvironment(stream);
1524     yyparse(); 
1525     penv->in = NULL; 
1526 };
1527
1528 /**************************************************************************/
1529 char* AsmParse::fillBuff(__in_opt __nullterminated char* pos) 
1530 {
1531     int iPutToBuffer;
1532     int iOptions = IS_TEXT_UNICODE_UNICODE_MASK;
1533     g_uCodePage = CP_ACP;
1534     iPutToBuffer = (int)penv->in->getAll(&(penv->curPos));
1535     
1536     penv->endPos = penv->curPos + iPutToBuffer;
1537     if(iPutToBuffer > 128) iPutToBuffer = 128;
1538     if(IsItUnicode(penv->curPos,iPutToBuffer,&iOptions))
1539     {
1540         g_uCodePage = CP_UTF8;
1541         if(iOptions & IS_TEXT_UNICODE_SIGNATURE)
1542         {
1543             penv->curPos += 2;
1544         }
1545         if(assem->m_fReportProgress) printf("Source file is UNICODE\n\n");
1546         penv->pfn_Sym = SymW;
1547         penv->pfn_nextchar = nextcharW;
1548         penv->pfn_NewStrFromToken = NewStrFromTokenW;
1549         penv->pfn_NewStaticStrFromToken = NewStaticStrFromTokenW;
1550         penv->pfn_GetDouble = GetDoubleW;
1551     }
1552     else
1553     {
1554         if(((penv->curPos[0]&0xFF)==0xEF)&&((penv->curPos[1]&0xFF)==0xBB)&&((penv->curPos[2]&0xFF)==0xBF))
1555         {
1556             g_uCodePage = CP_UTF8;
1557             penv->curPos += 3;
1558             if(assem->m_fReportProgress) printf("Source file is UTF-8\n\n");
1559             penv->pfn_nextchar = nextcharU;
1560         }
1561         else
1562         {
1563             if(assem->m_fReportProgress) printf("Source file is ANSI\n\n");
1564             penv->pfn_nextchar = nextcharA;
1565         }    
1566         penv->pfn_Sym = SymAU;
1567         penv->pfn_NewStrFromToken = NewStrFromTokenAU;
1568         penv->pfn_NewStaticStrFromToken = NewStaticStrFromTokenAU;
1569         penv->pfn_GetDouble = GetDoubleAU;
1570     }
1571     return(penv->curPos);
1572 }
1573
1574 /********************************************************************************/
1575 BinStr* AsmParse::MakeSig(unsigned callConv, BinStr* retType, BinStr* args, int ntyargs) 
1576 {
1577     _ASSERTE((ntyargs != 0) == ((callConv & IMAGE_CEE_CS_CALLCONV_GENERIC) != 0));
1578     BinStr* ret = new BinStr();
1579     if(ret)
1580     {
1581         //if (retType != 0) 
1582         ret->insertInt8(callConv);
1583         if (ntyargs != 0)
1584             corEmitInt(ret, ntyargs);
1585         corEmitInt(ret, corCountArgs(args));
1586
1587         if (retType != 0) 
1588         {
1589             ret->append(retType); 
1590             delete retType;
1591         }
1592         ret->append(args); 
1593     }
1594     else
1595         error("\nOut of memory!\n");
1596
1597     delete args;
1598     return(ret);
1599 }
1600
1601 /********************************************************************************/
1602 BinStr* AsmParse::MakeTypeArray(CorElementType kind, BinStr* elemType, BinStr* bounds) 
1603 {
1604     // 'bounds' is a binary buffer, that contains an array of 'struct Bounds' 
1605     struct Bounds {
1606         int lowerBound;
1607         unsigned numElements;
1608     };
1609
1610     _ASSERTE(bounds->length() % sizeof(Bounds) == 0);
1611     unsigned boundsLen = bounds->length() / sizeof(Bounds);
1612     _ASSERTE(boundsLen > 0);
1613     Bounds* boundsArr = (Bounds*) bounds->ptr();
1614
1615     BinStr* ret = new BinStr();
1616
1617     ret->appendInt8(kind);
1618     ret->append(elemType);
1619     corEmitInt(ret, boundsLen);                     // emit the rank
1620
1621     unsigned lowerBoundsDefined = 0;
1622     unsigned numElementsDefined = 0;
1623     unsigned i;
1624     for(i=0; i < boundsLen; i++) 
1625     {
1626         if(boundsArr[i].lowerBound < 0x7FFFFFFF) lowerBoundsDefined = i+1;
1627         else boundsArr[i].lowerBound = 0;
1628
1629         if(boundsArr[i].numElements < 0x7FFFFFFF) numElementsDefined = i+1;
1630         else boundsArr[i].numElements = 0;
1631     }
1632
1633     corEmitInt(ret, numElementsDefined);                    // emit number of bounds
1634
1635     for(i=0; i < numElementsDefined; i++) 
1636     {
1637         _ASSERTE (boundsArr[i].numElements >= 0);               // enforced at rule time
1638         corEmitInt(ret, boundsArr[i].numElements);
1639
1640     }
1641
1642     corEmitInt(ret, lowerBoundsDefined);    // emit number of lower bounds
1643     for(i=0; i < lowerBoundsDefined; i++)
1644         {
1645                 unsigned cnt = CorSigCompressSignedInt(boundsArr[i].lowerBound, ret->getBuff(5));
1646                 ret->remove(5 - cnt);
1647         }
1648     delete elemType;
1649     delete bounds;
1650     return(ret);
1651 }
1652
1653 /********************************************************************************/
1654 BinStr* AsmParse::MakeTypeClass(CorElementType kind, mdToken tk) 
1655 {
1656
1657     BinStr* ret = new BinStr();
1658     _ASSERTE(kind == ELEMENT_TYPE_CLASS || kind == ELEMENT_TYPE_VALUETYPE ||
1659                      kind == ELEMENT_TYPE_CMOD_REQD || kind == ELEMENT_TYPE_CMOD_OPT);
1660     ret->appendInt8(kind);
1661     unsigned cnt = CorSigCompressToken(tk, ret->getBuff(5));
1662     ret->remove(5 - cnt);
1663     return(ret);
1664 }
1665 /**************************************************************************/
1666 void PrintANSILine(FILE* pF, __in __nullterminated char* sz)
1667 {
1668         WCHAR *wz = &wzUniBuf[0];
1669         if(g_uCodePage != CP_ACP)
1670         {
1671                 memset(wz,0,dwUniBuf); // dwUniBuf/2 WCHARs = dwUniBuf bytes
1672                 WszMultiByteToWideChar(g_uCodePage,0,sz,-1,wz,(dwUniBuf >> 1)-1);
1673
1674                 memset(sz,0,dwUniBuf);
1675                 WszWideCharToMultiByte(g_uConsoleCP,0,wz,-1,sz,dwUniBuf-1,NULL,NULL);
1676         }
1677         fprintf(pF,"%s",sz);
1678 }
1679 /**************************************************************************/
1680 void AsmParse::error(const char* fmt, ...)
1681 {
1682     char *sz = (char*)(&wzUniBuf[(dwUniBuf >> 1)]);
1683     char *psz=&sz[0];
1684     FILE* pF = ((!assem->m_fReportProgress)&&(assem->OnErrGo)) ? stdout : stderr;
1685     success = false;
1686     va_list args;
1687     va_start(args, fmt);
1688
1689     if((penv) && (penv->in)) psz+=sprintf_s(psz, (dwUniBuf >> 1), "%S(%d) : ", penv->in->namew(), penv->curLine);
1690     psz+=sprintf_s(psz, (dwUniBuf >> 1), "error : ");
1691     _vsnprintf_s(psz, (dwUniBuf >> 1),(dwUniBuf >> 1)-strlen(sz)-1, fmt, args);
1692     PrintANSILine(pF,sz);
1693 }
1694
1695 /**************************************************************************/
1696 void AsmParse::warn(const char* fmt, ...)
1697 {
1698     char *sz = (char*)(&wzUniBuf[(dwUniBuf >> 1)]);
1699     char *psz=&sz[0];
1700     FILE* pF = ((!assem->m_fReportProgress)&&(assem->OnErrGo)) ? stdout : stderr;
1701     va_list args;
1702     va_start(args, fmt);
1703
1704     if((penv) && (penv->in)) psz+=sprintf_s(psz, (dwUniBuf >> 1), "%S(%d) : ", penv->in->namew(), penv->curLine);
1705     psz+=sprintf_s(psz, (dwUniBuf >> 1), "warning : ");
1706     _vsnprintf_s(psz, (dwUniBuf >> 1),(dwUniBuf >> 1)-strlen(sz)-1, fmt, args);
1707     PrintANSILine(pF,sz);
1708 }
1709 /**************************************************************************/
1710 void AsmParse::msg(const char* fmt, ...)
1711 {
1712     char *sz = (char*)(&wzUniBuf[(dwUniBuf >> 1)]);
1713     va_list args;
1714     va_start(args, fmt);
1715
1716     _vsnprintf_s(sz, (dwUniBuf >> 1),(dwUniBuf >> 1)-1, fmt, args);
1717     PrintANSILine(stdout,sz);
1718 }
1719
1720 #ifdef _MSC_VER
1721 #pragma warning(default : 4640)
1722 #endif