1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
4 /********************************************************************************/
7 /********************************************************************************/
13 unsigned short tokenVal;// this holds the instruction enumeration for those keywords that are instrs
17 #define NO_VALUE ((unsigned short)-1) // The token has no value
19 static Keywords keywords[] = {
20 // Attention! Because of aliases, the instructions MUST go first!
21 // Redefine all the instructions (defined in assembler.h <- asmenum.h <- opcode.def)
31 #undef ShortInlineBrTarget
42 #define InlineNone INSTR_NONE
43 #define InlineVar INSTR_VAR
44 #define ShortInlineVar INSTR_VAR
45 #define InlineI INSTR_I
46 #define ShortInlineI INSTR_I
47 #define InlineI8 INSTR_I8
48 #define InlineR INSTR_R
49 #define ShortInlineR INSTR_R
50 #define InlineBrTarget INSTR_BRTARGET
51 #define ShortInlineBrTarget INSTR_BRTARGET
52 #define InlineMethod INSTR_METHOD
53 #define InlineField INSTR_FIELD
54 #define InlineType INSTR_TYPE
55 #define InlineString INSTR_STRING
56 #define InlineSig INSTR_SIG
57 #define InlineTok INSTR_TOK
58 #define InlineSwitch INSTR_SWITCH
60 #define InlineVarTok 0
61 #define NEW_INLINE_NAMES
62 // The volatile instruction collides with the volatile keyword, so
63 // we treat it as a keyword everywhere and modify the grammar accordingly (Yuck!)
64 #define OPDEF(c,s,pop,push,args,type,l,s1,s2,ctrl) { s, args, c, lengthof(s)-1 },
65 #define OPALIAS(alias_c, s, c) { s, NO_VALUE, c, lengthof(s)-1 },
71 #define KYWD(name, sym, val) { name, sym, val, lengthof(name)-1 },
77 /********************************************************************************/
78 /* File encoding-dependent functions */
79 /*--------------------------------------------------------------------------*/
80 char* nextcharA(__in __nullterminated char* pos)
82 return (*pos > 0) ? ++pos : (char *)_mbsinc((const unsigned char *)pos);
85 char* nextcharU(__in __nullterminated char* pos)
90 char* nextcharW(__in __nullterminated char* pos)
94 /*--------------------------------------------------------------------------*/
95 unsigned SymAU(__in __nullterminated char* curPos)
97 return (unsigned)*curPos;
100 unsigned SymW(__in __nullterminated char* curPos)
102 return (unsigned)*((WCHAR*)curPos);
104 /*--------------------------------------------------------------------------*/
105 char* NewStrFromTokenAU(__in_ecount(tokLen) char* curTok, size_t tokLen)
107 char *nb = new char[tokLen+1];
110 memcpy(nb, curTok, tokLen);
115 char* NewStrFromTokenW(__in_ecount(tokLen) char* curTok, size_t tokLen)
117 WCHAR* wcurTok = (WCHAR*)curTok;
118 char *nb = new char[(tokLen<<1) + 2];
121 tokLen = WszWideCharToMultiByte(CP_UTF8,0,(LPCWSTR)wcurTok,(int)(tokLen >> 1),nb,(int)(tokLen<<1) + 2,NULL,NULL);
126 /*--------------------------------------------------------------------------*/
127 char* NewStaticStrFromTokenAU(__in_ecount(tokLen) char* curTok, size_t tokLen, __out_ecount(bufSize) char* staticBuf, size_t bufSize)
129 if(tokLen >= bufSize) return NULL;
130 memcpy(staticBuf, curTok, tokLen);
131 staticBuf[tokLen] = 0;
134 char* NewStaticStrFromTokenW(__in_ecount(tokLen) char* curTok, size_t tokLen, __out_ecount(bufSize) char* staticBuf, size_t bufSize)
136 WCHAR* wcurTok = (WCHAR*)curTok;
137 if(tokLen >= bufSize/2) return NULL;
138 tokLen = WszWideCharToMultiByte(CP_UTF8,0,(LPCWSTR)wcurTok,(int)(tokLen >> 1),staticBuf,(int)bufSize,NULL,NULL);
139 staticBuf[tokLen] = 0;
142 /*--------------------------------------------------------------------------*/
143 unsigned GetDoubleAU(__in __nullterminated char* begNum, unsigned L, double** ppRes)
145 static char dbuff[128];
148 memcpy(dbuff,begNum,L);
150 *ppRes = new double(strtod(dbuff, &pdummy));
151 return ((unsigned)(pdummy - dbuff));
154 unsigned GetDoubleW(__in __nullterminated char* begNum, unsigned L, double** ppRes)
156 static char dbuff[256];
159 memcpy(dbuff,begNum,L);
162 *ppRes = new double(wcstod((const wchar_t*)dbuff, (wchar_t**)&pdummy));
163 return ((unsigned)(pdummy - dbuff));
165 /*--------------------------------------------------------------------------*/
166 char* yygetline(int Line)
168 static char buff[0x4000];
169 char *pLine=NULL, *pNextLine=NULL;
170 char *pBegin=NULL, *pEnd = NULL;
171 unsigned uCount = parser->getAll(&pBegin);
172 pEnd = pBegin + uCount;
174 for(uCount=0, pLine=pBegin; pLine < pEnd; pLine = nextchar(pLine))
176 if(Sym(pLine) == '\n') uCount++;
177 if(uCount == (unsigned int)(Line-1)) break;
179 pLine = nextchar(pLine);
182 for(pNextLine = pLine; pNextLine < pEnd; pNextLine = nextchar(pNextLine))
184 if(Sym(pNextLine) == '\n') break;
186 if(Sym == SymW) // Unicode file
188 if(*((WCHAR*)pNextLine - 1) == '\r') pNextLine -= 2;
189 uCount = (unsigned)(pNextLine - pLine);
190 uCount &= 0x1FFF; // limit: 8K wchars
191 WCHAR* wzBuff = (WCHAR*)buff;
192 memcpy(buff,pLine,uCount);
193 wzBuff[uCount >> 1] = 0;
197 if(*(pNextLine-1)=='\r') pNextLine--;
198 uCount = (unsigned)(pNextLine - pLine);
199 uCount &= 0x3FFF; // limit: 16K chars
200 memcpy(buff,pLine,uCount);
207 void yyerror(__in __nullterminated const char* str) {
209 WCHAR *wzfile = (WCHAR*)(PENV->in->namew());
210 int iline = PENV->curLine;
212 size_t len = PENV->curPos - PENV->curTok;
213 if (len > 62) len = 62;
214 memcpy(tokBuff, PENV->curTok, len);
217 if(PENV->bExternSource)
219 wzfile = PASM->m_wzSourceFileName;
220 iline = PENV->nExtLine;
222 if(Sym == SymW) // Unicode file
223 fprintf(stderr, "%S(%d) : error : %s at token '%S' in: %S\n",
224 wzfile, iline, str, (WCHAR*)tokBuff, (WCHAR*)yygetline(PENV->curLine));
226 fprintf(stderr, "%S(%d) : error : %s at token '%s' in: %s\n",
227 wzfile, iline, str, tokBuff, yygetline(PENV->curLine));
228 parser->success = false;
231 /********************************************************************************/
232 /* looks up the typedef 'name' of length 'nameLen' (name does not need to be
233 null terminated) Returns 0 on failure */
234 TypeDefDescr* findTypedef(__in_ecount(NameLen) char* name, size_t NameLen)
236 TypeDefDescr* pRet = NULL;
237 static char Name[4096];
238 if(PASM->NumTypeDefs())
240 if(NewStaticStrFromToken(name,NameLen,Name,4096))
241 pRet = PASM->FindTypeDef(Name);
246 int TYPEDEF(TypeDefDescr* pTDD)
248 switch(TypeFromToken(pTDD->m_tkTypeSpec))
263 case mdtCustomAttribute:
270 /********************************************************************************/
271 void indexKeywords(Indx* indx) // called in Assembler constructor (assem.cpp)
273 Keywords* low = keywords;
274 Keywords* high = keywords + (sizeof(keywords) / sizeof(Keywords));
276 for(mid = low; mid < high; mid++)
278 indx->IndexString((char*)(mid->name),mid);
282 Instr* SetupInstr(unsigned short opcode)
285 if((pVal = PASM->GetInstr()))
287 pVal->opcode = opcode;
288 if((pVal->pWriter = PASM->m_pSymDocument)!=NULL)
290 if(PENV->bExternSource)
292 pVal->linenum = PENV->nExtLine;
293 pVal->column = PENV->nExtCol;
294 pVal->linenum_end = PENV->nExtLineEnd;
295 pVal->column_end = PENV->nExtColEnd;
300 pVal->linenum = PENV->curLine;
302 pVal->linenum_end = PENV->curLine;
303 pVal->column_end = 0;
304 pVal->pc = PASM->m_CurPC;
310 /* looks up the keyword 'name' of length 'nameLen' (name does not need to be
311 null terminated) Returns 0 on failure */
312 int findKeyword(const char* name, size_t nameLen, unsigned short* pOpcode)
314 static char Name[128];
317 if(NULL == NewStaticStrFromToken((char*)name,nameLen,Name,128)) return 0; // can't be a keyword
318 mid = (Keywords*)(PASM->indxKeywords.FindString(Name));
319 if(mid == NULL) return 0;
320 *pOpcode = mid->tokenVal;
325 /********************************************************************************/
326 /* convert str to a uint64 */
327 unsigned digits[128];
328 void Init_str2uint64()
331 memset(digits,255,sizeof(digits));
332 for(i='0'; i <= '9'; i++) digits[i] = i - '0';
333 for(i='A'; i <= 'Z'; i++) digits[i] = i + 10 - 'A';
334 for(i='a'; i <= 'z'; i++) digits[i] = i + 10 - 'a';
336 static unsigned __int64 str2uint64(const char* str, const char** endStr, unsigned radix)
338 unsigned __int64 ret = 0;
340 _ASSERTE(radix <= 36);
341 for(;;str = nextchar((char*)str))
343 ix = Sym((char*)str);
349 ret = ret * radix + digit;
357 /********************************************************************************/
358 /* Append an UTF-8 string preceded by compressed length, no zero terminator, to a BinStr */
359 static void AppendStringWithLength(BinStr* pbs, __in __nullterminated char* sz)
361 if((pbs != NULL) && (sz != NULL))
363 unsigned L = (unsigned) strlen(sz);
366 if((pb = pbs->getBuff(L)) != NULL)
371 /********************************************************************************/
372 /* fetch the next token, and return it Also set the yylval.union if the
373 lexical token also has a value */
377 #define IsAlpha(x) ((('A' <= (x))&&((x) <= 'Z'))||(('a' <= (x))&&((x) <= 'z')))
378 #define IsDigit(x) (('0' <= (x))&&((x) <= '9'))
379 #define IsAlNum(x) (IsAlpha(x) || IsDigit(x))
380 #define IsValidStartingSymbol(x) (IsAlpha(x)||((x)=='#')||((x)=='_')||((x)=='@')||((x)=='$'))
381 #define IsValidContinuingSymbol(x) (IsAlNum(x)||((x)=='_')||((x)=='@')||((x)=='$')||((x)=='?'))
382 void SetSymbolTables() { ; }
391 void SetSymbolTables()
394 memset(_Alpha,0,sizeof(_Alpha));
395 memset(_Digit,0,sizeof(_Digit));
396 memset(_AlNum,0,sizeof(_AlNum));
397 memset(_ValidSS,0,sizeof(_ValidSS));
398 memset(_ValidCS,0,sizeof(_ValidCS));
399 for(i = 'A'; i <= 'Z'; i++)
406 for(i = 'a'; i <= 'z'; i++)
413 for(i = '0'; i <= '9'; i++)
419 _ValidSS[(unsigned char)'_'] = TRUE;
420 _ValidSS[(unsigned char)'#'] = TRUE;
421 _ValidSS[(unsigned char)'$'] = TRUE;
422 _ValidSS[(unsigned char)'@'] = TRUE;
424 _ValidCS[(unsigned char)'_'] = TRUE;
425 _ValidCS[(unsigned char)'?'] = TRUE;
426 _ValidCS[(unsigned char)'$'] = TRUE;
427 _ValidCS[(unsigned char)'@'] = TRUE;
428 _ValidCS[(unsigned char)'`'] = TRUE;
430 BOOL IsAlpha(unsigned x) { return (x < 128)&&_Alpha[x]; }
431 BOOL IsDigit(unsigned x) { return (x < 128)&&_Digit[x]; }
432 BOOL IsAlNum(unsigned x) { return (x < 128)&&_AlNum[x]; }
433 BOOL IsValidStartingSymbol(unsigned x) { return (x < 128)&&_ValidSS[x]; }
434 BOOL IsValidContinuingSymbol(unsigned x) { return (x < 128)&&_ValidCS[x]; }
438 char* nextBlank(__in __nullterminated char* curPos)
445 if ((Sym(nextchar(curPos)) == '/')|| (Sym(nextchar(curPos)) == '*'))
449 curPos = nextchar(curPos);
461 curPos = nextchar(curPos);
466 char* skipBlanks(__in __nullterminated char* curPos, unsigned* pstate)
468 const unsigned eolComment = 1;
469 const unsigned multiComment = 2;
470 unsigned nextSym, state = *pstate;
473 { // skip whitespace and comments
474 if (curPos >= PENV->endPos)
484 state &= ~eolComment;
486 if(PENV->bExternSource)
488 if(PENV->bExternSourceAutoincrement) PENV->nExtLine++;
489 PASM->m_ulCurLine = PENV->nExtLine;
490 PASM->m_ulCurColumn = PENV->nExtCol;
494 PASM->m_ulCurLine = PENV->curLine;
495 PASM->m_ulCurColumn = 1;
505 if(state == 0) goto PAST_WHITESPACE;
506 if(state & multiComment)
508 nextPos = nextchar(curPos);
509 if (Sym(nextPos) == '/')
512 state &= ~multiComment;
520 nextPos = nextchar(curPos);
521 nextSym = Sym(nextPos);
527 else if (nextSym == '*')
530 state |= multiComment;
532 else goto PAST_WHITESPACE;
537 if (state == 0) goto PAST_WHITESPACE;
539 curPos = nextchar(curPos);
546 char* FullFileName(__in __nullterminated WCHAR* wzFileName, unsigned uCodePage);
550 PARSING_ENVIRONMENT* prev_penv = parser->PEStack.POP();
551 if(prev_penv != NULL)
553 //delete [] (WCHAR*)(PENV->in->namew());
556 parser->penv = prev_penv;
558 char* szFileName = new char[strlen(PENV->szFileName)+1];
559 strcpy_s(szFileName,strlen(PENV->szFileName)+1,PENV->szFileName);
560 PASM->SetSourceFileName(szFileName); // deletes the argument!
567 #define NEXT_TOKEN {state=0; curPos=PENV->curPos; goto NextToken;}
569 int parse_literal(unsigned curSym, __inout __nullterminated char* &curPos, BOOL translate_escapes)
571 unsigned quote = curSym;
572 curPos = nextchar(curPos);
573 char* fromPtr = curPos;
577 { // Find matching quote
578 curSym = (curPos >= PENV->endPos) ? 0 : Sym(curPos);
581 PENV->curPos = curPos;
582 return(BAD_LITERAL_);
584 else if(curSym == '\\')
591 if(PENV->bExternSource)
593 if(PENV->bExternSourceAutoincrement) PENV->nExtLine++;
594 PASM->m_ulCurLine = PENV->nExtLine;
595 PASM->m_ulCurColumn = PENV->nExtCol;
599 PASM->m_ulCurLine = PENV->curLine;
600 PASM->m_ulCurColumn = 1;
602 if (!escape) { PENV->curPos = curPos; return(BAD_LITERAL_); }
604 else if ((curSym == quote) && (!escape)) break;
607 curPos = nextchar(curPos);
609 // translate escaped characters
610 unsigned tokLen = (unsigned)(curPos - fromPtr);
611 char* newstr = NewStrFromToken(fromPtr, tokLen);
613 curPos = nextchar(curPos); // skip closing quote
614 if(translate_escapes)
618 tokLen = (unsigned)strlen(newstr);
619 toPtr = new char[tokLen+1];
620 if(toPtr==NULL) return BAD_LITERAL_;
621 yylval.string = toPtr;
622 char* endPtr = fromPtr+tokLen;
623 while(fromPtr < endPtr)
625 if (*fromPtr == '\\')
656 while(isspace(*fromPtr));
657 --fromPtr; // undo the increment below
663 if (IsDigit(fromPtr[1]) && IsDigit(fromPtr[2]))
665 *toPtr++ = ((fromPtr[0] - '0') * 8 + (fromPtr[1] - '0')) * 8 + (fromPtr[2] - '0');
668 else if(*fromPtr == '0') *toPtr++ = 0;
669 else *toPtr++ = *fromPtr;
677 // *toPtr++ = *fromPtr++;
679 char* tmpPtr = fromPtr;
680 fromPtr = (nextchar == nextcharW) ? nextcharU(fromPtr) : nextchar(fromPtr);
681 while(tmpPtr < fromPtr) *toPtr++ = *tmpPtr++;
684 } //end while(fromPtr < endPtr)
685 *toPtr = 0; // terminate string
690 yylval.string = newstr;
691 toPtr = newstr + strlen(newstr);
694 PENV->curPos = curPos;
697 BinStr* pBS = new BinStr();
698 unsigned size = (unsigned)(toPtr - yylval.string);
699 memcpy(pBS->getBuff(size),yylval.string,size);
700 delete [] yylval.string;
706 if(PASM->NumTypeDefs())
708 TypeDefDescr* pTDD = PASM->FindTypeDef(yylval.string);
711 delete [] yylval.string;
713 return(TYPEDEF(pTDD));
721 #pragma warning(push)
722 #pragma warning(disable:21000) // Suppress PREFast warning about overly large function
726 char* curPos = PENV->curPos;
728 const unsigned multiComment = 2;
734 // Skip any leading whitespace and comments
735 curPos = skipBlanks(curPos, &state);
738 if (state & multiComment) return (BAD_COMMENT_);
739 if(ProcessEOF() == 0) return 0; // EOF
742 char* curTok = curPos;
743 PENV->curTok = curPos;
744 PENV->curPos = curPos;
748 curSym = Sym(curPos);
749 if(bParsingByteArray) // only hexadecimals w/o 0x, ')' and white space allowed!
752 for(i=0; i<2; i++, curPos = nextchar(curPos), curSym = Sym(curPos))
754 if(('0' <= curSym)&&(curSym <= '9')) s = s*16+(curSym - '0');
755 else if(('A' <= curSym)&&(curSym <= 'F')) s = s*16+(curSym - 'A' + 10);
756 else if(('a' <= curSym)&&(curSym <= 'f')) s = s*16+(curSym - 'a' + 10);
757 else break; // don't increase curPos!
766 if(curSym == ')' || curSym == '}')
768 bParsingByteArray = FALSE;
769 goto Just_A_Character;
772 PENV->curPos = curPos;
775 if(curSym == '?') // '?' may be part of an identifier, if it's not followed by punctuation
777 if(IsValidContinuingSymbol(Sym(nextchar(curPos)))) goto Its_An_Id;
778 goto Just_A_Character;
781 if (IsValidStartingSymbol(curSym))
784 size_t offsetDot = (size_t)-1; // first appearance of '.'
785 size_t offsetDotDigit = (size_t)-1; // first appearance of '.<digit>' (not DOTTEDNAME!)
788 curPos = nextchar(curPos);
789 if (Sym(curPos) == '.')
791 if (offsetDot == (size_t)-1) offsetDot = curPos - curTok;
792 curPos = nextchar(curPos);
793 if((offsetDotDigit==(size_t)-1)&&(Sym(curPos) >= '0')&&(Sym(curPos) <= '9'))
794 offsetDotDigit = curPos - curTok - 1;
796 } while(IsValidContinuingSymbol(Sym(curPos)));
798 size_t tokLen = curPos - curTok;
799 // check to see if it is a keyword
800 int token = findKeyword(curTok, tokLen, &yylval.opcode);
803 //printf("yylex: TOK = %d, curPos=0x%8.8X\n",token,curPos);
804 PENV->curPos = curPos;
805 PENV->curTok = curTok;
811 //if(include_first_pass)
813 // PENV->curPos = curTok;
814 // include_first_pass = FALSE;
817 //include_first_pass = TRUE;
818 curPos = skipBlanks(curPos,&state);
821 if (state & multiComment) return (BAD_COMMENT_);
822 if(ProcessEOF() == 0) return 0; // EOF
825 if(Sym(curPos) != '"') return ERROR_;
826 curPos = nextchar(curPos);
828 PENV->curTok = curPos;
829 while(Sym(curPos) != '"')
831 curPos = nextchar(curPos);
832 if(curPos >= PENV->endPos) return ERROR_;
833 PENV->curPos = curPos;
835 tokLen = PENV->curPos - curTok;
836 curPos = nextchar(curPos);
837 PENV->curPos = curPos;
842 if((wzFile = new WCHAR[tokLen/2 + 1]) != NULL)
844 memcpy(wzFile,curTok,tokLen);
845 wzFile[tokLen/2] = 0;
850 if((wzFile = new WCHAR[tokLen+1]) != NULL)
852 tokLen = WszMultiByteToWideChar(g_uCodePage,0,curTok,(int)tokLen,wzFile,(int)tokLen+1);
858 if((parser->wzIncludePath != NULL)
859 &&(wcschr(wzFile,'\\')==NULL)&&(wcschr(wzFile,':')==NULL))
861 PathString wzFullName;
864 DWORD dw = WszSearchPath(parser->wzIncludePath,wzFile,NULL,
865 TRUE, wzFullName,&pwz);
868 wzFullName.CloseBuffer((COUNT_T)(dw));
871 wzFile = wzFullName.GetCopyOfUnicodeString();
875 if(PASM->m_fReportProgress)
876 parser->msg("\nIncluding '%S'\n",wzFile);
877 MappedFileStream *pIn = new MappedFileStream(wzFile);
878 if((pIn != NULL)&&pIn->IsValid())
880 parser->PEStack.PUSH(PENV);
881 PASM->SetSourceFileName(FullFileName(wzFile,CP_UTF8)); // deletes the argument!
882 parser->CreateEnvironment(pIn);
888 PASM->report->error("#include failed\n");
894 PASM->report->error("Out of memory\n");
898 curPos = PENV->curPos;
899 curTok = PENV->curTok;
905 curPos = skipBlanks(curPos,&state);
908 if (state & multiComment) return (BAD_COMMENT_);
909 if(ProcessEOF() == 0) return 0; // EOF
913 PENV->curTok = curPos;
914 PENV->curPos = curPos;
915 if (!IsValidStartingSymbol(Sym(curPos))) return ERROR_;
918 curPos = nextchar(curPos);
919 } while(IsValidContinuingSymbol(Sym(curPos)));
920 tokLen = curPos - curTok;
922 newstr = NewStrFromToken(curTok, tokLen);
923 if((token==P_DEFINE)||(token==P_UNDEF))
925 if(token == P_DEFINE)
927 curPos = skipBlanks(curPos,&state);
928 if ((curPos == NULL) && (ProcessEOF() == 0))
930 DefineVar(newstr, NULL);
933 curSym = Sym(curPos);
935 DefineVar(newstr, NULL);
938 tok = parse_literal(curSym, curPos, FALSE);
941 // if not ANSI, then string is in UTF-8,
943 if(nextchar != nextcharA)
945 yylval.binstr->insertInt8(0xEF);
946 yylval.binstr->insertInt8(0xBB);
947 yylval.binstr->insertInt8(0xBF);
949 yylval.binstr->appendInt8(' ');
950 DefineVar(newstr, yylval.binstr);
956 else UndefVar(newstr);
960 SkipToken = IsVarDefined(newstr);
961 if(token == P_IFDEF) SkipToken = !SkipToken;
963 if(SkipToken) IfEndifSkip=IfEndif;
973 PASM->report->error("Unmatched #endif\n");
992 if(IfEndif == IfEndifSkip) SkipToken = FALSE;
995 if(IfEndif == IfEndifSkip) SkipToken = FALSE;
1001 //if(yylval.instr) yylval.instr->opcode = -1;
1005 } // end if token != 0
1006 if(SkipToken) { curPos = nextBlank(curPos); goto NextToken; }
1008 VarName* pVarName = FindVarDef(NewStrFromToken(curTok, tokLen));
1009 if(pVarName != NULL)
1011 if(pVarName->pbody != NULL)
1013 BinStrStream *pIn = new BinStrStream(pVarName->pbody);
1014 if((pIn != NULL)&&pIn->IsValid())
1016 PENV->curPos = curPos;
1017 parser->PEStack.PUSH(PENV);
1018 parser->CreateEnvironment(pIn);
1024 TypeDefDescr* pTDD = findTypedef(curTok,tokLen);
1029 PENV->curPos = curPos;
1030 PENV->curTok = curTok;
1031 return(TYPEDEF(pTDD));
1033 if(Sym(curTok) == '#')
1035 PENV->curPos = curPos;
1036 PENV->curTok = curTok;
1039 // Not a keyword, normal identifiers don't have '.' in them
1040 if (offsetDot < (size_t)-1)
1042 if(offsetDotDigit < (size_t)-1)
1044 curPos = curTok+offsetDotDigit;
1045 tokLen = offsetDotDigit;
1047 // protection against something like Foo.Bar..123 or Foo.Bar.
1048 unsigned D = (Sym == SymW) ? 2 : 1; // Unicode or ANSI/UTF8!
1049 while((Sym(curPos-D)=='.')&&(tokLen))
1055 if((yylval.string = NewStrFromToken(curTok,tokLen)))
1057 tok = (offsetDot == (size_t)(-1))? ID : DOTTEDNAME;
1058 //printf("yylex: ID = '%s', curPos=0x%8.8X\n",yylval.string,curPos);
1060 else return BAD_LITERAL_;
1062 else if(SkipToken) { curPos = nextBlank(curPos); goto NextToken; }
1063 else if (IsDigit(curSym)
1064 || (curSym == '.' && IsDigit(Sym(nextchar(curPos))))
1065 || (curSym == '-' && IsDigit(Sym(nextchar(curPos)))))
1067 const char* begNum = curPos;
1068 unsigned radix = 10;
1070 neg = (curSym == '-'); // always make it unsigned
1071 if (neg) curPos = nextchar(curPos);
1073 if (Sym(curPos) == '0' && Sym(nextchar(curPos)) != '.')
1075 curPos = nextchar(curPos);
1077 if (Sym(curPos) == 'x' || Sym(curPos) == 'X')
1079 curPos = nextchar(curPos);
1085 unsigned __int64 i64 = str2uint64(begNum, const_cast<const char**>(&curPos), radix);
1086 unsigned __int64 mask64 = neg ? UI64(0xFFFFFFFF80000000) : UI64(0xFFFFFFFF00000000);
1087 unsigned __int64 largestNegVal32 = UI64(0x0000000080000000);
1088 if ((i64 & mask64) && (i64 != largestNegVal32))
1090 yylval.int64 = new __int64(i64);
1092 if (neg) *yylval.int64 = -*yylval.int64;
1096 yylval.int32 = (__int32)i64;
1098 if(neg) yylval.int32 = -yylval.int32;
1101 if (radix == 10 && ((Sym(curPos) == '.' && Sym(nextchar(curPos)) != '.') || Sym(curPos) == 'E' || Sym(curPos) == 'e'))
1103 unsigned L = (unsigned)(PENV->endPos - begNum);
1104 curPos = (char*)begNum + GetDouble((char*)begNum,L,&yylval.float64);
1105 if (neg) *yylval.float64 = -*yylval.float64;
1111 if (curSym == '"' || curSym == '\'')
1113 return parse_literal(curSym, curPos, TRUE);
1114 } // end if (*curPos == '"' || *curPos == '\'')
1115 else if (curSym==':' && Sym(nextchar(curPos))==':')
1117 curPos = nextchar(nextchar(curPos));
1120 else if(curSym == '.')
1122 if (Sym(nextchar(curPos))=='.' && Sym(nextchar(nextchar(curPos)))=='.')
1124 curPos = nextchar(nextchar(nextchar(curPos)));
1131 curPos = nextchar(curPos);
1132 if (curPos >= PENV->endPos)
1134 curSym = Sym(curPos);
1136 while(IsAlNum(curSym) || curSym == '_' || curSym == '$'|| curSym == '@'|| curSym == '?');
1137 size_t tokLen = curPos - curTok;
1139 // check to see if it is a keyword
1140 int token = findKeyword(curTok, tokLen, &yylval.opcode);
1143 //printf("yylex: TOK = %d, curPos=0x%8.8X\n",token,curPos);
1144 PENV->curPos = curPos;
1145 PENV->curTok = curTok;
1149 curPos = nextchar(curTok);
1156 curPos = nextchar(curPos);
1158 //printf("yylex: PUNCT curPos=0x%8.8X\n",curPos);
1160 dbprintf((" Line %d token %d (%c) val = %s\n", PENV->curLine, tok,
1161 (tok < 128 && isprint(tok)) ? tok : ' ',
1162 (tok > 255 && tok != INT32 && tok != INT64 && tok!= FLOAT64) ? yylval.string : ""));
1164 PENV->curPos = curPos;
1165 PENV->curTok = curTok;
1169 #pragma warning(pop)
1172 /**************************************************************************/
1173 static char* newString(__in __nullterminated const char* str1)
1175 char* ret = new char[strlen(str1)+1];
1176 if(ret) strcpy_s(ret, strlen(str1)+1, str1);
1180 /**************************************************************************/
1181 /* concatenate strings and release them */
1183 static char* newStringWDel(__in __nullterminated char* str1, char delimiter, __in __nullterminated char* str3)
1185 size_t len1 = strlen(str1);
1186 size_t len = len1+2;
1187 if (str3) len += strlen(str3);
1188 char* ret = new char[len];
1191 strcpy_s(ret, len, str1);
1193 ret[len1] = delimiter;
1197 strcat_s(ret, len, str3);
1204 /**************************************************************************/
1205 static void corEmitInt(BinStr* buff, unsigned data)
1207 unsigned cnt = CorSigCompressData(data, buff->getBuff(5));
1208 buff->remove(5 - cnt);
1212 /**************************************************************************/
1213 /* move 'ptr past the exactly one type description */
1215 unsigned __int8* skipType(unsigned __int8* ptr, BOOL fFixupType)
1220 case ELEMENT_TYPE_VOID :
1221 case ELEMENT_TYPE_BOOLEAN :
1222 case ELEMENT_TYPE_CHAR :
1223 case ELEMENT_TYPE_I1 :
1224 case ELEMENT_TYPE_U1 :
1225 case ELEMENT_TYPE_I2 :
1226 case ELEMENT_TYPE_U2 :
1227 case ELEMENT_TYPE_I4 :
1228 case ELEMENT_TYPE_U4 :
1229 case ELEMENT_TYPE_I8 :
1230 case ELEMENT_TYPE_U8 :
1231 case ELEMENT_TYPE_R4 :
1232 case ELEMENT_TYPE_R8 :
1233 case ELEMENT_TYPE_U :
1234 case ELEMENT_TYPE_I :
1235 case ELEMENT_TYPE_STRING :
1236 case ELEMENT_TYPE_OBJECT :
1237 case ELEMENT_TYPE_TYPEDBYREF :
1238 case ELEMENT_TYPE_SENTINEL :
1242 case ELEMENT_TYPE_VALUETYPE :
1243 case ELEMENT_TYPE_CLASS :
1244 ptr += CorSigUncompressToken(ptr, &tk);
1247 case ELEMENT_TYPE_CMOD_REQD :
1248 case ELEMENT_TYPE_CMOD_OPT :
1249 ptr += CorSigUncompressToken(ptr, &tk);
1252 case ELEMENT_TYPE_ARRAY :
1254 ptr = skipType(ptr, fFixupType); // element Type
1255 unsigned rank = CorSigUncompressData((PCCOR_SIGNATURE&) ptr);
1258 unsigned numSizes = CorSigUncompressData((PCCOR_SIGNATURE&) ptr);
1261 CorSigUncompressData((PCCOR_SIGNATURE&) ptr);
1264 unsigned numLowBounds = CorSigUncompressData((PCCOR_SIGNATURE&) ptr);
1265 while(numLowBounds > 0)
1267 CorSigUncompressData((PCCOR_SIGNATURE&) ptr);
1274 // Modifiers or depedant types
1275 case ELEMENT_TYPE_PINNED :
1276 case ELEMENT_TYPE_PTR :
1277 case ELEMENT_TYPE_BYREF :
1278 case ELEMENT_TYPE_SZARRAY :
1279 // tail recursion optimization
1280 // ptr = skipType(ptr, fFixupType);
1284 case ELEMENT_TYPE_VAR:
1285 case ELEMENT_TYPE_MVAR:
1286 CorSigUncompressData((PCCOR_SIGNATURE&) ptr); // bound
1289 case ELEMENT_TYPE_VARFIXUP:
1290 case ELEMENT_TYPE_MVARFIXUP:
1293 BYTE* pb = ptr-1; // ptr incremented in switch
1294 unsigned __int8* ptr_save = ptr;
1295 int n = CorSigUncompressData((PCCOR_SIGNATURE&) ptr); // fixup #
1296 int compressed_size_n = (int)(ptr - ptr_save); // ptr was updated by CorSigUncompressData()
1298 if(PASM->m_TyParList)
1299 m = PASM->m_TyParList->IndexOf(TyParFixupList.PEEK(n));
1302 PASM->report->error("(fixupType) Invalid %stype parameter '%s'\n",
1303 (*pb == ELEMENT_TYPE_MVARFIXUP)? "method ": "",
1304 TyParFixupList.PEEK(n));
1307 *pb = (*pb == ELEMENT_TYPE_MVARFIXUP)? ELEMENT_TYPE_MVAR : ELEMENT_TYPE_VAR;
1308 int compressed_size_m = (int)CorSigCompressData(m,pb+1);
1310 // Note that CorSigCompressData() (and hence, CorSigUncompressData()) store a number
1311 // 0 <= x <= 0x1FFFFFFF in 1, 2, or 4 bytes. Above, 'n' is the fixup number being read,
1312 // and 'm' is the generic parameter number being written out (in the same place where 'n'
1313 // came from). If 'n' takes more space to compress than 'm' (e.g., 0x80 <= n <= 0x3fff so
1314 // it takes 2 bytes, and m < 0x80 so it takes one byte), then when we overwrite the fixup
1315 // number with the generic parameter number, we'll leave extra bytes in the signature following
1316 // the written generic parameter number. Thus, we do something of a hack to ensure that the
1317 // compressed number is correctly readable even if 'm' compresses smaller than 'n' did: we
1318 // recompress 'm' to use the same amount of space as 'n' used. This is possible because smaller
1319 // numbers can still be compressed in a larger amount of space, even though it's not optimal (and
1320 // CorSigCompressData() would never do it). If, however, the compressed sizes are the other
1321 // way around (m takes more space to compress than n), then we've already corrupted the
1322 // signature that we're reading by writing beyond what we should (is there some reason why
1323 // this is not possible?).
1324 // Note that 'ptr' has already been adjusted, above, to point to the next type after this one.
1325 // There is no need to update it when recompressing the data.
1327 if (compressed_size_m > compressed_size_n)
1329 // We've got a problem: we just corrupted the rest of the signature!
1330 // (Can this ever happen in practice?)
1331 PASM->report->error("(fixupType) Too many %stype parameters\n",
1332 (*pb == ELEMENT_TYPE_MVARFIXUP)? "method ": "");
1334 else if (compressed_size_m < compressed_size_n)
1336 // We didn't write out as much data as we read. This will leave extra bytes in the
1337 // signature that will be incorrectly recognized. Ideally, we would just shrink the
1338 // signature. That's not easy to do here. Instead, pad the bytes to force it to use
1339 // a larger encoding than needed. This assumes knowledge of the CorSigCompressData()
1343 // compressed_size_m m bytes compressed_size_n result bytes
1345 // 1 m1 4 0xC0 0x00 0x00 m1
1346 // 2 m1 m2 4 0xC0 0x00 (m1 & 0x7f) m2
1348 _ASSERTE((compressed_size_m == 1) || (compressed_size_m == 2) || (compressed_size_m == 4));
1349 _ASSERTE((compressed_size_n == 1) || (compressed_size_n == 2) || (compressed_size_n == 4));
1351 if ((compressed_size_m == 1) &&
1352 (compressed_size_n == 2))
1354 unsigned __int8 m1 = *(pb + 1);
1355 _ASSERTE(m1 < 0x80);
1360 if ((compressed_size_m == 1) &&
1361 (compressed_size_n == 4))
1363 unsigned __int8 m1 = *(pb + 1);
1364 _ASSERTE(m1 < 0x80);
1371 if ((compressed_size_m == 2) &&
1372 (compressed_size_n == 4))
1374 unsigned __int8 m1 = *(pb + 1);
1375 unsigned __int8 m2 = *(pb + 2);
1376 _ASSERTE(m1 >= 0x80);
1377 m1 &= 0x7f; // strip the bit indicating it's a 2-byte thing
1386 CorSigUncompressData((PCCOR_SIGNATURE&) ptr); // bound
1389 case ELEMENT_TYPE_FNPTR:
1391 CorSigUncompressData((PCCOR_SIGNATURE&) ptr); // calling convention
1392 unsigned argCnt = CorSigUncompressData((PCCOR_SIGNATURE&) ptr); // arg count
1393 ptr = skipType(ptr, fFixupType); // return type
1396 ptr = skipType(ptr, fFixupType);
1402 case ELEMENT_TYPE_GENERICINST:
1404 ptr = skipType(ptr, fFixupType); // type constructor
1405 unsigned argCnt = CorSigUncompressData((PCCOR_SIGNATURE&)ptr); // arg count
1407 ptr = skipType(ptr, fFixupType);
1414 case ELEMENT_TYPE_END :
1415 _ASSERTE(!"Unknown Type");
1421 /**************************************************************************/
1422 void FixupTyPars(PCOR_SIGNATURE pSig, ULONG cSig)
1424 if(TyParFixupList.COUNT() > 0)
1426 BYTE* ptr = (BYTE*)pSig;
1427 BYTE* ptrEnd = ptr + cSig;
1430 ptr = skipType(ptr, TRUE);
1432 } // end if(COUNT>0)
1434 void FixupTyPars(BinStr* pbstype)
1436 FixupTyPars((PCOR_SIGNATURE)(pbstype->ptr()),(ULONG)(pbstype->length()));
1438 /**************************************************************************/
1439 static unsigned corCountArgs(BinStr* args)
1441 unsigned __int8* ptr = args->ptr();
1442 unsigned __int8* end = &args->ptr()[args->length()];
1446 if (*ptr != ELEMENT_TYPE_SENTINEL)
1448 ptr = skipType(ptr, FALSE);
1456 /********************************************************************************/
1457 AsmParse::AsmParse(ReadStream* aIn, Assembler *aAssem)
1459 #ifdef DEBUG_PARSING
1465 assem->SetErrorReporter((ErrorReporter *)this);
1467 assem->m_ulCurLine = 1;
1468 assem->m_ulCurColumn = 1;
1470 wzIncludePath = NULL;
1473 hstdout = GetStdHandle(STD_OUTPUT_HANDLE);
1474 hstderr = GetStdHandle(STD_ERROR_HANDLE);
1477 _ASSERTE(parser == 0); // Should only be one parser instance at a time
1480 for (unsigned int i = 0; i < sizeof(keywords) / sizeof(Keywords); i++)
1482 if (keywords[i].token == NO_VALUE)
1483 keywords[i].token = keywords[keywords[i].tokenVal].token;
1491 /********************************************************************************/
1492 AsmParse::~AsmParse()
1496 while(m_ANSLast.POP());
1499 /**************************************************************************/
1500 DWORD AsmParse::IsItUnicode(CONST LPVOID pBuff, int cb, LPINT lpi)
1502 return IsTextUnicode(pBuff,cb,lpi);
1505 /**************************************************************************/
1506 void AsmParse::CreateEnvironment(ReadStream* stream)
1508 penv = new PARSING_ENVIRONMENT;
1509 memset(penv,0,sizeof(PARSING_ENVIRONMENT));
1512 strcpy_s(penv->szFileName, MAX_FILENAME_LENGTH*3+1,assem->m_szSourceFileName);
1514 penv->curPos = fillBuff(NULL);
1515 penv->uCodePage = g_uCodePage;
1520 /**************************************************************************/
1521 void AsmParse::ParseFile(ReadStream* stream)
1523 CreateEnvironment(stream);
1528 /**************************************************************************/
1529 char* AsmParse::fillBuff(__in_opt __nullterminated char* pos)
1532 int iOptions = IS_TEXT_UNICODE_UNICODE_MASK;
1533 g_uCodePage = CP_ACP;
1534 iPutToBuffer = (int)penv->in->getAll(&(penv->curPos));
1536 penv->endPos = penv->curPos + iPutToBuffer;
1537 if(iPutToBuffer > 128) iPutToBuffer = 128;
1538 if(IsItUnicode(penv->curPos,iPutToBuffer,&iOptions))
1540 g_uCodePage = CP_UTF8;
1541 if(iOptions & IS_TEXT_UNICODE_SIGNATURE)
1545 if(assem->m_fReportProgress) printf("Source file is UNICODE\n\n");
1546 penv->pfn_Sym = SymW;
1547 penv->pfn_nextchar = nextcharW;
1548 penv->pfn_NewStrFromToken = NewStrFromTokenW;
1549 penv->pfn_NewStaticStrFromToken = NewStaticStrFromTokenW;
1550 penv->pfn_GetDouble = GetDoubleW;
1554 if(((penv->curPos[0]&0xFF)==0xEF)&&((penv->curPos[1]&0xFF)==0xBB)&&((penv->curPos[2]&0xFF)==0xBF))
1556 g_uCodePage = CP_UTF8;
1558 if(assem->m_fReportProgress) printf("Source file is UTF-8\n\n");
1559 penv->pfn_nextchar = nextcharU;
1563 if(assem->m_fReportProgress) printf("Source file is ANSI\n\n");
1564 penv->pfn_nextchar = nextcharA;
1566 penv->pfn_Sym = SymAU;
1567 penv->pfn_NewStrFromToken = NewStrFromTokenAU;
1568 penv->pfn_NewStaticStrFromToken = NewStaticStrFromTokenAU;
1569 penv->pfn_GetDouble = GetDoubleAU;
1571 return(penv->curPos);
1574 /********************************************************************************/
1575 BinStr* AsmParse::MakeSig(unsigned callConv, BinStr* retType, BinStr* args, int ntyargs)
1577 _ASSERTE((ntyargs != 0) == ((callConv & IMAGE_CEE_CS_CALLCONV_GENERIC) != 0));
1578 BinStr* ret = new BinStr();
1582 ret->insertInt8(callConv);
1584 corEmitInt(ret, ntyargs);
1585 corEmitInt(ret, corCountArgs(args));
1589 ret->append(retType);
1595 error("\nOut of memory!\n");
1601 /********************************************************************************/
1602 BinStr* AsmParse::MakeTypeArray(CorElementType kind, BinStr* elemType, BinStr* bounds)
1604 // 'bounds' is a binary buffer, that contains an array of 'struct Bounds'
1607 unsigned numElements;
1610 _ASSERTE(bounds->length() % sizeof(Bounds) == 0);
1611 unsigned boundsLen = bounds->length() / sizeof(Bounds);
1612 _ASSERTE(boundsLen > 0);
1613 Bounds* boundsArr = (Bounds*) bounds->ptr();
1615 BinStr* ret = new BinStr();
1617 ret->appendInt8(kind);
1618 ret->append(elemType);
1619 corEmitInt(ret, boundsLen); // emit the rank
1621 unsigned lowerBoundsDefined = 0;
1622 unsigned numElementsDefined = 0;
1624 for(i=0; i < boundsLen; i++)
1626 if(boundsArr[i].lowerBound < 0x7FFFFFFF) lowerBoundsDefined = i+1;
1627 else boundsArr[i].lowerBound = 0;
1629 if(boundsArr[i].numElements < 0x7FFFFFFF) numElementsDefined = i+1;
1630 else boundsArr[i].numElements = 0;
1633 corEmitInt(ret, numElementsDefined); // emit number of bounds
1635 for(i=0; i < numElementsDefined; i++)
1637 _ASSERTE (boundsArr[i].numElements >= 0); // enforced at rule time
1638 corEmitInt(ret, boundsArr[i].numElements);
1642 corEmitInt(ret, lowerBoundsDefined); // emit number of lower bounds
1643 for(i=0; i < lowerBoundsDefined; i++)
1645 unsigned cnt = CorSigCompressSignedInt(boundsArr[i].lowerBound, ret->getBuff(5));
1646 ret->remove(5 - cnt);
1653 /********************************************************************************/
1654 BinStr* AsmParse::MakeTypeClass(CorElementType kind, mdToken tk)
1657 BinStr* ret = new BinStr();
1658 _ASSERTE(kind == ELEMENT_TYPE_CLASS || kind == ELEMENT_TYPE_VALUETYPE ||
1659 kind == ELEMENT_TYPE_CMOD_REQD || kind == ELEMENT_TYPE_CMOD_OPT);
1660 ret->appendInt8(kind);
1661 unsigned cnt = CorSigCompressToken(tk, ret->getBuff(5));
1662 ret->remove(5 - cnt);
1665 /**************************************************************************/
1666 void PrintANSILine(FILE* pF, __in __nullterminated char* sz)
1668 WCHAR *wz = &wzUniBuf[0];
1669 if(g_uCodePage != CP_ACP)
1671 memset(wz,0,dwUniBuf); // dwUniBuf/2 WCHARs = dwUniBuf bytes
1672 WszMultiByteToWideChar(g_uCodePage,0,sz,-1,wz,(dwUniBuf >> 1)-1);
1674 memset(sz,0,dwUniBuf);
1675 WszWideCharToMultiByte(g_uConsoleCP,0,wz,-1,sz,dwUniBuf-1,NULL,NULL);
1677 fprintf(pF,"%s",sz);
1679 /**************************************************************************/
1680 void AsmParse::error(const char* fmt, ...)
1682 char *sz = (char*)(&wzUniBuf[(dwUniBuf >> 1)]);
1684 FILE* pF = ((!assem->m_fReportProgress)&&(assem->OnErrGo)) ? stdout : stderr;
1687 va_start(args, fmt);
1689 if((penv) && (penv->in)) psz+=sprintf_s(psz, (dwUniBuf >> 1), "%S(%d) : ", penv->in->namew(), penv->curLine);
1690 psz+=sprintf_s(psz, (dwUniBuf >> 1), "error : ");
1691 _vsnprintf_s(psz, (dwUniBuf >> 1),(dwUniBuf >> 1)-strlen(sz)-1, fmt, args);
1692 PrintANSILine(pF,sz);
1695 /**************************************************************************/
1696 void AsmParse::warn(const char* fmt, ...)
1698 char *sz = (char*)(&wzUniBuf[(dwUniBuf >> 1)]);
1700 FILE* pF = ((!assem->m_fReportProgress)&&(assem->OnErrGo)) ? stdout : stderr;
1702 va_start(args, fmt);
1704 if((penv) && (penv->in)) psz+=sprintf_s(psz, (dwUniBuf >> 1), "%S(%d) : ", penv->in->namew(), penv->curLine);
1705 psz+=sprintf_s(psz, (dwUniBuf >> 1), "warning : ");
1706 _vsnprintf_s(psz, (dwUniBuf >> 1),(dwUniBuf >> 1)-strlen(sz)-1, fmt, args);
1707 PrintANSILine(pF,sz);
1709 /**************************************************************************/
1710 void AsmParse::msg(const char* fmt, ...)
1712 char *sz = (char*)(&wzUniBuf[(dwUniBuf >> 1)]);
1714 va_start(args, fmt);
1716 _vsnprintf_s(sz, (dwUniBuf >> 1),(dwUniBuf >> 1)-1, fmt, args);
1717 PrintANSILine(stdout,sz);
1721 #pragma warning(default : 4640)