1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
4 /********************************************************************************/
7 /********************************************************************************/
13 unsigned short tokenVal;// this holds the instruction enumeration for those keywords that are instrs
17 #define NO_VALUE ((unsigned short)-1) // The token has no value
19 static Keywords keywords[] = {
20 // Attention! Because of aliases, the instructions MUST go first!
21 // Redefine all the instructions (defined in assembler.h <- asmenum.h <- opcode.def)
31 #undef ShortInlineBrTarget
42 #define InlineNone INSTR_NONE
43 #define InlineVar INSTR_VAR
44 #define ShortInlineVar INSTR_VAR
45 #define InlineI INSTR_I
46 #define ShortInlineI INSTR_I
47 #define InlineI8 INSTR_I8
48 #define InlineR INSTR_R
49 #define ShortInlineR INSTR_R
50 #define InlineBrTarget INSTR_BRTARGET
51 #define ShortInlineBrTarget INSTR_BRTARGET
52 #define InlineMethod INSTR_METHOD
53 #define InlineField INSTR_FIELD
54 #define InlineType INSTR_TYPE
55 #define InlineString INSTR_STRING
56 #define InlineSig INSTR_SIG
57 #define InlineTok INSTR_TOK
58 #define InlineSwitch INSTR_SWITCH
60 #define InlineVarTok 0
61 #define NEW_INLINE_NAMES
62 // The volatile instruction collides with the volatile keyword, so
63 // we treat it as a keyword everywhere and modify the grammar accordingly (Yuck!)
64 #define OPDEF(c,s,pop,push,args,type,l,s1,s2,ctrl) { s, args, c, lengthof(s)-1 },
65 #define OPALIAS(alias_c, s, c) { s, NO_VALUE, c, lengthof(s)-1 },
71 #define KYWD(name, sym, val) { name, sym, val, lengthof(name)-1 },
77 /********************************************************************************/
78 /* File encoding-dependent functions */
79 /*--------------------------------------------------------------------------*/
80 char* nextcharA(__in __nullterminated char* pos)
82 return (*pos > 0) ? ++pos : (char *)_mbsinc((const unsigned char *)pos);
85 char* nextcharU(__in __nullterminated char* pos)
90 char* nextcharW(__in __nullterminated char* pos)
94 /*--------------------------------------------------------------------------*/
95 unsigned SymAU(__in __nullterminated char* curPos)
97 return (unsigned)*curPos;
100 unsigned SymW(__in __nullterminated char* curPos)
102 return (unsigned)*((WCHAR*)curPos);
104 /*--------------------------------------------------------------------------*/
105 char* NewStrFromTokenAU(__in_ecount(tokLen) char* curTok, size_t tokLen)
107 char *nb = new char[tokLen+1];
110 memcpy(nb, curTok, tokLen);
115 char* NewStrFromTokenW(__in_ecount(tokLen) char* curTok, size_t tokLen)
117 WCHAR* wcurTok = (WCHAR*)curTok;
118 char *nb = new char[(tokLen<<1) + 2];
121 tokLen = WszWideCharToMultiByte(CP_UTF8,0,(LPCWSTR)wcurTok,(int)(tokLen >> 1),nb,(int)(tokLen<<1) + 2,NULL,NULL);
126 /*--------------------------------------------------------------------------*/
127 char* NewStaticStrFromTokenAU(__in_ecount(tokLen) char* curTok, size_t tokLen, __out_ecount(bufSize) char* staticBuf, size_t bufSize)
129 if(tokLen >= bufSize) return NULL;
130 memcpy(staticBuf, curTok, tokLen);
131 staticBuf[tokLen] = 0;
134 char* NewStaticStrFromTokenW(__in_ecount(tokLen) char* curTok, size_t tokLen, __out_ecount(bufSize) char* staticBuf, size_t bufSize)
136 WCHAR* wcurTok = (WCHAR*)curTok;
137 if(tokLen >= bufSize/2) return NULL;
138 tokLen = WszWideCharToMultiByte(CP_UTF8,0,(LPCWSTR)wcurTok,(int)(tokLen >> 1),staticBuf,(int)bufSize,NULL,NULL);
139 staticBuf[tokLen] = 0;
142 /*--------------------------------------------------------------------------*/
143 unsigned GetDoubleAU(__in __nullterminated char* begNum, unsigned L, double** ppRes)
145 static char dbuff[128];
148 memcpy(dbuff,begNum,L);
150 *ppRes = new double(strtod(dbuff, &pdummy));
151 return ((unsigned)(pdummy - dbuff));
154 unsigned GetDoubleW(__in __nullterminated char* begNum, unsigned L, double** ppRes)
156 static char dbuff[256];
159 memcpy(dbuff,begNum,L);
162 *ppRes = new double(wcstod((const wchar_t*)dbuff, (wchar_t**)&pdummy));
163 return ((unsigned)(pdummy - dbuff));
165 /*--------------------------------------------------------------------------*/
166 char* yygetline(int Line)
168 static char buff[0x4000];
169 char *pLine=NULL, *pNextLine=NULL;
170 char *pBegin=NULL, *pEnd = NULL;
171 unsigned uCount = parser->getAll(&pBegin);
172 pEnd = pBegin + uCount;
174 for(uCount=0, pLine=pBegin; pLine < pEnd; pLine = nextchar(pLine))
176 if(Sym(pLine) == '\n') uCount++;
177 if(uCount == (unsigned int)(Line-1)) break;
179 pLine = nextchar(pLine);
182 for(pNextLine = pLine; pNextLine < pEnd; pNextLine = nextchar(pNextLine))
184 if(Sym(pNextLine) == '\n') break;
186 if(Sym == SymW) // Unicode file
188 if(*((WCHAR*)pNextLine - 1) == '\r') pNextLine -= 2;
189 uCount = (unsigned)(pNextLine - pLine);
190 uCount &= 0x1FFF; // limit: 8K wchars
191 WCHAR* wzBuff = (WCHAR*)buff;
192 memcpy(buff,pLine,uCount);
193 wzBuff[uCount >> 1] = 0;
197 if(*(pNextLine-1)=='\r') pNextLine--;
198 uCount = (unsigned)(pNextLine - pLine);
199 uCount &= 0x3FFF; // limit: 16K chars
200 memcpy(buff,pLine,uCount);
207 void yyerror(__in __nullterminated const char* str) {
209 WCHAR *wzfile = (WCHAR*)(PENV->in->namew());
210 int iline = PENV->curLine;
212 size_t len = PENV->curPos - PENV->curTok;
213 if (len > 62) len = 62;
214 memcpy(tokBuff, PENV->curTok, len);
217 if(PENV->bExternSource)
219 wzfile = PASM->m_wzSourceFileName;
220 iline = PENV->nExtLine;
222 if(Sym == SymW) // Unicode file
223 fprintf(stderr, "%S(%d) : error : %s at token '%S' in: %S\n",
224 wzfile, iline, str, (WCHAR*)tokBuff, (WCHAR*)yygetline(PENV->curLine));
226 fprintf(stderr, "%S(%d) : error : %s at token '%s' in: %s\n",
227 wzfile, iline, str, tokBuff, yygetline(PENV->curLine));
228 parser->success = false;
231 /********************************************************************************/
232 /* looks up the typedef 'name' of length 'nameLen' (name does not need to be
233 null terminated) Returns 0 on failure */
234 TypeDefDescr* findTypedef(__in_ecount(NameLen) char* name, size_t NameLen)
236 TypeDefDescr* pRet = NULL;
237 static char Name[4096];
238 if(PASM->NumTypeDefs())
240 if(NewStaticStrFromToken(name,NameLen,Name,4096))
241 pRet = PASM->FindTypeDef(Name);
246 int TYPEDEF(TypeDefDescr* pTDD)
248 switch(TypeFromToken(pTDD->m_tkTypeSpec))
263 case mdtCustomAttribute:
270 /********************************************************************************/
271 void indexKeywords(Indx* indx) // called in Assembler constructor (assem.cpp)
273 Keywords* low = keywords;
274 Keywords* high = keywords + (sizeof(keywords) / sizeof(Keywords));
276 for(mid = low; mid < high; mid++)
278 indx->IndexString((char*)(mid->name),mid);
282 Instr* SetupInstr(unsigned short opcode)
285 if((pVal = PASM->GetInstr()))
287 pVal->opcode = opcode;
288 if((pVal->pWriter = PASM->m_pSymDocument)!=NULL)
290 if(PENV->bExternSource)
292 pVal->linenum = PENV->nExtLine;
293 pVal->column = PENV->nExtCol;
294 pVal->linenum_end = PENV->nExtLineEnd;
295 pVal->column_end = PENV->nExtColEnd;
300 pVal->linenum = PENV->curLine;
302 pVal->linenum_end = PENV->curLine;
303 pVal->column_end = 0;
304 pVal->pc = PASM->m_CurPC;
310 /* looks up the keyword 'name' of length 'nameLen' (name does not need to be
311 null terminated) Returns 0 on failure */
312 int findKeyword(const char* name, size_t nameLen, unsigned short* pOpcode)
314 static char Name[128];
317 if(NULL == NewStaticStrFromToken((char*)name,nameLen,Name,128)) return 0; // can't be a keyword
318 mid = (Keywords*)(PASM->indxKeywords.FindString(Name));
319 if(mid == NULL) return 0;
320 *pOpcode = mid->tokenVal;
325 /********************************************************************************/
326 /* convert str to a uint64 */
327 unsigned digits[128];
328 void Init_str2uint64()
331 memset(digits,255,sizeof(digits));
332 for(i='0'; i <= '9'; i++) digits[i] = i - '0';
333 for(i='A'; i <= 'Z'; i++) digits[i] = i + 10 - 'A';
334 for(i='a'; i <= 'z'; i++) digits[i] = i + 10 - 'a';
336 static unsigned __int64 str2uint64(const char* str, const char** endStr, unsigned radix)
338 unsigned __int64 ret = 0;
340 _ASSERTE(radix <= 36);
341 for(;;str = nextchar((char*)str))
343 ix = Sym((char*)str);
349 ret = ret * radix + digit;
357 /********************************************************************************/
358 /* Append an UTF-8 string preceded by compressed length, no zero terminator, to a BinStr */
359 static void AppendStringWithLength(BinStr* pbs, __in __nullterminated char* sz)
361 if((pbs != NULL) && (sz != NULL))
363 unsigned L = (unsigned) strlen(sz);
366 if((pb = pbs->getBuff(L)) != NULL)
371 /********************************************************************************/
372 /* fetch the next token, and return it Also set the yylval.union if the
373 lexical token also has a value */
381 void SetSymbolTables()
384 memset(_Alpha,0,sizeof(_Alpha));
385 memset(_Digit,0,sizeof(_Digit));
386 memset(_AlNum,0,sizeof(_AlNum));
387 memset(_ValidSS,0,sizeof(_ValidSS));
388 memset(_ValidCS,0,sizeof(_ValidCS));
389 for(i = 'A'; i <= 'Z'; i++)
396 for(i = 'a'; i <= 'z'; i++)
403 for(i = '0'; i <= '9'; i++)
409 _ValidSS[(unsigned char)'_'] = TRUE;
410 _ValidSS[(unsigned char)'#'] = TRUE;
411 _ValidSS[(unsigned char)'$'] = TRUE;
412 _ValidSS[(unsigned char)'@'] = TRUE;
414 _ValidCS[(unsigned char)'_'] = TRUE;
415 _ValidCS[(unsigned char)'?'] = TRUE;
416 _ValidCS[(unsigned char)'$'] = TRUE;
417 _ValidCS[(unsigned char)'@'] = TRUE;
418 _ValidCS[(unsigned char)'`'] = TRUE;
420 BOOL IsAlpha(unsigned x) { return (x < 128)&&_Alpha[x]; }
421 BOOL IsDigit(unsigned x) { return (x < 128)&&_Digit[x]; }
422 BOOL IsAlNum(unsigned x) { return (x < 128)&&_AlNum[x]; }
423 BOOL IsValidStartingSymbol(unsigned x) { return (x < 128)&&_ValidSS[x]; }
424 BOOL IsValidContinuingSymbol(unsigned x) { return (x < 128)&&_ValidCS[x]; }
427 char* nextBlank(__in __nullterminated char* curPos)
434 if ((Sym(nextchar(curPos)) == '/')|| (Sym(nextchar(curPos)) == '*'))
438 curPos = nextchar(curPos);
450 curPos = nextchar(curPos);
455 char* skipBlanks(__in __nullterminated char* curPos, unsigned* pstate)
457 const unsigned eolComment = 1;
458 const unsigned multiComment = 2;
459 unsigned nextSym, state = *pstate;
462 { // skip whitespace and comments
463 if (curPos >= PENV->endPos)
473 state &= ~eolComment;
475 if(PENV->bExternSource)
477 if(PENV->bExternSourceAutoincrement) PENV->nExtLine++;
478 PASM->m_ulCurLine = PENV->nExtLine;
479 PASM->m_ulCurColumn = PENV->nExtCol;
483 PASM->m_ulCurLine = PENV->curLine;
484 PASM->m_ulCurColumn = 1;
494 if(state == 0) goto PAST_WHITESPACE;
495 if(state & multiComment)
497 nextPos = nextchar(curPos);
498 if (Sym(nextPos) == '/')
501 state &= ~multiComment;
509 nextPos = nextchar(curPos);
510 nextSym = Sym(nextPos);
516 else if (nextSym == '*')
519 state |= multiComment;
521 else goto PAST_WHITESPACE;
526 if (state == 0) goto PAST_WHITESPACE;
528 curPos = nextchar(curPos);
535 char* FullFileName(__in __nullterminated WCHAR* wzFileName, unsigned uCodePage);
539 PARSING_ENVIRONMENT* prev_penv = parser->PEStack.POP();
540 if(prev_penv != NULL)
542 //delete [] (WCHAR*)(PENV->in->namew());
545 parser->penv = prev_penv;
547 char* szFileName = new char[strlen(PENV->szFileName)+1];
548 strcpy_s(szFileName,strlen(PENV->szFileName)+1,PENV->szFileName);
549 PASM->SetSourceFileName(szFileName); // deletes the argument!
556 #define NEXT_TOKEN {state=0; curPos=PENV->curPos; goto NextToken;}
558 int parse_literal(unsigned curSym, __inout __nullterminated char* &curPos, BOOL translate_escapes)
560 unsigned quote = curSym;
561 curPos = nextchar(curPos);
562 char* fromPtr = curPos;
566 { // Find matching quote
567 curSym = (curPos >= PENV->endPos) ? 0 : Sym(curPos);
570 PENV->curPos = curPos;
571 return(BAD_LITERAL_);
573 else if(curSym == '\\')
580 if(PENV->bExternSource)
582 if(PENV->bExternSourceAutoincrement) PENV->nExtLine++;
583 PASM->m_ulCurLine = PENV->nExtLine;
584 PASM->m_ulCurColumn = PENV->nExtCol;
588 PASM->m_ulCurLine = PENV->curLine;
589 PASM->m_ulCurColumn = 1;
591 if (!escape) { PENV->curPos = curPos; return(BAD_LITERAL_); }
593 else if ((curSym == quote) && (!escape)) break;
596 curPos = nextchar(curPos);
598 // translate escaped characters
599 unsigned tokLen = (unsigned)(curPos - fromPtr);
600 char* newstr = NewStrFromToken(fromPtr, tokLen);
602 curPos = nextchar(curPos); // skip closing quote
603 if(translate_escapes)
607 tokLen = (unsigned)strlen(newstr);
608 toPtr = new char[tokLen+1];
609 if(toPtr==NULL) return BAD_LITERAL_;
610 yylval.string = toPtr;
611 char* endPtr = fromPtr+tokLen;
612 while(fromPtr < endPtr)
614 if (*fromPtr == '\\')
645 while(isspace(*fromPtr));
646 --fromPtr; // undo the increment below
652 if (IsDigit(fromPtr[1]) && IsDigit(fromPtr[2]))
654 *toPtr++ = ((fromPtr[0] - '0') * 8 + (fromPtr[1] - '0')) * 8 + (fromPtr[2] - '0');
657 else if(*fromPtr == '0') *toPtr++ = 0;
658 else *toPtr++ = *fromPtr;
666 // *toPtr++ = *fromPtr++;
668 char* tmpPtr = fromPtr;
669 fromPtr = (nextchar == nextcharW) ? nextcharU(fromPtr) : nextchar(fromPtr);
670 while(tmpPtr < fromPtr) *toPtr++ = *tmpPtr++;
673 } //end while(fromPtr < endPtr)
674 *toPtr = 0; // terminate string
679 yylval.string = newstr;
680 toPtr = newstr + strlen(newstr);
683 PENV->curPos = curPos;
686 BinStr* pBS = new BinStr();
687 unsigned size = (unsigned)(toPtr - yylval.string);
688 memcpy(pBS->getBuff(size),yylval.string,size);
689 delete [] yylval.string;
695 if(PASM->NumTypeDefs())
697 TypeDefDescr* pTDD = PASM->FindTypeDef(yylval.string);
700 delete [] yylval.string;
702 return(TYPEDEF(pTDD));
710 #pragma warning(push)
711 #pragma warning(disable:21000) // Suppress PREFast warning about overly large function
715 char* curPos = PENV->curPos;
717 const unsigned multiComment = 2;
723 // Skip any leading whitespace and comments
724 curPos = skipBlanks(curPos, &state);
727 if (state & multiComment) return (BAD_COMMENT_);
728 if(ProcessEOF() == 0) return 0; // EOF
731 char* curTok = curPos;
732 PENV->curTok = curPos;
733 PENV->curPos = curPos;
737 curSym = Sym(curPos);
738 if(bParsingByteArray) // only hexadecimals w/o 0x, ')' and white space allowed!
741 for(i=0; i<2; i++, curPos = nextchar(curPos), curSym = Sym(curPos))
743 if(('0' <= curSym)&&(curSym <= '9')) s = s*16+(curSym - '0');
744 else if(('A' <= curSym)&&(curSym <= 'F')) s = s*16+(curSym - 'A' + 10);
745 else if(('a' <= curSym)&&(curSym <= 'f')) s = s*16+(curSym - 'a' + 10);
746 else break; // don't increase curPos!
755 if(curSym == ')' || curSym == '}')
757 bParsingByteArray = FALSE;
758 goto Just_A_Character;
761 PENV->curPos = curPos;
764 if(curSym == '?') // '?' may be part of an identifier, if it's not followed by punctuation
766 if(IsValidContinuingSymbol(Sym(nextchar(curPos)))) goto Its_An_Id;
767 goto Just_A_Character;
770 if (IsValidStartingSymbol(curSym))
773 size_t offsetDot = (size_t)-1; // first appearance of '.'
774 size_t offsetDotDigit = (size_t)-1; // first appearance of '.<digit>' (not DOTTEDNAME!)
777 curPos = nextchar(curPos);
778 if (Sym(curPos) == '.')
780 if (offsetDot == (size_t)-1) offsetDot = curPos - curTok;
781 curPos = nextchar(curPos);
782 if((offsetDotDigit==(size_t)-1)&&(Sym(curPos) >= '0')&&(Sym(curPos) <= '9'))
783 offsetDotDigit = curPos - curTok - 1;
785 } while(IsValidContinuingSymbol(Sym(curPos)));
787 size_t tokLen = curPos - curTok;
788 // check to see if it is a keyword
789 int token = findKeyword(curTok, tokLen, &yylval.opcode);
792 //printf("yylex: TOK = %d, curPos=0x%8.8X\n",token,curPos);
793 PENV->curPos = curPos;
794 PENV->curTok = curTok;
800 //if(include_first_pass)
802 // PENV->curPos = curTok;
803 // include_first_pass = FALSE;
806 //include_first_pass = TRUE;
807 curPos = skipBlanks(curPos,&state);
810 if (state & multiComment) return (BAD_COMMENT_);
811 if(ProcessEOF() == 0) return 0; // EOF
814 if(Sym(curPos) != '"') return ERROR_;
815 curPos = nextchar(curPos);
817 PENV->curTok = curPos;
818 while(Sym(curPos) != '"')
820 curPos = nextchar(curPos);
821 if(curPos >= PENV->endPos) return ERROR_;
822 PENV->curPos = curPos;
824 tokLen = PENV->curPos - curTok;
825 curPos = nextchar(curPos);
826 PENV->curPos = curPos;
831 if((wzFile = new WCHAR[tokLen/2 + 1]) != NULL)
833 memcpy(wzFile,curTok,tokLen);
834 wzFile[tokLen/2] = 0;
839 if((wzFile = new WCHAR[tokLen+1]) != NULL)
841 tokLen = WszMultiByteToWideChar(g_uCodePage,0,curTok,(int)tokLen,wzFile,(int)tokLen+1);
847 if((parser->wzIncludePath != NULL)
848 &&(wcschr(wzFile,'\\')==NULL)&&(wcschr(wzFile,':')==NULL))
850 PathString wzFullName;
853 DWORD dw = WszSearchPath(parser->wzIncludePath,wzFile,NULL,
854 TRUE, wzFullName,&pwz);
859 wzFile = wzFullName.GetCopyOfUnicodeString();
863 if(PASM->m_fReportProgress)
864 parser->msg("\nIncluding '%S'\n",wzFile);
865 MappedFileStream *pIn = new MappedFileStream(wzFile);
866 if((pIn != NULL)&&pIn->IsValid())
868 parser->PEStack.PUSH(PENV);
869 PASM->SetSourceFileName(FullFileName(wzFile,CP_UTF8)); // deletes the argument!
870 parser->CreateEnvironment(pIn);
876 PASM->report->error("#include failed\n");
882 PASM->report->error("Out of memory\n");
886 curPos = PENV->curPos;
887 curTok = PENV->curTok;
893 curPos = skipBlanks(curPos,&state);
896 if (state & multiComment) return (BAD_COMMENT_);
897 if(ProcessEOF() == 0) return 0; // EOF
901 PENV->curTok = curPos;
902 PENV->curPos = curPos;
903 if (!IsValidStartingSymbol(Sym(curPos))) return ERROR_;
906 curPos = nextchar(curPos);
907 } while(IsValidContinuingSymbol(Sym(curPos)));
908 tokLen = curPos - curTok;
910 newstr = NewStrFromToken(curTok, tokLen);
911 if((token==P_DEFINE)||(token==P_UNDEF))
913 if(token == P_DEFINE)
915 curPos = skipBlanks(curPos,&state);
916 if ((curPos == NULL) && (ProcessEOF() == 0))
918 DefineVar(newstr, NULL);
921 curSym = Sym(curPos);
923 DefineVar(newstr, NULL);
926 tok = parse_literal(curSym, curPos, FALSE);
929 // if not ANSI, then string is in UTF-8,
931 if(nextchar != nextcharA)
933 yylval.binstr->insertInt8(0xEF);
934 yylval.binstr->insertInt8(0xBB);
935 yylval.binstr->insertInt8(0xBF);
937 yylval.binstr->appendInt8(' ');
938 DefineVar(newstr, yylval.binstr);
944 else UndefVar(newstr);
948 SkipToken = IsVarDefined(newstr);
949 if(token == P_IFDEF) SkipToken = !SkipToken;
951 if(SkipToken) IfEndifSkip=IfEndif;
961 PASM->report->error("Unmatched #endif\n");
980 if(IfEndif == IfEndifSkip) SkipToken = FALSE;
983 if(IfEndif == IfEndifSkip) SkipToken = FALSE;
989 //if(yylval.instr) yylval.instr->opcode = -1;
993 } // end if token != 0
994 if(SkipToken) { curPos = nextBlank(curPos); goto NextToken; }
996 VarName* pVarName = FindVarDef(NewStrFromToken(curTok, tokLen));
999 if(pVarName->pbody != NULL)
1001 BinStrStream *pIn = new BinStrStream(pVarName->pbody);
1002 if((pIn != NULL)&&pIn->IsValid())
1004 PENV->curPos = curPos;
1005 parser->PEStack.PUSH(PENV);
1006 parser->CreateEnvironment(pIn);
1012 TypeDefDescr* pTDD = findTypedef(curTok,tokLen);
1017 PENV->curPos = curPos;
1018 PENV->curTok = curTok;
1019 return(TYPEDEF(pTDD));
1021 if(Sym(curTok) == '#')
1023 PENV->curPos = curPos;
1024 PENV->curTok = curTok;
1027 // Not a keyword, normal identifiers don't have '.' in them
1028 if (offsetDot < (size_t)-1)
1030 if(offsetDotDigit < (size_t)-1)
1032 curPos = curTok+offsetDotDigit;
1033 tokLen = offsetDotDigit;
1035 // protection against something like Foo.Bar..123 or Foo.Bar.
1036 unsigned D = (Sym == SymW) ? 2 : 1; // Unicode or ANSI/UTF8!
1037 while((Sym(curPos-D)=='.')&&(tokLen))
1043 if((yylval.string = NewStrFromToken(curTok,tokLen)))
1045 tok = (offsetDot == (size_t)(-1))? ID : DOTTEDNAME;
1046 //printf("yylex: ID = '%s', curPos=0x%8.8X\n",yylval.string,curPos);
1048 else return BAD_LITERAL_;
1050 else if(SkipToken) { curPos = nextBlank(curPos); goto NextToken; }
1051 else if (IsDigit(curSym)
1052 || (curSym == '.' && IsDigit(Sym(nextchar(curPos))))
1053 || (curSym == '-' && IsDigit(Sym(nextchar(curPos)))))
1055 const char* begNum = curPos;
1056 unsigned radix = 10;
1058 neg = (curSym == '-'); // always make it unsigned
1059 if (neg) curPos = nextchar(curPos);
1061 if (Sym(curPos) == '0' && Sym(nextchar(curPos)) != '.')
1063 curPos = nextchar(curPos);
1065 if (Sym(curPos) == 'x' || Sym(curPos) == 'X')
1067 curPos = nextchar(curPos);
1073 unsigned __int64 i64 = str2uint64(begNum, const_cast<const char**>(&curPos), radix);
1074 unsigned __int64 mask64 = neg ? UI64(0xFFFFFFFF80000000) : UI64(0xFFFFFFFF00000000);
1075 unsigned __int64 largestNegVal32 = UI64(0x0000000080000000);
1076 if ((i64 & mask64) && (i64 != largestNegVal32))
1078 yylval.int64 = new __int64(i64);
1080 if (neg) *yylval.int64 = -*yylval.int64;
1084 yylval.int32 = (__int32)i64;
1086 if(neg) yylval.int32 = -yylval.int32;
1089 if (radix == 10 && ((Sym(curPos) == '.' && Sym(nextchar(curPos)) != '.') || Sym(curPos) == 'E' || Sym(curPos) == 'e'))
1091 unsigned L = (unsigned)(PENV->endPos - begNum);
1092 curPos = (char*)begNum + GetDouble((char*)begNum,L,&yylval.float64);
1093 if (neg) *yylval.float64 = -*yylval.float64;
1099 if (curSym == '"' || curSym == '\'')
1101 return parse_literal(curSym, curPos, TRUE);
1102 } // end if (*curPos == '"' || *curPos == '\'')
1103 else if (curSym==':' && Sym(nextchar(curPos))==':')
1105 curPos = nextchar(nextchar(curPos));
1108 else if(curSym == '.')
1110 if (Sym(nextchar(curPos))=='.' && Sym(nextchar(nextchar(curPos)))=='.')
1112 curPos = nextchar(nextchar(nextchar(curPos)));
1119 curPos = nextchar(curPos);
1120 if (curPos >= PENV->endPos)
1122 curSym = Sym(curPos);
1124 while(IsAlNum(curSym) || curSym == '_' || curSym == '$'|| curSym == '@'|| curSym == '?');
1125 size_t tokLen = curPos - curTok;
1127 // check to see if it is a keyword
1128 int token = findKeyword(curTok, tokLen, &yylval.opcode);
1131 //printf("yylex: TOK = %d, curPos=0x%8.8X\n",token,curPos);
1132 PENV->curPos = curPos;
1133 PENV->curTok = curTok;
1137 curPos = nextchar(curTok);
1144 curPos = nextchar(curPos);
1146 //printf("yylex: PUNCT curPos=0x%8.8X\n",curPos);
1148 dbprintf((" Line %d token %d (%c) val = %s\n", PENV->curLine, tok,
1149 (tok < 128 && isprint(tok)) ? tok : ' ',
1150 (tok > 255 && tok != INT32 && tok != INT64 && tok!= FLOAT64) ? yylval.string : ""));
1152 PENV->curPos = curPos;
1153 PENV->curTok = curTok;
1157 #pragma warning(pop)
1160 /**************************************************************************/
1161 static char* newString(__in __nullterminated const char* str1)
1163 char* ret = new char[strlen(str1)+1];
1164 if(ret) strcpy_s(ret, strlen(str1)+1, str1);
1168 /**************************************************************************/
1169 /* concatenate strings and release them */
1171 static char* newStringWDel(__in __nullterminated char* str1, char delimiter, __in __nullterminated char* str3)
1173 size_t len1 = strlen(str1);
1174 size_t len = len1+2;
1175 if (str3) len += strlen(str3);
1176 char* ret = new char[len];
1179 strcpy_s(ret, len, str1);
1181 ret[len1] = delimiter;
1185 strcat_s(ret, len, str3);
1192 /**************************************************************************/
1193 static void corEmitInt(BinStr* buff, unsigned data)
1195 unsigned cnt = CorSigCompressData(data, buff->getBuff(5));
1196 buff->remove(5 - cnt);
1200 /**************************************************************************/
1201 /* move 'ptr past the exactly one type description */
1203 unsigned __int8* skipType(unsigned __int8* ptr, BOOL fFixupType)
1208 case ELEMENT_TYPE_VOID :
1209 case ELEMENT_TYPE_BOOLEAN :
1210 case ELEMENT_TYPE_CHAR :
1211 case ELEMENT_TYPE_I1 :
1212 case ELEMENT_TYPE_U1 :
1213 case ELEMENT_TYPE_I2 :
1214 case ELEMENT_TYPE_U2 :
1215 case ELEMENT_TYPE_I4 :
1216 case ELEMENT_TYPE_U4 :
1217 case ELEMENT_TYPE_I8 :
1218 case ELEMENT_TYPE_U8 :
1219 case ELEMENT_TYPE_R4 :
1220 case ELEMENT_TYPE_R8 :
1221 case ELEMENT_TYPE_U :
1222 case ELEMENT_TYPE_I :
1223 case ELEMENT_TYPE_STRING :
1224 case ELEMENT_TYPE_OBJECT :
1225 case ELEMENT_TYPE_TYPEDBYREF :
1226 case ELEMENT_TYPE_SENTINEL :
1230 case ELEMENT_TYPE_VALUETYPE :
1231 case ELEMENT_TYPE_CLASS :
1232 ptr += CorSigUncompressToken(ptr, &tk);
1235 case ELEMENT_TYPE_CMOD_REQD :
1236 case ELEMENT_TYPE_CMOD_OPT :
1237 ptr += CorSigUncompressToken(ptr, &tk);
1240 case ELEMENT_TYPE_ARRAY :
1242 ptr = skipType(ptr, fFixupType); // element Type
1243 unsigned rank = CorSigUncompressData((PCCOR_SIGNATURE&) ptr);
1246 unsigned numSizes = CorSigUncompressData((PCCOR_SIGNATURE&) ptr);
1249 CorSigUncompressData((PCCOR_SIGNATURE&) ptr);
1252 unsigned numLowBounds = CorSigUncompressData((PCCOR_SIGNATURE&) ptr);
1253 while(numLowBounds > 0)
1255 CorSigUncompressData((PCCOR_SIGNATURE&) ptr);
1262 // Modifiers or dependent types
1263 case ELEMENT_TYPE_PINNED :
1264 case ELEMENT_TYPE_PTR :
1265 case ELEMENT_TYPE_BYREF :
1266 case ELEMENT_TYPE_SZARRAY :
1267 // tail recursion optimization
1268 // ptr = skipType(ptr, fFixupType);
1272 case ELEMENT_TYPE_VAR:
1273 case ELEMENT_TYPE_MVAR:
1274 CorSigUncompressData((PCCOR_SIGNATURE&) ptr); // bound
1277 case ELEMENT_TYPE_VARFIXUP:
1278 case ELEMENT_TYPE_MVARFIXUP:
1281 BYTE* pb = ptr-1; // ptr incremented in switch
1282 unsigned __int8* ptr_save = ptr;
1283 int n = CorSigUncompressData((PCCOR_SIGNATURE&) ptr); // fixup #
1284 int compressed_size_n = (int)(ptr - ptr_save); // ptr was updated by CorSigUncompressData()
1286 if(PASM->m_TyParList)
1287 m = PASM->m_TyParList->IndexOf(TyParFixupList.PEEK(n));
1290 PASM->report->error("(fixupType) Invalid %stype parameter '%s'\n",
1291 (*pb == ELEMENT_TYPE_MVARFIXUP)? "method ": "",
1292 TyParFixupList.PEEK(n));
1295 *pb = (*pb == ELEMENT_TYPE_MVARFIXUP)? ELEMENT_TYPE_MVAR : ELEMENT_TYPE_VAR;
1296 int compressed_size_m = (int)CorSigCompressData(m,pb+1);
1298 // Note that CorSigCompressData() (and hence, CorSigUncompressData()) store a number
1299 // 0 <= x <= 0x1FFFFFFF in 1, 2, or 4 bytes. Above, 'n' is the fixup number being read,
1300 // and 'm' is the generic parameter number being written out (in the same place where 'n'
1301 // came from). If 'n' takes more space to compress than 'm' (e.g., 0x80 <= n <= 0x3fff so
1302 // it takes 2 bytes, and m < 0x80 so it takes one byte), then when we overwrite the fixup
1303 // number with the generic parameter number, we'll leave extra bytes in the signature following
1304 // the written generic parameter number. Thus, we do something of a hack to ensure that the
1305 // compressed number is correctly readable even if 'm' compresses smaller than 'n' did: we
1306 // recompress 'm' to use the same amount of space as 'n' used. This is possible because smaller
1307 // numbers can still be compressed in a larger amount of space, even though it's not optimal (and
1308 // CorSigCompressData() would never do it). If, however, the compressed sizes are the other
1309 // way around (m takes more space to compress than n), then we've already corrupted the
1310 // signature that we're reading by writing beyond what we should (is there some reason why
1311 // this is not possible?).
1312 // Note that 'ptr' has already been adjusted, above, to point to the next type after this one.
1313 // There is no need to update it when recompressing the data.
1315 if (compressed_size_m > compressed_size_n)
1317 // We've got a problem: we just corrupted the rest of the signature!
1318 // (Can this ever happen in practice?)
1319 PASM->report->error("(fixupType) Too many %stype parameters\n",
1320 (*pb == ELEMENT_TYPE_MVARFIXUP)? "method ": "");
1322 else if (compressed_size_m < compressed_size_n)
1324 // We didn't write out as much data as we read. This will leave extra bytes in the
1325 // signature that will be incorrectly recognized. Ideally, we would just shrink the
1326 // signature. That's not easy to do here. Instead, pad the bytes to force it to use
1327 // a larger encoding than needed. This assumes knowledge of the CorSigCompressData()
1331 // compressed_size_m m bytes compressed_size_n result bytes
1333 // 1 m1 4 0xC0 0x00 0x00 m1
1334 // 2 m1 m2 4 0xC0 0x00 (m1 & 0x7f) m2
1336 _ASSERTE((compressed_size_m == 1) || (compressed_size_m == 2) || (compressed_size_m == 4));
1337 _ASSERTE((compressed_size_n == 1) || (compressed_size_n == 2) || (compressed_size_n == 4));
1339 if ((compressed_size_m == 1) &&
1340 (compressed_size_n == 2))
1342 unsigned __int8 m1 = *(pb + 1);
1343 _ASSERTE(m1 < 0x80);
1348 if ((compressed_size_m == 1) &&
1349 (compressed_size_n == 4))
1351 unsigned __int8 m1 = *(pb + 1);
1352 _ASSERTE(m1 < 0x80);
1359 if ((compressed_size_m == 2) &&
1360 (compressed_size_n == 4))
1362 unsigned __int8 m1 = *(pb + 1);
1363 unsigned __int8 m2 = *(pb + 2);
1364 _ASSERTE(m1 >= 0x80);
1365 m1 &= 0x7f; // strip the bit indicating it's a 2-byte thing
1374 CorSigUncompressData((PCCOR_SIGNATURE&) ptr); // bound
1377 case ELEMENT_TYPE_FNPTR:
1379 CorSigUncompressData((PCCOR_SIGNATURE&) ptr); // calling convention
1380 unsigned argCnt = CorSigUncompressData((PCCOR_SIGNATURE&) ptr); // arg count
1381 ptr = skipType(ptr, fFixupType); // return type
1384 ptr = skipType(ptr, fFixupType);
1390 case ELEMENT_TYPE_GENERICINST:
1392 ptr = skipType(ptr, fFixupType); // type constructor
1393 unsigned argCnt = CorSigUncompressData((PCCOR_SIGNATURE&)ptr); // arg count
1395 ptr = skipType(ptr, fFixupType);
1402 case ELEMENT_TYPE_END :
1403 _ASSERTE(!"Unknown Type");
1409 /**************************************************************************/
1410 void FixupTyPars(PCOR_SIGNATURE pSig, ULONG cSig)
1412 if(TyParFixupList.COUNT() > 0)
1414 BYTE* ptr = (BYTE*)pSig;
1415 BYTE* ptrEnd = ptr + cSig;
1418 ptr = skipType(ptr, TRUE);
1420 } // end if(COUNT>0)
1422 void FixupTyPars(BinStr* pbstype)
1424 FixupTyPars((PCOR_SIGNATURE)(pbstype->ptr()),(ULONG)(pbstype->length()));
1426 /**************************************************************************/
1427 static unsigned corCountArgs(BinStr* args)
1429 unsigned __int8* ptr = args->ptr();
1430 unsigned __int8* end = &args->ptr()[args->length()];
1434 if (*ptr != ELEMENT_TYPE_SENTINEL)
1436 ptr = skipType(ptr, FALSE);
1444 /********************************************************************************/
1445 AsmParse::AsmParse(ReadStream* aIn, Assembler *aAssem)
1447 #ifdef DEBUG_PARSING
1453 assem->SetErrorReporter((ErrorReporter *)this);
1455 assem->m_ulCurLine = 1;
1456 assem->m_ulCurColumn = 1;
1458 wzIncludePath = NULL;
1461 hstdout = GetStdHandle(STD_OUTPUT_HANDLE);
1462 hstderr = GetStdHandle(STD_ERROR_HANDLE);
1465 _ASSERTE(parser == 0); // Should only be one parser instance at a time
1468 for (unsigned int i = 0; i < sizeof(keywords) / sizeof(Keywords); i++)
1470 if (keywords[i].token == NO_VALUE)
1471 keywords[i].token = keywords[keywords[i].tokenVal].token;
1479 /********************************************************************************/
1480 AsmParse::~AsmParse()
1484 while(m_ANSLast.POP());
1487 /**************************************************************************/
1488 DWORD AsmParse::IsItUnicode(CONST LPVOID pBuff, int cb, LPINT lpi)
1490 return IsTextUnicode(pBuff,cb,lpi);
1493 /**************************************************************************/
1494 void AsmParse::CreateEnvironment(ReadStream* stream)
1496 penv = new PARSING_ENVIRONMENT;
1497 memset(penv,0,sizeof(PARSING_ENVIRONMENT));
1500 strcpy_s(penv->szFileName, MAX_FILENAME_LENGTH*3+1,assem->m_szSourceFileName);
1502 penv->curPos = fillBuff(NULL);
1503 penv->uCodePage = g_uCodePage;
1508 /**************************************************************************/
1509 void AsmParse::ParseFile(ReadStream* stream)
1511 CreateEnvironment(stream);
1516 /**************************************************************************/
1517 char* AsmParse::fillBuff(__in_opt __nullterminated char* pos)
1520 int iOptions = IS_TEXT_UNICODE_UNICODE_MASK;
1521 g_uCodePage = CP_ACP;
1522 iPutToBuffer = (int)penv->in->getAll(&(penv->curPos));
1524 penv->endPos = penv->curPos + iPutToBuffer;
1525 if(iPutToBuffer > 128) iPutToBuffer = 128;
1526 if(IsItUnicode(penv->curPos,iPutToBuffer,&iOptions))
1528 g_uCodePage = CP_UTF8;
1529 if(iOptions & IS_TEXT_UNICODE_SIGNATURE)
1533 if(assem->m_fReportProgress) printf("Source file is UNICODE\n\n");
1534 penv->pfn_Sym = SymW;
1535 penv->pfn_nextchar = nextcharW;
1536 penv->pfn_NewStrFromToken = NewStrFromTokenW;
1537 penv->pfn_NewStaticStrFromToken = NewStaticStrFromTokenW;
1538 penv->pfn_GetDouble = GetDoubleW;
1542 if(((penv->curPos[0]&0xFF)==0xEF)&&((penv->curPos[1]&0xFF)==0xBB)&&((penv->curPos[2]&0xFF)==0xBF))
1544 g_uCodePage = CP_UTF8;
1546 if(assem->m_fReportProgress) printf("Source file is UTF-8\n\n");
1547 penv->pfn_nextchar = nextcharU;
1551 if(assem->m_fReportProgress) printf("Source file is ANSI\n\n");
1552 penv->pfn_nextchar = nextcharA;
1554 penv->pfn_Sym = SymAU;
1555 penv->pfn_NewStrFromToken = NewStrFromTokenAU;
1556 penv->pfn_NewStaticStrFromToken = NewStaticStrFromTokenAU;
1557 penv->pfn_GetDouble = GetDoubleAU;
1559 return(penv->curPos);
1562 /********************************************************************************/
1563 BinStr* AsmParse::MakeSig(unsigned callConv, BinStr* retType, BinStr* args, int ntyargs)
1565 _ASSERTE((ntyargs != 0) == ((callConv & IMAGE_CEE_CS_CALLCONV_GENERIC) != 0));
1566 BinStr* ret = new BinStr();
1570 ret->insertInt8(callConv);
1572 corEmitInt(ret, ntyargs);
1573 corEmitInt(ret, corCountArgs(args));
1577 ret->append(retType);
1583 error("\nOut of memory!\n");
1589 /********************************************************************************/
1590 BinStr* AsmParse::MakeTypeArray(CorElementType kind, BinStr* elemType, BinStr* bounds)
1592 // 'bounds' is a binary buffer, that contains an array of 'struct Bounds'
1595 unsigned numElements;
1598 _ASSERTE(bounds->length() % sizeof(Bounds) == 0);
1599 unsigned boundsLen = bounds->length() / sizeof(Bounds);
1600 _ASSERTE(boundsLen > 0);
1601 Bounds* boundsArr = (Bounds*) bounds->ptr();
1603 BinStr* ret = new BinStr();
1605 ret->appendInt8(kind);
1606 ret->append(elemType);
1607 corEmitInt(ret, boundsLen); // emit the rank
1609 unsigned lowerBoundsDefined = 0;
1610 unsigned numElementsDefined = 0;
1612 for(i=0; i < boundsLen; i++)
1614 if(boundsArr[i].lowerBound < 0x7FFFFFFF) lowerBoundsDefined = i+1;
1615 else boundsArr[i].lowerBound = 0;
1617 if(boundsArr[i].numElements < 0x7FFFFFFF) numElementsDefined = i+1;
1618 else boundsArr[i].numElements = 0;
1621 corEmitInt(ret, numElementsDefined); // emit number of bounds
1623 for(i=0; i < numElementsDefined; i++)
1625 _ASSERTE (boundsArr[i].numElements >= 0); // enforced at rule time
1626 corEmitInt(ret, boundsArr[i].numElements);
1630 corEmitInt(ret, lowerBoundsDefined); // emit number of lower bounds
1631 for(i=0; i < lowerBoundsDefined; i++)
1633 unsigned cnt = CorSigCompressSignedInt(boundsArr[i].lowerBound, ret->getBuff(5));
1634 ret->remove(5 - cnt);
1641 /********************************************************************************/
1642 BinStr* AsmParse::MakeTypeClass(CorElementType kind, mdToken tk)
1645 BinStr* ret = new BinStr();
1646 _ASSERTE(kind == ELEMENT_TYPE_CLASS || kind == ELEMENT_TYPE_VALUETYPE ||
1647 kind == ELEMENT_TYPE_CMOD_REQD || kind == ELEMENT_TYPE_CMOD_OPT);
1648 ret->appendInt8(kind);
1649 unsigned cnt = CorSigCompressToken(tk, ret->getBuff(5));
1650 ret->remove(5 - cnt);
1653 /**************************************************************************/
1654 void PrintANSILine(FILE* pF, __in __nullterminated char* sz)
1656 WCHAR *wz = &wzUniBuf[0];
1657 if(g_uCodePage != CP_ACP)
1659 memset(wz,0,dwUniBuf); // dwUniBuf/2 WCHARs = dwUniBuf bytes
1660 WszMultiByteToWideChar(g_uCodePage,0,sz,-1,wz,(dwUniBuf >> 1)-1);
1662 memset(sz,0,dwUniBuf);
1663 WszWideCharToMultiByte(g_uConsoleCP,0,wz,-1,sz,dwUniBuf-1,NULL,NULL);
1665 fprintf(pF,"%s",sz);
1667 /**************************************************************************/
1668 void AsmParse::error(const char* fmt, ...)
1670 char *sz = (char*)(&wzUniBuf[(dwUniBuf >> 1)]);
1672 FILE* pF = ((!assem->m_fReportProgress)&&(assem->OnErrGo)) ? stdout : stderr;
1675 va_start(args, fmt);
1677 if((penv) && (penv->in)) psz+=sprintf_s(psz, (dwUniBuf >> 1), "%S(%d) : ", penv->in->namew(), penv->curLine);
1678 psz+=sprintf_s(psz, (dwUniBuf >> 1), "error : ");
1679 _vsnprintf_s(psz, (dwUniBuf >> 1),(dwUniBuf >> 1)-strlen(sz)-1, fmt, args);
1680 PrintANSILine(pF,sz);
1683 /**************************************************************************/
1684 void AsmParse::warn(const char* fmt, ...)
1686 char *sz = (char*)(&wzUniBuf[(dwUniBuf >> 1)]);
1688 FILE* pF = ((!assem->m_fReportProgress)&&(assem->OnErrGo)) ? stdout : stderr;
1690 va_start(args, fmt);
1692 if((penv) && (penv->in)) psz+=sprintf_s(psz, (dwUniBuf >> 1), "%S(%d) : ", penv->in->namew(), penv->curLine);
1693 psz+=sprintf_s(psz, (dwUniBuf >> 1), "warning : ");
1694 _vsnprintf_s(psz, (dwUniBuf >> 1),(dwUniBuf >> 1)-strlen(sz)-1, fmt, args);
1695 PrintANSILine(pF,sz);
1697 /**************************************************************************/
1698 void AsmParse::msg(const char* fmt, ...)
1700 char *sz = (char*)(&wzUniBuf[(dwUniBuf >> 1)]);
1702 va_start(args, fmt);
1704 _vsnprintf_s(sz, (dwUniBuf >> 1),(dwUniBuf >> 1)-1, fmt, args);
1705 PrintANSILine(stdout,sz);
1709 #pragma warning(default : 4640)