From 4cd80de1eb1f25c23186a6d21e9223c352e60b67 Mon Sep 17 00:00:00 2001 From: thurston Date: Thu, 13 Sep 2007 17:54:31 +0000 Subject: [PATCH] If the condition embedding code runs out of available characters in the keyspace an error message is emitted. A paragraph about this was added to the manual. git-svn-id: http://svn.complang.org/ragel/trunk@278 052ea7fc-9027-0410-9066-f65837a77df0 --- common/common.h | 14 +++++++++++++- doc/ragel-guide.tex | 10 ++++++++++ ragel/fsmap.cpp | 11 +++++++++-- ragel/fsmgraph.h | 17 +++++++++++++++-- ragel/parsedata.cpp | 33 +++++++++++++++++++++++++++++---- ragel/parsedata.h | 6 ++++-- ragel/rlparse.kl | 4 ++-- 7 files changed, 82 insertions(+), 13 deletions(-) diff --git a/common/common.h b/common/common.h index c0470d3..cfe8500 100644 --- a/common/common.h +++ b/common/common.h @@ -57,9 +57,13 @@ public: long getVal() const { return key; }; /* Returns the key casted to a long long. This form of the key does not - * require and signedness interpretation. */ + * require any signedness interpretation. */ long long getLongLong() const; + /* Returns the distance from the key value to the maximum value that the + * key implementation can hold. */ + Size availableSpace() const; + bool isUpper() const { return ( 'A' <= key && key <= 'Z' ); } bool isLower() const { return ( 'a' <= key && key <= 'z' ); } bool isPrintable() const @@ -252,6 +256,14 @@ inline long long Key::getLongLong() const return keyOps->isSigned ? (long long)key : (long long)(unsigned long)key; } +inline Size Key::availableSpace() const +{ + if ( keyOps->isSigned ) + return (long long)LONG_MAX - (long long)key; + else + return (unsigned long long)ULONG_MAX - (unsigned long long)(unsigned long)key; +} + inline Key operator+(const Key key1, const Key key2) { /* FIXME: must be made aware of isSigned. */ diff --git a/doc/ragel-guide.tex b/doc/ragel-guide.tex index 03672a7..f27a0f4 100644 --- a/doc/ragel-guide.tex +++ b/doc/ragel-guide.tex @@ -3553,6 +3553,16 @@ performing checks on the semantic structure of input seen so far. In the next section we describe how Ragel accommodates several common parser engineering problems. +\vspace{10pt} + +\noindent {\large\bf Note:} The semantic condition feature works only with +alphabet types that are smaller in width than the \verb|long| type. To +implement semantic conditions Ragel needs to be able to allocate characters +from the alphabet space. Ragel uses these allocated characters to express +"character C with condition P true" or "C with P false." Since internally Ragel +uses longs to store characters there is no room left in the alphabet space +unless an alphabet type smaller than long is used. + \section{Implementing Lookahead} There are a few strategies for implementing lookahead in Ragel programs. diff --git a/ragel/fsmap.cpp b/ragel/fsmap.cpp index 53587e3..c9e673a 100644 --- a/ragel/fsmap.cpp +++ b/ragel/fsmap.cpp @@ -802,8 +802,15 @@ CondSpace *FsmAp::addCondSpace( const CondSet &condSet ) { CondSpace *condSpace = condData->condSpaceMap.find( condSet ); if ( condSpace == 0 ) { - Key baseKey = condData->nextCondKey; - condData->nextCondKey += (1 << condSet.length() ) * keyOps->alphSize(); + /* Do we have enough keyspace left? */ + Size availableSpace = condData->lastCondKey.availableSpace(); + Size neededSpace = (1 << condSet.length() ) * keyOps->alphSize(); + if ( neededSpace > availableSpace ) + throw FsmConstructFail( FsmConstructFail::CondNoKeySpace ); + + Key baseKey = condData->lastCondKey; + baseKey.increment(); + condData->lastCondKey += (1 << condSet.length() ) * keyOps->alphSize(); condSpace = new CondSpace( condSet ); condSpace->baseKey = baseKey; diff --git a/ragel/fsmgraph.h b/ragel/fsmgraph.h index b2ded15..e3b3486 100644 --- a/ragel/fsmgraph.h +++ b/ragel/fsmgraph.h @@ -25,6 +25,7 @@ #include "config.h" #include #include +#include #include "common.h" #include "vector.h" #include "bstset.h" @@ -641,16 +642,28 @@ struct Removal struct CondData { - CondData() : nextCondKey(0) {} + CondData() : lastCondKey(0) {} /* Condition info. */ - Key nextCondKey; + Key lastCondKey; CondSpaceMap condSpaceMap; }; extern CondData *condData; +struct FsmConstructFail +{ + enum Reason + { + CondNoKeySpace + }; + + FsmConstructFail( Reason reason ) + : reason(reason) {} + Reason reason; +}; + /* State class that implements actions and priorities. */ struct StateAp { diff --git a/ragel/parsedata.cpp b/ragel/parsedata.cpp index c860fef..6abe995 100644 --- a/ragel/parsedata.cpp +++ b/ragel/parsedata.cpp @@ -849,16 +849,18 @@ void ParseData::initGraphDict( ) } /* Set the alphabet type. If the types are not valid returns false. */ -bool ParseData::setAlphType( char *s1, char *s2 ) +bool ParseData::setAlphType( const InputLoc &loc, char *s1, char *s2 ) { + alphTypeLoc = loc; userAlphType = findAlphType( s1, s2 ); alphTypeSet = true; return userAlphType != 0; } /* Set the alphabet type. If the types are not valid returns false. */ -bool ParseData::setAlphType( char *s1 ) +bool ParseData::setAlphType( const InputLoc &loc, char *s1 ) { + alphTypeLoc = loc; userAlphType = findAlphType( s1 ); alphTypeSet = true; return userAlphType != 0; @@ -906,8 +908,7 @@ void ParseData::initKeyOps( ) thisKeyOps.maxKey = makeFsmKeyNum( upperNum, rangeHighLoc, this ); } - thisCondData.nextCondKey = thisKeyOps.maxKey; - thisCondData.nextCondKey.increment(); + thisCondData.lastCondKey = thisKeyOps.maxKey; } void ParseData::printNameInst( NameInst *nameInst, int level ) @@ -1365,8 +1366,32 @@ void ParseData::makeExports() } +/* Construct the machine and catch failures which can occur during + * construction. */ void ParseData::prepareMachineGen( GraphDictEl *graphDictEl ) { + try { + /* This machine construction can fail. */ + prepareMachineGenTBWrapped( graphDictEl ); + } + catch ( FsmConstructFail fail ) { + switch ( fail.reason ) { + case FsmConstructFail::CondNoKeySpace: { + InputLoc &loc = alphTypeSet ? alphTypeLoc : sectionLoc; + error(loc) << "sorry, no more characters are " + "available in the alphabet space" << endl; + error(loc) << " for conditions, please use a " + "smaller alphtype or reduce" << endl; + error(loc) << " the span of characters on which " + "conditions are embedded" << endl; + break; + } + } + } +} + +void ParseData::prepareMachineGenTBWrapped( GraphDictEl *graphDictEl ) +{ beginProcessing(); initKeyOps(); makeRootNames(); diff --git a/ragel/parsedata.h b/ragel/parsedata.h index ad90ccd..4f8d04c 100644 --- a/ragel/parsedata.h +++ b/ragel/parsedata.h @@ -187,8 +187,8 @@ struct ParseData void resolveActionNameRefs(); /* Set the alphabet type. If type types are not valid returns false. */ - bool setAlphType( char *s1, char *s2 ); - bool setAlphType( char *s1 ); + bool setAlphType( const InputLoc &loc, char *s1, char *s2 ); + bool setAlphType( const InputLoc &loc, char *s1 ); /* Override one of the variables ragel uses. */ bool setVariable( char *var, InlineList *inlineList ); @@ -214,6 +214,7 @@ struct ParseData void makeExports(); void prepareMachineGen( GraphDictEl *graphDictEl ); + void prepareMachineGenTBWrapped( GraphDictEl *graphDictEl ); void generateXML( ostream &out ); FsmAp *sectionGraph; bool generatingSectionSubset; @@ -254,6 +255,7 @@ struct ParseData /* Alphabet type. */ HostType *userAlphType; bool alphTypeSet; + InputLoc alphTypeLoc; /* Element type and get key expression. */ InlineList *getKeyExpr; diff --git a/ragel/rlparse.kl b/ragel/rlparse.kl index d6c2c02..bf93978 100644 --- a/ragel/rlparse.kl +++ b/ragel/rlparse.kl @@ -147,7 +147,7 @@ action_spec: # semi-colon. alphtype_spec: KW_AlphType TK_Word TK_Word ';' final { - if ( ! pd->setAlphType( $2->data, $3->data ) ) { + if ( ! pd->setAlphType( $1->loc, $2->data, $3->data ) ) { // Recover by ignoring the alphtype statement. error($2->loc) << "\"" << $2->data << " " << $3->data << "\" is not a valid alphabet type" << endl; @@ -156,7 +156,7 @@ alphtype_spec: alphtype_spec: KW_AlphType TK_Word ';' final { - if ( ! pd->setAlphType( $2->data ) ) { + if ( ! pd->setAlphType( $1->loc, $2->data ) ) { // Recover by ignoring the alphtype statement. error($2->loc) << "\"" << $2->data << "\" is not a valid alphabet type" << endl; -- 2.7.4