long getVal() const { return key; };
/* Returns the key casted to a long long. This form of the key does not
- * require and signedness interpretation. */
+ * require any signedness interpretation. */
long long getLongLong() const;
+ /* Returns the distance from the key value to the maximum value that the
+ * key implementation can hold. */
+ Size availableSpace() const;
+
bool isUpper() const { return ( 'A' <= key && key <= 'Z' ); }
bool isLower() const { return ( 'a' <= key && key <= 'z' ); }
bool isPrintable() const
return keyOps->isSigned ? (long long)key : (long long)(unsigned long)key;
}
+inline Size Key::availableSpace() const
+{
+ if ( keyOps->isSigned )
+ return (long long)LONG_MAX - (long long)key;
+ else
+ return (unsigned long long)ULONG_MAX - (unsigned long long)(unsigned long)key;
+}
+
inline Key operator+(const Key key1, const Key key2)
{
/* FIXME: must be made aware of isSigned. */
next section we describe how Ragel accommodates several common parser
engineering problems.
+\vspace{10pt}
+
+\noindent {\large\bf Note:} The semantic condition feature works only with
+alphabet types that are smaller in width than the \verb|long| type. To
+implement semantic conditions Ragel needs to be able to allocate characters
+from the alphabet space. Ragel uses these allocated characters to express
+"character C with condition P true" or "C with P false." Since internally Ragel
+uses longs to store characters there is no room left in the alphabet space
+unless an alphabet type smaller than long is used.
+
\section{Implementing Lookahead}
There are a few strategies for implementing lookahead in Ragel programs.
{
CondSpace *condSpace = condData->condSpaceMap.find( condSet );
if ( condSpace == 0 ) {
- Key baseKey = condData->nextCondKey;
- condData->nextCondKey += (1 << condSet.length() ) * keyOps->alphSize();
+ /* Do we have enough keyspace left? */
+ Size availableSpace = condData->lastCondKey.availableSpace();
+ Size neededSpace = (1 << condSet.length() ) * keyOps->alphSize();
+ if ( neededSpace > availableSpace )
+ throw FsmConstructFail( FsmConstructFail::CondNoKeySpace );
+
+ Key baseKey = condData->lastCondKey;
+ baseKey.increment();
+ condData->lastCondKey += (1 << condSet.length() ) * keyOps->alphSize();
condSpace = new CondSpace( condSet );
condSpace->baseKey = baseKey;
#include "config.h"
#include <assert.h>
#include <iostream>
+#include <string>
#include "common.h"
#include "vector.h"
#include "bstset.h"
struct CondData
{
- CondData() : nextCondKey(0) {}
+ CondData() : lastCondKey(0) {}
/* Condition info. */
- Key nextCondKey;
+ Key lastCondKey;
CondSpaceMap condSpaceMap;
};
extern CondData *condData;
+struct FsmConstructFail
+{
+ enum Reason
+ {
+ CondNoKeySpace
+ };
+
+ FsmConstructFail( Reason reason )
+ : reason(reason) {}
+ Reason reason;
+};
+
/* State class that implements actions and priorities. */
struct StateAp
{
}
/* Set the alphabet type. If the types are not valid returns false. */
-bool ParseData::setAlphType( char *s1, char *s2 )
+bool ParseData::setAlphType( const InputLoc &loc, char *s1, char *s2 )
{
+ alphTypeLoc = loc;
userAlphType = findAlphType( s1, s2 );
alphTypeSet = true;
return userAlphType != 0;
}
/* Set the alphabet type. If the types are not valid returns false. */
-bool ParseData::setAlphType( char *s1 )
+bool ParseData::setAlphType( const InputLoc &loc, char *s1 )
{
+ alphTypeLoc = loc;
userAlphType = findAlphType( s1 );
alphTypeSet = true;
return userAlphType != 0;
thisKeyOps.maxKey = makeFsmKeyNum( upperNum, rangeHighLoc, this );
}
- thisCondData.nextCondKey = thisKeyOps.maxKey;
- thisCondData.nextCondKey.increment();
+ thisCondData.lastCondKey = thisKeyOps.maxKey;
}
void ParseData::printNameInst( NameInst *nameInst, int level )
}
+/* Construct the machine and catch failures which can occur during
+ * construction. */
void ParseData::prepareMachineGen( GraphDictEl *graphDictEl )
{
+ try {
+ /* This machine construction can fail. */
+ prepareMachineGenTBWrapped( graphDictEl );
+ }
+ catch ( FsmConstructFail fail ) {
+ switch ( fail.reason ) {
+ case FsmConstructFail::CondNoKeySpace: {
+ InputLoc &loc = alphTypeSet ? alphTypeLoc : sectionLoc;
+ error(loc) << "sorry, no more characters are "
+ "available in the alphabet space" << endl;
+ error(loc) << " for conditions, please use a "
+ "smaller alphtype or reduce" << endl;
+ error(loc) << " the span of characters on which "
+ "conditions are embedded" << endl;
+ break;
+ }
+ }
+ }
+}
+
+void ParseData::prepareMachineGenTBWrapped( GraphDictEl *graphDictEl )
+{
beginProcessing();
initKeyOps();
makeRootNames();
void resolveActionNameRefs();
/* Set the alphabet type. If type types are not valid returns false. */
- bool setAlphType( char *s1, char *s2 );
- bool setAlphType( char *s1 );
+ bool setAlphType( const InputLoc &loc, char *s1, char *s2 );
+ bool setAlphType( const InputLoc &loc, char *s1 );
/* Override one of the variables ragel uses. */
bool setVariable( char *var, InlineList *inlineList );
void makeExports();
void prepareMachineGen( GraphDictEl *graphDictEl );
+ void prepareMachineGenTBWrapped( GraphDictEl *graphDictEl );
void generateXML( ostream &out );
FsmAp *sectionGraph;
bool generatingSectionSubset;
/* Alphabet type. */
HostType *userAlphType;
bool alphTypeSet;
+ InputLoc alphTypeLoc;
/* Element type and get key expression. */
InlineList *getKeyExpr;
# semi-colon.
alphtype_spec:
KW_AlphType TK_Word TK_Word ';' final {
- if ( ! pd->setAlphType( $2->data, $3->data ) ) {
+ if ( ! pd->setAlphType( $1->loc, $2->data, $3->data ) ) {
// Recover by ignoring the alphtype statement.
error($2->loc) << "\"" << $2->data <<
" " << $3->data << "\" is not a valid alphabet type" << endl;
alphtype_spec:
KW_AlphType TK_Word ';' final {
- if ( ! pd->setAlphType( $2->data ) ) {
+ if ( ! pd->setAlphType( $1->loc, $2->data ) ) {
// Recover by ignoring the alphtype statement.
error($2->loc) << "\"" << $2->data <<
"\" is not a valid alphabet type" << endl;