If the condition embedding code runs out of available characters in the

author thurston <thurston@052ea7fc-9027-0410-9066-f65837a77df0>

Thu, 13 Sep 2007 17:54:31 +0000 (17:54 +0000)

committer thurston <thurston@052ea7fc-9027-0410-9066-f65837a77df0>

Thu, 13 Sep 2007 17:54:31 +0000 (17:54 +0000)
author thurston <thurston@052ea7fc-9027-0410-9066-f65837a77df0>
Thu, 13 Sep 2007 17:54:31 +0000 (17:54 +0000)
committer thurston <thurston@052ea7fc-9027-0410-9066-f65837a77df0>
Thu, 13 Sep 2007 17:54:31 +0000 (17:54 +0000)
diff --git a/common/common.h b/common/common.h

index c0470d3..cfe8500 100644 (file)
--- a/common/common.h
+++ b/common/common.h
@@ -57,9 +57,13 @@ public:
         long getVal() const { return key; };
  
         /* Returns the key casted to a long long. This form of the key does not
-        * require and signedness interpretation. */
+        * require any signedness interpretation. */
         long long getLongLong() const;
  
+       /* Returns the distance from the key value to the maximum value that the
+        * key implementation can hold. */
+       Size availableSpace() const;
+
         bool isUpper() const { return ( 'A' <= key && key <= 'Z' ); }
         bool isLower() const { return ( 'a' <= key && key <= 'z' ); }
         bool isPrintable() const
@@ -252,6 +256,14 @@ inline long long Key::getLongLong() const
         return keyOps->isSigned ? (long long)key : (long long)(unsigned long)key;
  }
  
+inline Size Key::availableSpace() const
+{
+       if ( keyOps->isSigned ) 
+               return (long long)LONG_MAX - (long long)key;
+       else
+               return (unsigned long long)ULONG_MAX - (unsigned long long)(unsigned long)key;
+}
+       
  inline Key operator+(const Key key1, const Key key2)
  {
         /* FIXME: must be made aware of isSigned. */
diff --git a/doc/ragel-guide.tex b/doc/ragel-guide.tex

index 03672a7..f27a0f4 100644 (file)
--- a/doc/ragel-guide.tex
+++ b/doc/ragel-guide.tex
@@ -3553,6 +3553,16 @@ performing checks on the semantic structure of input seen so far. In the
  next section we describe how Ragel accommodates several common parser
  engineering problems.
  
+\vspace{10pt}
+
+\noindent {\large\bf Note:} The semantic condition feature works only with
+alphabet types that are smaller in width than the \verb|long| type. To
+implement semantic conditions Ragel needs to be able to allocate characters
+from the alphabet space. Ragel uses these allocated characters to express
+"character C with condition P true" or "C with P false." Since internally Ragel
+uses longs to store characters there is no room left in the alphabet space
+unless an alphabet type smaller than long is used.
+
  \section{Implementing Lookahead}
  
  There are a few strategies for implementing lookahead in Ragel programs.
diff --git a/ragel/fsmap.cpp b/ragel/fsmap.cpp

index 53587e3..c9e673a 100644 (file)
--- a/ragel/fsmap.cpp
+++ b/ragel/fsmap.cpp
@@ -802,8 +802,15 @@ CondSpace *FsmAp::addCondSpace( const CondSet &condSet )
  {
         CondSpace *condSpace = condData->condSpaceMap.find( condSet );
         if ( condSpace == 0 ) {
-               Key baseKey = condData->nextCondKey;
-               condData->nextCondKey += (1 << condSet.length() ) * keyOps->alphSize();
+               /* Do we have enough keyspace left? */
+               Size availableSpace = condData->lastCondKey.availableSpace();
+               Size neededSpace = (1 << condSet.length() ) * keyOps->alphSize();
+               if ( neededSpace > availableSpace )
+                       throw FsmConstructFail( FsmConstructFail::CondNoKeySpace );
+
+               Key baseKey = condData->lastCondKey;
+               baseKey.increment();
+               condData->lastCondKey += (1 << condSet.length() ) * keyOps->alphSize();
  
                 condSpace = new CondSpace( condSet );
                 condSpace->baseKey = baseKey;
diff --git a/ragel/fsmgraph.h b/ragel/fsmgraph.h

index b2ded15..e3b3486 100644 (file)
--- a/ragel/fsmgraph.h
+++ b/ragel/fsmgraph.h
@@ -25,6 +25,7 @@
  #include "config.h"
  #include <assert.h>
  #include <iostream>
+#include <string>
  #include "common.h"
  #include "vector.h"
  #include "bstset.h"
@@ -641,16 +642,28 @@ struct Removal
  
  struct CondData
  {
-       CondData() : nextCondKey(0) {}
+       CondData() : lastCondKey(0) {}
  
         /* Condition info. */
-       Key nextCondKey;
+       Key lastCondKey;
  
         CondSpaceMap condSpaceMap;
  };
  
  extern CondData *condData;
  
+struct FsmConstructFail
+{
+       enum Reason
+       {
+               CondNoKeySpace
+       };
+
+       FsmConstructFail( Reason reason ) 
+               : reason(reason) {}
+       Reason reason;
+};
+
  /* State class that implements actions and priorities. */
  struct StateAp 
  {
diff --git a/ragel/parsedata.cpp b/ragel/parsedata.cpp

index c860fef..6abe995 100644 (file)
--- a/ragel/parsedata.cpp
+++ b/ragel/parsedata.cpp
@@ -849,16 +849,18 @@ void ParseData::initGraphDict( )
  }
  
  /* Set the alphabet type. If the types are not valid returns false. */
-bool ParseData::setAlphType( char *s1, char *s2 )
+bool ParseData::setAlphType( const InputLoc &loc, char *s1, char *s2 )
  {
+       alphTypeLoc = loc;
         userAlphType = findAlphType( s1, s2 );
         alphTypeSet = true;
         return userAlphType != 0;
  }
  
  /* Set the alphabet type. If the types are not valid returns false. */
-bool ParseData::setAlphType( char *s1 )
+bool ParseData::setAlphType( const InputLoc &loc, char *s1 )
  {
+       alphTypeLoc = loc;
         userAlphType = findAlphType( s1 );
         alphTypeSet = true;
         return userAlphType != 0;
@@ -906,8 +908,7 @@ void ParseData::initKeyOps( )
                 thisKeyOps.maxKey = makeFsmKeyNum( upperNum, rangeHighLoc, this );
         }
  
-       thisCondData.nextCondKey = thisKeyOps.maxKey;
-       thisCondData.nextCondKey.increment();
+       thisCondData.lastCondKey = thisKeyOps.maxKey;
  }
  
  void ParseData::printNameInst( NameInst *nameInst, int level )
@@ -1365,8 +1366,32 @@ void ParseData::makeExports()
  
  }
  
+/* Construct the machine and catch failures which can occur during
+ * construction. */
  void ParseData::prepareMachineGen( GraphDictEl *graphDictEl )
  {
+       try {
+               /* This machine construction can fail. */
+               prepareMachineGenTBWrapped( graphDictEl );
+       }
+       catch ( FsmConstructFail fail ) {
+               switch ( fail.reason ) {
+                       case FsmConstructFail::CondNoKeySpace: {
+                               InputLoc &loc = alphTypeSet ? alphTypeLoc : sectionLoc;
+                               error(loc) << "sorry, no more characters are "
+                                               "available in the alphabet space" << endl;
+                               error(loc) << "  for conditions, please use a "
+                                               "smaller alphtype or reduce" << endl;
+                               error(loc) << "  the span of characters on which "
+                                               "conditions are embedded" << endl;
+                               break;
+                       }
+               }
+       }
+}
+
+void ParseData::prepareMachineGenTBWrapped( GraphDictEl *graphDictEl )
+{
         beginProcessing();
         initKeyOps();
         makeRootNames();
diff --git a/ragel/parsedata.h b/ragel/parsedata.h

index ad90ccd..4f8d04c 100644 (file)
--- a/ragel/parsedata.h
+++ b/ragel/parsedata.h
@@ -187,8 +187,8 @@ struct ParseData
         void resolveActionNameRefs();
  
         /* Set the alphabet type. If type types are not valid returns false. */
-       bool setAlphType( char *s1, char *s2 );
-       bool setAlphType( char *s1 );
+       bool setAlphType( const InputLoc &loc, char *s1, char *s2 );
+       bool setAlphType( const InputLoc &loc, char *s1 );
  
         /* Override one of the variables ragel uses. */
         bool setVariable( char *var, InlineList *inlineList );
@@ -214,6 +214,7 @@ struct ParseData
         void makeExports();
  
         void prepareMachineGen( GraphDictEl *graphDictEl );
+       void prepareMachineGenTBWrapped( GraphDictEl *graphDictEl );
         void generateXML( ostream &out );
         FsmAp *sectionGraph;
         bool generatingSectionSubset;
@@ -254,6 +255,7 @@ struct ParseData
         /* Alphabet type. */
         HostType *userAlphType;
         bool alphTypeSet;
+       InputLoc alphTypeLoc;
  
         /* Element type and get key expression. */
         InlineList *getKeyExpr;
diff --git a/ragel/rlparse.kl b/ragel/rlparse.kl

index d6c2c02..bf93978 100644 (file)
--- a/ragel/rlparse.kl
+++ b/ragel/rlparse.kl
@@ -147,7 +147,7 @@ action_spec:
  # semi-colon.
  alphtype_spec:
         KW_AlphType TK_Word TK_Word ';' final {
-               if ( ! pd->setAlphType( $2->data, $3->data ) ) {
+               if ( ! pd->setAlphType( $1->loc, $2->data, $3->data ) ) {
                         // Recover by ignoring the alphtype statement.
                         error($2->loc) << "\"" << $2->data << 
                                         " " << $3->data << "\" is not a valid alphabet type" << endl;
@@ -156,7 +156,7 @@ alphtype_spec:
  
  alphtype_spec:
         KW_AlphType TK_Word ';' final {
-               if ( ! pd->setAlphType( $2->data ) ) {
+               if ( ! pd->setAlphType( $1->loc, $2->data ) ) {
                         // Recover by ignoring the alphtype statement.
                         error($2->loc) << "\"" << $2->data << 
                                         "\" is not a valid alphabet type" << endl;
author	thurston <thurston@052ea7fc-9027-0410-9066-f65837a77df0>
	Thu, 13 Sep 2007 17:54:31 +0000 (17:54 +0000)
committer	thurston <thurston@052ea7fc-9027-0410-9066-f65837a77df0>
	Thu, 13 Sep 2007 17:54:31 +0000 (17:54 +0000)
common/common.h		patch \| blob \| history
doc/ragel-guide.tex		patch \| blob \| history
ragel/fsmap.cpp		patch \| blob \| history
ragel/fsmgraph.h		patch \| blob \| history
ragel/parsedata.cpp		patch \| blob \| history
ragel/parsedata.h		patch \| blob \| history
ragel/rlparse.kl		patch \| blob \| history