importing my current diff
[platform/upstream/libzypp.git] / zypp / url / UrlBase.cc
index fc1d875..6c9b002 100644 (file)
@@ -11,6 +11,8 @@
  */
 #include <zypp/url/UrlBase.h>
 #include <zypp/base/String.h>
+#include <zypp/base/Gettext.h>
+#include <zypp/base/Regex.h>
 
 #include <stdexcept>
 #include <climits>
@@ -19,6 +21,7 @@
 #include <sys/socket.h>
 #include <arpa/inet.h>
 
+#include <iostream>
 
 // ---------------------------------------------------------------
 /*
 **
 ** host      = hostname | IPv4 | "[" IPv6-IP "]" | "[v...]"
 */
-#define RX_SPLIT_AUTHORITY \
-        "^(([^:@]*)([:]([^@]*))?@)?(\\[[^]]+\\]|[^:]+)?([:](.*))?"
-
 #define RX_VALID_SCHEME    "^[a-zA-Z][a-zA-Z0-9\\.+-]*$"
 
-#define RX_VALID_PORT      "^[0-9]{1,5}$"
-
 #define RX_VALID_HOSTNAME  "^[[:alnum:]]+([\\.-][[:alnum:]]+)*$"
 
 #define RX_VALID_HOSTIPV4  \
@@ -55,33 +53,44 @@ namespace zypp
     /*
     ** URL asString() view option constants:
     */
-    const ViewOptions ViewOptions::WITH_SCHEME       = 0x0001;
-    const ViewOptions ViewOptions::WITH_USERNAME     = 0x0002;
-    const ViewOptions ViewOptions::WITH_PASSWORD     = 0x0004;
-    const ViewOptions ViewOptions::WITH_HOST         = 0x0008;
-    const ViewOptions ViewOptions::WITH_PORT         = 0x0010;
-    const ViewOptions ViewOptions::WITH_PATH_NAME    = 0x0020;
-    const ViewOptions ViewOptions::WITH_PATH_PARAMS  = 0x0040;
-    const ViewOptions ViewOptions::WITH_QUERY_STR    = 0x0080;
-    const ViewOptions ViewOptions::WITH_FRAGMENT     = 0x0100;
-    const ViewOptions ViewOptions::EMPTY_AUTHORITY   = 0x0200;
-    const ViewOptions ViewOptions::EMPTY_PATH_NAME   = 0x0400;
-    const ViewOptions ViewOptions::EMPTY_PATH_PARAMS = 0x0800;
-    const ViewOptions ViewOptions::EMPTY_QUERY_STR   = 0x1000;
-    const ViewOptions ViewOptions::EMPTY_FRAGMENT    = 0x2000;
-    const ViewOptions ViewOptions::DEFAULTS          = 0x07bb;
+    const ViewOption  ViewOption::WITH_SCHEME       = 0x0001;
+    const ViewOption  ViewOption::WITH_USERNAME     = 0x0002;
+    const ViewOption  ViewOption::WITH_PASSWORD     = 0x0004;
+    const ViewOption  ViewOption::WITH_HOST         = 0x0008;
+    const ViewOption  ViewOption::WITH_PORT         = 0x0010;
+    const ViewOption  ViewOption::WITH_PATH_NAME    = 0x0020;
+    const ViewOption  ViewOption::WITH_PATH_PARAMS  = 0x0040;
+    const ViewOption  ViewOption::WITH_QUERY_STR    = 0x0080;
+    const ViewOption  ViewOption::WITH_FRAGMENT     = 0x0100;
+    const ViewOption  ViewOption::EMPTY_AUTHORITY   = 0x0200;
+    const ViewOption  ViewOption::EMPTY_PATH_NAME   = 0x0400;
+    const ViewOption  ViewOption::EMPTY_PATH_PARAMS = 0x0800;
+    const ViewOption  ViewOption::EMPTY_QUERY_STR   = 0x1000;
+    const ViewOption  ViewOption::EMPTY_FRAGMENT    = 0x2000;
+    const ViewOption  ViewOption::DEFAULTS          = 0x07bb;
     /*
-                      ViewOptions::WITH_SCHEME       +
-                      ViewOptions::WITH_USERNAME     +
-                      ViewOptions::WITH_HOST         +
-                      ViewOptions::WITH_PORT         +
-                      ViewOptions::WITH_PATH_NAME    +
-                      ViewOptions::WITH_QUERY_STR    +
-                      ViewOptions::WITH_FRAGMENT     +
-                      ViewOptions::EMPTY_AUTHORITY   +
-                      ViewOptions::EMPTY_PATH_NAME;
+    const ViewOption  ViewOption::DEFAULTS          =
+                      ViewOption::WITH_SCHEME       +
+                      ViewOption::WITH_USERNAME     +
+                      ViewOption::WITH_HOST         +
+                      ViewOption::WITH_PORT         +
+                      ViewOption::WITH_PATH_NAME    +
+                      ViewOption::WITH_QUERY_STR    +
+                      ViewOption::WITH_FRAGMENT     +
+                      ViewOption::EMPTY_AUTHORITY   +
+                      ViewOption::EMPTY_PATH_NAME;
     */
 
+    // ---------------------------------------------------------------
+    ViewOption::ViewOption()
+      : opt(0x07bb)
+    {}
+
+    // ---------------------------------------------------------------
+    ViewOption::ViewOption(int option)
+      : opt(option)
+    {}
+
 
     // ---------------------------------------------------------------
     /*
@@ -130,14 +139,14 @@ namespace zypp
       inline void
       checkUrlData(const std::string &data,
                    const std::string &name,
-                   const std::string &regx)
+                   const std::string &regx,
+                   bool               show=true)
       {
         if( regx.empty() || regx == "^$")
         {
-          throw std::invalid_argument(
-            std::string("Url scheme does not allow a " +
-                        name)
-          );
+          ZYPP_THROW(UrlNotAllowedException(
+            str::form(_("Url scheme does not allow a %s"), name.c_str())
+          ));
         }
         else
         {
@@ -145,6 +154,7 @@ namespace zypp
           try
           {
             str::regex rex(regx);
+            std::cout << "testing *" << data << "* against *" << regx << "*" << std::endl;
             valid = str::regex_match(data, rex);
           }
           catch( ... )
@@ -152,10 +162,19 @@ namespace zypp
 
           if( !valid)
           {
-            throw std::invalid_argument(
-              std::string("Invalid " + name + " argument '" +
-                          data + "'")
-              );
+            if( show)
+            {
+              ZYPP_THROW(UrlBadComponentException(
+                str::form(_("Invalid %s component '%s'"),
+                          name.c_str(), data.c_str())
+              ));
+            }
+            else
+            {
+              ZYPP_THROW(UrlBadComponentException(
+                str::form(_("Invalid %s component"), name.c_str())
+              ));
+            }
           }
         }
       }
@@ -234,7 +253,20 @@ namespace zypp
       config("safe_querystr",   "~!$&'()*+=,:;@/?");
       config("safe_fragment",   "~!$&'()*+=,:;@/?");
 
+      // y=yes (allowed)
+      // n=no  (disallowed, exception if !empty)
       config("with_authority",  "y");
+      config("with_port",       "y");
+
+      // y=yes (required but don't throw if empty)
+      // n=no  (not required, ignore if empty)
+      // m=mandatory (exception if empty)
+      config("require_host",    "n");
+      config("require_pathname","n");
+
+      // y=yes (encode 2. slash even if authority present)
+      // n=no  (don't encode 2. slash if authority present)
+      config("path_encode_slash2", "n");
 
       config("rx_username",     "^([a-zA-Z0-9!$&'\\(\\)*+=,;~\\._-]|%[a-fA-F0-9]{2})+$");
       config("rx_password",     "^([a-zA-Z0-9!$&'\\(\\)*+=,:;~\\._-]|%[a-fA-F0-9]{2})+$");
@@ -341,7 +373,7 @@ namespace zypp
       catch( ... )
       {}
 
-      if(scheme.empty() || valid)
+      if(valid)
       {
         std::string    lscheme( str::toLower(scheme));
         UrlSchemes     schemes( getKnownSchemes());
@@ -364,7 +396,31 @@ namespace zypp
     bool
     UrlBase::isValid() const
     {
-      return !getScheme().empty();
+      /*
+      ** scheme is the only mandatory component
+      ** for all url's and is already verified,
+      ** (except for empty Url instances), so
+      ** Url with empty scheme is never valid.
+      */
+      if( getScheme().empty())
+        return false;
+
+      std::string host( getHost(zypp::url::E_ENCODED));
+      if( host.empty() && config("require_host")     != "n")
+        return false;
+
+      std::string path( getPathName(zypp::url::E_ENCODED));
+      if( path.empty() && config("require_pathname") != "n")
+        return false;
+
+      /*
+      ** path has to begin with "/" if authority avaliable
+      ** if host is set after the pathname, we can't throw
+      */
+      if( !host.empty() && !path.empty() && path.at(0) != '/')
+        return false;
+
+      return true;
     }
 
 
@@ -383,6 +439,8 @@ namespace zypp
       std::string   url;
       UrlBaseData   tmp;
 
+      std::cout << "UrlBase:.asString()" << std::endl;
+
       if( opts.has(ViewOptions::WITH_SCHEME))
       {
         tmp.scheme = getScheme();
@@ -416,6 +474,8 @@ namespace zypp
                 }
               }
 
+              std::cout << "tmp.host: *" << tmp.host << "*" << std::endl;
+
               url += tmp.host;
 
               if( opts.has(ViewOptions::WITH_PORT))
@@ -427,6 +487,10 @@ namespace zypp
                 }
               }
             }
+            else if( opts.has(ViewOptions::EMPTY_AUTHORITY))
+            {
+              url += "//";
+            }
           }
           else if( opts.has(ViewOptions::EMPTY_AUTHORITY))
           {
@@ -438,12 +502,18 @@ namespace zypp
       if( opts.has(ViewOptions::WITH_PATH_NAME))
       {
         tmp.pathname = getPathName(zypp::url::E_ENCODED);
+        std::cout << "pathname: *" << tmp.pathname << "*" << std::endl;
         if( !tmp.pathname.empty())
         {
-          if( (!tmp.host.empty() || opts.has(ViewOptions::EMPTY_AUTHORITY))
-              && (tmp.pathname.at(0) != '/'))
+          if(url.find("/") != std::string::npos)
           {
-            url += "/";
+            // Url contains authority (that may be empty),
+            // we may need a rewrite of the encoded path.
+            tmp.pathname = cleanupPathName(tmp.pathname, true);
+            if(tmp.pathname.at(0) != '/')
+            {
+              url += "/";
+            }
           }
           url += tmp.pathname;
 
@@ -460,7 +530,8 @@ namespace zypp
             }
           }
         }
-        else if( opts.has(ViewOptions::EMPTY_PATH_NAME))
+        else if( opts.has(ViewOptions::EMPTY_PATH_NAME)
+                 && url.find("/") != std::string::npos)
         {
           url += "/";
           if( opts.has(ViewOptions::EMPTY_PATH_PARAMS))
@@ -612,7 +683,7 @@ namespace zypp
       if(eflag == zypp::url::E_DECODED)
         return zypp::url::decode(m_data->pathname);
       else
-        return m_data->pathname;
+        return cleanupPathName(m_data->pathname);
     }
 
 
@@ -652,9 +723,9 @@ namespace zypp
       if( config("psep_pathparam").empty() ||
           config("vsep_pathparam").empty())
       {
-        throw std::logic_error(
-          "Path parameter parsing not supported for this URL"
-        );
+        ZYPP_THROW(UrlNotSupportedException(
+          _("Path parameter parsing not supported for this URL")
+        ));
       }
       zypp::url::ParamMap pmap;
       zypp::url::split(
@@ -707,9 +778,9 @@ namespace zypp
       if( config("psep_querystr").empty() ||
           config("vsep_querystr").empty())
       {
-        throw std::logic_error(
-          "Query string parsing not supported for this URL"
-        );
+        ZYPP_THROW(UrlNotSupportedException(
+          _("Query string parsing not supported for this URL")
+        ));
       }
       zypp::url::ParamMap pmap;
       zypp::url::split(
@@ -743,10 +814,17 @@ namespace zypp
         m_data->scheme = str::toLower(scheme);
       }
       else
+      if( scheme.empty())
       {
-        throw std::invalid_argument(
-          std::string("Invalid Url scheme '" + scheme + "'")
-        );
+        ZYPP_THROW(UrlBadComponentException(
+          _("Url scheme is a required component")
+        ));
+      }
+      else
+      {
+        ZYPP_THROW(UrlBadComponentException(
+          str::form(_("Invalid Url scheme '%s'"), scheme.c_str())
+        ));
       }
     }
 
@@ -755,30 +833,34 @@ namespace zypp
     void
     UrlBase::setAuthority(const std::string &authority)
     {
-      str::smatch out;
-      bool        ret = false;
+      std::string s = authority;
+      std::string::size_type p,q;
 
-      try
-      {
-        str::regex  rex(RX_SPLIT_AUTHORITY);
-        ret = str::regex_match(authority, out, rex);
-      }
-      catch( ... )
-      {}
+      std::string username, password, host, port;
+
+      std::cout << "authority: " << authority << "*" << std::endl;
 
-      if( ret && out.size() == 8)
+      if ((p=s.find('@')) != std::string::npos)
       {
-        setUsername(out[2].str(), zypp::url::E_ENCODED);
-        setPassword(out[4].str(), zypp::url::E_ENCODED);
-        setHost(out[5].str());
-        setPort(out[7].str());
+        q = s.find(':');
+        if (q != std::string::npos && q < p)
+        {
+          setUsername(s.substr(0, q), zypp::url::E_ENCODED);
+          setPassword(s.substr(q+1, p-q-1), zypp::url::E_ENCODED);
+        }
+        else
+          setUsername(s.substr(0, p), zypp::url::E_ENCODED);
+        s = s.substr(p+1);
       }
-      else
+      q = s.rfind(']');
+      if ((p = s.rfind(':')) != std::string::npos && p > q+1)
       {
-        throw std::invalid_argument(
-          "Unable to parse Url authority"
-        );
+
+        setHost(s.substr(0, p));
+        setPort(s.substr(p+1));
       }
+      else
+        setHost(s);
     }
 
     // ---------------------------------------------------------------
@@ -843,7 +925,7 @@ namespace zypp
         else
         {
           m_data->fragment = zypp::url::encode(
-            fragment, config("safe_password")
+            fragment, config("safe_fragment")
           );
         }
       }
@@ -863,9 +945,9 @@ namespace zypp
       {
         if( config("with_authority") != "y")
         {
-          throw std::invalid_argument(
-            std::string("Url scheme does not allow a username")
-          );
+          ZYPP_THROW(UrlNotAllowedException(
+            _("Url scheme does not allow a username")
+          ));
         }
 
         if(eflag == zypp::url::E_ENCODED)
@@ -897,14 +979,14 @@ namespace zypp
       {
         if( config("with_authority") != "y")
         {
-          throw std::invalid_argument(
-            std::string("Url scheme does not allow a password")
-          );
+          ZYPP_THROW(UrlNotAllowedException(
+            _("Url scheme does not allow a password")
+          ));
         }
 
         if(eflag == zypp::url::E_ENCODED)
         {
-          checkUrlData(pass, "password", config("rx_password"));
+          checkUrlData(pass, "password", config("rx_password"), false);
 
           m_data->pass = pass;
         }
@@ -924,17 +1006,25 @@ namespace zypp
     {
       if( host.empty())
       {
+        if(config("require_host") == "m")
+        {
+          ZYPP_THROW(UrlNotAllowedException(
+            _("Url scheme requires a host component")
+          ));
+        }
         m_data->host = host;
       }
       else
       {
         if( config("with_authority") != "y")
         {
-          throw std::invalid_argument(
-            std::string("Url scheme does not allow a host")
-          );
+          ZYPP_THROW(UrlNotAllowedException(
+            _("Url scheme does not allow a host component")
+          ));
         }
 
+        std::cout << "host: *" << host << "*" << std::endl;
+
         if( isValidHost(host))
         {
           std::string temp;
@@ -957,9 +1047,9 @@ namespace zypp
         }
         else
         {
-          throw std::invalid_argument(
-            std::string("Invalid host argument '" + host + "'")
-          );
+          ZYPP_THROW(UrlBadComponentException(
+            str::form(_("Invalid host component '%s'"), host.c_str())
+          ));
         }
       }
     }
@@ -975,11 +1065,12 @@ namespace zypp
       }
       else
       {
-        if( config("with_authority") != "y")
+        if( config("with_authority") != "y" ||
+            config("with_port")      != "y")
         {
-          throw std::invalid_argument(
-            std::string("Url scheme does not allow a port")
-          );
+          ZYPP_THROW(UrlNotAllowedException(
+            _("Url scheme does not allow a port")
+          ));
         }
 
         if( isValidPort(port))
@@ -988,9 +1079,9 @@ namespace zypp
         }
         else
         {
-          throw std::invalid_argument(
-            std::string("Invalid host argument '" + port + "'")
-          );
+          ZYPP_THROW(UrlBadComponentException(
+            str::form(_("Invalid port component '%s'"), port.c_str())
+          ));
         }
       }
     }
@@ -1003,25 +1094,55 @@ namespace zypp
     {
       if( path.empty())
       {
+        if(config("require_pathname") == "m")
+        {
+          ZYPP_THROW(UrlNotAllowedException(
+            _("Url scheme requires path name")
+          ));
+        }
         m_data->pathname = path;
       }
       else
       {
-        std::string data;
         if(eflag == zypp::url::E_ENCODED)
         {
           checkUrlData(path, "path name", config("rx_pathname"));
 
-          data = cleanupPathName(zypp::url::decode(path));
+          if( !getHost(zypp::url::E_ENCODED).empty())
+          {
+            // has to begin with a "/". For consistency with
+            // setPathName while the host is empty, we allow
+            // it in encoded ("%2f") form - cleanupPathName()
+            // will fix / decode the first slash if needed.
+            if(!(path.at(0) == '/' || (path.size() >= 3 &&
+                 str::toLower(path.substr(0, 3)) == "%2f")))
+            {
+              ZYPP_THROW(UrlNotAllowedException(
+                _("Relative path not allowed if authority exists")
+              ));
+            }
+          }
+
+          m_data->pathname = cleanupPathName(path);
         }
-        else
+        else //     zypp::url::E_DECODED
         {
-          data = cleanupPathName(path);
-        }
+          if( !getHost(zypp::url::E_ENCODED).empty())
+          {
+            if(path.at(0) != '/')
+            {
+              ZYPP_THROW(UrlNotAllowedException(
+                _("Relative path not allowed if authority exists")
+              ));
+            }
+          }
 
-        m_data->pathname = zypp::url::encode(
-          data, config("safe_pathname")
-        );
+          m_data->pathname = cleanupPathName(
+            zypp::url::encode(
+              path, config("safe_pathname")
+            )
+          );
+        }
       }
     }
 
@@ -1063,9 +1184,9 @@ namespace zypp
       if( config("psep_pathparam").empty() ||
           config("vsep_pathparam").empty())
       {
-        throw std::logic_error(
-          "Path Parameter parsing not supported for this URL"
-        );
+        ZYPP_THROW(UrlNotSupportedException(
+          _("Path Parameter parsing not supported for this URL")
+        ));
       }
       setPathParams(
         zypp::url::join(
@@ -1108,9 +1229,9 @@ namespace zypp
       if( config("psep_querystr").empty() ||
           config("vsep_querystr").empty())
       {
-        throw std::logic_error(
-          "Query string parsing not supported for this URL"
-        );
+        ZYPP_THROW(UrlNotSupportedException(
+          _("Query string parsing not supported for this URL")
+        ));
       }
       setQueryString(
         zypp::url::join(
@@ -1134,27 +1255,67 @@ namespace zypp
 
     // ---------------------------------------------------------------
     std::string
-    UrlBase::cleanupPathName(const std::string &path)
+    UrlBase::cleanupPathName(const std::string &path) const
     {
-      size_t pos = 0;
+      bool authority = !getHost(zypp::url::E_ENCODED).empty();
+      return cleanupPathName(path, authority);
+    }
 
-      while( pos < path.length() && path.at(pos) == '/')
-        pos++;
+    // ---------------------------------------------------------------
+    std::string
+    UrlBase::cleanupPathName(const std::string &path, bool authority) const
+    {
+      std::string copy( path);
 
-      if( pos > 1)
+      // decode the first slash if it is encoded ...
+      if(copy.size() >= 3 && copy.at(0) != '/' &&
+         str::toLower(copy.substr(0, 3)) == "%2f")
       {
-        // make sure, there is not more than
-        // _one_ leading "/" in the path name.
-        return path.substr(pos - 1);
+        copy.replace(0, 3, "/");
       }
 
-      return std::string(path);
+      // if path begins with a double slash ("//"); encode the second
+      // slash [minimal and IMO sufficient] before the first path
+      // segment, to fulfill the path-absolute rule of RFC 3986
+      // disallowing a "//" if no authority is present.
+      if( authority)
+      {
+        //
+        // rewrite of "//" to "/%2f" not required, use config
+        //
+        if(config("path_encode_slash2") == "y")
+        {
+          // rewrite "//" ==> "/%2f"
+          if(copy.size() >= 2 && copy.at(0) == '/' && copy.at(1) == '/')
+          {
+            copy.replace(1, 1, "%2F");
+          }
+        }
+        else
+        {
+          // rewrite "/%2f" ==> "//"
+          if(copy.size() >= 4 && copy.at(0) == '/' &&
+             str::toLower(copy.substr(1, 4)) == "%2f")
+          {
+            copy.replace(1, 4, "/");
+          }
+        }
+      }
+      else
+      {
+        // rewrite of "//" to "/%2f" is required (no authority)
+        if(copy.size() >= 2 && copy.at(0) == '/' && copy.at(1) == '/')
+        {
+          copy.replace(1, 1, "%2F");
+        }
+      }
+      return copy;
     }
 
 
     // ---------------------------------------------------------------
     bool
-    UrlBase::isValidHost(const std::string &host)
+    UrlBase::isValidHost(const std::string &host) const
     {
       try
       {
@@ -1183,19 +1344,12 @@ namespace zypp
 
     // ---------------------------------------------------------------
     bool
-    UrlBase::isValidPort(const std::string &port)
+    UrlBase::isValidPort(const std::string &port) const
     {
-      try
-      {
-        str::regex regx(RX_VALID_PORT);
-        if( str::regex_match(port, regx))
-        {
-          long pnum = str::strtonum<long>(port);
-          return ( pnum >= 1 && pnum <= USHRT_MAX);
-        }
-      }
-      catch( ... )
-      {}
+        char* endptr;
+        long pnum = strtol(port.c_str(), &endptr, 10);
+        return ( !port.empty() && !*endptr
+                 && pnum >= 1 && pnum <= USHRT_MAX);
 
       return false;
     }