*/
#include <zypp/url/UrlBase.h>
#include <zypp/base/String.h>
+#include <zypp/base/Gettext.h>
+#include <zypp/base/Regex.h>
#include <stdexcept>
#include <climits>
#include <sys/socket.h>
#include <arpa/inet.h>
+#include <iostream>
// ---------------------------------------------------------------
/*
**
** host = hostname | IPv4 | "[" IPv6-IP "]" | "[v...]"
*/
-#define RX_SPLIT_AUTHORITY \
- "^(([^:@]*)([:]([^@]*))?@)?(\\[[^]]+\\]|[^:]+)?([:](.*))?"
-
#define RX_VALID_SCHEME "^[a-zA-Z][a-zA-Z0-9\\.+-]*$"
-#define RX_VALID_PORT "^[0-9]{1,5}$"
-
#define RX_VALID_HOSTNAME "^[[:alnum:]]+([\\.-][[:alnum:]]+)*$"
#define RX_VALID_HOSTIPV4 \
/*
** URL asString() view option constants:
*/
- const ViewOptions ViewOptions::WITH_SCHEME = 0x0001;
- const ViewOptions ViewOptions::WITH_USERNAME = 0x0002;
- const ViewOptions ViewOptions::WITH_PASSWORD = 0x0004;
- const ViewOptions ViewOptions::WITH_HOST = 0x0008;
- const ViewOptions ViewOptions::WITH_PORT = 0x0010;
- const ViewOptions ViewOptions::WITH_PATH_NAME = 0x0020;
- const ViewOptions ViewOptions::WITH_PATH_PARAMS = 0x0040;
- const ViewOptions ViewOptions::WITH_QUERY_STR = 0x0080;
- const ViewOptions ViewOptions::WITH_FRAGMENT = 0x0100;
- const ViewOptions ViewOptions::EMPTY_AUTHORITY = 0x0200;
- const ViewOptions ViewOptions::EMPTY_PATH_NAME = 0x0400;
- const ViewOptions ViewOptions::EMPTY_PATH_PARAMS = 0x0800;
- const ViewOptions ViewOptions::EMPTY_QUERY_STR = 0x1000;
- const ViewOptions ViewOptions::EMPTY_FRAGMENT = 0x2000;
- const ViewOptions ViewOptions::DEFAULTS = 0x07bb;
+ const ViewOption ViewOption::WITH_SCHEME = 0x0001;
+ const ViewOption ViewOption::WITH_USERNAME = 0x0002;
+ const ViewOption ViewOption::WITH_PASSWORD = 0x0004;
+ const ViewOption ViewOption::WITH_HOST = 0x0008;
+ const ViewOption ViewOption::WITH_PORT = 0x0010;
+ const ViewOption ViewOption::WITH_PATH_NAME = 0x0020;
+ const ViewOption ViewOption::WITH_PATH_PARAMS = 0x0040;
+ const ViewOption ViewOption::WITH_QUERY_STR = 0x0080;
+ const ViewOption ViewOption::WITH_FRAGMENT = 0x0100;
+ const ViewOption ViewOption::EMPTY_AUTHORITY = 0x0200;
+ const ViewOption ViewOption::EMPTY_PATH_NAME = 0x0400;
+ const ViewOption ViewOption::EMPTY_PATH_PARAMS = 0x0800;
+ const ViewOption ViewOption::EMPTY_QUERY_STR = 0x1000;
+ const ViewOption ViewOption::EMPTY_FRAGMENT = 0x2000;
+ const ViewOption ViewOption::DEFAULTS = 0x07bb;
/*
- ViewOptions::WITH_SCHEME +
- ViewOptions::WITH_USERNAME +
- ViewOptions::WITH_HOST +
- ViewOptions::WITH_PORT +
- ViewOptions::WITH_PATH_NAME +
- ViewOptions::WITH_QUERY_STR +
- ViewOptions::WITH_FRAGMENT +
- ViewOptions::EMPTY_AUTHORITY +
- ViewOptions::EMPTY_PATH_NAME;
+ const ViewOption ViewOption::DEFAULTS =
+ ViewOption::WITH_SCHEME +
+ ViewOption::WITH_USERNAME +
+ ViewOption::WITH_HOST +
+ ViewOption::WITH_PORT +
+ ViewOption::WITH_PATH_NAME +
+ ViewOption::WITH_QUERY_STR +
+ ViewOption::WITH_FRAGMENT +
+ ViewOption::EMPTY_AUTHORITY +
+ ViewOption::EMPTY_PATH_NAME;
*/
+ // ---------------------------------------------------------------
+ ViewOption::ViewOption()
+ : opt(0x07bb)
+ {}
+
+ // ---------------------------------------------------------------
+ ViewOption::ViewOption(int option)
+ : opt(option)
+ {}
+
// ---------------------------------------------------------------
/*
inline void
checkUrlData(const std::string &data,
const std::string &name,
- const std::string ®x)
+ const std::string ®x,
+ bool show=true)
{
if( regx.empty() || regx == "^$")
{
- throw std::invalid_argument(
- std::string("Url scheme does not allow a " +
- name)
- );
+ ZYPP_THROW(UrlNotAllowedException(
+ str::form(_("Url scheme does not allow a %s"), name.c_str())
+ ));
}
else
{
try
{
str::regex rex(regx);
+ std::cout << "testing *" << data << "* against *" << regx << "*" << std::endl;
valid = str::regex_match(data, rex);
}
catch( ... )
if( !valid)
{
- throw std::invalid_argument(
- std::string("Invalid " + name + " argument '" +
- data + "'")
- );
+ if( show)
+ {
+ ZYPP_THROW(UrlBadComponentException(
+ str::form(_("Invalid %s component '%s'"),
+ name.c_str(), data.c_str())
+ ));
+ }
+ else
+ {
+ ZYPP_THROW(UrlBadComponentException(
+ str::form(_("Invalid %s component"), name.c_str())
+ ));
+ }
}
}
}
config("safe_querystr", "~!$&'()*+=,:;@/?");
config("safe_fragment", "~!$&'()*+=,:;@/?");
+ // y=yes (allowed)
+ // n=no (disallowed, exception if !empty)
config("with_authority", "y");
+ config("with_port", "y");
+
+ // y=yes (required but don't throw if empty)
+ // n=no (not required, ignore if empty)
+ // m=mandatory (exception if empty)
+ config("require_host", "n");
+ config("require_pathname","n");
+
+ // y=yes (encode 2. slash even if authority present)
+ // n=no (don't encode 2. slash if authority present)
+ config("path_encode_slash2", "n");
config("rx_username", "^([a-zA-Z0-9!$&'\\(\\)*+=,;~\\._-]|%[a-fA-F0-9]{2})+$");
config("rx_password", "^([a-zA-Z0-9!$&'\\(\\)*+=,:;~\\._-]|%[a-fA-F0-9]{2})+$");
catch( ... )
{}
- if(scheme.empty() || valid)
+ if(valid)
{
std::string lscheme( str::toLower(scheme));
UrlSchemes schemes( getKnownSchemes());
bool
UrlBase::isValid() const
{
- return !getScheme().empty();
+ /*
+ ** scheme is the only mandatory component
+ ** for all url's and is already verified,
+ ** (except for empty Url instances), so
+ ** Url with empty scheme is never valid.
+ */
+ if( getScheme().empty())
+ return false;
+
+ std::string host( getHost(zypp::url::E_ENCODED));
+ if( host.empty() && config("require_host") != "n")
+ return false;
+
+ std::string path( getPathName(zypp::url::E_ENCODED));
+ if( path.empty() && config("require_pathname") != "n")
+ return false;
+
+ /*
+ ** path has to begin with "/" if authority avaliable
+ ** if host is set after the pathname, we can't throw
+ */
+ if( !host.empty() && !path.empty() && path.at(0) != '/')
+ return false;
+
+ return true;
}
std::string url;
UrlBaseData tmp;
+ std::cout << "UrlBase:.asString()" << std::endl;
+
if( opts.has(ViewOptions::WITH_SCHEME))
{
tmp.scheme = getScheme();
}
}
+ std::cout << "tmp.host: *" << tmp.host << "*" << std::endl;
+
url += tmp.host;
if( opts.has(ViewOptions::WITH_PORT))
}
}
}
+ else if( opts.has(ViewOptions::EMPTY_AUTHORITY))
+ {
+ url += "//";
+ }
}
else if( opts.has(ViewOptions::EMPTY_AUTHORITY))
{
if( opts.has(ViewOptions::WITH_PATH_NAME))
{
tmp.pathname = getPathName(zypp::url::E_ENCODED);
+ std::cout << "pathname: *" << tmp.pathname << "*" << std::endl;
if( !tmp.pathname.empty())
{
- if( (!tmp.host.empty() || opts.has(ViewOptions::EMPTY_AUTHORITY))
- && (tmp.pathname.at(0) != '/'))
+ if(url.find("/") != std::string::npos)
{
- url += "/";
+ // Url contains authority (that may be empty),
+ // we may need a rewrite of the encoded path.
+ tmp.pathname = cleanupPathName(tmp.pathname, true);
+ if(tmp.pathname.at(0) != '/')
+ {
+ url += "/";
+ }
}
url += tmp.pathname;
}
}
}
- else if( opts.has(ViewOptions::EMPTY_PATH_NAME))
+ else if( opts.has(ViewOptions::EMPTY_PATH_NAME)
+ && url.find("/") != std::string::npos)
{
url += "/";
if( opts.has(ViewOptions::EMPTY_PATH_PARAMS))
if(eflag == zypp::url::E_DECODED)
return zypp::url::decode(m_data->pathname);
else
- return m_data->pathname;
+ return cleanupPathName(m_data->pathname);
}
if( config("psep_pathparam").empty() ||
config("vsep_pathparam").empty())
{
- throw std::logic_error(
- "Path parameter parsing not supported for this URL"
- );
+ ZYPP_THROW(UrlNotSupportedException(
+ _("Path parameter parsing not supported for this URL")
+ ));
}
zypp::url::ParamMap pmap;
zypp::url::split(
if( config("psep_querystr").empty() ||
config("vsep_querystr").empty())
{
- throw std::logic_error(
- "Query string parsing not supported for this URL"
- );
+ ZYPP_THROW(UrlNotSupportedException(
+ _("Query string parsing not supported for this URL")
+ ));
}
zypp::url::ParamMap pmap;
zypp::url::split(
m_data->scheme = str::toLower(scheme);
}
else
+ if( scheme.empty())
{
- throw std::invalid_argument(
- std::string("Invalid Url scheme '" + scheme + "'")
- );
+ ZYPP_THROW(UrlBadComponentException(
+ _("Url scheme is a required component")
+ ));
+ }
+ else
+ {
+ ZYPP_THROW(UrlBadComponentException(
+ str::form(_("Invalid Url scheme '%s'"), scheme.c_str())
+ ));
}
}
void
UrlBase::setAuthority(const std::string &authority)
{
- str::smatch out;
- bool ret = false;
+ std::string s = authority;
+ std::string::size_type p,q;
- try
- {
- str::regex rex(RX_SPLIT_AUTHORITY);
- ret = str::regex_match(authority, out, rex);
- }
- catch( ... )
- {}
+ std::string username, password, host, port;
+
+ std::cout << "authority: " << authority << "*" << std::endl;
- if( ret && out.size() == 8)
+ if ((p=s.find('@')) != std::string::npos)
{
- setUsername(out[2].str(), zypp::url::E_ENCODED);
- setPassword(out[4].str(), zypp::url::E_ENCODED);
- setHost(out[5].str());
- setPort(out[7].str());
+ q = s.find(':');
+ if (q != std::string::npos && q < p)
+ {
+ setUsername(s.substr(0, q), zypp::url::E_ENCODED);
+ setPassword(s.substr(q+1, p-q-1), zypp::url::E_ENCODED);
+ }
+ else
+ setUsername(s.substr(0, p), zypp::url::E_ENCODED);
+ s = s.substr(p+1);
}
- else
+ q = s.rfind(']');
+ if ((p = s.rfind(':')) != std::string::npos && p > q+1)
{
- throw std::invalid_argument(
- "Unable to parse Url authority"
- );
+
+ setHost(s.substr(0, p));
+ setPort(s.substr(p+1));
}
+ else
+ setHost(s);
}
// ---------------------------------------------------------------
else
{
m_data->fragment = zypp::url::encode(
- fragment, config("safe_password")
+ fragment, config("safe_fragment")
);
}
}
{
if( config("with_authority") != "y")
{
- throw std::invalid_argument(
- std::string("Url scheme does not allow a username")
- );
+ ZYPP_THROW(UrlNotAllowedException(
+ _("Url scheme does not allow a username")
+ ));
}
if(eflag == zypp::url::E_ENCODED)
{
if( config("with_authority") != "y")
{
- throw std::invalid_argument(
- std::string("Url scheme does not allow a password")
- );
+ ZYPP_THROW(UrlNotAllowedException(
+ _("Url scheme does not allow a password")
+ ));
}
if(eflag == zypp::url::E_ENCODED)
{
- checkUrlData(pass, "password", config("rx_password"));
+ checkUrlData(pass, "password", config("rx_password"), false);
m_data->pass = pass;
}
{
if( host.empty())
{
+ if(config("require_host") == "m")
+ {
+ ZYPP_THROW(UrlNotAllowedException(
+ _("Url scheme requires a host component")
+ ));
+ }
m_data->host = host;
}
else
{
if( config("with_authority") != "y")
{
- throw std::invalid_argument(
- std::string("Url scheme does not allow a host")
- );
+ ZYPP_THROW(UrlNotAllowedException(
+ _("Url scheme does not allow a host component")
+ ));
}
+ std::cout << "host: *" << host << "*" << std::endl;
+
if( isValidHost(host))
{
std::string temp;
}
else
{
- throw std::invalid_argument(
- std::string("Invalid host argument '" + host + "'")
- );
+ ZYPP_THROW(UrlBadComponentException(
+ str::form(_("Invalid host component '%s'"), host.c_str())
+ ));
}
}
}
}
else
{
- if( config("with_authority") != "y")
+ if( config("with_authority") != "y" ||
+ config("with_port") != "y")
{
- throw std::invalid_argument(
- std::string("Url scheme does not allow a port")
- );
+ ZYPP_THROW(UrlNotAllowedException(
+ _("Url scheme does not allow a port")
+ ));
}
if( isValidPort(port))
}
else
{
- throw std::invalid_argument(
- std::string("Invalid host argument '" + port + "'")
- );
+ ZYPP_THROW(UrlBadComponentException(
+ str::form(_("Invalid port component '%s'"), port.c_str())
+ ));
}
}
}
{
if( path.empty())
{
+ if(config("require_pathname") == "m")
+ {
+ ZYPP_THROW(UrlNotAllowedException(
+ _("Url scheme requires path name")
+ ));
+ }
m_data->pathname = path;
}
else
{
- std::string data;
if(eflag == zypp::url::E_ENCODED)
{
checkUrlData(path, "path name", config("rx_pathname"));
- data = cleanupPathName(zypp::url::decode(path));
+ if( !getHost(zypp::url::E_ENCODED).empty())
+ {
+ // has to begin with a "/". For consistency with
+ // setPathName while the host is empty, we allow
+ // it in encoded ("%2f") form - cleanupPathName()
+ // will fix / decode the first slash if needed.
+ if(!(path.at(0) == '/' || (path.size() >= 3 &&
+ str::toLower(path.substr(0, 3)) == "%2f")))
+ {
+ ZYPP_THROW(UrlNotAllowedException(
+ _("Relative path not allowed if authority exists")
+ ));
+ }
+ }
+
+ m_data->pathname = cleanupPathName(path);
}
- else
+ else // zypp::url::E_DECODED
{
- data = cleanupPathName(path);
- }
+ if( !getHost(zypp::url::E_ENCODED).empty())
+ {
+ if(path.at(0) != '/')
+ {
+ ZYPP_THROW(UrlNotAllowedException(
+ _("Relative path not allowed if authority exists")
+ ));
+ }
+ }
- m_data->pathname = zypp::url::encode(
- data, config("safe_pathname")
- );
+ m_data->pathname = cleanupPathName(
+ zypp::url::encode(
+ path, config("safe_pathname")
+ )
+ );
+ }
}
}
if( config("psep_pathparam").empty() ||
config("vsep_pathparam").empty())
{
- throw std::logic_error(
- "Path Parameter parsing not supported for this URL"
- );
+ ZYPP_THROW(UrlNotSupportedException(
+ _("Path Parameter parsing not supported for this URL")
+ ));
}
setPathParams(
zypp::url::join(
if( config("psep_querystr").empty() ||
config("vsep_querystr").empty())
{
- throw std::logic_error(
- "Query string parsing not supported for this URL"
- );
+ ZYPP_THROW(UrlNotSupportedException(
+ _("Query string parsing not supported for this URL")
+ ));
}
setQueryString(
zypp::url::join(
// ---------------------------------------------------------------
std::string
- UrlBase::cleanupPathName(const std::string &path)
+ UrlBase::cleanupPathName(const std::string &path) const
{
- size_t pos = 0;
+ bool authority = !getHost(zypp::url::E_ENCODED).empty();
+ return cleanupPathName(path, authority);
+ }
- while( pos < path.length() && path.at(pos) == '/')
- pos++;
+ // ---------------------------------------------------------------
+ std::string
+ UrlBase::cleanupPathName(const std::string &path, bool authority) const
+ {
+ std::string copy( path);
- if( pos > 1)
+ // decode the first slash if it is encoded ...
+ if(copy.size() >= 3 && copy.at(0) != '/' &&
+ str::toLower(copy.substr(0, 3)) == "%2f")
{
- // make sure, there is not more than
- // _one_ leading "/" in the path name.
- return path.substr(pos - 1);
+ copy.replace(0, 3, "/");
}
- return std::string(path);
+ // if path begins with a double slash ("//"); encode the second
+ // slash [minimal and IMO sufficient] before the first path
+ // segment, to fulfill the path-absolute rule of RFC 3986
+ // disallowing a "//" if no authority is present.
+ if( authority)
+ {
+ //
+ // rewrite of "//" to "/%2f" not required, use config
+ //
+ if(config("path_encode_slash2") == "y")
+ {
+ // rewrite "//" ==> "/%2f"
+ if(copy.size() >= 2 && copy.at(0) == '/' && copy.at(1) == '/')
+ {
+ copy.replace(1, 1, "%2F");
+ }
+ }
+ else
+ {
+ // rewrite "/%2f" ==> "//"
+ if(copy.size() >= 4 && copy.at(0) == '/' &&
+ str::toLower(copy.substr(1, 4)) == "%2f")
+ {
+ copy.replace(1, 4, "/");
+ }
+ }
+ }
+ else
+ {
+ // rewrite of "//" to "/%2f" is required (no authority)
+ if(copy.size() >= 2 && copy.at(0) == '/' && copy.at(1) == '/')
+ {
+ copy.replace(1, 1, "%2F");
+ }
+ }
+ return copy;
}
// ---------------------------------------------------------------
bool
- UrlBase::isValidHost(const std::string &host)
+ UrlBase::isValidHost(const std::string &host) const
{
try
{
// ---------------------------------------------------------------
bool
- UrlBase::isValidPort(const std::string &port)
+ UrlBase::isValidPort(const std::string &port) const
{
- try
- {
- str::regex regx(RX_VALID_PORT);
- if( str::regex_match(port, regx))
- {
- long pnum = str::strtonum<long>(port);
- return ( pnum >= 1 && pnum <= USHRT_MAX);
- }
- }
- catch( ... )
- {}
+ char* endptr;
+ long pnum = strtol(port.c_str(), &endptr, 10);
+ return ( !port.empty() && !*endptr
+ && pnum >= 1 && pnum <= USHRT_MAX);
return false;
}