1 /*---------------------------------------------------------------------\
3 | |__ / \ / / . \ . \ |
8 \---------------------------------------------------------------------*/
9 /** \file zypp/CpeId.cc
13 #include "zypp/base/String.h"
14 #include "zypp/base/LogTools.h"
15 #include "zypp/base/NonCopyable.h"
17 #include "zypp/CpeId.h"
21 /** Initializer list with all wfn attributes */
22 #define WFN_ATTRIBUTES {\
29 Attribute::language, \
30 Attribute::sw_edition,\
31 Attribute::target_sw, \
32 Attribute::target_hw, \
36 ///////////////////////////////////////////////////////////////////
39 ///////////////////////////////////////////////////////////////////
42 /** Hex-digit to number or -1. */
43 inline int heDecodeCh( char ch )
45 if ( '0' <= ch && ch <= '9' )
47 if ( 'A' <= ch && ch <= 'F' )
48 return( ch - 'A' + 10 );
49 if ( 'a' <= ch && ch <= 'f' )
50 return( ch - 'a' + 10 );
54 /** Printable non whitespace in [0x00,0x7f] valid in WFN */
55 inline bool chIsValidRange( char ch )
56 { return( '!' <= ch && ch <= '~' ); }
59 inline bool chIsAlpha( char ch )
60 { return( ( 'a' <= ch && ch <= 'z' ) || ( 'A' <= ch && ch <= 'Z' ) ); }
63 inline bool chIsNum( char ch )
64 { return( '0' <= ch && ch <= '9' ); }
67 inline bool chIsAlNum( char ch )
68 { return( chIsAlpha( ch ) || chIsNum( ch ) ); }
70 /** Alphanum or \c underscore are unescaped in WFN */
71 inline bool chIsWfnUnescaped( char ch )
72 { return( chIsAlNum( ch ) || ch == '_' ); }
75 ///////////////////////////////////////////////////////////////////
77 ///////////////////////////////////////////////////////////////////
78 /// \class CpeId::Impl
79 /// \brief CpeId implementation.
80 ///////////////////////////////////////////////////////////////////
81 class CpeId::Impl : private base::NonCopyable
83 typedef std::array<Value,Attribute::numAttributes> Wfn;
88 Impl( const std::string & cpe_r )
89 : _wfn( unbind( cpe_r ) )
93 explicit operator bool() const
94 { for ( const auto & val : _wfn ) if ( ! val.isANY() ) return true; return false; }
96 std::string asFs() const
100 for ( auto ai : WFN_ATTRIBUTES )
102 ret << ':' << _wfn[ai].asFs();
107 std::string asUri() const
112 unsigned colon = 0; // to remember trailing colons
113 for ( auto ai : WFN_ATTRIBUTES )
115 val = _wfn[ai].asUri();
117 if ( ai == Attribute::edition )
119 if ( ! ( _wfn[Attribute::sw_edition].isANY()
120 && _wfn[Attribute::target_sw].isANY()
121 && _wfn[Attribute::target_hw].isANY()
122 && _wfn[Attribute::other].isANY() ) )
126 << '~' << val//Attribute::edition
127 << '~' << _wfn[Attribute::sw_edition].asUri()
128 << '~' << _wfn[Attribute::target_sw].asUri()
129 << '~' << _wfn[Attribute::target_hw].asUri()
130 << '~' << _wfn[Attribute::other].asUri();
137 ret << std::string( colon, ':' );
144 if ( ai == Attribute::language )
145 break; // remaining attrs packaed in edition
150 std::string asWfn() const
154 for ( auto ai : WFN_ATTRIBUTES )
156 const Value & val( _wfn[ai] );
159 if ( ai ) ret << ',';
160 ret << Attribute::asString( ai ) << '=';
161 if ( val.isString() )
162 ret << '"' << val << '"';
164 ret << "NA"; // as ANY is omitted, it must be NA
171 SetCompare setRelationMixinCompare( const Impl & trg ) const
173 SetCompare ret = SetCompare::equal;
174 for ( auto ai : WFN_ATTRIBUTES )
176 switch ( _wfn[ai].compare( trg._wfn[ai] ).asEnum() )
178 case SetCompare::uncomparable:
179 ret = SetCompare::uncomparable;
182 case SetCompare::equal:
185 case SetCompare::properSubset:
186 if ( ret == SetCompare::equal )
187 ret = SetCompare::properSubset;
188 else if ( ret != SetCompare::properSubset )
189 ret = SetCompare::uncomparable;
192 case SetCompare::properSuperset:
193 if ( ret == SetCompare::equal )
194 ret = SetCompare::properSuperset;
195 else if ( ret != SetCompare::properSuperset )
196 ret = SetCompare::uncomparable;
199 case SetCompare::disjoint:
200 ret = SetCompare::disjoint;
203 if ( ret == SetCompare::uncomparable || ret == SetCompare::disjoint )
210 /** Assign \a val_r if it meets \a attr_r specific contraints.
211 * \throws std::invalid_argument if string is malformed
213 static void assignAttr( Wfn & wfn_r, Attribute attr_r, const Value & val_r )
215 if ( val_r.isString() )
217 switch ( attr_r.asEnum() )
219 case Attribute::part:
221 const std::string & wfn( val_r.asWfn() );
227 if ( wfn[1] == '\0' )
231 throw std::invalid_argument( "CpeId:Wfn:part: illegal value" );
237 case Attribute::language:
239 const std::string & wfn( val_r.asWfn() );
240 std::string::size_type len = 0;
241 // (2*3ALPHA) ["-" (2ALPHA / 3DIGIT)]
242 if ( chIsAlpha( wfn[0] ) && chIsAlpha( wfn[1] ) )
244 len = chIsAlpha( wfn[2] ) ? 3 : 2;
245 if ( wfn[len] == '-' )
247 if ( chIsAlpha( wfn[len+1] ) && chIsAlpha( wfn[len+2] ) )
249 else if ( chIsNum( wfn[len+1] ) && chIsNum( wfn[len+2] ) && chIsNum( wfn[len+3] ) )
253 if ( wfn.size() != len )
254 throw std::invalid_argument( "CpeId:Wfn:language: illegal value" );
263 wfn_r[attr_r.asIntegral()] = val_r;
267 /** Parse magic and unbind accordingly
268 * \throws std::invalid_argument if string is malformed
270 static Wfn unbind( const std::string & cpe_r );
272 /** Parse Uri and unbind
273 * \throws std::invalid_argument if string is malformed
275 static Wfn unbindUri( const std::string & cpe_r );
277 /** Parse Fs and unbind
278 * \throws std::invalid_argument if string is malformed
280 static Wfn unbindFs( const std::string & cpe_r );
286 CpeId::Impl::Wfn CpeId::Impl::unbind( const std::string & cpe_r )
294 if ( cpe_r[4] == '/' )
296 ret = unbindUri( cpe_r );
298 else if ( cpe_r[4] == '2'
303 ret = unbindFs( cpe_r );
306 throw std::invalid_argument( "CpeId: bad magic" );
308 else if ( cpe_r[0] != '\0' )
309 throw std::invalid_argument( "CpeId: bad magic" );
313 CpeId::Impl::Wfn CpeId::Impl::unbindUri( const std::string & cpe_r )
317 std::vector<std::string> field;
318 field.reserve( Attribute::numAttributes );
319 if ( str::splitFields( cpe_r.c_str()+5/* skip magic 'cpe:/' */, std::back_inserter(field), ":" ) > Attribute::numAttributes )
320 throw std::invalid_argument( "CpeId:Uri: too many fields" );
321 field.resize( Attribute::numAttributes ); // fillup with ANY("")
323 for ( auto ai : WFN_ATTRIBUTES )
325 if ( ai == Attribute::edition && field[ai][0] == '~' )
327 // unpacking is needed
328 static constexpr unsigned numPacks = 6u; // dummy_before_~ + edition + 4 extended attributes
329 std::vector<std::string> pack;
330 pack.reserve( numPacks );
331 if ( str::splitFields( field[ai], std::back_inserter(pack), "~" ) > numPacks )
332 throw std::invalid_argument( "CpeId:Uri: too many packs" );
333 pack.resize( numPacks ); // fillup with ANY(""), should be noOP
335 pack[1].swap( field[Attribute::edition] );
336 pack[2].swap( field[Attribute::sw_edition] );
337 pack[3].swap( field[Attribute::target_sw] );
338 pack[4].swap( field[Attribute::target_hw] );
339 pack[5].swap( field[Attribute::other] );
341 assignAttr( ret, ai, Value( field[ai], Value::uriFormat ) );
346 CpeId::Impl::Wfn CpeId::Impl::unbindFs( const std::string & cpe_r )
350 std::vector<std::string> field;
351 field.reserve( Attribute::numAttributes );
352 if ( str::splitFields( cpe_r.c_str()+8/* skip magic 'cpe:2.3:' */, std::back_inserter(field), ":" ) > Attribute::numAttributes )
353 throw std::invalid_argument( "CpeId:Fs: too many fields" );
354 field.resize( Attribute::numAttributes, "*" ); // fillup with ANY|"*"
356 for ( auto ai : WFN_ATTRIBUTES )
358 assignAttr( ret, ai, Value( field[ai], Value::fsFormat ) );
364 ///////////////////////////////////////////////////////////////////
366 ///////////////////////////////////////////////////////////////////
372 CpeId::CpeId( const std::string & cpe_r )
373 : _pimpl( new Impl( cpe_r ) )
376 CpeId::CpeId( const std::string & cpe_r, NoThrowType )
379 { _pimpl.reset( new Impl( cpe_r ) ); }
381 { _pimpl.reset( new Impl ); }
387 CpeId::operator bool() const
388 { return bool(*_pimpl); }
390 std::string CpeId::asFs() const
391 { return _pimpl->asFs(); }
393 std::string CpeId::asUri() const
394 { return _pimpl->asUri(); }
396 std::string CpeId::asWfn() const
397 { return _pimpl->asWfn(); }
399 SetCompare CpeId::setRelationMixinCompare( const CpeId & trg ) const
400 { return _pimpl->setRelationMixinCompare( *trg._pimpl ); }
402 ///////////////////////////////////////////////////////////////////
403 // class CpeId::WfnAttribute
404 ///////////////////////////////////////////////////////////////////
406 const std::string & CpeId::_AttributeDef::asString( Enum val_r )
408 static std::map<Enum,std::string> _table = {
409 #define OUTS(N) { N, #N }
423 return _table[val_r];
426 ///////////////////////////////////////////////////////////////////
427 // class CpeId::Value
428 ///////////////////////////////////////////////////////////////////
430 const CpeId::Value CpeId::Value::ANY;
431 const CpeId::Value CpeId::Value::NA( "" );
433 CpeId::Value::Value( const std::string & value_r )
435 if ( value_r.empty() ) // NA
437 if ( ! CpeId::Value::NA._value ) // initialized by this ctor!
438 _value.reset( new std::string );
440 _value = CpeId::Value::NA._value;
442 else if ( value_r != "*" ) // ANY is default constructed
444 bool starting = true; // false after the 1st non-?
445 for_( chp, value_r.begin(), value_r.end() )
451 if ( ! chIsValidRange( *chp ) )
454 throw std::invalid_argument( "CpeId:Wfn: illegal quoted character" );
456 throw std::invalid_argument( "CpeId:Wfn: Backslash escapes nothing" );
458 else if ( chIsWfnUnescaped( *chp ) )
459 throw std::invalid_argument( "CpeId:Wfn: unnecessarily quoted character" );
460 else if ( starting && *chp == '-' && chp+1 == value_r.end() )
461 throw std::invalid_argument( "CpeId:Wfn: '\\-' is illegal value" );
464 case '?': // sequence at beginning or end of string
465 while ( *(chp+1) == '?' )
467 if ( ! ( starting || chp+1 == value_r.end() ) )
468 throw std::invalid_argument( "CpeId:Wfn: embedded ?" );
471 case '*': // single at beginning or end of string
472 if ( ! ( starting || chp+1 == value_r.end() ) )
473 throw std::invalid_argument( "CpeId:Wfn: embedded *" );
476 default: // everything else unquoted
477 if ( ! chIsWfnUnescaped( *chp ) )
479 if ( chIsValidRange( *chp ) )
480 throw std::invalid_argument( "CpeId:Wfn: missing quote" );
482 throw std::invalid_argument( "CpeId:Wfn: illegal character" );
489 _value.reset( new std::string( value_r ) );
493 CpeId::Value::Value( const std::string & encoded_r, FsFormatType )
495 if ( encoded_r != "*" ) // ANY is default constructed
497 if ( encoded_r == "-" ) // NA
499 _value = CpeId::Value::NA._value;
504 bool starting = true; // false after the 1st non-?
505 for_( chp, encoded_r.begin(), encoded_r.end() )
509 case '\\': // may stay quoted
511 if ( chIsWfnUnescaped( *chp ) )
513 else if ( chIsValidRange( *chp ) )
514 result << '\\' << *chp;
516 throw std::invalid_argument( "CpeId:Fs: illegal quoted character" );
518 throw std::invalid_argument( "CpeId:Fs: Backslash escapes nothing" );
521 case '?': // sequence at beginning or end of string
523 while ( *(chp+1) == '?' )
528 if ( ! ( starting || chp+1 == encoded_r.end() ) )
529 throw std::invalid_argument( "CpeId:Fs: embedded ?" );
532 case '*': // single at beginning or end of string
533 if ( starting || chp+1 == encoded_r.end() )
536 throw std::invalid_argument( "CpeId:Fs: embedded *" );
540 if ( chIsWfnUnescaped( *chp ) )
542 else if ( chIsValidRange( *chp ) )
543 result << '\\' << *chp;
545 throw std::invalid_argument( "CpeId:Fs: illegal character" );
552 throw std::invalid_argument( "CpeId:Fs: '' is illegal" );
553 _value.reset( new std::string( result ) );
558 CpeId::Value::Value( const std::string & encoded_r, UriFormatType )
560 if ( ! encoded_r.empty() ) // ANY is default constructed
562 if ( encoded_r == "-" ) // NA
564 _value = CpeId::Value::NA._value;
569 bool starting = true; // false after the 1st non-? (%01)
570 for_( chp, encoded_r.begin(), encoded_r.end() )
574 if ( ch == '%' ) // legal '%xx' sequence first
576 int d1 = heDecodeCh( *(chp+1) );
579 int d2 = heDecodeCh( *(chp+2) );
582 chp += 2; // skip sequence
585 if ( d2 == 1 ) // %01 - ? valid sequence at begin or end
588 while ( *(chp+1) == '%' && *(chp+2) == '0' && *(chp+3) == '1' )
593 if ( starting || chp+1 == encoded_r.end() )
596 continue; // -> continue;
599 throw std::invalid_argument( "CpeId:Uri: embedded %01" );
601 else if ( d2 == 2 ) // %02 - * valid at begin or end
603 if ( starting || chp+1 == encoded_r.end() )
607 continue; // -> continue;
610 throw std::invalid_argument( "CpeId:Uri: embedded %02" );
614 if ( ! chIsValidRange( ch ) )
615 throw std::invalid_argument( "CpeId:Uri: illegal % encoded character" );
619 else if ( ! chIsValidRange( ch ) )
620 throw std::invalid_argument( "CpeId:Uri: illegal character" );
622 if ( chIsWfnUnescaped( ch ) )
625 result << '\\' << ch;
630 _value.reset( new std::string( result ) );
635 std::string CpeId::Value::asWfn() const
640 static const std::string any( "*" );
644 ret = *_value; // includes "" for NA
648 std::string CpeId::Value::asFs() const
653 static const std::string asterisk( "*" );
658 static const std::string dash( "-" );
664 for_( chp, _value->begin(), _value->end() )
676 result << *chp; // without escaping
680 throw std::invalid_argument( "CpeId:Wfn: Backslash escapes nothing" );
684 result << '\\' << *chp;
694 std::string CpeId::Value::asUri() const
696 std::string ret; // ANY
701 static const std::string dash( "-" );
707 for_( chp, _value->begin(), _value->end() )
709 if ( chIsWfnUnescaped( *chp ) )
715 static const char *const hdig = "0123456789abcdef";
724 result << *chp; // without encodeing
728 throw std::invalid_argument( "CpeId:Wfn: Backslash escapes nothing" );
732 result << '%' << hdig[(unsigned char)(*chp)/16] << hdig[(unsigned char)(*chp)%16];
746 throw std::invalid_argument( str::Str() << "CpeId:Wfn: illegal char '" << *chp << "' in WFN" );
757 ///////////////////////////////////////////////////////////////////
760 /** Whether it's a wildcard character (<tt>[*?]</tt>). */
761 inline bool isWildchar( char ch_r )
762 { return( ch_r == '*' || ch_r == '?' ); }
764 /** Whether there is an even number of consecutive backslashes before and including \a rbegin_r
765 * An even number of backslashes means the character following is unescaped.
767 inline bool evenNumberOfBackslashes( std::string::const_reverse_iterator rbegin_r, std::string::const_reverse_iterator rend_r )
769 unsigned backslashes = 0;
770 for_( it, rbegin_r, rend_r )
777 return !(backslashes & 1U);
780 /** Number of chars (not counting escaping backslashes) in <tt>[begin_r,end_r[</tt> */
781 inline unsigned trueCharsIn( const std::string & str_r, std::string::size_type begin_r, std::string::size_type end_r )
784 for_( it, begin_r, end_r )
787 if ( str_r[it] == '\\' )
796 /** Match helper comparing 2 Wildcardfree string values (case insensitive). */
797 inline bool matchWildcardfreeString( const std::string & lhs, const std::string & rhs )
798 { return( str::compareCI( lhs, rhs ) == 0 ); }
800 /** Match helper matching Wildcarded source against Wildcardfree target.
802 * Constraints on usage of the unquoted question mark (zero or one char in \a trg):
803 * 1. An unquoted question mark MAY be used at the beginning and/or the end of an
804 * attribute-value string.
805 * 2. A contiguous sequence of unquoted question marks MAY appear at the beginning
806 * and/or the end of an attribute-value string.
807 * 3. An unquoted question mark SHALL NOT be used in any other place in an
808 * attribute-value string.
810 * Constraints on usage of the unquoted asterisk (zero or more chars in \a trg):
811 * 1. A single unquoted asterisk MAY be used as the entire attribute-value string.
812 * 2. A single unquoted asterisk MAY be used at the beginning and/or end of an
813 * attribute-value string.
814 * 3. An unquoted asterisk SHALL NOT be used in any other place in an attribute-value
817 * Unquoted question marks and asterisks MAY appear in the same attribute-value string
818 * as long as they meet the constraints above.
820 * Example of illegal usage: "foo?bar", "bar??baz", "q??x",
821 * "foo*bar", "**foo", "bar***",
822 * "*?foobar", "foobar*?"
824 * \note Relies on \a src and \a trg being wellformed.
826 inline bool matchWildcardedString( std::string src, std::string trg )
828 // std::string::npos remembers an asterisk
829 // unescaped wildcard prefix
830 std::string::size_type prefx = 0;
831 switch ( *src.begin() ) // wellformed implies not empty
834 if ( src.size() == 1 )
835 return true; // "*" matches always: superset
837 prefx = std::string::npos;
842 for_( it, ++src.begin(), src.end() )
843 { if ( *it == '?' ) ++prefx; else break; }
844 if ( src.size() == prefx )
845 return( trg.size() <= prefx ); // "??..?": superset if at most #prefx chars
847 src.erase( 0, prefx );
852 // unescaped wildcard suffix
853 std::string::size_type suffx = 0;
856 switch ( *src.rbegin() )
859 if ( evenNumberOfBackslashes( ++src.rbegin(), src.rend() ) )
861 suffx = std::string::npos;
862 src.erase( src.size()-1 );
867 for_( it, ++src.rbegin(), src.rend() )
868 { if ( *it == '?' ) ++suffx; else break; }
869 if ( ! evenNumberOfBackslashes( src.rbegin()+suffx, src.rend() ) )
870 --suffx; // last '?' was escaped.
871 src.erase( src.size()-suffx );
877 // now match; find src in trg an check surrounding wildcards
878 src = str::toLower( src );
879 trg = str::toLower( trg );
880 for ( std::string::size_type match = trg.find( src, 0 );
881 match != std::string::npos;
882 match = trg.find( src, match+1 ) )
884 if ( prefx != std::string::npos && trueCharsIn( trg, 0, match ) > prefx )
885 break; // not "*", and already more chars than "?"s before match: disjoint
886 std::string::size_type frontSize = match + src.size();
887 if ( suffx != std::string::npos && trueCharsIn( trg, frontSize, trg.size() ) > suffx )
888 continue; // not "*", and still more chars than "?"s after match: check next match
889 return true; // match: superset
891 return false; // disjoint
894 ///////////////////////////////////////////////////////////////////
896 bool CpeId::Value::containsWildcard() const
898 const std::string & value( *_value );
899 return ( isWildchar( *value.begin() )
900 || ( isWildchar( *value.rbegin() ) && evenNumberOfBackslashes( ++value.rbegin(), value.rend() ) ) );
903 SetCompare CpeId::Value::setRelationMixinCompare( const CpeId::Value & trg ) const
905 static const SetCompare _NeedsCloserLook( SetCompare::Enum(-1) ); // artificial Compare value
906 static const SetCompare matchTabel[4][4] = {{
907 /* ANY, ANY */ SetCompare::equal,
908 /* ANY, NA */ SetCompare::properSuperset,
909 /* ANY, wildcardfree */ SetCompare::properSuperset,
910 /* ANY, wildcarded */ SetCompare::uncomparable,
912 /* NA, ANY */ SetCompare::properSubset,
913 /* NA, NA */ SetCompare::equal,
914 /* NA, wildcardfree */ SetCompare::disjoint,
915 /* NA, wildcarded */ SetCompare::uncomparable,
917 /* wildcardfree, ANY */ SetCompare::properSubset,
918 /* wildcardfree, NA */ SetCompare::disjoint,
919 /* wildcardfree, wildcardfree */ _NeedsCloserLook, // equal or disjoint
920 /* wildcardfree, wildcarded */ SetCompare::uncomparable,
922 /* wildcarded, ANY */ SetCompare::properSubset,
923 /* wildcarded, NA */ SetCompare::disjoint,
924 /* wildcarded, wildcardfree */ _NeedsCloserLook, // superset or disjoint
925 /* wildcarded, wildcarded */ SetCompare::uncomparable,
928 Type srcType = type();
929 Type trgType = trg.type();
930 SetCompare ret = matchTabel[srcType.asIntegral()][trgType.asIntegral()];
931 if ( ret == _NeedsCloserLook )
933 if ( srcType == Type::wildcardfree ) // trgType == Type::wildcardfree
935 // simple string compare
936 ret = matchWildcardfreeString( *_value, *trg._value ) ? SetCompare::equal : SetCompare::disjoint;
938 else if ( srcType == Type::wildcarded ) // trgType == Type::wildcardfree
940 // Needs wildcard compare
941 ret = matchWildcardedString( *_value, *trg._value ) ? SetCompare::properSuperset : SetCompare::disjoint;
947 std::ostream & operator<<( std::ostream & str, const CpeId::Value & obj )
948 { return str << obj.asString(); }
951 ///////////////////////////////////////////////////////////////////