YUM primary.xml.gz file reader almost done
authorJan Kupec <jkupec@suse.cz>
Wed, 25 Apr 2007 15:44:41 +0000 (15:44 +0000)
committerJan Kupec <jkupec@suse.cz>
Wed, 25 Apr 2007 15:44:41 +0000 (15:44 +0000)
devel/devel.jkupec/YUMParser.cc
devel/devel.jkupec/YUMParser.h
devel/devel.jkupec/YUMParser_test.cc
zypp/parser/yum/PrimaryFileReader.cc
zypp/parser/yum/PrimaryFileReader.h

index 563dc4f..fa961c3 100644 (file)
@@ -1,4 +1,3 @@
-//#include "zypp/data/ResolvableDataConsumer.h"
 #include "zypp/ZYpp.h"
 #include "zypp/ZYppFactory.h"
 #include "zypp/base/Logger.h"
@@ -23,30 +22,29 @@ namespace zypp
 
     MIL << "constructed" << endl;
   }
-  
-  
-  
-  bool YUMParser::primary_CB(const zypp::data::Package &package)
+
+
+  bool YUMParser::primary_CB(const zypp::data::Package &package, const zypp::data::Dependencies &deps)
   {
-//    data::RecordId pkgid = _consumer.appendResolvable( _catalog_id, ResTraits<Package>::kind, nvra, deps );
+    NVRA nvra(package.name, package.edition, package.arch);
+    data::RecordId pkgid =
+      _consumer.appendResolvable(
+        _catalog_id, ResTraits<Package>::kind, nvra, deps);
 
-    MIL << "got package "
+/*    MIL << "got package "
       << package.name << package.edition << " "
       << package.arch
       << endl;
-/*    MIL << "checksum: " << package.checksum << endl;
+    MIL << "checksum: " << package.checksum << endl;
     MIL << "summary: " << package.summary << endl;*/
   }
 
-  void YUMParser::start(const Pathname &cache_dir, Progress progress_fnc)
+  void YUMParser::start(const Pathname &cache_dir, ParserProgress::Ptr progress)
   {
-    progress_fnc(0);
-
     zypp::parser::yum::PrimaryFileReader(
         cache_dir + "/repodata/primary.xml.gz",
-        bind(&YUMParser::primary_CB, this, _1));
-
-    progress_fnc(100);
+        bind(&YUMParser::primary_CB, this, _1, _2),
+        progress);
   }
 
 
index ad13b26..323bb1e 100644 (file)
@@ -5,6 +5,7 @@
 #include "zypp2/cache/CacheStore.h"
 #include "zypp/data/ResolvableData.h"
 #include "zypp/parser/yum/PrimaryFileReader.h"
+#include "zypp/parser/ParserProgress.h"
 
 #undef ZYPP_BASE_LOGGER_LOGGROUP
 #define ZYPP_BASE_LOGGER_LOGGROUP "parser"
@@ -25,9 +26,9 @@ namespace zypp
 
     YUMParser(const zypp::data::RecordId &catalog_id, zypp::cache::CacheStore &consumer);
 
-    void start(const zypp::Pathname &path, Progress progress_fnc);
+    void start(const zypp::Pathname &path, ParserProgress::Ptr progress);
 
-    bool primary_CB(const zypp::data::Package &package);
+    bool primary_CB(const zypp::data::Package &package, const zypp::data::Dependencies &deps);
     bool test() { return true; }
 
   private:
@@ -36,8 +37,8 @@ namespace zypp
   };
 
 
-    }
-  }
+    } // ns yum
+  } // ns parser
 } // ns zypp
 
 #endif /*YUMPARSER_H_*/
index a199d72..a31d4e8 100644 (file)
@@ -4,6 +4,9 @@
 #include "zypp/base/LogControl.h"
 #include "zypp/parser/yum/PrimaryFileReader.h"
 #include "YUMParser.h"
+#include "zypp/parser/ParserProgress.h"
+#include "zypp/base/Measure.h"
+
 
 
 #undef ZYPP_BASE_LOGGER_LOGGROUP
 using namespace std;
 using namespace zypp;
 using namespace zypp::parser::yum;
+using zypp::debug::Measure;
 
 bool progress_function(int p)
 {
-  MIL << p << "%" << endl;
+//  cout << "\r                                       " << flush;
+  cout << "\rParsing primary.xml.gz [" << p << "%]" << flush;
+//  MIL << p << "%" << endl;
 }
 
 int main(int argc, char **argv)
 {
   base::LogControl::instance().logfile("yumparsertest.log");
+  
+  if (argc < 2)
+  {
+    cout << "usage: yumparsertest path/to/yumsourcedir" << endl << endl;
+    return 1;
+  }
 
   try
   {
     ZYpp::Ptr z = getZYpp();
-//, bind( &YUMDownloader::patches_Callback, this, _1, _2));
 
-//    Pathname dbfile = Pathname(getenv("PWD")) + "data.db";
+    Measure open_catalog_timer("CacheStore: lookupOrAppendCatalog");
+
     cache::CacheStore store(getenv("PWD"));
-    data::RecordId catalog_id = store.lookupOrAppendCatalog( Url("http://www.google.com"), "/");
+    data::RecordId catalog_id = store.lookupOrAppendCatalog( Url("http://some.url"), "/");
+
+    open_catalog_timer.stop();
 
     MIL << "creating PrimaryFileParser" << endl;
+    parser::ParserProgress::Ptr progress;
+    progress.reset(new parser::ParserProgress(&progress_function));
+    Measure parse_primary_timer("primary.xml.gz parsing");
+
     parser::yum::YUMParser parser( catalog_id, store);
-    parser.start(argv[1], &progress_function);
+    parser.start(argv[1], progress);
+
+    parse_primary_timer.stop();
 
-/*
-      YUMDownloader downloader(Url(argv[1]), "/");
-      downloader.download(argv[2]);*/
+    cout << endl;
   }
   catch ( const Exception &e )
   {
index 16408e7..578ea8b 100644 (file)
@@ -1,11 +1,10 @@
-#include "zypp/base/String.h"
+//#include "zypp/base/String.h"
 #include "zypp/base/Logger.h"
 #include "zypp/parser/yum/PrimaryFileReader.h"
 #include "zypp/Arch.h"
 #include "zypp/Edition.h"
 #include "zypp/TranslatedText.h"
 
-
 using namespace std;
 using namespace zypp::xml;
 
@@ -17,21 +16,29 @@ namespace zypp
     {
 
 
-  PrimaryFileReader::PrimaryFileReader(const Pathname &primary_file, ProcessPackage callback)
-     : _callback(callback), _package(NULL), _count(0), _tag(tag_NONE)
+  PrimaryFileReader::PrimaryFileReader(const Pathname &primary_file, ProcessPackage callback, ParserProgress::Ptr progress)
+    : _callback(callback), _package(NULL), _count(0), _total_packages(0),
+      _tag(tag_NONE), _expect_rpm_entry(false), _dtype(zypp::Dep::REQUIRES),
+      _progress(progress), _old_progress(0)
   {
     Reader reader( primary_file );
     MIL << "Reading " << primary_file << endl;
     reader.foreachNode( bind( &PrimaryFileReader::consumeNode, this, _1 ) );
   }
-  
+
   bool PrimaryFileReader::consumeNode(Reader & reader_r)
   {
-    if (_tag = tag_format)
+//    DBG << "**node: " << reader_r->name() << " (" << reader_r->nodeType() << ")" << endl;
+    if (_tag == tag_format)
       return consumeFormatChildNodes(reader_r);
   
     if (reader_r->nodeType() == XML_READER_TYPE_ELEMENT)
     {
+      if (reader_r->name() == "metadata")
+      {
+        zypp::str::strtonum(reader_r->getAttribute("packages").asString(), _total_packages);
+        return true;
+      }
       if (reader_r->name() == "package")
       {
         _tag = tag_package;
@@ -91,54 +98,166 @@ namespace zypp
         _package->packager = reader_r.nodeText().asString(); 
         return true;
       }
-  
+
       // TODO url
       // TODO time
       // TODO size
-  
+
       if (reader_r->name() == "location")
       {
         _package->location = reader_r->getAttribute("href").asString();
+        return true;
       }
-  
+
       if (reader_r->name() == "format")
       {
         _tag = tag_format;
-        consumeFormatChildNodes(reader_r);
+        _deps.clear();
+        return true;
       }
     }
     else if ( reader_r->nodeType() == XML_READER_TYPE_END_ELEMENT )
     {
       if (reader_r->name() == "package")
       {
-        _callback(*_package);
+        _callback(*_package, _deps);
         if (_package)
         {
           delete _package;
           _package = NULL;
         }
         _count++;
+
+        // report progress
+        long int new_progress = (long int) ((_count/(double) _total_packages)*100);
+        if (new_progress - _old_progress >= 5)
+        {
+          _progress->progress(new_progress);
+          _old_progress = new_progress;
+        }
         _tag = tag_NONE;
+        return true;
       }
-      if (reader_r->name() == "metadata")
-      {
-        MIL << _count << " packages read." << endl;
-      }
-      return true;
     }
-  
+
     return true;
   }
-  
+
+
+  // --------------( consume <format> tag )------------------------------------
+
   bool PrimaryFileReader::consumeFormatChildNodes(Reader & reader_r)
   {
+//    DBG << "format subtag: " << reader_r->name() << endl;
     if (reader_r->nodeType() == XML_READER_TYPE_ELEMENT)
     {
-      
+      if (reader_r->name() == "rpm:entry")
+      {
+        if (!_expect_rpm_entry)
+        {
+          // TODO make this a ParseException (once created/taken out of tagfile ns?)
+          ZYPP_THROW(Exception("rpm:entry found when not expected"));
+        }
+
+        Edition edition(
+          reader_r->getAttribute("ver").asString(),
+          reader_r->getAttribute("rel").asString(),
+          reader_r->getAttribute("epoch").asString()
+        );
+/*
+        DBG << "got rpm:entry for " << _dtype << ": "
+            << reader_r->getAttribute("name").asString()
+            << " " << edition << endl;
+*/
+        _deps[_dtype].push_back(
+          zypp::capability::parse(
+            ResTraits<Package>::kind,
+            reader_r->getAttribute("name").asString(),
+            Rel(reader_r->getAttribute("flags").asString()),
+            edition
+          )
+        );
+      }
+
+      // TODO license
+      // TODO vendor
+      // TODO group
+      // TODO buildhost
+      // TODO sourcerpm
+      // TODO header-range
+
+      if (reader_r->name() == "rpm:provides")
+      {
+        _dtype = zypp::Dep::PROVIDES;
+        _expect_rpm_entry = true;
+        return true;
+      }
+      if (reader_r->name() == "rpm:conflicts")
+      {
+        _dtype = zypp::Dep::CONFLICTS;
+        _expect_rpm_entry = true;
+        return true;
+      }
+      if (reader_r->name() == "rpm:obsoletes")
+      {
+        _dtype = zypp::Dep::OBSOLETES;
+        _expect_rpm_entry = true;
+        return true;
+      }
+      if (reader_r->name() == "rpm:requires")
+      {
+        _dtype = zypp::Dep::REQUIRES;
+        _expect_rpm_entry = true;
+        return true;
+      }
+      if (reader_r->name() == "rpm:recommends")
+      {
+        _dtype = zypp::Dep::RECOMMENDS;
+        _expect_rpm_entry = true;
+        return true;
+      }
+      if (reader_r->name() == "rpm:enhances")
+      {
+        _dtype = zypp::Dep::ENHANCES;
+        _expect_rpm_entry = true;
+        return true;
+      }
+      if (reader_r->name() == "rpm:supplements")
+      {
+        _dtype = zypp::Dep::SUPPLEMENTS;
+        _expect_rpm_entry = true;
+        return true;
+      }
+      if (reader_r->name() == "rpm:suggests")
+      {
+        _dtype = zypp::Dep::SUGGESTS;
+        _expect_rpm_entry = true;
+        return true;
+      }
+      if (reader_r->name() == "rpm:suggests")
+      {
+        _dtype = zypp::Dep::SUGGESTS;
+        _expect_rpm_entry = true;
+        return true;
+      }
+      // TODO file
     }
-    else if ( reader_r->nodeType() == XML_READER_TYPE_END_ELEMENT )
+    else if (reader_r->nodeType() == XML_READER_TYPE_END_ELEMENT)
     {
-      if (reader_r->name() == "format");
+      if (reader_r->name() == "rpm:requires"
+          || reader_r->name() == "rpm:provides"
+          || reader_r->name() == "rpm:conflicts"
+          || reader_r->name() == "rpm:obsoletes"
+          || reader_r->name() == "rpm:recommends"
+          || reader_r->name() == "rpm:enhances"
+          || reader_r->name() == "rpm:supplements"
+          || reader_r->name() == "rpm:suggests")
+      {
+        _expect_rpm_entry = false;
+        return true;
+      }
+
+      if (reader_r->name() == "format")
       {
         _tag = tag_package;
         return true;
index 33601db..ec0d10f 100644 (file)
@@ -1,11 +1,11 @@
 #ifndef ZYPP_PARSER_YUM_PRIMARYFILEPARSER_H
 #define ZYPP_PARSER_YUM_PRIMARYFILEPARSER_H
 
-#include "zypp/Date.h"
 #include "zypp/base/Function.h"
 #include "zypp/base/Logger.h"
 #include "zypp/parser/xml/Reader.h"
 #include "zypp/data/ResolvableData.h"
+#include "zypp/parser/ParserProgress.h"
 
 #undef ZYPP_BASE_LOGGER_LOGGROUP
 #define ZYPP_BASE_LOGGER_LOGGROUP "parser"
@@ -17,59 +17,125 @@ namespace zypp
     namespace yum
     {
 
-      enum Tag
-      {
-        tag_NONE,
-        tag_package,
-        tag_format
-      };
-
-/**
- * Iterates through a primary.xml file giving on each iteration
- * a \ref OnMediaLocation object with the resource and its
- * type ( primary, patches, filelists, etc ).
- * The iteration is done via a callback provided on
- * construction.
- *
- * \code
- * RepomdFileReader reader(repomd_file, 
- *                  bind( &SomeClass::callbackfunc, &object, _1, _2 ) );
- * \endcode
- */
-class PrimaryFileReader
-{
-public:
   /**
-   * Callback definition.
-   */
-  typedef function<bool(const zypp::data::Package&)> ProcessPackage;
-
-  /**
-   * Constructor
-   * \param primary_file the primary.xml.gz file you want to read
-   * \param function to process \ref _package data.
+   * Reads through a primary.xml file and collects package data including
+   * dependencies.
    * 
-   * \see PrimaryFileReader::ProcessPackage
+   * After a package is read, a \ref zypp::data::Package
+   * and \ref zypp::data::Dependencies object is prepared and \ref _callback
+   * is called with these two objects passed in.
+   *
+   * The \ref _callback is provided on construction.
+   *
+   * \code
+   * PrimaryFileReader reader(repomd_file, 
+   *                          bind(&SomeClass::callbackfunc, &object, _1));
+   * \endcode
    */
-  PrimaryFileReader(const Pathname &primary_file, ProcessPackage callback);
+  class PrimaryFileReader
+  {
+  public:
+    /**
+     * Callback definition.
+     */
+    typedef function<bool(const zypp::data::Package&, const zypp::data::Dependencies &deps)> ProcessPackage;
 
-  /**
-   * Callback provided to the XML parser.
-   */
-  bool consumeNode(zypp::xml::Reader & reader_r);
-
-private:
-  bool consumeFormatChildNodes(zypp::xml::Reader & reader_r);
-
-private:
-  Tag _tag;
-  unsigned _count;
-  zypp::data::Package *_package;
-  ProcessPackage _callback;
-/*  CheckSum _checksum;
-  std::string _checksum_type;
-  Date _timestamp;*/
-};
+    /**
+     * Enumeration of some primary.xml tags.
+     */
+    enum Tag
+    {
+      tag_NONE,
+      tag_package,
+      tag_format
+    };
+/*    
+    enum RPMTag
+    {
+      rpmtag_NONE,
+      rpmtag_provides,
+      rpmtag_conflicts,
+      rpmtag_obsoletes,
+      rpmtag_requires,
+      rpmtag_recommends,
+      rpmtag_enhances,
+      rpmtag_supplements,
+      rpmtag_suggests
+    };
+*/
+    /**
+     * Constructor
+     * \param primary_file the primary.xml.gz file you want to read
+     * \param function to process \ref _package data.
+     * 
+     * \see PrimaryFileReader::ProcessPackage
+     */
+    PrimaryFileReader(const Pathname &primary_file, ProcessPackage callback, ParserProgress::Ptr progress);
+  
+    /**
+     * Callback provided to the XML parser.
+     */
+    bool consumeNode(zypp::xml::Reader & reader_r);
+
+  private:
+    /**
+     * Function for processing all <code>format</code> tag subtree nodes.
+     */
+    bool consumeFormatChildNodes(zypp::xml::Reader & reader_r);
+
+  private:
+    /** Used to remember primary.xml tag beeing currently processed. */
+    Tag _tag;
+
+//    RPMTag _rpmtag;
+
+    /**
+     * Used to remember whether we are expecting an rpm:entry tag
+     * e.g. for rpm:requires
+     */
+    bool _expect_rpm_entry;
+
+    /**
+     * Type of dependecy beeing processed.
+     */
+    Dep _dtype;
+
+    /**
+     * Number of packages read so far.
+     */
+    unsigned _count;
+
+    /**
+     * Total number of packages to be read. This information is acquired from
+     * the <code>packages</code> attribute of <code>metadata<code> tag.
+     */
+    unsigned _total_packages;
+
+    /**
+     * Pointer to the \ref zypp::data::Package object for storing the package
+     * metada (except of depencencies are stored).
+     */
+    zypp::data::Package *_package;
+
+    /**
+     * A map of lists of strings for storing package dependencies.
+     * 
+     * \see zypp::data::Dependencies
+     */
+    zypp::data::Dependencies _deps;
+
+    /**
+     * Callback for processing package metadata passed in through constructor.
+     */
+    ProcessPackage _callback;
+
+    /**
+     * Progress reporting object.
+     */
+    ParserProgress::Ptr _progress;
+    
+    long int _old_progress;
+  };
 
 
     } // ns zypp