Fix the package build issues: missing deps

author Graydon, Tracy <tracy.graydon@intel.com>

Thu, 6 Dec 2012 22:05:52 +0000 (14:05 -0800)

committer Graydon, Tracy <tracy.graydon@intel.com>

Thu, 6 Dec 2012 22:05:52 +0000 (14:05 -0800)
author Graydon, Tracy <tracy.graydon@intel.com>
Thu, 6 Dec 2012 22:05:52 +0000 (14:05 -0800)
committer Graydon, Tracy <tracy.graydon@intel.com>
Thu, 6 Dec 2012 22:05:52 +0000 (14:05 -0800)
diff --git a/Changes b/Changes

index 948e5dc..df5932d 100644 (file)
--- a/Changes
+++ b/Changes
@@ -1,4 +1,44 @@
  _______________________________________________________________________________
+2011-10-15  Release 3.69
+
+Gisle Aas (4):
+      Documentation fix; encode_utf8 mixup [RT#71151]
+      Make it clearer that there are 2 (actually 3) options for handing "UTF-8 garbage"
+      Github is the official repo
+      Can't be bothered to try to fix the failures that occur on perl-5.6
+
+Barbie (1):
+      fix to TokeParser to correctly handle option configuration
+
+Jon Jensen (1):
+      Aesthetic change: remove extra ;
+
+Ville Skyttä (1):
+      Trim surrounding whitespace from extracted URLs.
+
+
+_______________________________________________________________________________
+2010-09-01  Release 3.68
+
+Gisle Aas (1):
+      Declare the encoding of the POD to be utf8
+
+
+_______________________________________________________________________________
+2010-08-17  Release 3.67
+
+Nicholas Clark (1):
+      bleadperl 2154eca7 breaks HTML::Parser 3.66 [RT#60368]
+
+
+_______________________________________________________________________________
+2010-07-09  Release 3.66
+
+Gisle Aas (1):
+      Fix entity decoding in utf8_mode for the title header
+
+
+_______________________________________________________________________________
  2010-04-04  Release 3.65
  
  Gisle Aas (1):
diff --git a/META.yml b/META.yml

index 4b3ea92..96ff56e 100644 (file)
--- a/META.yml
+++ b/META.yml
@@ -1,6 +1,6 @@
  --- #YAML:1.0
  name:               HTML-Parser
-version:            3.65
+version:            3.69
  abstract:           HTML parser class
  author:
      - Gisle Aas <gisle@activestate.com>
@@ -13,16 +13,16 @@ build_requires:
      Test::More:           0
  requires:
      HTML::Tagset:  3
-    perl:          5.006
+    perl:          5.008
      XSLoader:      0
  resources:
      MailingList:  mailto:libwww@perl.org
-    repository:   http://gitorious.org/projects/perl-html-parser
+    repository:   http://github.com/gisle/html-parser
  no_index:
      directory:
          - t
          - inc
-generated_by:       ExtUtils::MakeMaker version 6.56
+generated_by:       ExtUtils::MakeMaker version 6.57_05
  meta-spec:
      url:      http://module-build.sourceforge.net/META-spec-v1.4.html
      version:  1.4
diff --git a/Makefile.PL b/Makefile.PL

index 70ad50c..3e99a55 100644 (file)
--- a/Makefile.PL
+++ b/Makefile.PL
@@ -8,7 +8,7 @@ WriteMakefile(
      AUTHOR       => 'Gisle Aas <gisle@activestate.com>',
      LICENSE     => 'perl',
  
-    MIN_PERL_VERSION => 5.006,
+    MIN_PERL_VERSION => 5.008,
      PREREQ_PM    => {
                       'HTML::Tagset' => 3,
                       'XSLoader' => 0,
@@ -17,7 +17,7 @@ WriteMakefile(
          build_requires => { 'Test::More' => 0 },
          recommends => { 'HTTP::Headers' => 0 },
          resources => {
-            repository => 'http://gitorious.org/projects/perl-html-parser',
+            repository => 'http://github.com/gisle/html-parser',
             MailingList => 'mailto:libwww@perl.org',
          }
      },
diff --git a/Parser.pm b/Parser.pm

index 154fb2f..dccbc5f 100644 (file)
--- a/Parser.pm
+++ b/Parser.pm
@@ -9,7 +9,7 @@ package HTML::Parser;
  use strict;
  use vars qw($VERSION @ISA);
  
-$VERSION = "3.65";
+$VERSION = "3.69";
  
  require HTML::Entities;
  
@@ -650,9 +650,7 @@ names are forced to lower case.
  General entities are decoded in the attribute values and
  one layer of matching quotes enclosing the attribute values is removed.
  
-The Unicode character set is assumed for entity decoding.  With Perl
-version 5.6 or earlier only the Latin-1 range is supported, and
-entities for characters outside the range 0..255 are left unchanged.
+The Unicode character set is assumed for entity decoding.
  
  =item C<@attr>
  
@@ -1192,12 +1190,14 @@ The result of decoding will be a mix of encoded and decoded characters
  for any entities that expand to characters with code above 127.  This
  is not a good thing.
  
-The solution is to use the Encode::encode_utf8() on the data before
-feeding it to the $p->parse().  For $p->parse_file() pass a file that
-has been opened in ":utf8" mode.
+The recommened solution is to apply Encode::decode_utf8() on the data before
+feeding it to the $p->parse().  For $p->parse_file() pass a file that has been
+opened in ":utf8" mode.
  
-The parser can process raw undecoded UTF-8 sanely if the C<utf8_mode>
-is enabled or if the "attr", "@attr" or "dtext" argspecs is avoided.
+The alternative solution is to enable the C<utf8_mode> and not decode before
+passing strings to $p->parse().  The parser can process raw undecoded UTF-8
+sanely if the C<utf8_mode> is enabled, or if the "attr", "@attr" or "dtext"
+argspecs are avoided.
  
  =item Parsing string decoded with wrong endianness
  
diff --git a/Parser.xs b/Parser.xs

index ffad00b..331e0e9 100644 (file)
--- a/Parser.xs
+++ b/Parser.xs
@@ -125,6 +125,7 @@ newSVpvn(char *s, STRLEN len)
  static SV*
  check_handler(pTHX_ SV* h)
  {
+    SvGETMAGIC(h);
      if (SvROK(h)) {
         SV* myref = SvRV(h);
         if (SvTYPE(myref) == SVt_PVCV)
diff --git a/README b/README

index ade4bf7..928d585 100644 (file)
--- a/README
+++ b/README
@@ -34,7 +34,7 @@ HTML::Parser to create and extract information from HTML syntax trees
  PREREQUISITES
  
  In order to install and use this package you will need Perl version
-5.6 or better.  The HTML::Tagset module should be installed.
+5.8 or better.  The HTML::Tagset module should be installed.
  
  If you intend to use the HTML::HeadParser you probably want to install
  libwww-perl too.
diff --git a/eg/hform b/eg/hform

old mode 100644 (file)

new mode 100755 (executable)
diff --git a/lib/HTML/Entities.pm b/lib/HTML/Entities.pm

index 922faf2..ecd8e0d 100644 (file)
--- a/lib/HTML/Entities.pm
+++ b/lib/HTML/Entities.pm
@@ -1,5 +1,7 @@
  package HTML::Entities;
  
+=encoding utf8
+
  =head1 NAME
  
  HTML::Entities - Encode or decode strings with HTML entities
@@ -32,9 +34,7 @@ character entities.  The module provides the following functions:
  =item decode_entities( $string, ... )
  
  This routine replaces HTML entities found in the $string with the
-corresponding Unicode character.  Under perl 5.6 and earlier only
-characters in the Latin-1 range are replaced. Unrecognized
-entities are left alone.
+corresponding Unicode character.  Unrecognized entities are left alone.
  
  If multiple strings are provided as argument they are each decoded
  separately and the same number of strings are returned.
@@ -146,7 +146,7 @@ require Exporter;
  @EXPORT = qw(encode_entities decode_entities _decode_entities);
  @EXPORT_OK = qw(%entity2char %char2entity encode_entities_numeric);
  
-$VERSION = "3.64";
+$VERSION = "3.69";
  sub Version { $VERSION; }
  
  require HTML::Parser;  # for fast XS implemented decode_entities
diff --git a/lib/HTML/HeadParser.pm b/lib/HTML/HeadParser.pm

index be65fa2..fe6916e 100644 (file)
--- a/lib/HTML/HeadParser.pm
+++ b/lib/HTML/HeadParser.pm
@@ -87,7 +87,7 @@ use HTML::Entities ();
  use strict;
  use vars qw($VERSION $DEBUG);
  #$DEBUG = 1;
-$VERSION = "3.62";
+$VERSION = "3.69";
  
  =item $hp = HTML::HeadParser->new
  
@@ -157,7 +157,10 @@ sub flush_text   # internal
      $text =~ s/\s+/ /g;
      print "FLUSH $tag => '$text'\n"  if $DEBUG;
      if ($tag eq 'title') {
+       my $decoded;
+       $decoded = utf8::decode($text) if $self->utf8_mode && defined &utf8::decode;
         HTML::Entities::decode($text);
+       utf8::encode($text) if $decoded;
         $self->{'header'}->push_header(Title => $text);
      }
      $self->{'tag'} = $self->{'text'} = '';
@@ -204,7 +207,8 @@ sub start
         $self->{'header'}->push_header($key => $attr->{content});
      } elsif ($tag eq 'base') {
         return unless exists $attr->{href};
-       $self->{'header'}->push_header('Content-Base' => $attr->{href});
+       (my $base = $attr->{href}) =~ s/^\s+//; $base =~ s/\s+$//; # HTML5
+       $self->{'header'}->push_header('Content-Base' => $base);
      } elsif ($tag eq 'isindex') {
         # This is a non-standard header.  Perhaps we should just ignore
         # this element
@@ -215,7 +219,9 @@ sub start
      } elsif ($tag eq 'link') {
         return unless exists $attr->{href};
         # <link href="http:..." rel="xxx" rev="xxx" title="xxx">
-       my $h_val = "<" . delete($attr->{href}) . ">";
+       my $href = delete($attr->{href});
+       $href =~ s/^\s+//; $href =~ s/\s+$//; # HTML5
+       my $h_val = "<$href>";
         for (sort keys %{$attr}) {
             next if $_ eq "/";  # XHTML junk
             $h_val .= qq(; $_="$attr->{$_}");
@@ -262,7 +268,7 @@ sub text
  }
  
  BEGIN {
-    *utf8_mode = sub { 1 } unless HTML::Entities::UNICODE_SUPPORT;;
+    *utf8_mode = sub { 1 } unless HTML::Entities::UNICODE_SUPPORT;
  }
  
  1;
diff --git a/lib/HTML/LinkExtor.pm b/lib/HTML/LinkExtor.pm

index 8d50439..c2f08c6 100644 (file)
--- a/lib/HTML/LinkExtor.pm
+++ b/lib/HTML/LinkExtor.pm
@@ -2,7 +2,7 @@ package HTML::LinkExtor;
  
  require HTML::Parser;
  @ISA = qw(HTML::Parser);
-$VERSION = "3.60";
+$VERSION = "3.69";
  
  =head1 NAME
  
@@ -83,8 +83,8 @@ sub _start_tag
      my $a;
      for $a (@$links) {
         next unless exists $attr->{$a};
-       push(@links, $a, $base ? URI->new($attr->{$a}, $base)->abs($base)
-                               : $attr->{$a});
+       (my $link = $attr->{$a}) =~ s/^\s+//; $link =~ s/\s+$//; # HTML5
+       push(@links, $a, $base ? URI->new($link, $base)->abs($base) : $link);
      }
      return unless @links;
      $self->_found_link($tag, @links);
diff --git a/lib/HTML/TokeParser.pm b/lib/HTML/TokeParser.pm

index 94128db..959b96f 100644 (file)
--- a/lib/HTML/TokeParser.pm
+++ b/lib/HTML/TokeParser.pm
@@ -2,7 +2,7 @@ package HTML::TokeParser;
  
  require HTML::PullParser;
  @ISA=qw(HTML::PullParser);
-$VERSION = "3.57";
+$VERSION = "3.69";
  
  use strict;
  use Carp ();
@@ -27,17 +27,19 @@ sub new
  {
      my $class = shift;
      my %cnf;
+
      if (@_ == 1) {
         my $type = (ref($_[0]) eq "SCALAR") ? "doc" : "file";
         %cnf = ($type => $_[0]);
      }
      else {
+       unshift @_, (ref($_[0]) eq "SCALAR") ? "doc" : "file" if(scalar(@_) % 2 == 1);
         %cnf = @_;
      }
  
      my $textify = delete $cnf{textify} || {img => "alt", applet => "alt"};
  
-    my $self = $class->SUPER::new(%cnf, %ARGS) || return undef;
+    my $self = $class->SUPER::new(%ARGS, %cnf) || return undef;
  
      $self->{textify} = $textify;
      $self;
diff --git a/packaging/perl-HTML-Parser.spec b/packaging/perl-HTML-Parser.spec

index 0c4ec8a..3447484 100644 (file)
--- a/packaging/perl-HTML-Parser.spec
+++ b/packaging/perl-HTML-Parser.spec
@@ -1,58 +1,70 @@
-Name:       perl-HTML-Parser
-Summary:    Perl module for parsing HTML
-Version:    3.65
-Release:    2 
-Group:      Development/Libraries
-License:    GPL+ or Artistic
-URL:        http://search.cpan.org/dist/HTML-Parser/
-Source0:    %{name}-%{version}.tar.gz
-Source1001: perl-HTML-Parser.manifest 
-Requires:   perl(:MODULE_COMPAT_%(eval "`%{__perl} -V:version`"; echo $version))
-Requires:   perl(HTML::Tagset) >= 3.03
-BuildRequires:  perl(HTML::Tagset) >= 3.03, perl(ExtUtils::MakeMaker), perl(Test::Simple)
+%define real_name HTML-Parser
  
+Name:           perl-%{real_name}
+Summary:        Perl module for parsing HTML
+Version:        3.69
+Release:        3%{?dist}
+License:        GPL+ or Artistic
+Group:          Development/Libraries
+Source0:        %{real_name}-%{version}.tar.gz 
+Source1001:     packaging/perl-HTML-Parser.manifest 
+URL:            http://search.cpan.org/dist/HTML-Parser/
+Requires:       perl(:MODULE_COMPAT_%(eval "`%{__perl} -V:version`"; echo $version))
+BuildRequires:  perl(Carp)
+BuildRequires:  perl(ExtUtils::MakeMaker)
+BuildRequires:  perl(ExtUtils::ParseXS)
+BuildRequires:  perl(HTML::Tagset) >= 3
+BuildRequires:  perl(Test::More)
+BuildRequires:  perl(URI)
+BuildRequires:  perl(XSLoader)
+BuildRequires:  perl(Pod::Simple)
+BuildRequires:  perl(Test::Pod) 
+Requires:       perl(HTML::Tagset) >= 3
+Requires:       perl(URI)
+Requires:       perl(XSLoader)
+%if %{undefined perl_bootstrap}
+# This creates cycle with perl-HTTP-Message. Weaken the dependency here because
+# it's just a recommended dependency per META.yml.
+BuildRequires:  perl(HTTP::Headers)
+Requires:       perl(HTTP::Headers)
+%endif
+
+%{?perl_default_filter}
+%{?perl_default_subpackage_tests}
  
  %description
  The HTML-Parser module for perl to parse and extract information from
  HTML documents, including the HTML::Entities, HTML::HeadParser,
  HTML::LinkExtor, HTML::PullParser, and HTML::TokeParser modules.
  
-
  %prep
-%setup -q
+%setup -q -n HTML-Parser-3.69
  
  chmod -c a-x eg/*
  
  %build
  cp %{SOURCE1001} .
-
-if test -f Makefile.PL; then
-%{__perl} Makefile.PL INSTALLDIRS=vendor
-make %{?jobs:-j%jobs}
-else
-%{__perl} Build.PL  --installdirs vendor
-./Build
-fi
+%{__perl} Makefile.PL INSTALLDIRS=vendor OPTIMIZE="%{optflags}"
+make %{?_smp_mflags}
  
  %install
-rm -rf %{buildroot}
-if test -f Makefile.PL; then
  make pure_install PERL_INSTALL_ROOT=%{buildroot}
-else
-./Build install --installdirs vendor
-fi
+#file=%{buildroot}%{_mandir}/man3/HTML::Entities.3pm
+#iconv -f iso-8859-1 -t utf-8 <"$file" > "${file}_"
+#mv -f "${file}_" "$file"
  find %{buildroot} -type f -name .packlist -exec rm -f {} ';'
-find %{buildroot} -depth -type d -exec rmdir {} 2>/dev/null ';'
  find %{buildroot} -type f -name '*.bs' -empty -exec rm -f {} ';'
-%{_fixperms} %{buildroot}/*
+find %{buildroot} -depth -type d -exec rmdir {} 2>/dev/null ';'
+chmod -R u+w %{buildroot}/*
  
-file=$RPM_BUILD_ROOT%{_mandir}/man3/HTML::Entities.3pm
-iconv -f iso-8859-1 -t utf-8 <"$file" > "${file}_"
-mv -f "${file}_" "$file"
-chmod -R u+w $RPM_BUILD_ROOT/*
+%check
+make test
  
  %files
  %manifest perl-HTML-Parser.manifest
+%doc Changes README TODO eg/
  %{perl_vendorarch}/HTML/*
  %{perl_vendorarch}/auto/HTML/*
-%doc %{_mandir}/man3/*.3pm*
+#%{_mandir}/man3/*.3pm*
+
+
diff --git a/t/headparser.t b/t/headparser.t

index adcde7a..985eaff 100644 (file)
--- a/t/headparser.t
+++ b/t/headparser.t
@@ -1,7 +1,7 @@
  #!perl -w
  
  use strict;
-use Test::More tests => 15;
+use Test::More tests => 16;
  
  { package H;
    sub new { bless {}, shift; }
@@ -147,7 +147,7 @@ unlink($file) or warn "Can't unlink $file: $!";
  ok(!$p->as_string);
  
  SKIP: {
-  skip "Need Unicode support", 4 if $] < 5.008;
+  skip "Need Unicode support", 5 if $] < 5.008;
  
    # Test that the Unicode BOM does not confuse us?
    $p = HTML::HeadParser->new(H->new);
@@ -177,4 +177,20 @@ EOT
  
    is($p->header("title"), "Parkinson's disease");
    is($p->header("link")->[0], '<../../css/ummAdam.css>; rel="stylesheet"; type="text/css"');
+
+  $p = HTML::HeadParser->new(H->new);
+  $p->utf8_mode(1);
+  $p->parse(<<"EOT");   # example from http://www.mjw.com.pl/
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">\r
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="pl" lang="pl"> \r
+\r
+<head profile="http://gmpg.org/xfn/11">\r
+<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />\r
+\r
+<title> ko\xC5\x84c\xC3\xB3wki kolekcji, outlet, hurtownia odzie\xC5\xBCy Warszawa &#8211; MJW</title>\r
+<link rel="shortcut icon" href="favicon.ico" type="image/x-icon" />\r
+
+EOT
+    $p->eof;
+    is($p->header("title"), "ko\xC5\x84c\xC3\xB3wki kolekcji, outlet, hurtownia odzie\xC5\xBCy Warszawa \xE2\x80\x93 MJW");
  }
author	Graydon, Tracy <tracy.graydon@intel.com>
	Thu, 6 Dec 2012 22:05:52 +0000 (14:05 -0800)
committer	Graydon, Tracy <tracy.graydon@intel.com>
	Thu, 6 Dec 2012 22:05:52 +0000 (14:05 -0800)
Changes		patch \| blob \| history
META.yml		patch \| blob \| history
Makefile.PL		patch \| blob \| history
Parser.pm		patch \| blob \| history
Parser.xs		patch \| blob \| history
README		patch \| blob \| history
eg/hform	[changed mode: 0644->0755]	patch \| blob \| history
lib/HTML/Entities.pm		patch \| blob \| history
lib/HTML/HeadParser.pm		patch \| blob \| history
lib/HTML/LinkExtor.pm		patch \| blob \| history
lib/HTML/TokeParser.pm		patch \| blob \| history
packaging/perl-HTML-Parser.spec		patch \| blob \| history
t/headparser.t		patch \| blob \| history