From 2ed511eccdb1c54a77a99ffd2e8b3d8cf558b45c Mon Sep 17 00:00:00 2001 From: Rafael Garcia-Suarez Date: Fri, 19 Dec 2003 08:36:11 +0000 Subject: [PATCH] Upgrade to CGI.pm 3.01 p4raw-id: //depot/perl@21928 --- lib/CGI.pm | 250 +++++++++++++++++++++++++++++++++++++++------------- lib/CGI/Carp.pm | 7 +- lib/CGI/Cookie.pm | 2 +- lib/CGI/Fast.pm | 2 +- lib/CGI/Pretty.pm | 2 +- lib/CGI/Util.pm | 17 +++- lib/CGI/t/carp.t | 18 +--- lib/CGI/t/request.t | 2 +- 8 files changed, 214 insertions(+), 86 deletions(-) diff --git a/lib/CGI.pm b/lib/CGI.pm index 9f65f7d..1fe49e3 100644 --- a/lib/CGI.pm +++ b/lib/CGI.pm @@ -18,13 +18,13 @@ use Carp 'croak'; # The most recent version and complete docs are available at: # http://stein.cshl.org/WWW/software/CGI/ -$CGI::revision = '$Id: CGI.pm,v 1.130 2003/08/01 14:39:17 lstein Exp $ + patches by merlyn'; -$CGI::VERSION='3.00'; +$CGI::revision = '$Id: CGI.pm,v 1.145 2003/12/10 15:16:08 lstein Exp $'; +$CGI::VERSION=3.01; # HARD-CODED LOCATION FOR FILE UPLOAD TEMPORARY FILES. # UNCOMMENT THIS ONLY IF YOU KNOW WHAT YOU'RE DOING. # $CGITempFile::TMPDIRECTORY = '/usr/tmp'; -use CGI::Util qw(rearrange make_attributes unescape escape expires); +use CGI::Util qw(rearrange make_attributes unescape escape expires ebcdic2ascii ascii2ebcdic); #use constant XHTML_DTD => ['-//W3C//DTD XHTML Basic 1.0//EN', # 'http://www.w3.org/TR/xhtml-basic/xhtml-basic10.dtd']; @@ -210,9 +210,9 @@ if ($OS eq 'VMS') { } if ($needs_binmode) { - $CGI::DefaultClass->binmode(main::STDOUT); - $CGI::DefaultClass->binmode(main::STDIN); - $CGI::DefaultClass->binmode(main::STDERR); + $CGI::DefaultClass->binmode(\*main::STDOUT); + $CGI::DefaultClass->binmode(\*main::STDIN); + $CGI::DefaultClass->binmode(\*main::STDERR); } %EXPORT_TAGS = ( @@ -232,8 +232,8 @@ if ($needs_binmode) { start_multipart_form end_multipart_form isindex tmpFileName uploadInfo URL_ENCODED MULTIPART/], ':cgi'=>[qw/param upload path_info path_translated url self_url script_name cookie Dump raw_cookie request_method query_string Accept user_agent remote_host content_type - remote_addr referer server_name server_software server_port server_protocol - virtual_host remote_ident auth_type http + remote_addr referer server_name server_software server_port server_protocol virtual_port + virtual_host remote_ident auth_type http append save_parameters restore_parameters param_fetch remote_user user_name header redirect import_names put Delete Delete_all url_param cgi_error/], @@ -295,6 +295,7 @@ sub expand_tags { sub new { my($class,@initializer) = @_; my $self = {}; + bless $self,ref $class || $class || $DefaultClass; if (ref($initializer[0]) && (UNIVERSAL::isa($initializer[0],'Apache') @@ -322,9 +323,20 @@ sub new { return $self; } -# We provide a DESTROY method so that the autoloader -# doesn't bother trying to find it. -sub DESTROY { } +# We provide a DESTROY method so that we can ensure that +# temporary files are closed (via Fh->DESTROY) before they +# are unlinked (via CGITempFile->DESTROY) because it is not +# possible to unlink an open file on Win32. We explicitly +# call DESTROY on each, rather than just undefing them and +# letting Perl DESTROY them by garbage collection, in case the +# user is still holding any reference to them as well. +sub DESTROY { + my $self = shift; + foreach my $href (values %{$self->{'.tmpfiles'}}) { + $href->{hndl}->DESTROY if defined $href->{hndl}; + $href->{name}->DESTROY if defined $href->{name}; + } +} sub r { my $self = shift; @@ -333,6 +345,12 @@ sub r { $r; } +sub upload_hook { + my ($self,$hook,$data) = self_or_default(@_); + $self->{'.upload_hook'} = $hook; + $self->{'.upload_data'} = $data; +} + #### Method: param # Returns the value(s)of a named parameter. # If invoked in a list context, returns the @@ -447,12 +465,15 @@ sub init { # quietly read and discard the post my $buffer; my $max = $content_length; - while ($max > 0 && (my $bytes = read(STDIN,$buffer,$max < 10000 ? $max : 10000))) { - $max -= $bytes; + while ($max > 0 && + (my $bytes = $MOD_PERL + ? $self->r->read($buffer,$max < 10000 ? $max : 10000) + : read(STDIN,$buffer,$max < 10000 ? $max : 10000) + )) { + $self->cgi_error("413 Request entity too large"); + last METHOD; } - $self->cgi_error("413 Request entity too large"); - last METHOD; - } + } # Process multipart postings, but only if the initializer is # not defined. @@ -495,6 +516,21 @@ sub init { last METHOD; } + if (defined($fh) && ($fh ne '')) { + while (<$fh>) { + chomp; + last if /^=/; + push(@lines,$_); + } + # massage back into standard format + if ("@lines" =~ /=/) { + $query_string=join("&",@lines); + } else { + $query_string=join("+",@lines); + } + last METHOD; + } + # last chance -- treat it as a string $initializer = $$initializer if ref($initializer) eq 'SCALAR'; $query_string = $initializer; @@ -515,7 +551,7 @@ sub init { } if ($meth eq 'POST') { - $self->read_from_client(\*STDIN,\$query_string,$content_length,0) + $self->read_from_client(\$query_string,$content_length,0) if $content_length > 0; # Some people want to have their cake and eat it too! # Uncomment this line to have the contents of the query string @@ -528,7 +564,15 @@ sub init { # Check the command line and then the standard input for data. # We use the shellwords package in order to behave the way that # UN*X programmers expect. - $query_string = read_from_cmdline() if $DEBUG; + if ($DEBUG) + { + my $cmdline_ret = read_from_cmdline(); + $query_string = $cmdline_ret->{'query_string'}; + if (defined($cmdline_ret->{'subpath'})) + { + $self->path_info($cmdline_ret->{'subpath'}); + } + } } # YL: Begin Change for XML handler 10/19/2001 @@ -655,6 +699,7 @@ sub all_parameters { # put a filehandle into binary mode (DOS) sub binmode { + return unless defined($_[1]) && defined fileno($_[1]); CORE::binmode($_[1]); } @@ -823,18 +868,19 @@ END_OF_FUNC 'new_MultipartBuffer' => <<'END_OF_FUNC', # Create a new multipart buffer sub new_MultipartBuffer { - my($self,$boundary,$length,$filehandle) = @_; - return MultipartBuffer->new($self,$boundary,$length,$filehandle); + my($self,$boundary,$length) = @_; + return MultipartBuffer->new($self,$boundary,$length); } END_OF_FUNC 'read_from_client' => <<'END_OF_FUNC', # Read data from a file handle sub read_from_client { - my($self, $fh, $buff, $len, $offset) = @_; + my($self, $buff, $len, $offset) = @_; local $^W=0; # prevent a warning - return undef unless defined($fh); - return read($fh, $$buff, $len, $offset); + return $MOD_PERL + ? $self->r->read($$buff, $len, $offset) + : read(\*STDIN, $$buff, $len, $offset); } END_OF_FUNC @@ -1300,7 +1346,7 @@ sub header { my($self,@p) = self_or_default(@_); my(@header); - return undef if $self->{'.header_printed'}++ and $HEADERS_ONCE; + return "" if $self->{'.header_printed'}++ and $HEADERS_ONCE; my($type,$status,$cookie,$target,$expires,$nph,$charset,$attachment,$p3p,@other) = rearrange([['TYPE','CONTENT_TYPE','CONTENT-TYPE'], @@ -1530,7 +1576,7 @@ sub _style { : qq() ) if $src; } - if ($verbatim) { + if ($verbatim) { push(@result, ""); } push(@result,style({'type'=>$type},"$cdata_start\n$code\n$cdata_end")) if $code; @@ -1639,6 +1685,7 @@ sub startform { $method = lc($method) || 'post'; $enctype = $enctype || &URL_ENCODED; unless (defined $action) { + $action = $self->escapeHTML($self->url(-absolute=>1,-path=>1)); if (length($ENV{QUERY_STRING})>0) { $action .= "?".$self->escapeHTML($ENV{QUERY_STRING},1); @@ -2509,7 +2556,7 @@ sub url { $url .= server_name(); my $port = $self->server_port; $url .= ":" . $port - unless (lc($protocol) eq 'http' && $port == 80) + unless (lc($protocol) eq 'http' && $port == 80) || (lc($protocol) eq 'https' && $port == 443); } return $url if $base; @@ -2850,6 +2897,21 @@ sub server_software { } END_OF_FUNC +#### Method: virtual_port +# Return the server port, taking virtual hosts into account +#### +'virtual_port' => <<'END_OF_FUNC', +sub virtual_port { + my($self) = self_or_default(@_); + my $vh = $self->http('host'); + if ($vh) { + return ($vh =~ /:(\d+)$/)[0] || '80'; + } else { + return $self->server_port(); + } +} +END_OF_FUNC + #### Method: server_port # Return the tcp/ip port the server is running on #### @@ -3062,11 +3124,12 @@ END_OF_FUNC sub read_from_cmdline { my($input,@words); my($query_string); + my($subpath); if ($DEBUG && @ARGV) { @words = @ARGV; } elsif ($DEBUG > 1) { require "shellwords.pl"; - print STDERR "(offline mode: enter name=value pairs on standard input)\n"; + print STDERR "(offline mode: enter name=value pairs on standard input; press ^D or ^Z when done)\n"; chomp(@lines = ); # remove newlines $input = join(" ",@lines); @words = &shellwords($input); @@ -3081,7 +3144,12 @@ sub read_from_cmdline { } else { $query_string = join('+',@words); } - return $query_string; + if ($query_string =~ /^(.*?)\?(.*)$/) + { + $query_string = $2; + $subpath = $1; + } + return { 'query_string' => $query_string, 'subpath' => $subpath }; } END_OF_FUNC @@ -3095,8 +3163,8 @@ END_OF_FUNC ##### 'read_multipart' => <<'END_OF_FUNC', sub read_multipart { - my($self,$boundary,$length,$filehandle) = @_; - my($buffer) = $self->new_MultipartBuffer($boundary,$length,$filehandle); + my($self,$boundary,$length) = @_; + my($buffer) = $self->new_MultipartBuffer($boundary,$length); return unless $buffer; my(%header,$body); my $filenumber = 0; @@ -3156,10 +3224,11 @@ sub read_multipart { $seqno += int rand(100); } die "CGI open of tmpfile: $!\n" unless defined $filehandle; - $CGI::DefaultClass->binmode($filehandle) if $CGI::needs_binmode; + $CGI::DefaultClass->binmode($filehandle) if $CGI::needs_binmode + && defined fileno($filehandle); # if this is an multipart/mixed attachment, save the header - # together with the body for lateron parsing with an external + # together with the body for later parsing with an external # MIME parser module if ( $multipart ) { foreach ( keys %header ) { @@ -3170,9 +3239,15 @@ sub read_multipart { my ($data); local($\) = ''; - while (defined($data = $buffer->read)) { + my $totalbytes; + while (defined($data = $buffer->read)) { + if (defined $self->{'.upload_hook'}) + { + $totalbytes += length($data); + &{$self->{'.upload_hook'}}($filename ,$data, $totalbytes, $self->{'.upload_data'}); + } print $filehandle $data; - } + } # back up to beginning of file seek($filehandle,0,0); @@ -3187,6 +3262,7 @@ sub read_multipart { # Save some information about the uploaded file where we can get # at it later. $self->{'.tmpfiles'}->{fileno($filehandle)}= { + hndl => $filehandle, name => $tmpfile, info => {%header}, }; @@ -3337,6 +3413,8 @@ END_OF_AUTOLOAD ######################## MultipartBuffer #################### package MultipartBuffer; +use constant DEBUG => 0; + # how many bytes to read at a time. We use # a 4K buffer by default. $INITIAL_FILLUNIT = 1024 * 4; @@ -3359,17 +3437,9 @@ $AUTOLOADED_ROUTINES=<<'END_OF_AUTOLOAD'; 'new' => <<'END_OF_FUNC', sub new { - my($package,$interface,$boundary,$length,$filehandle) = @_; + my($package,$interface,$boundary,$length) = @_; $FILLUNIT = $INITIAL_FILLUNIT; - my $IN; - if ($filehandle) { - my($package) = caller; - # force into caller's package if necessary - $IN = $filehandle=~/[':]/ ? $filehandle : "$package\:\:$filehandle"; - } - $IN = "main::STDIN" unless $IN; - - $CGI::DefaultClass->binmode($IN) if $CGI::needs_binmode; + $CGI::DefaultClass->binmode($IN); # if $CGI::needs_binmode; # just do it always # If the user types garbage into the file upload field, # then Netscape passes NOTHING to the server (not good). @@ -3392,7 +3462,7 @@ sub new { } else { # otherwise we find it ourselves my($old); ($old,$/) = ($/,$CRLF); # read a CRLF-delimited line - $boundary = <$IN>; # BUG: This won't work correctly under mod_perl + $boundary = ; # BUG: This won't work correctly under mod_perl $length -= length($boundary); chomp($boundary); # remove the CRLF $/ = $old; # restore old line separator @@ -3401,7 +3471,6 @@ sub new { my $self = {LENGTH=>$length, BOUNDARY=>$boundary, - IN=>$IN, INTERFACE=>$interface, BUFFER=>'', }; @@ -3415,7 +3484,7 @@ sub new { unless ($boundary_read) { while ($self->read(0)) { } } - die "Malformed multipart POST\n" if $self->eof; + die "Malformed multipart POST: data truncated\n" if $self->eof; return $retval; } @@ -3428,7 +3497,7 @@ sub readHeader { my($ok) = 0; my($bad) = 0; - local($CRLF) = "\015\012" if $CGI::OS eq 'VMS'; + local($CRLF) = "\015\012" if $CGI::OS eq 'VMS' || $CGI::EBCDIC; do { $self->fillBuffer($FILLUNIT); @@ -3440,10 +3509,18 @@ sub readHeader { } until $ok || $bad; return () if $bad; + #EBCDIC NOTE: translate header into EBCDIC, but watch out for continuation lines! + my($header) = substr($self->{BUFFER},0,$end+2); substr($self->{BUFFER},0,$end+4) = ''; my %return; + if ($CGI::EBCDIC) { + warn "untranslated header=$header\n" if DEBUG; + $header = CGI::Util::ascii2ebcdic($header); + warn "translated header=$header\n" if DEBUG; + } + # See RFC 2045 Appendix A and RFC 822 sections 3.4.8 # (Folding Long Header Fields), 3.4.3 (Comments) # and 3.4.5 (Quoted-Strings). @@ -3466,9 +3543,18 @@ sub readBody { my($self) = @_; my($data); my($returnval)=''; + + #EBCDIC NOTE: want to translate returnval into EBCDIC HERE + while (defined($data = $self->read)) { $returnval .= $data; } + + if ($CGI::EBCDIC) { + warn "untranslated body=$returnval\n" if DEBUG; + $returnval = CGI::Util::ascii2ebcdic($returnval); + warn "translated body=$returnval\n" if DEBUG; + } return $returnval; } END_OF_FUNC @@ -3481,30 +3567,38 @@ sub read { my($self,$bytes) = @_; # default number of bytes to read - $bytes = $bytes || $FILLUNIT; + $bytes = $bytes || $FILLUNIT; # Fill up our internal buffer in such a way that the boundary # is never split between reads. $self->fillBuffer($bytes); + my $boundary_start = $CGI::EBCDIC ? CGI::Util::ebcdic2ascii($self->{BOUNDARY}) : $self->{BOUNDARY}; + my $boundary_end = $CGI::EBCDIC ? CGI::Util::ebcdic2ascii($self->{BOUNDARY}.'--') : $self->{BOUNDARY}.'--'; + # Find the boundary in the buffer (it may not be there). - my $start = index($self->{BUFFER},$self->{BOUNDARY}); + my $start = index($self->{BUFFER},$boundary_start); + + warn "boundary=$self->{BOUNDARY} length=$self->{LENGTH} start=$start\n" if DEBUG; # protect against malformed multipart POST operations die "Malformed multipart POST\n" unless ($start >= 0) || ($self->{LENGTH} > 0); + + #EBCDIC NOTE: want to translate boundary search into ASCII here. + # If the boundary begins the data, then skip past it # and return undef. if ($start == 0) { # clear us out completely if we've hit the last boundary. - if (index($self->{BUFFER},"$self->{BOUNDARY}--")==0) { + if (index($self->{BUFFER},$boundary_end)==0) { $self->{BUFFER}=''; $self->{LENGTH}=0; return undef; } # just remove the boundary. - substr($self->{BUFFER},0,length($self->{BOUNDARY}))=''; + substr($self->{BUFFER},0,length($boundary_start))=''; $self->{BUFFER} =~ s/^\012\015?//; return undef; } @@ -3516,7 +3610,7 @@ sub read { # leave enough bytes in the buffer to allow us to read # the boundary. Thanks to Kevin Hendrick for finding # this one. - $bytesToReturn = $bytes - (length($self->{BOUNDARY})+1); + $bytesToReturn = $bytes - (length($boundary_start)+1); } my $returnval=substr($self->{BUFFER},0,$bytesToReturn); @@ -3541,11 +3635,11 @@ sub fillBuffer { my($bytesToRead) = $bytes - $bufferLength + $boundaryLength + 2; $bytesToRead = $self->{LENGTH} if $self->{LENGTH} < $bytesToRead; - # Try to read some data. We may hang here if the browser is screwed up. - my $bytesRead = $self->{INTERFACE}->read_from_client($self->{IN}, - \$self->{BUFFER}, + # Try to read some data. We may hang here if the browser is screwed up. + my $bytesRead = $self->{INTERFACE}->read_from_client(\$self->{BUFFER}, $bytesToRead, $bufferLength); + warn "bytesToRead=$bytesToRead, bufferLength=$bufferLength, buffer=$self->{BUFFER}\n" if DEBUG; $self->{BUFFER} = '' unless defined $self->{BUFFER}; # An apparent bug in the Apache server causes the read() @@ -4634,11 +4728,8 @@ The redirect() function redirects the browser to a different URL. If you use redirection like this, you should B print out a header as well. -One hint I can offer is that relative links may not work correctly -when you generate a redirection to another document on your site. -This is due to a well-intentioned optimization that some servers use. -The solution to this is to use the full URL (including the http: part) -of the document you are redirecting to. +You should always use full URLs (including the http: or ftp: part) in +redirection requests. Relative URLs will not work correctly. You can also use named arguments: @@ -5544,6 +5635,29 @@ Example: You are free to create a custom HTML page to complain about the error, if you wish. +You can set up a callback that will be called whenever a file upload +is being read during the form processing. This is much like the +UPLOAD_HOOK facility available in Apache::Request, with the exception +that the first argument to the callback is an Apache::Upload object, +here it's the remote filename. + + $q = CGI->new(); + $q->upload_hook(\&hook,$data); + + sub hook + { + my ($filename, $buffer, $bytes_read, $data) = @_; + print "Read $bytes_read bytes of $filename\n"; + } + +If using the function-oriented interface, call the CGI::upload_hook() +method before calling param() or any other CGI functions: + + CGI::upload_hook(\&hook,$data); + +This method is not exported by default. You will have to import it +explicitly if you wish to use it without the CGI:: prefix. + If you are using CGI.pm on a Windows platform and find that binary files get slightly larger when uploaded but that text files remain the same, then you have forgotten to activate binary mode on the output @@ -6393,8 +6507,8 @@ side-by-side frames. CGI.pm has limited support for HTML3's cascading style sheets (css). To incorporate a stylesheet into your document, pass the start_html() method a B<-style> parameter. The value of this -parameter may be a scalar, in which case it is incorporated directly -into a