js/src/tests/ecma_3/RegExp/regress-85721.js

   1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
   2 /* ***** BEGIN LICENSE BLOCK *****
   3  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
   4  *
   5  * The contents of this file are subject to the Mozilla Public License Version
   6  * 1.1 (the "License"); you may not use this file except in compliance with
   7  * the License. You may obtain a copy of the License at
   8  * http://www.mozilla.org/MPL/
   9  *
  10  * Software distributed under the License is distributed on an "AS IS" basis,
  11  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  12  * for the specific language governing rights and limitations under the
  13  * License.
  14  *
  15  * The Original Code is JavaScript Engine testing utilities.
  16  *
  17  * The Initial Developer of the Original Code is
  18  * Netscape Communications Corp.
  19  * Portions created by the Initial Developer are Copyright (C) 2002
  20  * the Initial Developer. All Rights Reserved.
  21  *
  22  * Contributor(s):
  23  *   rogerl@netscape.com, pschwartau@netscape.com
  24  *
  25  * Alternatively, the contents of this file may be used under the terms of
  26  * either the GNU General Public License Version 2 or later (the "GPL"), or
  27  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  28  * in which case the provisions of the GPL or the LGPL are applicable instead
  29  * of those above. If you wish to allow use of your version of this file only
  30  * under the terms of either the GPL or the LGPL, and not to allow others to
  31  * use your version of this file under the terms of the MPL, indicate your
  32  * decision by deleting the provisions above and replace them with the notice
  33  * and other provisions required by the GPL or the LGPL. If you do not delete
  34  * the provisions above, a recipient may use your version of this file under
  35  * the terms of any one of the MPL, the GPL or the LGPL.
  36  *
  37  * ***** END LICENSE BLOCK ***** */
  38
  39 /*
  40  *
  41  * Date:    14 Feb 2002
  42  * SUMMARY: Performance: Regexp performance degraded from 4.7
  43  * See http://bugzilla.mozilla.org/show_bug.cgi?id=85721
  44  *
  45  * Adjust this testcase if necessary. The FAST constant defines
  46  * an upper bound in milliseconds for any execution to take.
  47  *
  48  */
  49 //-----------------------------------------------------------------------------
  50 var BUGNUMBER = 85721;
  51 var summary = 'Performance: execution of regular expression';
  52 var FAST = 100; // execution should be 100 ms or less to pass the test
  53 var MSG_FAST = 'Execution took less than ' + FAST + ' ms';
  54 var MSG_SLOW = 'Execution took ';
  55 var MSG_MS = ' ms';
  56 var str = '';
  57 var re = '';
  58 var status = '';
  59 var actual = '';
  60 var expect= '';
  61
  62 printBugNumber(BUGNUMBER);
  63 printStatus (summary);
  64
  65
  66 function elapsedTime(startTime)
  67 {
  68   return new Date() - startTime;
  69 }
  70
  71
  72 function isThisFast(ms)
  73 {
  74   if (ms <= FAST)
  75     return MSG_FAST;
  76   return MSG_SLOW + ms + MSG_MS;
  77 }
  78
  79
  80
  81 /*
  82  * The first regexp. We'll test for performance (Section 1) and accuracy (Section 2).
  83  */
  84 str='<sql:connection id="conn1"> <sql:url>www.m.com</sql:url> <sql:driver>drive.class</sql:driver>\n<sql:userId>foo</sql:userId> <sql:password>goo</sql:password> </sql:connection>';
  85 re = /<sql:connection id="([^\r\n]*?)">\s*<sql:url>\s*([^\r\n]*?)\s*<\/sql:url>\s*<sql:driver>\s*([^\r\n]*?)\s*<\/sql:driver>\s*(\s*<sql:userId>\s*([^\r\n]*?)\s*<\/sql:userId>\s*)?\s*(\s*<sql:password>\s*([^\r\n]*?)\s*<\/sql:password>\s*)?\s*<\/sql:connection>/;
  86 expect = Array("<sql:connection id=\"conn1\"> <sql:url>www.m.com</sql:url> <sql:driver>drive.class</sql:driver>\n<sql:userId>foo</sql:userId> <sql:password>goo</sql:password> </sql:connection>","conn1","www.m.com","drive.class","<sql:userId>foo</sql:userId> ","foo","<sql:password>goo</sql:password> ","goo");
  87
  88 /*
  89  *  Check performance -
  90  */
  91 status = inSection(1);
  92 var start = new Date();
  93 var result = re.exec(str);
  94 actual = elapsedTime(start);
  95 reportCompare(isThisFast(FAST), isThisFast(actual), status);
  96
  97 /*
  98  *  Check accuracy -
  99  */
 100 status = inSection(2);
 101 testRegExp([status], [re], [str], [result], [expect]);
 102
 103
 104
 105 /*
 106  * The second regexp (HUGE!). We'll test for performance (Section 3) and accuracy (Section 4).
 107  * It comes from the O'Reilly book "Mastering Regular Expressions" by Jeffrey Friedl, Appendix B
 108  */
 109
 110 //# Some things for avoiding backslashitis later on.
 111 $esc        = '\\\\';
 112 $Period      = '\.';
 113 $space      = '\040';              $tab         = '\t';
 114 $OpenBR     = '\\[';               $CloseBR     = '\\]';
 115 $OpenParen  = '\\(';               $CloseParen  = '\\)';
 116 $NonASCII   = '\x80-\xff';         $ctrl        = '\000-\037';
 117 $CRlist     = '\n\015';  //# note: this should really be only \015.
 118 // Items 19, 20, 21
 119 $qtext = '[^' + $esc + $NonASCII + $CRlist + '\"]';                                               // # for within "..."
 120 $dtext = '[^' + $esc + $NonASCII + $CRlist + $OpenBR + $CloseBR + ']';    // # for within [...]
 121 $quoted_pair = $esc + '[^' + $NonASCII + ']';                                                     // # an escaped character
 122
 123 //##############################################################################
 124 //# Items 22 and 23, comment.
 125 //# Impossible to do properly with a regex, I make do by allowing at most one level of nesting.
 126 $ctext   =  '[^' + $esc + $NonASCII + $CRlist + '()]';
 127
 128 //# $Cnested matches one non-nested comment.
 129 //# It is unrolled, with normal of $ctext, special of $quoted_pair.
 130 $Cnested =
 131   $OpenParen +                                 // #  (
 132   $ctext + '*' +                            // #     normal*
 133   '(?:' + $quoted_pair + $ctext + '*)*' +   // #     (special normal*)*
 134   $CloseParen;                                 // #                       )
 135
 136
 137 //# $comment allows one level of nested parentheses
 138 //# It is unrolled, with normal of $ctext, special of ($quoted_pair|$Cnested)
 139 $comment =
 140   $OpenParen +                                           // #  (
 141   $ctext + '*' +                                     // #     normal*
 142   '(?:' +                                            // #       (
 143   '(?:' + $quoted_pair + '|' + $Cnested + ')' +   // #         special
 144   $ctext + '*' +                                 // #         normal*
 145   ')*' +                                             // #            )*
 146   $CloseParen;                                           // #                )
 147
 148
 149 //##############################################################################
 150 //# $X is optional whitespace/comments.
 151 $X =
 152   '[' + $space + $tab + ']*' +                                         // # Nab whitespace.
 153   '(?:' + $comment + '[' + $space + $tab + ']*)*';    // # If comment found, allow more spaces.
 154
 155
 156 //# Item 10: atom
 157 $atom_char   = '[^(' + $space + '<>\@,;:\".' + $esc + $OpenBR + $CloseBR + $ctrl + $NonASCII + ']';
 158 $atom =
 159   $atom_char + '+' +            // # some number of atom characters...
 160   '(?!' + $atom_char + ')';     // # ..not followed by something that could be part of an atom
 161
 162 // # Item 11: doublequoted string, unrolled.
 163 $quoted_str =
 164   '\"' +                                         // # "
 165   $qtext + '*' +                              // #   normal
 166   '(?:' + $quoted_pair + $qtext + '*)*' +     // #   ( special normal* )*
 167   '\"';                                          // # "
 168
 169 //# Item 7: word is an atom or quoted string
 170 $word =
 171   '(?:' +
 172   $atom +                // # Atom
 173   '|' +                  //     #  or
 174   $quoted_str +          // # Quoted string
 175   ')'
 176
 177 //# Item 12: domain-ref is just an atom
 178   $domain_ref  = $atom;
 179
 180 //# Item 13: domain-literal is like a quoted string, but [...] instead of  "..."
 181 $domain_lit  =
 182   $OpenBR +                                                                          // # [
 183   '(?:' + $dtext + '|' + $quoted_pair + ')*' +     // #    stuff
 184   $CloseBR;                                        // #           ]
 185
 186 // # Item 9: sub-domain is a domain-ref or domain-literal
 187 $sub_domain  =
 188   '(?:' +
 189   $domain_ref +
 190   '|' +
 191   $domain_lit +
 192   ')' +
 193   $X;                 // # optional trailing comments
 194
 195 // # Item 6: domain is a list of subdomains separated by dots.
 196 $domain =
 197   $sub_domain +
 198   '(?:' +
 199   $Period + $X + $sub_domain +
 200   ')*';
 201
 202 //# Item 8: a route. A bunch of "@ $domain" separated by commas, followed by a colon.
 203 $route =
 204   '\@' + $X + $domain +
 205   '(?:,' + $X + '\@' + $X + $domain + ')*' +  // # additional domains
 206   ':' +
 207   $X;                                   // # optional trailing comments
 208
 209 //# Item 6: local-part is a bunch of $word separated by periods
 210 $local_part =
 211   $word + $X
 212   '(?:' +
 213   $Period + $X + $word + $X +           // # additional words
 214   ')*';
 215
 216 // # Item 2: addr-spec is local@domain
 217 $addr_spec  =
 218   $local_part + '\@' + $X + $domain;
 219
 220 //# Item 4: route-addr is <route? addr-spec>
 221 $route_addr =
 222   '<' + $X +                     // # <
 223   '(?:' + $route + ')?' +     // #       optional route
 224   $addr_spec +                // #       address spec
 225   '>';                           // #                 >
 226
 227 //# Item 3: phrase........
 228 $phrase_ctrl = '\000-\010\012-\037'; // # like ctrl, but without tab
 229
 230 //# Like atom-char, but without listing space, and uses phrase_ctrl.
 231 //# Since the class is negated, this matches the same as atom-char plus space and tab
 232 $phrase_char =
 233   '[^()<>\@,;:\".' + $esc + $OpenBR + $CloseBR + $NonASCII + $phrase_ctrl + ']';
 234
 235 // # We've worked it so that $word, $comment, and $quoted_str to not consume trailing $X
 236 // # because we take care of it manually.
 237 $phrase =
 238   $word +                                                  // # leading word
 239   $phrase_char + '*' +                                     // # "normal" atoms and/or spaces
 240   '(?:' +
 241   '(?:' + $comment + '|' + $quoted_str + ')' +          // # "special" comment or quoted string
 242   $phrase_char + '*' +                                  // #  more "normal"
 243   ')*';
 244
 245 // ## Item #1: mailbox is an addr_spec or a phrase/route_addr
 246 $mailbox =
 247   $X +                                // # optional leading comment
 248   '(?:' +
 249   $phrase + $route_addr +     // # name and address
 250   '|' +                       //     #  or
 251   $addr_spec +                // # address
 252   ')';
 253
 254
 255 //###########################################################################
 256
 257
 258 re = new RegExp($mailbox, "g");
 259 str = 'Jeffy<"That Tall Guy"@ora.com (this address is no longer active)>';
 260 expect = Array('Jeffy<"That Tall Guy"@ora.com (this address is no longer active)>');
 261
 262 /*
 263  *  Check performance -
 264  */
 265 status = inSection(3);
 266 var start = new Date();
 267 var result = re.exec(str);
 268 actual = elapsedTime(start);
 269 reportCompare(isThisFast(FAST), isThisFast(actual), status);
 270
 271 /*
 272  *  Check accuracy -
 273  */
 274 status = inSection(4);
 275 testRegExp([status], [re], [str], [result], [expect]);