From 9c007264caa0e1aed57010dc2950fe35f9d8347e Mon Sep 17 00:00:00 2001 From: Jarkko Hietaniemi Date: Wed, 13 Jan 1999 17:24:59 +0000 Subject: [PATCH] From: Hans Mulder Optimize common sort routines. Thread started by the message From: Hans Mulder Sender: owner-perl5-porters@perl.org To: perl5-porters@perl.org Subject: [Patch for 5.00554] From the Todo list: Optimize sort by { $a <=> $b Message-Id: <9901092156.AA03831@icgned.icgroup.nl> and the patch from the message From: Hans Mulder To: jhi@iki.fi Cc: perl5-porters@perl.org Subject: Re: [Patch for 5.00554] From the Todo list: Optimize sort by { $a <=> $b } Date: Wed, 13 Jan 1999 17:39:35 +0100 Message-Id: <9901131639.AA17419@icgned.icgroup.nl> p4raw-id: //depot/cfgperl@2595 --- Todo | 1 - op.c | 63 ++++++++++++++++++++++++++++++++++++++++- op.h | 4 +++ pp_ctl.c | 93 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++----- t/op/sort.t | 42 +++++++++++++++++++++++++++- 5 files changed, 193 insertions(+), 10 deletions(-) diff --git a/Todo b/Todo index a4cecbf..2f20ed7 100644 --- a/Todo +++ b/Todo @@ -41,7 +41,6 @@ Optimizations Cache hash value? (Not a win, according to Guido) Optimize away @_ where possible "one pass" global destruction - Optimize sort by { $a <=> $b } Rewrite regexp parser for better integrated optimization LRU cache of regexp: foreach $pat (@pats) { foo() if /$pat/ } diff --git a/op.c b/op.c index 58f26e1..901995a 100644 --- a/op.c +++ b/op.c @@ -51,6 +51,7 @@ static PADOFFSET pad_findlex _((char* name, PADOFFSET newoff, U32 seq, CV* startcv, I32 cx_ix, I32 saweval)); static OP *newDEFSVOP _((void)); static OP *new_logop _((I32 type, I32 flags, OP **firstp, OP **otherp)); +static void simplify_sort(OP *o); #endif STATIC char* @@ -5048,7 +5049,9 @@ ck_sort(OP *o) o->op_private |= OPpLOCALE; #endif - if (o->op_flags & OPf_STACKED) { + if (o->op_flags & OPf_STACKED) + simplify_sort(o); + if (o->op_flags & OPf_STACKED) { /* may have been cleared */ OP *kid = cLISTOPo->op_first->op_sibling; /* get past pushmark */ OP *k; kid = kUNOP->op_first; /* get past rv2gv */ @@ -5089,6 +5092,64 @@ ck_sort(OP *o) return o; } +static void +simplify_sort(OP *o) +{ + register OP *kid = cLISTOPo->op_first->op_sibling; /* get past pushmark */ + OP *k; + int reversed; + if (!(o->op_flags & OPf_STACKED)) + return; + kid = kUNOP->op_first; /* get past rv2gv */ + if (kid->op_type != OP_SCOPE) + return; + kid = kLISTOP->op_last; /* get past scope */ + switch(kid->op_type) { + case OP_NCMP: + case OP_I_NCMP: + case OP_SCMP: + break; + default: + return; + } + k = kid; /* remember this node*/ + if (kBINOP->op_first->op_type != OP_RV2SV) + return; + kid = kBINOP->op_first; /* get past cmp */ + if (kUNOP->op_first->op_type != OP_GV) + return; + kid = kUNOP->op_first; /* get past rv2sv */ + if (GvSTASH(kGVOP->op_gv) != PL_curstash) + return; + if (strEQ(GvNAME(kGVOP->op_gv), "a")) + reversed = 0; + else if(strEQ(GvNAME(kGVOP->op_gv), "b")) + reversed = 1; + else + return; + kid = k; /* back to cmp */ + if (kBINOP->op_last->op_type != OP_RV2SV) + return; + kid = kBINOP->op_last; /* down to 2nd arg */ + if (kUNOP->op_first->op_type != OP_GV) + return; + kid = kUNOP->op_first; /* get past rv2sv */ + if (GvSTASH(kGVOP->op_gv) != PL_curstash + || ( reversed + ? strNE(GvNAME(kGVOP->op_gv), "a") + : strNE(GvNAME(kGVOP->op_gv), "b"))) + return; + o->op_flags &= ~(OPf_STACKED | OPf_SPECIAL); + if (reversed) + o->op_private |= OPpSORT_REVERSE; + if (k->op_type == OP_NCMP) + o->op_private |= OPpSORT_NUMERIC; + if (k->op_type == OP_I_NCMP) + o->op_private |= OPpSORT_NUMERIC | OPpSORT_INTEGER; + op_free(cLISTOPo->op_first->op_sibling); /* delete comparison block */ + cLISTOPo->op_first->op_sibling = cLISTOPo->op_last; + cLISTOPo->op_children = 1; +} OP * ck_split(OP *o) diff --git a/op.h b/op.h index 31f018d..8a9f81d 100644 --- a/op.h +++ b/op.h @@ -146,6 +146,10 @@ typedef U32 PADOFFSET; /* Private for OP_SORT, OP_PRTF, OP_SPRINTF, string cmp'n, and case changers */ #define OPpLOCALE 64 /* Use locale */ +/* Private for OP_SORT */ +#define OPpSORT_NUMERIC 1 /* Optimized away { $a <=> $b } */ +#define OPpSORT_INTEGER 2 /* Ditto while under "use integer" */ +#define OPpSORT_REVERSE 4 /* Descending sort */ /* Private for OP_THREADSV */ #define OPpDONE_SVREF 64 /* Been through newSVREF once */ diff --git a/pp_ctl.c b/pp_ctl.c index 59c571d..3263b34 100644 --- a/pp_ctl.c +++ b/pp_ctl.c @@ -41,6 +41,10 @@ static void save_lines _((AV *array, SV *sv)); static I32 sortcv _((SV *a, SV *b)); static void qsortsv _((SV **array, size_t num_elts, I32 (*fun)(SV *a, SV *b))); static OP *doeval _((int gimme, OP** startop)); +static I32 sv_ncmp _((SV *a, SV *b)); +static I32 sv_i_ncmp _((SV *a, SV *b)); +static I32 amagic_ncmp _((SV *a, SV *b)); +static I32 amagic_i_ncmp _((SV *a, SV *b)); I32 amagic_cmp _((SV *str1, SV *str2)); I32 amagic_cmp_locale _((SV *str1, SV *str2)); #endif @@ -753,6 +757,20 @@ PP(pp_mapwhile) } } +STATIC I32 +sv_ncmp (SV *a, SV *b) +{ + double nv1 = SvNV(a); + double nv2 = SvNV(b); + return nv1 < nv2 ? -1 : nv1 > nv2 ? 1 : 0; +} +STATIC I32 +sv_i_ncmp (SV *a, SV *b) +{ + IV iv1 = SvIV(a); + IV iv2 = SvIV(b); + return iv1 < iv2 ? -1 : iv1 > iv2 ? 1 : 0; +} #define tryCALL_AMAGICbin(left,right,meth,svp) STMT_START { \ *svp = Nullsv; \ if (PL_amagic_generation) { \ @@ -764,6 +782,50 @@ PP(pp_mapwhile) } \ } STMT_END +STATIC I32 +amagic_ncmp(register SV *a, register SV *b) +{ + SV *tmpsv; + tryCALL_AMAGICbin(a,b,ncmp,&tmpsv); + if (tmpsv) { + double d; + + if (SvIOK(tmpsv)) { + I32 i = SvIVX(tmpsv); + if (i > 0) + return 1; + return i? -1 : 0; + } + d = SvNV(tmpsv); + if (d > 0) + return 1; + return d? -1 : 0; + } + return sv_ncmp(a, b); +} + +STATIC I32 +amagic_i_ncmp(register SV *a, register SV *b) +{ + SV *tmpsv; + tryCALL_AMAGICbin(a,b,ncmp,&tmpsv); + if (tmpsv) { + double d; + + if (SvIOK(tmpsv)) { + I32 i = SvIVX(tmpsv); + if (i > 0) + return 1; + return i? -1 : 0; + } + d = SvNV(tmpsv); + if (d > 0) + return 1; + return d? -1 : 0; + } + return sv_i_ncmp(a, b); +} + I32 amagic_cmp(register SV *str1, register SV *str2) { @@ -925,13 +987,30 @@ PP(pp_sort) if (max > 1) { MEXTEND(SP, 20); /* Can't afford stack realloc on signal. */ qsortsv(ORIGMARK+1, max, - (PL_op->op_private & OPpLOCALE) - ? ( overloading - ? FUNC_NAME_TO_PTR(amagic_cmp_locale) - : FUNC_NAME_TO_PTR(sv_cmp_locale)) - : ( overloading - ? FUNC_NAME_TO_PTR(amagic_cmp) - : FUNC_NAME_TO_PTR(sv_cmp) )); + (PL_op->op_private & OPpSORT_NUMERIC) + ? ( (PL_op->op_private & OPpSORT_INTEGER) + ? ( overloading + ? FUNC_NAME_TO_PTR(amagic_i_ncmp) + : FUNC_NAME_TO_PTR(sv_i_ncmp)) + : ( overloading + ? FUNC_NAME_TO_PTR(amagic_ncmp) + : FUNC_NAME_TO_PTR(sv_ncmp))) + : ( (PL_op->op_private & OPpLOCALE) + ? ( overloading + ? FUNC_NAME_TO_PTR(amagic_cmp_locale) + : FUNC_NAME_TO_PTR(sv_cmp_locale)) + : ( overloading + ? FUNC_NAME_TO_PTR(amagic_cmp) + : FUNC_NAME_TO_PTR(sv_cmp) ))); + if (PL_op->op_private & OPpSORT_REVERSE) { + SV **p = ORIGMARK+1; + SV **q = ORIGMARK+max; + while (p < q) { + SV *tmp = *p; + *p++ = *q; + *q-- = tmp; + } + } } } LEAVE; diff --git a/t/op/sort.t b/t/op/sort.t index fdb4e34..4de5cce 100755 --- a/t/op/sort.t +++ b/t/op/sort.t @@ -1,6 +1,10 @@ #!./perl -print "1..29\n"; +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib'; +} +print "1..37\n"; # XXX known to leak scalars $ENV{PERL_DESTRUCT_LEVEL} = 0 unless $ENV{PERL_DESTRUCT_LEVEL} > 3; @@ -157,3 +161,39 @@ print $@ ? "not ok 21\n# $@" : "ok 21\n"; print ("@b" eq '4 3 2 1' ? "ok 29\n" : "not ok 29 |@b|\n"); } +## exercise sort builtins... ($a <=> $b already tested) +@a = ( 5, 19, 1996, 255, 90 ); +@b = sort { $b <=> $a } @a; +print ("@b" eq '1996 255 90 19 5' ? "ok 30\n" : "not ok 30\n"); +print "# x = '@b'\n"; +$x = join('', sort { $a cmp $b } @harry); +$expected = $upperfirst ? 'AbelCaincatdogx' : 'catdogxAbelCain'; +print ($x eq $expected ? "ok 31\n" : "not ok 31\n"); +print "# x = '$x'; expected = '$expected'\n"; +$x = join('', sort { $b cmp $a } @harry); +$expected = $upperfirst ? 'xdogcatCainAbel' : 'CainAbelxdogcat'; +print ($x eq $expected ? "ok 32\n" : "not ok 32\n"); +print "# x = '$x'; expected = '$expected'\n"; +{ + use integer; + @b = sort { $a <=> $b } @a; + print ("@b" eq '5 19 90 255 1996' ? "ok 33\n" : "not ok 33\n"); + print "# x = '@b'\n"; + @b = sort { $b <=> $a } @a; + print ("@b" eq '1996 255 90 19 5' ? "ok 34\n" : "not ok 34\n"); + print "# x = '@b'\n"; + $x = join('', sort { $a cmp $b } @harry); + $expected = $upperfirst ? 'AbelCaincatdogx' : 'catdogxAbelCain'; + print ($x eq $expected ? "ok 35\n" : "not ok 35\n"); + print "# x = '$x'; expected = '$expected'\n"; + $x = join('', sort { $b cmp $a } @harry); + $expected = $upperfirst ? 'xdogcatCainAbel' : 'CainAbelxdogcat'; + print ($x eq $expected ? "ok 36\n" : "not ok 36\n"); + print "# x = '$x'; expected = '$expected'\n"; +} +# test sorting in non-main package +package Foo; +@a = ( 5, 19, 1996, 255, 90 ); +@b = sort { $b <=> $a } @a; +print ("@b" eq '1996 255 90 19 5' ? "ok 37\n" : "not ok 37\n"); +print "# x = '@b'\n"; -- 2.7.4