From e8c6a474e88610b73e62a19256dc8706b42f42b9 Mon Sep 17 00:00:00 2001 From: Yves Orton Date: Sun, 11 May 2014 12:37:33 +0200 Subject: [PATCH] Implement "max waste" thresholds to avoid problems with COW and deliberately overallocated pvs COW does not play nicely with "preallocate" algorithms. More specifically code like sv_gets() wants to preallocate a large buffer into $_ for performance reasons. Prior to COW this was just fine. When someone assigned $_ to a less volatile variable only the used portion of the buffer was copied, and the extended buffer would be reused by sv_gets() and all was well. With COW however this process is foiled. The preallocated buffer get shared, and then when $_ is updated the buffer is dropped from $_, leaving the other SV holding ownership of the overallocated buffer, and causing sv_gets() to allocate a new buffer entirely. This process would then repeat consuming time and lots of memory. This patch introduces a "wastage" check to COW. When decided if we should COW a string we look at the ratio and difference of SvCUR(sv) and SvLEN(sv), which represent the "actual string length" and the "allocated string length". When the difference exceeds a hard threshold, or when the ration exceeds a designated factor then we do not COW. This means that strings with large overallocations are not COWed. Exactly how this works out in practice, where SvGROW() *always* overallocates, is an open question. See: https://rt.perl.org/Ticket/Display.html?id=121796 This patch also slightly tweaks SvGROW() not to do roundup on the first allocation of the pv. Odds are good that the initial request realy does want exactly what they expected. (SvGROW contrary to what the name suggests is used for bother *extended* the size of a pv, and initializing it the first time.) --- sv.c | 88 +++++++++++++++++++++++++++++++++++++++++++++++++++++++------------- sv.h | 6 ----- 2 files changed, 71 insertions(+), 23 deletions(-) diff --git a/sv.c b/sv.c index 395431a..93fcdac 100644 --- a/sv.c +++ b/sv.c @@ -46,6 +46,69 @@ char *gconvert(double, int, int, char *); #endif +#ifdef PERL_NEW_COPY_ON_WRITE +# ifndef SV_COW_THRESHOLD +# define SV_COW_THRESHOLD 0 /* COW iff len > K */ +# endif +# ifndef SV_COWBUF_THRESHOLD +# define SV_COWBUF_THRESHOLD 1250 /* COW iff len > K */ +# endif +# ifndef SV_COW_MAX_WASTE_THRESHOLD +# define SV_COW_MAX_WASTE_THRESHOLD 80 /* COW iff (len - cur) < K */ +# endif +# ifndef SV_COWBUF_WASTE_THRESHOLD +# define SV_COWBUF_WASTE_THRESHOLD 80 /* COW iff (len - cur) < K */ +# endif +# ifndef SV_COW_MAX_WASTE_FACTOR_THRESHOLD +# define SV_COW_MAX_WASTE_FACTOR_THRESHOLD 2 /* COW iff len < (cur * K) */ +# endif +# ifndef SV_COWBUF_WASTE_FACTOR_THRESHOLD +# define SV_COWBUF_WASTE_FACTOR_THRESHOLD 2 /* COW iff len < (cur * K) */ +# endif +#endif +/* Work around compiler warnings about unsigned >= THRESHOLD when thres- + hold is 0. */ +#if SV_COW_THRESHOLD +# define GE_COW_THRESHOLD(cur) ((cur) >= SV_COW_THRESHOLD) +#else +# define GE_COW_THRESHOLD(cur) 1 +#endif +#if SV_COWBUF_THRESHOLD +# define GE_COWBUF_THRESHOLD(cur) ((cur) >= SV_COWBUF_THRESHOLD) +#else +# define GE_COWBUF_THRESHOLD(cur) 1 +#endif +#if SV_COW_MAX_WASTE_THRESHOLD +# define GE_COW_MAX_WASTE_THRESHOLD(cur,len) (((len)-(cur)) < SV_COW_MAX_WASTE_THRESHOLD) +#else +# define GE_COW_MAX_WASTE_THRESHOLD(cur,len) 1 +#endif +#if SV_COWBUF_WASTE_THRESHOLD +# define GE_COWBUF_WASTE_THRESHOLD(cur,len) (((len)-(cur)) < SV_COWBUF_WASTE_THRESHOLD) +#else +# define GE_COWBUF_WASTE_THRESHOLD(cur,len) 1 +#endif +#if SV_COW_MAX_WASTE_FACTOR_THRESHOLD +# define GE_COW_MAX_WASTE_FACTOR_THRESHOLD(cur,len) ((len) < SV_COW_MAX_WASTE_FACTOR_THRESHOLD * (cur)) +#else +# define GE_COW_MAX_WASTE_FACTOR_THRESHOLD(cur,len) 1 +#endif +#if SV_COWBUF_WASTE_FACTOR_THRESHOLD +# define GE_COWBUF_WASTE_FACTOR_THRESHOLD(cur,len) ((len) < SV_COWBUF_WASTE_FACTOR_THRESHOLD * (cur)) +#else +# define GE_COWBUF_WASTE_FACTOR_THRESHOLD(cur,len) 1 +#endif + +#define CHECK_COW_THRESHOLD(cur,len) (\ + GE_COW_THRESHOLD((cur)) && \ + GE_COW_MAX_WASTE_THRESHOLD((cur),(len)) && \ + GE_COW_MAX_WASTE_FACTOR_THRESHOLD((cur),(len)) \ +) +#define CHECK_COWBUF_THRESHOLD(cur,len) (\ + GE_COWBUF_THRESHOLD((cur)) && \ + GE_COWBUF_WASTE_THRESHOLD((cur),(len)) && \ + GE_COWBUF_WASTE_FACTOR_THRESHOLD((cur),(len)) \ +) /* void Gconvert: on Linux at least, gcvt (which Gconvert gets deffed to), * has a mandatory return value, even though that value is just the same * as the buf arg */ @@ -1524,7 +1587,8 @@ Perl_sv_grow(pTHX_ SV *const sv, STRLEN newlen) if (newlen < minlen) newlen = minlen; #ifndef Perl_safesysmalloc_size - newlen = PERL_STRLEN_ROUNDUP(newlen); + if (SvLEN(sv)) + newlen = PERL_STRLEN_ROUNDUP(newlen); #endif if (SvLEN(sv) && s) { s = (char*)saferealloc(s, newlen); @@ -3987,18 +4051,8 @@ S_glob_assign_ref(pTHX_ SV *const dstr, SV *const sstr) return; } -/* Work around compiler warnings about unsigned >= THRESHOLD when thres- - hold is 0. */ -#if SV_COW_THRESHOLD -# define GE_COW_THRESHOLD(len) ((len) >= SV_COW_THRESHOLD) -#else -# define GE_COW_THRESHOLD(len) 1 -#endif -#if SV_COWBUF_THRESHOLD -# define GE_COWBUF_THRESHOLD(len) ((len) >= SV_COWBUF_THRESHOLD) -#else -# define GE_COWBUF_THRESHOLD(len) 1 -#endif + + #ifdef PERL_DEBUG_READONLY_COW # include @@ -4366,7 +4420,7 @@ Perl_sv_setsv_flags(pTHX_ SV *dstr, SV* sstr, const I32 flags) || ((sflags & (SVs_PADTMP|SVf_READONLY|SVf_IsCOW)) == SVs_PADTMP /* whose buffer is worth stealing */ - && GE_COWBUF_THRESHOLD(cur) + && CHECK_COWBUF_THRESHOLD(cur,len) ) ) && !(sflags & SVf_OOK) && /* and not involved in OOK hack? */ @@ -4400,14 +4454,14 @@ Perl_sv_setsv_flags(pTHX_ SV *dstr, SV* sstr, const I32 flags) #elif defined(PERL_NEW_COPY_ON_WRITE) (sflags & SVf_IsCOW ? (!len || - ( (GE_COWBUF_THRESHOLD(cur) || SvLEN(dstr) < cur+1) + ( (CHECK_COWBUF_THRESHOLD(cur,len) || SvLEN(dstr) < cur+1) /* If this is a regular (non-hek) COW, only so many COW "copies" are possible. */ && CowREFCNT(sstr) != SV_COW_REFCNT_MAX )) : ( (sflags & CAN_COW_MASK) == CAN_COW_FLAGS && !(SvFLAGS(dstr) & SVf_BREAK) - && GE_COW_THRESHOLD(cur) && cur+1 < len - && (GE_COWBUF_THRESHOLD(cur) || SvLEN(dstr) < cur+1) + && CHECK_COW_THRESHOLD(cur,len) && cur+1 < len + && (CHECK_COWBUF_THRESHOLD(cur,len) || SvLEN(dstr) < cur+1) )) #else sflags & SVf_IsCOW diff --git a/sv.h b/sv.h index 8760ec4..f5e2827 100644 --- a/sv.h +++ b/sv.h @@ -1909,12 +1909,6 @@ mg.c:1024: warning: left-hand operand of comma expression has no effect /* Note: To allow 256 COW "copies", a refcnt of 0 means 1. */ # define CowREFCNT(sv) (*(U8 *)(SvPVX(sv)+SvLEN(sv)-1)) # define SV_COW_REFCNT_MAX ((1 << sizeof(U8)*8) - 1) -# ifndef SV_COW_THRESHOLD -# define SV_COW_THRESHOLD 0 /* min string length for cow */ -# endif -# ifndef SV_COWBUF_THRESHOLD -# define SV_COWBUF_THRESHOLD 1250 /* min string length for cow */ -# endif /* over existing buffer */ # define CAN_COW_MASK (SVf_POK|SVf_ROK|SVp_POK|SVf_FAKE| \ SVf_OOK|SVf_BREAK|SVf_READONLY) # endif -- 2.7.4