From: Luo Xionghu Date: Tue, 16 Sep 2014 03:24:48 +0000 (+0800) Subject: improve the build performance of vector type built-in function. X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=565d1eb00d9a5219c2848b3674e40ac07cb48b89;p=contrib%2Fbeignet.git improve the build performance of vector type built-in function. this patch was lost during the libocl merge. resubmit it to improve the vector function performance. please refer to e2db890596eea0a6eb741e11e576a38952f1ed1e for detail. Signed-off-by: Luo Xionghu Reviewed-by: Zhigang Gong --- diff --git a/backend/src/libocl/script/gen_vector.py b/backend/src/libocl/script/gen_vector.py index a91dfcf..de28552 100755 --- a/backend/src/libocl/script/gen_vector.py +++ b/backend/src/libocl/script/gen_vector.py @@ -289,9 +289,42 @@ class builtinProto(): formatStr += ';' self.append(formatStr) return formatStr - formatStr = self.append(formatStr, '{{return ({0}{1})('.format(vtype[0], vtype[1])) - self.indent = len(formatStr) - for j in range(0, vtype[1]): + if self.functionName != 'select' and ptypeSeqs[0] == ptypeSeqs[self.paramCount-1] and ptype[1] > 4: + formatStr += '\n{ \n union{' + formatStr = self.append(formatStr, ' {0} va[{1}];'.format(vtype[0], vtype[1])) + formatStr = self.append(formatStr, ' {0}{1} vv{2};'.format(vtype[0], vtype[1], vtype[1])) + formatStr += '\n }uret;' + formatStr += '\n union{' + formatStr = self.append(formatStr, ' {0} pa[{1}];'.format(ptype[0], ptype[1])) + formatStr = self.append(formatStr, ' {0}{1} pv{2};'.format(ptype[0], ptype[1], ptype[1])) + formatStr += '\n }' + for n in range(0, self.paramCount): + formatStr += 'usrc{0}'.format(n) + if n+1 != self.paramCount: + formatStr +=', ' + formatStr += ';' + + for n in range(0, self.paramCount): + formatStr = self.append(formatStr, ' usrc{0}.pv{1} = param{2};'.format(n, ptype[1], n)) + formatStr = self.append(formatStr, ' for(int i =0; i < {0}; i++)'.format(ptype[1])) + formatStr += '\n uret.va[i] = ' + if self.prefix == 'relational' and self.functionName != 'bitselect' and self.functionName != 'select': + formatStr += '-' + formatStr += '{0}('.format(self.functionName) + + for n in range(0, self.paramCount): + formatStr += 'usrc{0}.pa[i]'.format(n) + if n+1 != self.paramCount: + formatStr +=', ' + formatStr += ');' + formatStr = self.append(formatStr, ' return uret.vv{0};'.format(vtype[1])) + formatStr += '\n}' + formatStr = self.append(formatStr) + return formatStr + else: + formatStr = self.append(formatStr, '{{return ({0}{1})('.format(vtype[0], vtype[1])) + self.indent = len(formatStr) + for j in range(0, vtype[1]): if (j != 0): formatStr += ',' if (j + 1) % 2 == 0: @@ -326,10 +359,10 @@ class builtinProto(): formatStr += ')' - formatStr += '); }\n' - self.append(formatStr) + formatStr += '); }\n' + self.append(formatStr) - return formatStr + return formatStr def output(self): for line in self.outputStr: