--- /dev/null
+#!/usr/bin/python3
+# Copyright (C) 2021 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+#
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, see
+# <https://www.gnu.org/licenses/>.
+
+"""Benchmark program generator script
+
+This script takes a function name as input and generates a program using
+an libmvec input file located in the sysdeps/x86_64/fpu directory. The
+name of the input file should be of the form libmvec-foo-inputs where
+'foo' is the name of the function.
+"""
+
+from __future__ import print_function
+import sys
+import os
+import itertools
+import re
+
+# Macro definitions for functions that take no arguments. For functions
+# that take arguments, the STRUCT_TEMPLATE, ARGS_TEMPLATE and
+# VARIANTS_TEMPLATE are used instead.
+DEFINES_TEMPLATE = '''
+#define CALL_BENCH_FUNC(v, i) %(func)s ()
+#define NUM_VARIANTS (1)
+#define NUM_SAMPLES(v) (1)
+#define VARIANT(v) FUNCNAME "()"
+'''
+
+# Structures to store arguments for the function call. A function may
+# have its inputs partitioned to represent distinct performance
+# characteristics or distinct flavors of the function. Each such
+# variant is represented by the _VARIANT structure. The ARGS structure
+# represents a single set of arguments.
+BENCH_VEC_TEMPLATE = '''
+#define CALL_BENCH_FUNC(v, i) (__extension__ ({ \\
+ %(defs)s mx0 = %(func)s (%(func_args)s); \\
+ mx0; }))
+'''
+
+BENCH_SCALAR_TEMPLATE = '''
+#define CALL_BENCH_FUNC(v, i) %(func)s (%(func_args)s)
+'''
+
+STRUCT_TEMPLATE = '''struct args
+{
+%(args)s
+ double timing;
+};
+
+struct _variants
+{
+ const char *name;
+ int count;
+ struct args *in;
+};
+'''
+
+# The actual input arguments.
+ARGS_TEMPLATE = '''struct args in%(argnum)d[%(num_args)d] = {
+%(args)s
+};
+'''
+
+# The actual variants, along with macros defined to access the variants.
+VARIANTS_TEMPLATE = '''struct _variants variants[%(num_variants)d] = {
+%(variants)s
+};
+
+#define NUM_VARIANTS %(num_variants)d
+#define NUM_SAMPLES(i) (variants[i].count)
+#define VARIANT(i) (variants[i].name)
+'''
+
+# Epilogue for the generated source file.
+EPILOGUE = '''
+#define BENCH_FUNC(i, j) ({%(getret)s CALL_BENCH_FUNC (i, j);})
+#define FUNCNAME "%(func)s"
+#include <bench-libmvec-skeleton.c>'''
+
+
+def gen_source(func_types, directives, all_vals):
+ """Generate source for the function
+
+ Generate the C source for the function from the values and
+ directives.
+
+ Args:
+ func: The function name
+ directives: A dictionary of directives applicable to this function
+ all_vals: A dictionary input values
+ """
+ # The includes go in first.
+ for header in directives['includes']:
+ print('#include <%s>' % header)
+
+ for header in directives['include-sources']:
+ print('#include "%s"' % header)
+
+ argtype_vtable = {
+ 2: '128',
+ 4: '256',
+ 8: '512'
+ }
+ prefix_vtable = {
+ 2: 'b',
+ 4: 'c',
+ 8: 'e'
+ }
+
+ # Get all the function properties
+ funcname_argtype = ''
+ float_flag = False
+ if func_types[1] == 'float':
+ float_flag = True
+ avx_flag = False
+ if func_types[3] == 'avx2':
+ avx_flag = True
+ funcname_stride = int(func_types[2][4:])
+ funcname_origin = func_types[-1]
+ if float_flag:
+ funcname_origin = funcname_origin[:-1]
+
+ if funcname_stride == 1:
+ # Prepare for scalar functions file generation
+ funcname_prefix = ''
+ funcname_prefix_1 = ''
+ funcname_argtype = 'double'
+ if float_flag:
+ funcname_argtype = 'float'
+ else:
+ # Prepare for libmvec functions file generation
+ funcname_prefix_1 = len(directives['args']) * 'v' + '_'
+ aligned_stride = funcname_stride
+ if float_flag:
+ aligned_stride /= 2
+ funcname_prefix = '_ZGV'
+ if (avx_flag and (aligned_stride == 4)):
+ funcname_prefix += 'd'
+ else:
+ funcname_prefix += prefix_vtable[aligned_stride]
+ funcname_prefix = funcname_prefix + 'N' + func_types[2][4:]
+ funcname_argtype = '__m' + argtype_vtable[aligned_stride]
+ if not float_flag:
+ funcname_argtype += 'd'
+
+ # Include x86intrin.h for vector functions
+ if not funcname_stride == 1:
+ print('#include <x86intrin.h>')
+ if (avx_flag and (aligned_stride == 4)):
+ # For bench-float-vlen8-avx2* and bench-double-vlen4-avx2*
+ print('#define REQUIRE_AVX2')
+ elif aligned_stride == 8:
+ # For bench-float-vlen16* and bench-double-vlen8*
+ print('#define REQUIRE_AVX512F')
+ elif aligned_stride == 4:
+ # For bench-float-vlen8* and bench-double-vlen4* without avx2
+ print('#define REQUIRE_AVX')
+ else:
+ print('#define FUNCTYPE %s' % funcname_argtype)
+
+ print('#define STRIDE %d ' % funcname_stride)
+
+ funcname = funcname_prefix + funcname_prefix_1 + funcname_origin
+ if float_flag:
+ funcname += 'f'
+
+ funcname_rettype = funcname_argtype
+ if directives['ret'] == '':
+ funcname_rettype = 'void'
+
+ funcname_inputtype = []
+ for arg, i in zip(directives['args'], itertools.count()):
+ funcname_inputtype.append(funcname_argtype)
+ if arg[0] == '<' and arg[-1] == '>':
+ pos = arg.rfind('*')
+ if pos == -1:
+ die('Output argument must be a pointer type')
+ funcname_inputtype[i] += ' *'
+
+ if not funcname_stride == 1:
+ if len(directives['args']) == 2:
+ print('extern %s %s (%s, %s);' % (funcname_rettype, funcname, funcname_inputtype[0], funcname_inputtype[1]))
+ elif len(directives['args']) == 3:
+ print('extern %s %s (%s, %s, %s);' % (funcname_rettype, funcname, funcname_inputtype[0], funcname_inputtype[1], funcname_inputtype[2]))
+ else:
+ print('extern %s %s (%s);' % (funcname_rettype, funcname, funcname_inputtype[0]))
+
+ # Print macros. This branches out to a separate routine if
+ # the function takes arguments.
+ if not directives['args']:
+ print(DEFINES_TEMPLATE % {'funcname': funcname})
+ outargs = []
+ else:
+ outargs = _print_arg_data(funcname, float_flag, funcname_argtype, funcname_stride, directives, all_vals)
+
+ # Print the output variable definitions if necessary.
+ for out in outargs:
+ print(out)
+
+ # If we have a return value from the function, make sure it is
+ # assigned to prevent the compiler from optimizing out the
+ # call.
+ getret = ''
+
+ if directives['ret']:
+ if funcname_argtype != '':
+ print('static %s volatile ret;' % funcname_argtype)
+ getret = 'ret ='
+ else:
+ print('static %s volatile ret;' % directives['ret'])
+ getret = 'ret ='
+
+ # Test initialization.
+ if directives['init']:
+ print('#define BENCH_INIT %s' % directives['init'])
+
+ print(EPILOGUE % {'getret': getret, 'func': funcname})
+
+
+def _print_arg_data(func, float_flag, funcname_argtype, funcname_stride, directives, all_vals):
+ """Print argument data
+
+ This is a helper function for gen_source that prints structure and
+ values for arguments and their variants and returns output arguments
+ if any are found.
+
+ Args:
+ func: Function name
+ float_flag: True if function is float type
+ funcname_argtype: Type for vector variants
+ funcname_stride: Vector Length
+ directives: A dictionary of directives applicable to this function
+ all_vals: A dictionary input values
+
+ Returns:
+ Returns a list of definitions for function arguments that act as
+ output parameters.
+ """
+ # First, all of the definitions. We process writing of
+ # CALL_BENCH_FUNC, struct args and also the output arguments
+ # together in a single traversal of the arguments list.
+ func_args = []
+ _func_args = []
+ arg_struct = []
+ outargs = []
+ # Conversion function for each type
+ vtable = {
+ '__m128d': '_mm_loadu_pd',
+ '__m256d': '_mm256_loadu_pd',
+ '__m512d': '_mm512_loadu_pd',
+ '__m128': '_mm_loadu_ps',
+ '__m256': '_mm256_loadu_ps',
+ '__m512': '_mm512_loadu_ps',
+ 'double': '',
+ 'float': ''
+ }
+
+ # For double max_vlen=8, for float max_vlen=16.
+ if float_flag == True:
+ max_vlen = 16
+ else:
+ max_vlen = 8
+
+ for arg, i in zip(directives['args'], itertools.count()):
+ if arg[0] == '<' and arg[-1] == '>':
+ outargs.append('static %s out%d __attribute__((used));' % (funcname_argtype, i))
+ func_args.append('&out%d' % i)
+ _func_args.append('&out%d' % i)
+ else:
+ arg_struct.append(' %s arg%d[STRIDE];' % (arg, i))
+ func_args.append('%s (variants[v].in[i].arg%d)' %
+ (vtable[funcname_argtype], i))
+ _func_args.append('variants[v].in[i].arg%d[0]' % i)
+
+ if funcname_stride == 1:
+ print(BENCH_SCALAR_TEMPLATE % {'func': func,
+ 'func_args': ', '.join(_func_args)})
+ elif directives['ret'] == '':
+ print(BENCH_SCALAR_TEMPLATE % {'func': func,
+ 'func_args': ', '.join(func_args)})
+ else:
+ print(BENCH_VEC_TEMPLATE % {'func': func, 'func_args': ', '.join(func_args),
+ 'defs': funcname_argtype})
+ print(STRUCT_TEMPLATE % {'args': '\n'.join(arg_struct)})
+
+ # Now print the values.
+ variants = []
+ for (k, _vals), i in zip(all_vals.items(), itertools.count()):
+ vals = []
+ temp_vals = []
+ j = 0
+ temp_j = 0
+ result_v = ['', '', '']
+ for _v in _vals:
+ nums = _v.split(',')
+ for l in range(0, len(nums)):
+ result_v[l] = result_v[l] + nums[l].strip() + ','
+ j += 1
+ temp_j += 1
+
+ if temp_j == funcname_stride:
+ final_result = ''
+ for l in range(0, len(nums)):
+ final_result = final_result + '{' + result_v[l][:-1] + '},'
+ temp_vals.append(final_result[:-1])
+ temp_j = 0
+ result_v = ['', '', '']
+
+ # Make sure amount of test data is multiple of max_vlen
+ # to keep data size same for all vector length.
+ if j == max_vlen:
+ vals.extend(temp_vals)
+ temp_vals = []
+ j = 0
+
+ out = [' {%s, 0},' % v for v in vals]
+
+ # Members for the variants structure list that we will
+ # print later.
+ variants.append(' {"%s", %d, in%d},' % (k, len(vals), i))
+ print(ARGS_TEMPLATE % {'argnum': i, 'num_args': len(vals),
+ 'args': '\n'.join(out)})
+
+ # Print the variants and the last set of macros.
+ print(VARIANTS_TEMPLATE % {'num_variants': len(all_vals),
+ 'variants': '\n'.join(variants)})
+ return outargs
+
+
+def _process_directive(d_name, d_val, func_args):
+ """Process a directive.
+
+ Evaluate the directive name and value passed and return the
+ processed value. This is a helper function for parse_file.
+
+ Args:
+ d_name: Name of the directive
+ d_val: The string value to process
+
+ Returns:
+ The processed value, which may be the string as it is or an object
+ that describes the directive.
+ """
+ # Process the directive values if necessary. name and ret don't
+ # need any processing.
+ if d_name.startswith('include'):
+ d_val = d_val.split(',')
+ elif d_name == 'args':
+ d_val = d_val.split(':')
+ # Check if args type match
+ if not d_val[0] == func_args:
+ die("Args mismatch, should be %s, but get %s" % (d_val[0], func_args))
+
+ # Return the values.
+ return d_val
+
+
+def parse_file(func_types):
+ """Parse an input file
+
+ Given a function name, open and parse an input file for the function
+ and get the necessary parameters for the generated code and the list
+ of inputs.
+
+ Args:
+ func: The function name
+
+ Returns:
+ A tuple of two elements, one a dictionary of directives and the
+ other a dictionary of all input values.
+ """
+ all_vals = {}
+ # Valid directives.
+ directives = {
+ 'name': '',
+ 'args': [],
+ 'includes': [],
+ 'include-sources': [],
+ 'ret': '',
+ 'init': ''
+ }
+
+ func = func_types[-1]
+ try:
+ with open('../sysdeps/x86_64/fpu/libmvec-%s-inputs' % func) as f:
+ for line in f:
+ # Look for directives and parse it if found.
+ if line.startswith('##'):
+ try:
+ d_name, d_val = line[2:].split(':', 1)
+ d_name = d_name.strip()
+ d_val = d_val.strip()
+ directives[d_name] = _process_directive(d_name, d_val, func_types[1])
+ except (IndexError, KeyError):
+ die('Invalid directive: %s' % line[2:])
+
+ # Skip blank lines and comments.
+ line = line.split('#', 1)[0].rstrip()
+ if not line:
+ continue
+
+ # Otherwise, we're an input. Add to the appropriate
+ # input set.
+ cur_name = directives['name']
+ all_vals.setdefault(cur_name, [])
+ all_vals[cur_name].append(line)
+ except IOError as ex:
+ die("Failed to open input file (%s): %s" % (ex.filename, ex.strerror))
+
+ return directives, all_vals
+
+
+def die(msg):
+ """Exit with an error
+
+ Prints an error message to the standard error stream and exits with
+ a non-zero status.
+
+ Args:
+ msg: The error message to print to standard error
+ """
+ print('%s\n' % msg, file=sys.stderr)
+ sys.exit(os.EX_DATAERR)
+
+
+def main(args):
+ """Main function
+
+ Use the first command line argument as function name and parse its
+ input file to generate C source that calls the function repeatedly
+ for the input.
+
+ Args:
+ args: The command line arguments with the program name dropped
+
+ Returns:
+ os.EX_USAGE on error and os.EX_OK on success.
+ """
+ if len(args) != 1:
+ print('Usage: %s <function>' % sys.argv[0])
+ return os.EX_USAGE
+
+ func_types = args[0].split('-')
+ directives, all_vals = parse_file(func_types)
+ gen_source(func_types, directives, all_vals)
+ return os.EX_OK
+
+
+if __name__ == '__main__':
+ sys.exit(main(sys.argv[1:]))