Detect Intel MIC architecture and set some defaults at run time instead of build...
authorAndrey Churbanov <Andrey.Churbanov@intel.com>
Fri, 20 Feb 2015 18:14:43 +0000 (18:14 +0000)
committerAndrey Churbanov <Andrey.Churbanov@intel.com>
Fri, 20 Feb 2015 18:14:43 +0000 (18:14 +0000)
llvm-svn: 230033

openmp/runtime/src/kmp.h
openmp/runtime/src/kmp_global.c
openmp/runtime/src/kmp_runtime.c
openmp/runtime/src/kmp_settings.c

index 8603fa7..0b86346 100644 (file)
@@ -392,6 +392,16 @@ enum clock_function_type {
 };
 #endif /* KMP_OS_LINUX */
 
+#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
+enum mic_type {
+    non_mic,
+    mic1,
+    mic2,
+    mic3,
+    dummy
+};
+#endif
+
 /* ------------------------------------------------------------------------ */
 /* -- fast reduction stuff ------------------------------------------------ */
 
@@ -976,12 +986,12 @@ extern unsigned int __kmp_place_core_offset;
 #endif
 
 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
-struct kmp_cpuid {
+typedef struct kmp_cpuid {
     kmp_uint32  eax;
     kmp_uint32  ebx;
     kmp_uint32  ecx;
     kmp_uint32  edx;
-};
+} kmp_cpuid_t;
 extern void __kmp_x86_cpuid( int mode, int mode2, struct kmp_cpuid *p );
 # if KMP_ARCH_X86
   extern void __kmp_x86_pause( void );
@@ -2618,6 +2628,10 @@ extern enum clock_function_type __kmp_clock_function;
 extern int __kmp_clock_function_param;
 # endif /* KMP_OS_LINUX */
 
+#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
+extern enum mic_type __kmp_mic_type;
+#endif
+
 # ifdef USE_LOAD_BALANCE
 extern double      __kmp_load_balance_interval;   /* Interval for the load balance algorithm */
 # endif /* USE_LOAD_BALANCE */
index 6075701..f3d17c8 100644 (file)
@@ -212,6 +212,10 @@ enum clock_function_type __kmp_clock_function;
 int __kmp_clock_function_param;
 #endif /* KMP_OS_LINUX */
 
+#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
+enum mic_type __kmp_mic_type = non_mic;
+#endif
+
 #if KMP_AFFINITY_SUPPORTED
 
 # if KMP_GROUP_AFFINITY
index 5b46122..55b58ce 100644 (file)
@@ -5863,6 +5863,28 @@ __kmp_unregister_library( void ) {
 // End of Library registration stuff.
 // -------------------------------------------------------------------------------------------------
 
+#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
+
+static void __kmp_check_mic_type()
+{
+    kmp_cpuid_t cpuid_state = {0};
+    kmp_cpuid_t * cs_p = &cpuid_state;
+    cs_p->eax=1;
+    cs_p->ecx=0;
+    __asm__ __volatile__("cpuid"
+                        : "+a" (cs_p->eax), "=b" (cs_p->ebx), "+c" (cs_p->ecx), "=d" (cs_p->edx));
+    // We don't support mic1 at the moment
+    if( (cs_p->eax & 0xff0) == 0xB10 ) {
+        __kmp_mic_type = mic2;
+    } else if( (cs_p->eax & 0xf0ff0) == 0x50670 ) {
+        __kmp_mic_type = mic3;
+    } else {
+        __kmp_mic_type = non_mic;
+    }
+}
+
+#endif /* KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) */
+
 static void
 __kmp_do_serial_initialize( void )
 {
@@ -5927,6 +5949,10 @@ __kmp_do_serial_initialize( void )
 
     __kmp_runtime_initialize();
 
+#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
+    __kmp_check_mic_type();
+#endif
+
     // Some global variable initialization moved here from kmp_env_initialize()
 #ifdef KMP_DEBUG
     kmp_diag = 0;
@@ -5983,17 +6009,21 @@ __kmp_do_serial_initialize( void )
         #undef kmp_reduction_barrier_release_bb
         #undef kmp_reduction_barrier_gather_bb
     #endif // KMP_FAST_REDUCTION_BARRIER
-    #if KMP_MIC
-    // AC: plane=3,2, forkjoin=2,1 are optimal for 240 threads on KNC
-    __kmp_barrier_gather_branch_bits [ bs_plain_barrier ] = 3;  // plane gather
-    __kmp_barrier_release_branch_bits[ bs_forkjoin_barrier ] = 1;  // forkjoin release
-    __kmp_barrier_gather_pattern [ bs_forkjoin_barrier ] = bp_hierarchical_bar;
-    __kmp_barrier_release_pattern[ bs_forkjoin_barrier ] = bp_hierarchical_bar;
+#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
+    if( __kmp_mic_type != non_mic ) {
+        // AC: plane=3,2, forkjoin=2,1 are optimal for 240 threads on KNC
+        __kmp_barrier_gather_branch_bits [ bs_plain_barrier ] = 3;  // plane gather
+        __kmp_barrier_release_branch_bits[ bs_forkjoin_barrier ] = 1;  // forkjoin release
+        __kmp_barrier_gather_pattern [ bs_forkjoin_barrier ] = bp_hierarchical_bar;
+        __kmp_barrier_release_pattern[ bs_forkjoin_barrier ] = bp_hierarchical_bar;
+    }
 #if KMP_FAST_REDUCTION_BARRIER
-    __kmp_barrier_gather_pattern [ bs_reduction_barrier ] = bp_hierarchical_bar;
-    __kmp_barrier_release_pattern[ bs_reduction_barrier ] = bp_hierarchical_bar;
+    if( __kmp_mic_type != non_mic ) {
+        __kmp_barrier_gather_pattern [ bs_reduction_barrier ] = bp_hierarchical_bar;
+        __kmp_barrier_release_pattern[ bs_reduction_barrier ] = bp_hierarchical_bar;
+    }
+#endif
 #endif
-    #endif
 
     // From KMP_CHECKS initialization
 #ifdef KMP_DEBUG
@@ -7001,6 +7031,8 @@ __kmp_determine_reduction_method( ident_t *loc, kmp_int32 global_tid,
 
     int team_size;
 
+    int teamsize_cutoff = 4;
+
     KMP_DEBUG_ASSERT( loc );    // it would be nice to test ( loc != 0 )
     KMP_DEBUG_ASSERT( lck );    // it would be nice to test ( lck != 0 )
 
@@ -7023,13 +7055,13 @@ __kmp_determine_reduction_method( ident_t *loc, kmp_int32 global_tid,
         #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64
 
             #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN
-                #if KMP_MIC
-                    #define REDUCTION_TEAMSIZE_CUTOFF 8
-                #else // KMP_MIC
-                    #define REDUCTION_TEAMSIZE_CUTOFF 4
-                #endif // KMP_MIC
+#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
+                if( __kmp_mic_type != non_mic ) {
+                    teamsize_cutoff = 8;
+                }
+#endif
                 if( tree_available ) {
-                    if( team_size <= REDUCTION_TEAMSIZE_CUTOFF ) {
+                    if( team_size <= teamsize_cutoff ) {
                         if ( atomic_available ) {
                             retval = atomic_reduce_block;
                         }
index 0ca6bba..0394afd 100644 (file)
@@ -2314,17 +2314,20 @@ __kmp_parse_affinity_env( char const * name, char const * value,
             }; // if
 
             if ( __kmp_affinity_gran == affinity_gran_default ) {
-# if KMP_MIC
-                if( __kmp_affinity_verbose || __kmp_affinity_warnings ) {
-                    KMP_WARNING( AffGranUsing, "KMP_AFFINITY", "fine" );
-                }
-                __kmp_affinity_gran = affinity_gran_fine;
-# else
-                if( __kmp_affinity_verbose || __kmp_affinity_warnings ) {
-                    KMP_WARNING( AffGranUsing, "KMP_AFFINITY", "core" );
+#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
+                if( __kmp_mic_type != non_mic ) {
+                    if( __kmp_affinity_verbose || __kmp_affinity_warnings ) {
+                        KMP_WARNING( AffGranUsing, "KMP_AFFINITY", "fine" );
+                    }
+                    __kmp_affinity_gran = affinity_gran_fine;
+                } else
+#endif
+                {
+                    if( __kmp_affinity_verbose || __kmp_affinity_warnings ) {
+                        KMP_WARNING( AffGranUsing, "KMP_AFFINITY", "core" );
+                    }
+                    __kmp_affinity_gran = affinity_gran_core;
                 }
-                __kmp_affinity_gran = affinity_gran_core;
-# endif /* KMP_MIC */
             }
         } break;
         case affinity_scatter:
@@ -3030,11 +3033,11 @@ __kmp_stg_parse_proc_bind( char const * name, char const * value, void * data )
             // OMP_PROC_BIND => granularity=core,scatter elsewhere
             //
             __kmp_affinity_type = affinity_scatter;
-#  if KMP_MIC
-            __kmp_affinity_gran = affinity_gran_fine;
-#  else
-            __kmp_affinity_gran = affinity_gran_core;
-#  endif /* KMP_MIC */
+            if( __kmp_mic_type != non_mic ) {
+                __kmp_affinity_gran = affinity_gran_fine;
+            } else {
+                __kmp_affinity_gran = affinity_gran_core;
+            }
     }
     else {
         __kmp_affinity_type = affinity_none;
@@ -5228,25 +5231,36 @@ __kmp_env_initialize( char const * string ) {
                 else
 # endif /* OMP_40_ENABLED */
                 if ( __kmp_affinity_type == affinity_default ) {
-# if KMP_MIC
-                    __kmp_affinity_type = affinity_scatter;
-#  if OMP_40_ENABLED
-                    __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel;
-#  endif
-# else
-                    __kmp_affinity_type = affinity_none;
-#  if OMP_40_ENABLED
-                    __kmp_nested_proc_bind.bind_types[0] = proc_bind_false;
-#  endif
-# endif
+#if OMP_40_ENABLED
+#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
+                    if( __kmp_mic_type != non_mic ) {
+                        __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel;
+                    } else
+#endif
+                    {
+                        __kmp_nested_proc_bind.bind_types[0] = proc_bind_false;
+                    }
+#endif /* OMP_40_ENABLED */
+#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
+                    if( __kmp_mic_type != non_mic ) {
+                        __kmp_affinity_type = affinity_scatter;
+                    } else
+#endif
+                    {
+                        __kmp_affinity_type = affinity_none;
+                    }
+
                 }
                 if ( ( __kmp_affinity_gran == affinity_gran_default )
                   &&  ( __kmp_affinity_gran_levels < 0 ) ) {
-# if KMP_MIC
-                    __kmp_affinity_gran = affinity_gran_fine;
-# else
-                    __kmp_affinity_gran = affinity_gran_core;
-# endif
+#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
+                    if( __kmp_mic_type != non_mic ) {
+                        __kmp_affinity_gran = affinity_gran_fine;
+                    } else
+#endif
+                    {
+                        __kmp_affinity_gran = affinity_gran_core;
+                    }
                 }
                 if ( __kmp_affinity_top_method == affinity_top_method_default ) {
                     __kmp_affinity_top_method = affinity_top_method_all;