Runtime detection of available processor cores.
authorAttila Nagy <attilanagy@google.com>
Fri, 25 Mar 2011 10:53:03 +0000 (12:53 +0200)
committerAttila Nagy <attilanagy@google.com>
Thu, 31 Mar 2011 07:23:01 +0000 (10:23 +0300)
Detect the number of available cores and limit the thread allocation
accordingly. On decoder side limit the number of threads to the max
number of token partition.

Core detetction works on Windows and
Posix platforms, which define _SC_NPROCESSORS_ONLN or _SC_NPROC_ONLN.

Change-Id: I76cbe37c18d3b8035e508b7a1795577674efc078

build/make/configure.sh
configure
vp8/common/generic/systemdependent.c
vp8/common/onyxc_int.h
vp8/decoder/threading.c
vp8/encoder/ethreading.c
vp8/encoder/onyx_int.h

index a48fd9f..cdd7b9c 100755 (executable)
@@ -979,6 +979,9 @@ EOF
         esac
     fi
 
+    # for sysconf(3) and friends.
+    check_header unistd.h
+
     # glibc needs these
     if enabled linux; then
         add_cflags -D_LARGEFILE_SOURCE
index f92ffc5..c6805b0 100755 (executable)
--- a/configure
+++ b/configure
@@ -211,6 +211,7 @@ HAVE_LIST="
     alt_tree_layout
     pthread_h
     sys_mman_h
+    unistd_h
 "
 CONFIG_LIST="
     external_build
index 5c64647..4131d3c 100644 (file)
 #include "vp8/common/idct.h"
 #include "vp8/common/onyxc_int.h"
 
+#if CONFIG_MULTITHREAD
+#if HAVE_UNISTD_H
+#include <unistd.h>
+#elif defined(_WIN32)
+#include <windows.h>
+typedef void (WINAPI *PGNSI)(LPSYSTEM_INFO);
+#endif
+#endif
+
 extern void vp8_arch_x86_common_init(VP8_COMMON *ctx);
 extern void vp8_arch_arm_common_init(VP8_COMMON *ctx);
 
+#if CONFIG_MULTITHREAD
+static int get_cpu_count()
+{
+    int core_count = 16;
+
+#if HAVE_UNISTD_H
+#if defined(_SC_NPROCESSORS_ONLN)
+    core_count = sysconf(_SC_NPROCESSORS_ONLN);
+#elif defined(_SC_NPROC_ONLN)
+    core_count = sysconf(_SC_NPROC_ONLN);
+#endif
+#elif defined(_WIN32)
+    {
+        PGNSI pGNSI;
+        SYSTEM_INFO sysinfo;
+
+        /* Call GetNativeSystemInfo if supported or
+         * GetSystemInfo otherwise. */
+
+        pGNSI = (PGNSI) GetProcAddress(
+                GetModuleHandle(TEXT("kernel32.dll")), "GetNativeSystemInfo");
+        if (pGNSI != NULL)
+            pGNSI(&sysinfo);
+        else
+            GetSystemInfo(&sysinfo);
+
+        core_count = sysinfo.dwNumberOfProcessors;
+    }
+#else
+    /* other platforms */
+#endif
+
+    return core_count > 0 ? core_count : 1;
+}
+#endif
+
 void vp8_machine_specific_config(VP8_COMMON *ctx)
 {
 #if CONFIG_RUNTIME_CPU_DETECT
@@ -82,4 +127,7 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
     vp8_arch_arm_common_init(ctx);
 #endif
 
+#if CONFIG_MULTITHREAD
+    ctx->processor_core_count = get_cpu_count();
+#endif /* CONFIG_MULTITHREAD */
 }
index c8c2277..60737e4 100644 (file)
@@ -196,6 +196,9 @@ typedef struct VP8Common
 #if CONFIG_RUNTIME_CPU_DETECT
     VP8_COMMON_RTCD rtcd;
 #endif
+#if CONFIG_MULTITHREAD
+    int processor_core_count;
+#endif
     struct postproc_state  postproc_state;
 } VP8_COMMON;
 
index 3d9d428..4cc7f53 100644 (file)
@@ -429,12 +429,18 @@ void vp8_decoder_create_threads(VP8D_COMP *pbi)
 
     pbi->b_multithreaded_rd = 0;
     pbi->allocated_decoding_thread_count = 0;
-    core_count = (pbi->max_threads > 16) ? 16 : pbi->max_threads;
+
+    /* limit decoding threads to the max number of token partitions */
+    core_count = (pbi->max_threads > 8) ? 8 : pbi->max_threads;
+
+    /* limit decoding threads to the available cores */
+    if (core_count > pbi->common.processor_core_count)
+        core_count = pbi->common.processor_core_count;
 
     if (core_count > 1)
     {
         pbi->b_multithreaded_rd = 1;
-        pbi->decoding_thread_count = core_count -1;
+        pbi->decoding_thread_count = core_count - 1;
 
         CHECK_MEM_ERROR(pbi->h_decoding_thread, vpx_malloc(sizeof(pthread_t) * pbi->decoding_thread_count));
         CHECK_MEM_ERROR(pbi->h_event_start_decoding, vpx_malloc(sizeof(sem_t) * pbi->decoding_thread_count));
index 71da103..fa6a943 100644 (file)
@@ -459,15 +459,15 @@ void vp8cx_create_encoder_threads(VP8_COMP *cpi)
 
     cpi->b_multi_threaded = 0;
     cpi->encoding_thread_count = 0;
-    cpi->processor_core_count = 32; //vp8_get_proc_core_count();
 
-    if (cpi->processor_core_count > 1 && cpi->oxcf.multi_threaded > 1)
+    if (cm->processor_core_count > 1 && cpi->oxcf.multi_threaded > 1)
     {
         int ithread;
         int th_count = cpi->oxcf.multi_threaded - 1;
 
-        if (cpi->oxcf.multi_threaded > cpi->processor_core_count)
-            th_count = cpi->processor_core_count - 1;
+        /* don't allocate more threads than cores available */
+        if (cpi->oxcf.multi_threaded > cm->processor_core_count)
+            th_count = cm->processor_core_count - 1;
 
         /* we have th_count + 1 (main) threads processing one row each */
         /* no point to have more threads than the sync range allows */
index 0e53f68..7282ef8 100644 (file)
@@ -594,7 +594,6 @@ typedef struct
     // multithread data
     int * mt_current_mb_col;
     int mt_sync_range;
-    int processor_core_count;
     int b_multi_threaded;
     int encoding_thread_count;