choose proper thread number per block according to CUDA architecture.
authorLiang-Chi Hsieh <viirya@gmail.com>
Wed, 29 Jan 2014 10:39:16 +0000 (18:39 +0800)
committerLiang-Chi Hsieh <viirya@gmail.com>
Wed, 29 Jan 2014 10:39:16 +0000 (18:39 +0800)
include/caffe/common.hpp

index e7c5abe..7fd7ea6 100644 (file)
@@ -49,7 +49,12 @@ using boost::shared_ptr;
 
 
 // We will use 1024 threads per block, which requires cuda sm_2x or above.
-const int CAFFE_CUDA_NUM_THREADS = 1024;
+#if __CUDA_ARCH__ >= 200
+    const int CAFFE_CUDA_NUM_THREADS = 1024;
+#else
+    const int CAFFE_CUDA_NUM_THREADS = 512;
+#endif
+
 
 
 inline int CAFFE_GET_BLOCKS(const int N) {