/usr/include/nntrainer/common_properties.h
/usr/include/nntrainer/base_properties.h
/usr/include/nntrainer/node_exporter.h
+/usr/include/nntrainer/profiler.h
+/usr/include/nntrainer/nntr_threads.h
# tensor headers
/usr/include/nntrainer/tensor.h
/usr/include/nntrainer/tensor_wrap_specs.h
$(NNTRAINER_ROOT)/nntrainer/utils/profiler.cpp \
$(NNTRAINER_ROOT)/nntrainer/utils/node_exporter.cpp \
$(NNTRAINER_ROOT)/nntrainer/utils/base_properties.cpp \
+ $(NNTRAINER_ROOT)/nntrainer/utils/nntr_threads.cpp \
$(NNTRAINER_ROOT)/nntrainer/compiler/ini_interpreter.cpp \
$(NNTRAINER_ROOT)/nntrainer/compiler/flatten_realizer.cpp \
$(NNTRAINER_ROOT)/nntrainer/compiler/activation_realizer.cpp \
option('tizen-version-major', type: 'integer', min : 4, max : 9999, value: 9999) # 9999 means "not Tizen"
option('tizen-version-minor', type: 'integer', min : 0, max : 9999, value: 0)
option('openblas-num-threads', type: 'integer', min : 0, max : 9999, value: 0)
-#This is for the multi-threading in nntrainer. ( multi-threading along batch direction )
-option('nntr-num-threads', type: 'integer', min : 0, max : 9999, value: 2)
+#This is for the multi-threading in nntrainer
+option('nntr-num-threads', type: 'integer', min : 0, max : 9999, value: 1)
# test related option
option('reduce-tolerance', type: 'boolean', value: true)
namespace nntrainer {
-/**
- * @brief User data props
- *
- */
-class PropsUserData final : public Property<void *> {
-public:
- static constexpr const char *key = "user_data";
- PropsUserData(void *user_data) { set(user_data); }
- using prop_tag = ptr_prop_tag;
-};
-
FuncDataProducer::FuncDataProducer(datagen_cb datagen_cb, void *user_data_) :
cb(datagen_cb),
- user_data_prop(new PropsUserData(user_data_)) {}
+ user_data_prop(new props::PropsUserData(user_data_)) {}
FuncDataProducer::~FuncDataProducer() {}
#ifndef __FUNC_DATA_PRODUCER_H__
#define __FUNC_DATA_PRODUCER_H__
+#include <common_properties.h>
#include <data_producer.h>
-
#include <dataset.h>
#include <memory>
namespace nntrainer {
-class PropsUserData;
class Exporter;
using datagen_cb = ml::train::datagen_cb;
private:
datagen_cb cb;
- std::unique_ptr<PropsUserData> user_data_prop;
+ std::unique_ptr<props::PropsUserData> user_data_prop;
};
} // namespace nntrainer
WeightDecay::WeightDecay(float value) : BasicRegularizerConstant(value) {}
BiasDecay::BiasDecay(float value) : BasicRegularizerConstant(value) {}
+PropsUserData::PropsUserData(void *user_data) { set(user_data); }
+
bool BasicRegularizerConstant::isValid(const float &value) const {
return value >= 0.0f;
}
using prop_tag = uint_prop_tag; /**< property type */
};
+/**
+ * @brief User data props
+ *
+ */
+class PropsUserData final : public Property<void *> {
+public:
+ PropsUserData(void *user_data);
+ static constexpr const char *key = "user_data";
+ using prop_tag = ptr_prop_tag;
+};
+
} // namespace props
} // namespace nntrainer
#include <conv2d_layer.h>
#include <layer_context.h>
#include <lazy_tensor.h>
+#include <nntr_threads.h>
#include <nntrainer_error.h>
#include <nntrainer_log.h>
#include <node_exporter.h>
/// filter_kernel^T X derivaitive -> column matrix
/// col2im(column matrix) to reconstruct the original image
- unsigned int num_threads = NNTR_NUM_THREADS;
-
- if (num_threads > derivative.batch())
- num_threads = 1;
-
- if (num_threads > 1) {
+ if (NNTR_NUM_THREADS > 1) {
auto dowork = [&](size_t s, size_t e, void *user_data) {
+ Tensor result =
+ Tensor(calcCol2ImOutputDim(derivative.getDim(), filter_dim));
+
for (size_t b = s; b < e; ++b) {
- Tensor result =
- Tensor(calcCol2ImOutputDim(derivative.getDim(), filter_dim));
Tensor deriv_sub = derivative.getBatchSlice(b, 1);
Tensor in_deriv_sub = input_derivative.getBatchSlice(b, 1);
deriv_sub.reshape(
}
};
- size_t start = 0;
- size_t end = derivative.batch();
- size_t chunk = (end - start + (num_threads - 1)) / num_threads;
-
- std::vector<std::thread> workers;
-
- for (unsigned int i = 0; i < num_threads; ++i) {
- size_t s = start + i * chunk;
- size_t e = s + chunk;
- if (e > end)
- e = end;
- workers.push_back(std::thread(dowork, s, e, nullptr));
- }
+ auto workers = ParallelBatch(dowork, derivative.batch(), nullptr);
- for (unsigned int i = 0; i < num_threads; ++i)
- workers[i].join();
+ workers.run();
} else {
{filter_size, derivative.width() * derivative.height()});
filter_kernel.dot(deriv_sub, col2im_result, true, false);
- col2im(col2im_result, filter_dim, padding, stride, dilation, in_deriv_sub);
+ col2im(col2im_result, filter_dim, padding, stride, dilation,
+ in_deriv_sub);
}
}
'profiler.cpp',
'ini_wrapper.cpp',
'node_exporter.cpp',
- 'base_properties.cpp'
+ 'base_properties.cpp',
+ 'nntr_threads.cpp'
]
util_headers = [
'base_properties.h',
'node_exporter.h',
'util_func.h',
+ 'profiler.h',
+ 'nntr_threads.h'
]
foreach s : util_sources
--- /dev/null
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Copyright (C) 2022 Jijoong Moon <jijoong.moon@samsung.com>
+ *
+ * @file nntr_threads.cpp
+ * @date 07 July 2022
+ * @brief Thread Management for NNTrainer
+ * @see https://github.com/nnstreamer/nntrainer
+ * @author Jijoong Moon <jijoong.moon@samsung.com>
+ * @bug No known bugs except for NYI items
+ */
+
+#include <nntr_threads.h>
+
+namespace nntrainer {
+
+ParallelBatch::ParallelBatch(threaded_cb threaded_cb, unsigned int batch_size,
+ void *user_data_) :
+ cb(threaded_cb),
+ batch(batch_size),
+ num_workers(NNTR_NUM_THREADS > batch ? 1 : NNTR_NUM_THREADS),
+ user_data_prop(new props::PropsUserData(user_data_)) {}
+
+ParallelBatch::~ParallelBatch() {}
+
+void ParallelBatch::run() {
+
+ unsigned int start = 0;
+ unsigned int end = batch;
+
+ unsigned int chunk = (end - start + (num_workers - 1)) / num_workers;
+
+ for (unsigned int i = 0; i < num_workers; ++i) {
+ unsigned int s = start + i * chunk;
+ unsigned int e = s + chunk;
+ if (e > end)
+ e = end;
+ workers.push_back(std::thread(cb, s, e, user_data_prop->get()));
+ }
+
+ for (unsigned int i = 0; i < num_workers; ++i)
+ workers[i].join();
+}
+
+} // namespace nntrainer
--- /dev/null
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Copyright (C) 2022 Jijoong Moon <jijoong.moon@samsung.com>
+ *
+ * @file nntr_threads.h
+ * @date 07 July 2022
+ * @brief Thread Management for NNTrainer
+ * @see https://github.com/nnstreamer/nntrainer
+ * @author Jijoong Moon <jijoong.moon@samsung.com>
+ * @bug No known bugs except for NYI items
+ */
+#ifndef __NNTR_THREADS_H__
+#define __NNTR_THREADS_H__
+
+#include <string>
+#include <thread>
+#include <vector>
+
+#include <common_properties.h>
+#include <nntrainer_error.h>
+#include <util_func.h>
+
+typedef void (*loop_cb)(unsigned int start, unsigned int end, void *user_data);
+
+typedef std::function<std::remove_pointer<loop_cb>::type> threaded_cb;
+
+namespace nntrainer {
+
+/**
+ * @brief ParallelBatch class to parallelize along batch direction
+ *
+ */
+class ParallelBatch {
+public:
+ /**
+ * @brief Construct a new ParallelBatch object
+ *
+ */
+ ParallelBatch(threaded_cb threaded_cb, unsigned int batch, void *user_data_);
+
+ /**
+ * @brief Destroy the ParallelBatch object
+ *
+ */
+ ~ParallelBatch();
+
+ /**
+ * @brief Run the workders
+ *
+ */
+ void run();
+
+private:
+ threaded_cb cb;
+ unsigned int batch;
+ unsigned int num_workers;
+ std::vector<std::thread> workers;
+ std::unique_ptr<props::PropsUserData> user_data_prop;
+};
+
+} // namespace nntrainer
+#endif // __NODE_EXPORTER_H__
%{_includedir}/nntrainer/common_properties.h
%{_includedir}/nntrainer/base_properties.h
%{_includedir}/nntrainer/node_exporter.h
+%{_includedir}/nntrainer/nntr_threads.h
+%{_includedir}/nntrainer/profiler.h
# tensor headers
%{_includedir}/nntrainer/tensor.h
%{_includedir}/nntrainer/tensor_wrap_specs.h