status = scheduler_->submitRequest (req);
if (status >= 0)
sync.wait ();
-
- /* remove the internal callback */
- req->setCallback (nullptr);
} break;
case NPU_INFER_NON_BLOCKING:
if (req->getCallback () == nullptr) {
input_buffers input = {0};
output_buffers output = {0};
- input.num_buffers = 1;
- input.bufs[0].type = BUFFER_DMABUF;
- input.bufs[0].dmabuf = KERNEL_INPUT_SEG; /* indicator of kernel input */
+ input.num_buffers = model->getInputTensorNum ();
+ for (uint32_t i = 0; i < input.num_buffers; i++) {
+ input.bufs[i].type = BUFFER_DMABUF;
+ input.bufs[i].dmabuf = -1 * (int) (i + 1); /* indicator of kernel input */
+ }
- output.num_buffers = 1;
- output.bufs[0].type = BUFFER_DMABUF;
- output.bufs[0].dmabuf = KERNEL_OUTPUT_SEG; /* indicator of kernel output */
+ output.num_buffers = model->getOutputTensorNum ();
+ for (uint32_t i = 0; i < output.num_buffers; i++) {
+ output.bufs[i].type = BUFFER_DMABUF;
+ output.bufs[i].dmabuf = -1 * (int) (i + 1 + MAX_TENSORS); /* indicator of kernel output */
+ }
SegmentTable *segt = dynamic_cast<SegmentTable *> (req->getInferData ());
if (segt != nullptr) {
if (!req->isStopped () && callback != nullptr)
callback ();
- /** the request instance is also deleted here */
- if (!req->isPreserved ())
+ if (!req->isPreserved ()) {
+ /* the request instance is also deleted here */
removeRequest (req);
+ } else if (req->getInferMode () == NPU_INFER_BLOCKING) {
+ /* remove the internal callback for next submissions */
+ req->setCallback (nullptr);
+ }
}
/**
* an inference. Note that each entry in segment table has 32-bits slot.
*/
int dbuf_fd = hwmem->getDmabuf ();
- if (dbuf_fd != KERNEL_INPUT_SEG && dbuf_fd != KERNEL_OUTPUT_SEG) {
+ if (dbuf_fd >= 0) {
reinterpret_cast<int32_t *> (getData ())[slot] = dbuf_fd;
reinterpret_cast<uint32_t *> (getData () + getSize () / 2)[slot] = hwmem->getOffset ();
if (hwmem->getOffset () % SEGMENT_ALIGN != 0)
logwarn (TAG, "Segment is not aligned..?\n");
} else {
+ /* this is an external segment to be mapped in kernel */
reinterpret_cast<int32_t *> (getData ())[slot] = dbuf_fd;
reinterpret_cast<uint32_t *> (getData () + getSize () / 2)[slot] = hwmem->getSize ();
}
#include <vector>
-#define KERNEL_INPUT_SEG (-1)
-#define KERNEL_OUTPUT_SEG (-2)
-
/** @brief segment table class derived from hwmem */
class SegmentTable : public HWmem {
public: