--- /dev/null
+#include "caffe2/operators/dense_vector_to_id_list_op.h"
+
+namespace caffe2 {
+namespace {
+REGISTER_CPU_OPERATOR(DenseVectorToIdList, DenseVectorToIdListOp<CPUContext>);
+
+OPERATOR_SCHEMA(DenseVectorToIdList)
+ .NumInputs(1)
+ .NumOutputs(2)
+ .SetDoc(R"DOC(
+DenseVectorToIdList: Convert a blob with dense feature into a ID_LIST.
+
+An ID_LIST is a list of IDs (may be ints, often longs) that represents a single
+feature. As described in https://caffe2.ai/docs/sparse-operations.html, a batch
+of ID_LIST examples is represented as a pair of lengths and values where the
+`lengths` (int32) segment the `values` or ids (int32/int64) into examples.
+
+Input is a single blob where the first dimension is the batch size and the
+second dimension is the length of dense vectors. This operator produces a
+ID_LIST where out_values are the indices of non-zero entries
+and out_lengths are the number of non-zeros entries in each row.
+
+)DOC")
+ .Input(0, "values", "A data blob of dense vectors")
+ .Output(0, "out_lengths", "Lengths of the sparse feature")
+ .Output(1, "out_values", "Values of the sparse feature");
+NO_GRADIENT(DenseVectorToIdList);
+} // namespace
+} // namespace caffe2
--- /dev/null
+#ifndef CAFFE2_OPERATORS_DENSE_VECTOR_TO_ID_LIST_OP_H_
+#define CAFFE2_OPERATORS_DENSE_VECTOR_TO_ID_LIST_OP_H_
+
+#include <set>
+#include <vector>
+#include "caffe2/core/context.h"
+#include "caffe2/core/operator.h"
+
+namespace caffe2 {
+
+template <class Context>
+class DenseVectorToIdListOp : public Operator<Context> {
+ public:
+ USE_OPERATOR_CONTEXT_FUNCTIONS;
+ USE_SIMPLE_CTOR_DTOR(DenseVectorToIdListOp)
+
+ template <typename T, typename M>
+ bool DoRunWithType() {
+ auto& input = Input(0);
+ const auto* input_data = input.template data<T>();
+
+ CAFFE_ENFORCE_EQ(input.dim(), 2, "Sample should be 2-D");
+ const auto batch_size = input.size(0);
+ const auto col_num = input.size(1);
+
+ auto* out_lengths = Output(0);
+ out_lengths->Resize(batch_size);
+
+ auto* out_lengths_data = out_lengths->template mutable_data<int32_t>();
+
+ auto* out_values = Output(1);
+ out_values->Resize(batch_size * col_num);
+
+ auto* out_values_data = out_values->template mutable_data<M>();
+
+ auto v_pos = 0;
+ auto l_pos = 0;
+ for (auto i = 0; i < batch_size; i++) {
+ auto length = 0;
+ for (int j = 0; j < col_num; j++) {
+ if ((int)(input_data[i * col_num + j] + 0.5) != 0) {
+ out_values_data[v_pos++] = j;
+ length++;
+ }
+ }
+ out_lengths_data[l_pos++] = length;
+ }
+ out_values->Resize(v_pos);
+ out_lengths->Resize(l_pos);
+ return true;
+ }
+
+ bool RunOnDevice() override {
+ if (Input(0).template IsType<float>()) {
+ return DoRunWithType<float, int>();
+ } else {
+ CAFFE_THROW(
+ "DenseVectorToIdList operator only supports 32-bit float, but",
+ " input was of type ",
+ Input(0).dtype().name());
+ }
+ }
+};
+
+} // namespace caffe2
+
+#endif // CAFFE2_OPERATORS_DENSE_VECTOR_TO_ID_LIST_OP_H_
--- /dev/null
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+from caffe2.python import core
+from hypothesis import given
+import caffe2.python.hypothesis_test_util as hu
+
+import hypothesis.extra.numpy as hnp
+import hypothesis.strategies as st
+import numpy as np
+
+
+@st.composite
+def id_list_batch(draw):
+ batch_size = draw(st.integers(2, 2))
+ values_dtype = np.float32
+ inputs = []
+ sample_size = draw(st.integers(5, 10))
+ for _ in range(batch_size):
+ values = draw(hnp.arrays(values_dtype, sample_size, st.integers(0, 1)))
+ inputs += [values]
+ return [np.array(inputs)]
+
+
+def dense_vector_to_id_list_ref(*arg):
+ arg = arg[0]
+ batch_size = len(arg)
+ assert batch_size > 0
+ out_length = []
+ out_values = []
+ for row in arg:
+ length = 0
+ for idx, entry in enumerate(row):
+ if entry != 0:
+ out_values += [idx]
+ length += 1
+ out_length += [length]
+ return (out_length, out_values)
+
+
+class TestDenseVectorToIdList(hu.HypothesisTestCase):
+ def test_dense_vector_to_id_list_ref(self):
+ # Verify that the reference implementation is correct!
+ dense_input = np.array(
+ [[1, 0, 0, 1, 0, 0, 0, 1],
+ [1, 0, 1, 0, 0, 0, 0, 1],
+ [0, 1, 0, 0, 0, 1, 0, 1]],
+ dtype=np.float32)
+ sparse_lengths, sparse_values = dense_vector_to_id_list_ref(dense_input)
+ expected_lengths = np.array([3, 3, 3], dtype=np.int32)
+ expected_values = np.array([0, 3, 7, 0, 2, 7, 1, 5, 7], dtype=np.int64)
+
+ np.testing.assert_array_equal(sparse_lengths, expected_lengths)
+ np.testing.assert_array_equal(sparse_values, expected_values)
+
+ @given(inputs=id_list_batch(), **hu.gcs_cpu_only)
+ def test_dense_vector_to_id_list_op(self, inputs, gc, dc):
+ op = core.CreateOperator(
+ "DenseVectorToIdList",
+ ["values"],
+ ["out_lengths", "out_values"]
+ )
+ self.assertDeviceChecks(dc, op, inputs, [0])
+ self.assertReferenceChecks(gc, op, inputs, dense_vector_to_id_list_ref)