nfp: bpf: implement memory bulk copy for length bigger than 32-bytes
authorJiong Wang <jiong.wang@netronome.com>
Fri, 1 Dec 2017 05:32:59 +0000 (21:32 -0800)
committerDaniel Borkmann <daniel@iogearbox.net>
Fri, 1 Dec 2017 19:59:20 +0000 (20:59 +0100)
When the gathered copy length is bigger than 32-bytes and within 128-bytes
(the maximum length a single CPP Pull/Push request can finish), the
strategy of read/write are changeed into:

  * Read.
      - use direct reference mode when length is within 32-bytes.
      - use indirect mode when length is bigger than 32-bytes.

  * Write.
      - length <= 8-bytes
        use write8 (direct_ref).
      - length <= 32-byte and 4-bytes aligned
        use write32 (direct_ref).
      - length <= 32-bytes but not 4-bytes aligned
        use write8 (indirect_ref).
      - length > 32-bytes and 4-bytes aligned
        use write32 (indirect_ref).
      - length > 32-bytes and not 4-bytes aligned and <= 40-bytes
        use write32 (direct_ref) to finish the first 32-bytes.
        use write8 (direct_ref) to finish all remaining hanging part.
      - length > 32-bytes and not 4-bytes aligned
        use write32 (indirect_ref) to finish those 4-byte aligned parts.
        use write8 (direct_ref) to finish all remaining hanging part.

Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
drivers/net/ethernet/netronome/nfp/bpf/jit.c

index 138568c..1b98ef2 100644 (file)
@@ -544,16 +544,18 @@ static int nfp_cpp_memcpy(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
        unsigned int i;
        u8 xfer_num;
 
-       if (WARN_ON_ONCE(len > 32))
-               return -EOPNOTSUPP;
-
        off = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
        src_base = reg_a(meta->insn.src_reg * 2);
        xfer_num = round_up(len, 4) / 4;
 
+       /* Setup PREV_ALU fields to override memory read length. */
+       if (len > 32)
+               wrp_immed(nfp_prog, reg_none(),
+                         CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1));
+
        /* Memory read from source addr into transfer-in registers. */
-       emit_cmd(nfp_prog, CMD_TGT_READ32_SWAP, CMD_MODE_32b, 0, src_base, off,
-                xfer_num - 1, true);
+       emit_cmd_any(nfp_prog, CMD_TGT_READ32_SWAP, CMD_MODE_32b, 0, src_base,
+                    off, xfer_num - 1, true, len > 32);
 
        /* Move from transfer-in to transfer-out. */
        for (i = 0; i < xfer_num; i++)
@@ -566,18 +568,54 @@ static int nfp_cpp_memcpy(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
                emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
                         reg_a(meta->paired_st->dst_reg * 2), off, len - 1,
                         true);
-       } else if (IS_ALIGNED(len, 4)) {
+       } else if (len <= 32 && IS_ALIGNED(len, 4)) {
                /* Use single direct_ref write32. */
                emit_cmd(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
                         reg_a(meta->paired_st->dst_reg * 2), off, xfer_num - 1,
                         true);
-       } else {
+       } else if (len <= 32) {
                /* Use single indirect_ref write8. */
                wrp_immed(nfp_prog, reg_none(),
                          CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, len - 1));
                emit_cmd_indir(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
                               reg_a(meta->paired_st->dst_reg * 2), off,
                               len - 1, true);
+       } else if (IS_ALIGNED(len, 4)) {
+               /* Use single indirect_ref write32. */
+               wrp_immed(nfp_prog, reg_none(),
+                         CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1));
+               emit_cmd_indir(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
+                              reg_a(meta->paired_st->dst_reg * 2), off,
+                              xfer_num - 1, true);
+       } else if (len <= 40) {
+               /* Use one direct_ref write32 to write the first 32-bytes, then
+                * another direct_ref write8 to write the remaining bytes.
+                */
+               emit_cmd(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
+                        reg_a(meta->paired_st->dst_reg * 2), off, 7,
+                        true);
+
+               off = re_load_imm_any(nfp_prog, meta->paired_st->off + 32,
+                                     imm_b(nfp_prog));
+               emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 8,
+                        reg_a(meta->paired_st->dst_reg * 2), off, len - 33,
+                        true);
+       } else {
+               /* Use one indirect_ref write32 to write 4-bytes aligned length,
+                * then another direct_ref write8 to write the remaining bytes.
+                */
+               u8 new_off;
+
+               wrp_immed(nfp_prog, reg_none(),
+                         CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 2));
+               emit_cmd_indir(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
+                              reg_a(meta->paired_st->dst_reg * 2), off,
+                              xfer_num - 2, true);
+               new_off = meta->paired_st->off + (xfer_num - 1) * 4;
+               off = re_load_imm_any(nfp_prog, new_off, imm_b(nfp_prog));
+               emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b,
+                        xfer_num - 1, reg_a(meta->paired_st->dst_reg * 2), off,
+                        (len & 0x3) - 1, true);
        }
 
        /* TODO: The following extra load is to make sure data flow be identical