From: Roland Dreier Date: Fri, 25 Jan 2008 22:15:34 +0000 (-0800) Subject: IB/mlx4: Micro-optimize mlx4_ib_poll_one() X-Git-Tag: upstream/snapshot3+hdmi~28415^2~47 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=b3226184af6c13c9d5d38f13f0ef8e03f718bbf7;p=platform%2Fadaptation%2Frenesas_rcar%2Frenesas_kernel.git IB/mlx4: Micro-optimize mlx4_ib_poll_one() Rather than byte-swapping cqe->g_mlpath_rqpn each time we extract a field from it, byte-swap it once into a temporary variable. This results in smaller, better code -- eg, on 32-bit x86: add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-5 (-5) function old new delta mlx4_ib_poll_cq 1188 1183 -5 Signed-off-by: Roland Dreier --- diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 9d32c49..7950aa6 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -313,6 +313,7 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq, struct mlx4_ib_srq *srq; int is_send; int is_error; + u32 g_mlpath_rqpn; u16 wqe_ctr; cqe = next_cqe_sw(cq); @@ -426,10 +427,10 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq, wc->slid = be16_to_cpu(cqe->rlid); wc->sl = cqe->sl >> 4; - wc->src_qp = be32_to_cpu(cqe->g_mlpath_rqpn) & 0xffffff; - wc->dlid_path_bits = (be32_to_cpu(cqe->g_mlpath_rqpn) >> 24) & 0x7f; - wc->wc_flags |= be32_to_cpu(cqe->g_mlpath_rqpn) & 0x80000000 ? - IB_WC_GRH : 0; + g_mlpath_rqpn = be32_to_cpu(cqe->g_mlpath_rqpn); + wc->src_qp = g_mlpath_rqpn & 0xffffff; + wc->dlid_path_bits = (g_mlpath_rqpn >> 24) & 0x7f; + wc->wc_flags |= g_mlpath_rqpn & 0x80000000 ? IB_WC_GRH : 0; wc->pkey_index = be32_to_cpu(cqe->immed_rss_invalid) & 0x7f; }