Merge tag 'v4.18' into rdma.git for-next
authorJason Gunthorpe <jgg@mellanox.com>
Thu, 16 Aug 2018 19:08:18 +0000 (13:08 -0600)
committerJason Gunthorpe <jgg@mellanox.com>
Thu, 16 Aug 2018 19:12:00 +0000 (13:12 -0600)
Resolve merge conflicts from the -rc cycle against the rdma.git tree:

Conflicts:
 drivers/infiniband/core/uverbs_cmd.c
  - New ifs added to ib_uverbs_ex_create_flow in -rc and for-next
  - Merge removal of file->ucontext in for-next with new code in -rc
 drivers/infiniband/core/uverbs_main.c
  - for-next removed code from ib_uverbs_write() that was modified
    in for-rc

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
18 files changed:
1  2 
MAINTAINERS
drivers/infiniband/core/uverbs_cmd.c
drivers/infiniband/core/verbs.c
drivers/infiniband/hw/hfi1/rc.c
drivers/infiniband/hw/mlx5/main.c
drivers/infiniband/hw/mlx5/srq.c
drivers/infiniband/hw/qedr/verbs.c
drivers/net/ethernet/mellanox/mlx5/core/cmd.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
drivers/nvme/host/rdma.c
fs/cifs/smbdirect.c
include/linux/mlx5/driver.h
include/linux/mlx5/mlx5_ifc.h
include/rdma/ib_verbs.h
net/rds/ib_frmr.c
net/smc/smc_tx.c

diff --combined MAINTAINERS
@@@ -581,7 -581,7 +581,7 @@@ W: https://www.infradead.org/~dhowells/
  
  AGPGART DRIVER
  M:    David Airlie <airlied@linux.ie>
- T:    git git://people.freedesktop.org/~airlied/linux (part of drm maint)
+ T:    git git://anongit.freedesktop.org/drm/drm
  S:    Maintained
  F:    drivers/char/agp/
  F:    include/linux/agp*
@@@ -2523,7 -2523,7 +2523,7 @@@ S:      Supporte
  F:    drivers/scsi/esas2r
  
  ATUSB IEEE 802.15.4 RADIO DRIVER
- M:    Stefan Schmidt <stefan@osg.samsung.com>
+ M:    Stefan Schmidt <stefan@datenfreihafen.org>
  L:    linux-wpan@vger.kernel.org
  S:    Maintained
  F:    drivers/net/ieee802154/atusb.c
@@@ -2971,9 -2971,13 +2971,13 @@@ N:    bcm585
  N:    bcm586*
  N:    bcm88312
  N:    hr2
- F:    arch/arm64/boot/dts/broadcom/ns2*
+ N:    stingray
+ F:    arch/arm64/boot/dts/broadcom/northstar2/*
+ F:    arch/arm64/boot/dts/broadcom/stingray/*
  F:    drivers/clk/bcm/clk-ns*
+ F:    drivers/clk/bcm/clk-sr*
  F:    drivers/pinctrl/bcm/pinctrl-ns*
+ F:    include/dt-bindings/clock/bcm-sr*
  
  BROADCOM KONA GPIO DRIVER
  M:    Ray Jui <rjui@broadcom.com>
@@@ -3506,6 -3510,7 +3510,6 @@@ F:      drivers/net/ethernet/cisco/enic
  
  CISCO VIC LOW LATENCY NIC DRIVER
  M:    Christian Benvenuti <benve@cisco.com>
 -M:    Dave Goodell <dgoodell@cisco.com>
  S:    Supported
  F:    drivers/infiniband/hw/usnic/
  
@@@ -4359,12 -4364,7 +4363,7 @@@ L:     iommu@lists.linux-foundation.or
  T:    git git://git.infradead.org/users/hch/dma-mapping.git
  W:    http://git.infradead.org/users/hch/dma-mapping.git
  S:    Supported
- F:    lib/dma-debug.c
- F:    lib/dma-direct.c
- F:    lib/dma-noncoherent.c
- F:    lib/dma-virt.c
- F:    drivers/base/dma-mapping.c
- F:    drivers/base/dma-coherent.c
+ F:    kernel/dma/
  F:    include/asm-generic/dma-mapping.h
  F:    include/linux/dma-direct.h
  F:    include/linux/dma-mapping.h
@@@ -4460,6 -4460,7 +4459,7 @@@ F:      Documentation/blockdev/drbd
  
  DRIVER CORE, KOBJECTS, DEBUGFS AND SYSFS
  M:    Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+ R:    "Rafael J. Wysocki" <rafael@kernel.org>
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core.git
  S:    Supported
  F:    Documentation/kobject.txt
@@@ -4630,7 -4631,7 +4630,7 @@@ F:      include/uapi/drm/vmwgfx_drm.
  DRM DRIVERS
  M:    David Airlie <airlied@linux.ie>
  L:    dri-devel@lists.freedesktop.org
- T:    git git://people.freedesktop.org/~airlied/linux
+ T:    git git://anongit.freedesktop.org/drm/drm
  B:    https://bugs.freedesktop.org/
  C:    irc://chat.freenode.net/dri-devel
  S:    Maintained
@@@ -5443,6 -5444,7 +5443,7 @@@ F:      drivers/iommu/exynos-iommu.
  
  EZchip NPS platform support
  M:    Vineet Gupta <vgupta@synopsys.com>
+ M:    Ofer Levi <oferle@mellanox.com>
  S:    Supported
  F:    arch/arc/plat-eznps
  F:    arch/arc/boot/dts/eznps.dts
@@@ -5673,7 -5675,7 +5674,7 @@@ F:      drivers/crypto/caam
  F:    Documentation/devicetree/bindings/crypto/fsl-sec4.txt
  
  FREESCALE DIU FRAMEBUFFER DRIVER
- M:    Timur Tabi <timur@tabi.org>
+ M:    Timur Tabi <timur@kernel.org>
  L:    linux-fbdev@vger.kernel.org
  S:    Maintained
  F:    drivers/video/fbdev/fsl-diu-fb.*
@@@ -5773,7 -5775,7 +5774,7 @@@ S:      Maintaine
  F:    drivers/net/wan/fsl_ucc_hdlc*
  
  FREESCALE QUICC ENGINE UCC UART DRIVER
- M:    Timur Tabi <timur@tabi.org>
+ M:    Timur Tabi <timur@kernel.org>
  L:    linuxppc-dev@lists.ozlabs.org
  S:    Maintained
  F:    drivers/tty/serial/ucc_uart.c
@@@ -5789,7 -5791,6 +5790,6 @@@ F:      include/linux/fsl
  
  FREESCALE SOC FS_ENET DRIVER
  M:    Pantelis Antoniou <pantelis.antoniou@gmail.com>
- M:    Vitaly Bordug <vbordug@ru.mvista.com>
  L:    linuxppc-dev@lists.ozlabs.org
  L:    netdev@vger.kernel.org
  S:    Maintained
@@@ -5797,7 -5798,7 +5797,7 @@@ F:      drivers/net/ethernet/freescale/fs_en
  F:    include/linux/fs_enet_pd.h
  
  FREESCALE SOC SOUND DRIVERS
- M:    Timur Tabi <timur@tabi.org>
+ M:    Timur Tabi <timur@kernel.org>
  M:    Nicolin Chen <nicoleotsuka@gmail.com>
  M:    Xiubo Li <Xiubo.Lee@gmail.com>
  R:    Fabio Estevam <fabio.estevam@nxp.com>
@@@ -5929,7 -5930,7 +5929,7 @@@ F:      Documentation/dev-tools/gcov.rs
  
  GDB KERNEL DEBUGGING HELPER SCRIPTS
  M:    Jan Kiszka <jan.kiszka@siemens.com>
- M:    Kieran Bingham <kieran@bingham.xyz>
+ M:    Kieran Bingham <kbingham@kernel.org>
  S:    Supported
  F:    scripts/gdb/
  
@@@ -6908,7 -6909,7 +6908,7 @@@ F:      drivers/clk/clk-versaclock5.
  
  IEEE 802.15.4 SUBSYSTEM
  M:    Alexander Aring <alex.aring@gmail.com>
- M:    Stefan Schmidt <stefan@osg.samsung.com>
+ M:    Stefan Schmidt <stefan@datenfreihafen.org>
  L:    linux-wpan@vger.kernel.org
  W:    http://wpan.cakelab.org/
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/sschmidt/wpan.git
@@@ -7095,6 -7096,7 +7095,7 @@@ F:      include/uapi/linux/input.
  F:    include/uapi/linux/input-event-codes.h
  F:    include/linux/input/
  F:    Documentation/devicetree/bindings/input/
+ F:    Documentation/devicetree/bindings/serio/
  F:    Documentation/input/
  
  INPUT MULTITOUCH (MT) PROTOCOL
@@@ -7561,8 -7563,9 +7562,8 @@@ S:      Maintaine
  F:    drivers/firmware/iscsi_ibft*
  
  ISCSI EXTENSIONS FOR RDMA (ISER) INITIATOR
 -M:    Or Gerlitz <ogerlitz@mellanox.com>
  M:    Sagi Grimberg <sagi@grimberg.me>
 -M:    Roi Dayan <roid@mellanox.com>
 +M:    Max Gurtovoy <maxg@mellanox.com>
  L:    linux-rdma@vger.kernel.org
  S:    Supported
  W:    http://www.openfabrics.org
@@@ -7983,7 -7986,7 +7984,7 @@@ F:      lib/test_kmod.
  F:    tools/testing/selftests/kmod/
  
  KPROBES
- M:    Ananth N Mavinakayanahalli <ananth@linux.vnet.ibm.com>
+ M:    Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
  M:    Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
  M:    "David S. Miller" <davem@davemloft.net>
  M:    Masami Hiramatsu <mhiramat@kernel.org>
@@@ -8627,7 -8630,7 +8628,7 @@@ MARVELL MWIFIEX WIRELESS DRIVE
  M:    Amitkumar Karwar <amitkarwar@gmail.com>
  M:    Nishant Sarmukadam <nishants@marvell.com>
  M:    Ganapathi Bhat <gbhat@marvell.com>
- M:    Xinming Hu <huxm@marvell.com>
+ M:    Xinming Hu <huxinming820@gmail.com>
  L:    linux-wireless@vger.kernel.org
  S:    Maintained
  F:    drivers/net/wireless/marvell/mwifiex/
@@@ -9073,7 -9076,7 +9074,7 @@@ S:      Maintaine
  F:    drivers/usb/mtu3/
  
  MEGACHIPS STDPXXXX-GE-B850V3-FW LVDS/DP++ BRIDGES
- M:    Peter Senna Tschudin <peter.senna@collabora.com>
+ M:    Peter Senna Tschudin <peter.senna@gmail.com>
  M:    Martin Donnelly <martin.donnelly@ge.com>
  M:    Martyn Welch <martyn.welch@collabora.co.uk>
  S:    Maintained
@@@ -9754,6 -9757,11 +9755,11 @@@ L:    linux-scsi@vger.kernel.or
  S:    Maintained
  F:    drivers/scsi/NCR_D700.*
  
+ NCSI LIBRARY:
+ M:    Samuel Mendoza-Jonas <sam@mendozajonas.com>
+ S:    Maintained
+ F:    net/ncsi/
  NCT6775 HARDWARE MONITOR DRIVER
  M:    Guenter Roeck <linux@roeck-us.net>
  L:    linux-hwmon@vger.kernel.org
@@@ -9880,6 -9888,7 +9886,7 @@@ M:      Andrew Lunn <andrew@lunn.ch
  M:    Vivien Didelot <vivien.didelot@savoirfairelinux.com>
  M:    Florian Fainelli <f.fainelli@gmail.com>
  S:    Maintained
+ F:    Documentation/devicetree/bindings/net/dsa/
  F:    net/dsa/
  F:    include/net/dsa.h
  F:    include/linux/dsa/
@@@ -10206,11 -10215,13 +10213,13 @@@ F:        sound/soc/codecs/sgtl5000
  
  NXP TDA998X DRM DRIVER
  M:    Russell King <linux@armlinux.org.uk>
- S:    Supported
+ S:    Maintained
  T:    git git://git.armlinux.org.uk/~rmk/linux-arm.git drm-tda998x-devel
  T:    git git://git.armlinux.org.uk/~rmk/linux-arm.git drm-tda998x-fixes
  F:    drivers/gpu/drm/i2c/tda998x_drv.c
  F:    include/drm/i2c/tda998x.h
+ F:    include/dt-bindings/display/tda998x.h
+ K:    "nxp,tda998x"
  
  NXP TFA9879 DRIVER
  M:    Peter Rosin <peda@axentia.se>
@@@ -11474,6 -11485,15 +11483,15 @@@ W: http://wireless.kernel.org/en/users/
  S:    Obsolete
  F:    drivers/net/wireless/intersil/prism54/
  
+ PROC FILESYSTEM
+ R:    Alexey Dobriyan <adobriyan@gmail.com>
+ L:    linux-kernel@vger.kernel.org
+ L:    linux-fsdevel@vger.kernel.org
+ S:    Maintained
+ F:    fs/proc/
+ F:    include/linux/proc_fs.h
+ F:    tools/testing/selftests/proc/
  PROC SYSCTL
  M:    "Luis R. Rodriguez" <mcgrof@kernel.org>
  M:    Kees Cook <keescook@chromium.org>
@@@ -11806,9 -11826,9 +11824,9 @@@ F:  Documentation/devicetree/bindings/o
  F:  drivers/cpufreq/qcom-cpufreq-kryo.c
  
  QUALCOMM EMAC GIGABIT ETHERNET DRIVER
- M:    Timur Tabi <timur@codeaurora.org>
+ M:    Timur Tabi <timur@kernel.org>
  L:    netdev@vger.kernel.org
- S:    Supported
+ S:    Maintained
  F:    drivers/net/ethernet/qualcomm/emac/
  
  QUALCOMM HEXAGON ARCHITECTURE
@@@ -11819,7 -11839,7 +11837,7 @@@ S:   Supporte
  F:    arch/hexagon/
  
  QUALCOMM HIDMA DRIVER
- M:    Sinan Kaya <okaya@codeaurora.org>
+ M:    Sinan Kaya <okaya@kernel.org>
  L:    linux-arm-kernel@lists.infradead.org
  L:    linux-arm-msm@vger.kernel.org
  L:    dmaengine@vger.kernel.org
@@@ -12619,21 -12639,15 +12637,21 @@@ S:        Maintaine
  F:    drivers/scsi/sr*
  
  SCSI RDMA PROTOCOL (SRP) INITIATOR
 -M:    Bart Van Assche <bart.vanassche@sandisk.com>
 +M:    Bart Van Assche <bvanassche@acm.org>
  L:    linux-rdma@vger.kernel.org
  S:    Supported
 -W:    http://www.openfabrics.org
  Q:    http://patchwork.kernel.org/project/linux-rdma/list/
 -T:    git git://git.kernel.org/pub/scm/linux/kernel/git/dad/srp-initiator.git
  F:    drivers/infiniband/ulp/srp/
  F:    include/scsi/srp.h
  
 +SCSI RDMA PROTOCOL (SRP) TARGET
 +M:    Bart Van Assche <bvanassche@acm.org>
 +L:    linux-rdma@vger.kernel.org
 +L:    target-devel@vger.kernel.org
 +S:    Supported
 +Q:    http://patchwork.kernel.org/project/linux-rdma/list/
 +F:    drivers/infiniband/ulp/srpt/
 +
  SCSI SG DRIVER
  M:    Doug Gilbert <dgilbert@interlog.com>
  L:    linux-scsi@vger.kernel.org
@@@ -13652,7 -13666,7 +13670,7 @@@ M:   Konrad Rzeszutek Wilk <konrad.wilk@o
  L:    iommu@lists.linux-foundation.org
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/konrad/swiotlb.git
  S:    Supported
- F:    lib/swiotlb.c
+ F:    kernel/dma/swiotlb.c
  F:    arch/*/kernel/pci-swiotlb.c
  F:    include/linux/swiotlb.h
  
@@@ -15576,9 -15590,17 +15594,17 @@@ M: x86@kernel.or
  L:    linux-kernel@vger.kernel.org
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/core
  S:    Maintained
+ F:    Documentation/devicetree/bindings/x86/
  F:    Documentation/x86/
  F:    arch/x86/
  
+ X86 ENTRY CODE
+ M:    Andy Lutomirski <luto@kernel.org>
+ L:    linux-kernel@vger.kernel.org
+ T:    git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/asm
+ S:    Maintained
+ F:    arch/x86/entry/
  X86 MCE INFRASTRUCTURE
  M:    Tony Luck <tony.luck@intel.com>
  M:    Borislav Petkov <bp@alien8.de>
@@@ -15601,7 -15623,7 +15627,7 @@@ F:   drivers/platform/x86
  F:    drivers/platform/olpc/
  
  X86 VDSO
- M:    Andy Lutomirski <luto@amacapital.net>
+ M:    Andy Lutomirski <luto@kernel.org>
  L:    linux-kernel@vger.kernel.org
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/vdso
  S:    Maintained
  #include "core_priv.h"
  
  static struct ib_uverbs_completion_event_file *
 -ib_uverbs_lookup_comp_file(int fd, struct ib_ucontext *context)
 +_ib_uverbs_lookup_comp_file(s32 fd, struct ib_uverbs_file *ufile)
  {
 -      struct ib_uobject *uobj = uobj_get_read(UVERBS_OBJECT_COMP_CHANNEL,
 -                                              fd, context);
 -      struct ib_uobject_file *uobj_file;
 +      struct ib_uobject *uobj = ufd_get_read(UVERBS_OBJECT_COMP_CHANNEL,
 +                                             fd, ufile);
  
        if (IS_ERR(uobj))
                return (void *)uobj;
        uverbs_uobject_get(uobj);
        uobj_put_read(uobj);
  
 -      uobj_file = container_of(uobj, struct ib_uobject_file, uobj);
 -      return container_of(uobj_file, struct ib_uverbs_completion_event_file,
 -                          uobj_file);
 +      return container_of(uobj, struct ib_uverbs_completion_event_file,
 +                          uobj);
  }
 +#define ib_uverbs_lookup_comp_file(_fd, _ufile)                                \
 +      _ib_uverbs_lookup_comp_file((_fd)*typecheck(s32, _fd), _ufile)
  
  ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
 -                            struct ib_device *ib_dev,
                              const char __user *buf,
                              int in_len, int out_len)
  {
@@@ -75,7 -76,6 +75,7 @@@
        struct ib_ucontext               *ucontext;
        struct file                      *filp;
        struct ib_rdmacg_object          cg_obj;
 +      struct ib_device *ib_dev;
        int ret;
  
        if (out_len < sizeof resp)
        if (copy_from_user(&cmd, buf, sizeof cmd))
                return -EFAULT;
  
 -      mutex_lock(&file->mutex);
 +      mutex_lock(&file->ucontext_lock);
 +      ib_dev = srcu_dereference(file->device->ib_dev,
 +                                &file->device->disassociate_srcu);
 +      if (!ib_dev) {
 +              ret = -EIO;
 +              goto err;
 +      }
  
        if (file->ucontext) {
                ret = -EINVAL;
        ucontext->cg_obj = cg_obj;
        /* ufile is required when some objects are released */
        ucontext->ufile = file;
 -      uverbs_initialize_ucontext(ucontext);
  
        rcu_read_lock();
        ucontext->tgid = get_task_pid(current->group_leader, PIDTYPE_PID);
        rcu_read_unlock();
        ucontext->closing = 0;
 +      ucontext->cleanup_retryable = false;
  
  #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
        ucontext->umem_tree = RB_ROOT_CACHED;
                goto err_file;
        }
  
 -      file->ucontext = ucontext;
 -
        fd_install(resp.async_fd, filp);
  
 -      mutex_unlock(&file->mutex);
 +      /*
 +       * Make sure that ib_uverbs_get_ucontext() sees the pointer update
 +       * only after all writes to setup the ucontext have completed
 +       */
 +      smp_store_release(&file->ucontext, ucontext);
 +
 +      mutex_unlock(&file->ucontext_lock);
  
        return in_len;
  
@@@ -179,16 -169,15 +179,16 @@@ err_alloc
        ib_rdmacg_uncharge(&cg_obj, ib_dev, RDMACG_RESOURCE_HCA_HANDLE);
  
  err:
 -      mutex_unlock(&file->mutex);
 +      mutex_unlock(&file->ucontext_lock);
        return ret;
  }
  
 -static void copy_query_dev_fields(struct ib_uverbs_file *file,
 -                                struct ib_device *ib_dev,
 +static void copy_query_dev_fields(struct ib_ucontext *ucontext,
                                  struct ib_uverbs_query_device_resp *resp,
                                  struct ib_device_attr *attr)
  {
 +      struct ib_device *ib_dev = ucontext->device;
 +
        resp->fw_ver            = attr->fw_ver;
        resp->node_guid         = ib_dev->node_guid;
        resp->sys_image_guid    = attr->sys_image_guid;
        resp->max_qp            = attr->max_qp;
        resp->max_qp_wr         = attr->max_qp_wr;
        resp->device_cap_flags  = lower_32_bits(attr->device_cap_flags);
 -      resp->max_sge           = attr->max_sge;
 +      resp->max_sge           = min(attr->max_send_sge, attr->max_recv_sge);
        resp->max_sge_rd        = attr->max_sge_rd;
        resp->max_cq            = attr->max_cq;
        resp->max_cqe           = attr->max_cqe;
  }
  
  ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file,
 -                             struct ib_device *ib_dev,
                               const char __user *buf,
                               int in_len, int out_len)
  {
        struct ib_uverbs_query_device      cmd;
        struct ib_uverbs_query_device_resp resp;
 +      struct ib_ucontext *ucontext;
 +
 +      ucontext = ib_uverbs_get_ucontext(file);
 +      if (IS_ERR(ucontext))
 +              return PTR_ERR(ucontext);
  
        if (out_len < sizeof resp)
                return -ENOSPC;
                return -EFAULT;
  
        memset(&resp, 0, sizeof resp);
 -      copy_query_dev_fields(file, ib_dev, &resp, &ib_dev->attrs);
 +      copy_query_dev_fields(ucontext, &resp, &ucontext->device->attrs);
  
        if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp))
                return -EFAULT;
        return in_len;
  }
  
 +/*
 + * ib_uverbs_query_port_resp.port_cap_flags started out as just a copy of the
 + * PortInfo CapabilityMask, but was extended with unique bits.
 + */
 +static u32 make_port_cap_flags(const struct ib_port_attr *attr)
 +{
 +      u32 res;
 +
 +      /* All IBA CapabilityMask bits are passed through here, except bit 26,
 +       * which is overridden with IP_BASED_GIDS. This is due to a historical
 +       * mistake in the implementation of IP_BASED_GIDS. Otherwise all other
 +       * bits match the IBA definition across all kernel versions.
 +       */
 +      res = attr->port_cap_flags & ~(u32)IB_UVERBS_PCF_IP_BASED_GIDS;
 +
 +      if (attr->ip_gids)
 +              res |= IB_UVERBS_PCF_IP_BASED_GIDS;
 +
 +      return res;
 +}
 +
  ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
 -                           struct ib_device *ib_dev,
                             const char __user *buf,
                             int in_len, int out_len)
  {
        struct ib_uverbs_query_port_resp resp;
        struct ib_port_attr              attr;
        int                              ret;
 +      struct ib_ucontext *ucontext;
 +      struct ib_device *ib_dev;
 +
 +      ucontext = ib_uverbs_get_ucontext(file);
 +      if (IS_ERR(ucontext))
 +              return PTR_ERR(ucontext);
 +      ib_dev = ucontext->device;
  
        if (out_len < sizeof resp)
                return -ENOSPC;
        resp.max_mtu         = attr.max_mtu;
        resp.active_mtu      = attr.active_mtu;
        resp.gid_tbl_len     = attr.gid_tbl_len;
 -      resp.port_cap_flags  = attr.port_cap_flags;
 +      resp.port_cap_flags  = make_port_cap_flags(&attr);
        resp.max_msg_sz      = attr.max_msg_sz;
        resp.bad_pkey_cntr   = attr.bad_pkey_cntr;
        resp.qkey_viol_cntr  = attr.qkey_viol_cntr;
        resp.pkey_tbl_len    = attr.pkey_tbl_len;
  
 +      if (rdma_is_grh_required(ib_dev, cmd.port_num))
 +              resp.flags |= IB_UVERBS_QPF_GRH_REQUIRED;
 +
        if (rdma_cap_opa_ah(ib_dev, cmd.port_num)) {
                resp.lid     = OPA_TO_IB_UCAST_LID(attr.lid);
                resp.sm_lid  = OPA_TO_IB_UCAST_LID(attr.sm_lid);
  }
  
  ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
 -                         struct ib_device *ib_dev,
                           const char __user *buf,
                           int in_len, int out_len)
  {
        struct ib_uobject             *uobj;
        struct ib_pd                  *pd;
        int                            ret;
 +      struct ib_device *ib_dev;
  
        if (out_len < sizeof resp)
                return -ENOSPC;
                     in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
                     out_len - sizeof(resp));
  
 -      uobj  = uobj_alloc(UVERBS_OBJECT_PD, file->ucontext);
 +      uobj = uobj_alloc(UVERBS_OBJECT_PD, file, &ib_dev);
        if (IS_ERR(uobj))
                return PTR_ERR(uobj);
  
 -      pd = ib_dev->alloc_pd(ib_dev, file->ucontext, &udata);
 +      pd = ib_dev->alloc_pd(ib_dev, uobj->context, &udata);
        if (IS_ERR(pd)) {
                ret = PTR_ERR(pd);
                goto err;
                goto err_copy;
        }
  
 -      uobj_alloc_commit(uobj);
 -
 -      return in_len;
 +      return uobj_alloc_commit(uobj, in_len);
  
  err_copy:
        ib_dealloc_pd(pd);
@@@ -404,16 -361,25 +404,16 @@@ err
  }
  
  ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file,
 -                           struct ib_device *ib_dev,
                             const char __user *buf,
                             int in_len, int out_len)
  {
        struct ib_uverbs_dealloc_pd cmd;
 -      struct ib_uobject          *uobj;
 -      int                         ret;
  
        if (copy_from_user(&cmd, buf, sizeof cmd))
                return -EFAULT;
  
 -      uobj  = uobj_get_write(UVERBS_OBJECT_PD, cmd.pd_handle,
 -                             file->ucontext);
 -      if (IS_ERR(uobj))
 -              return PTR_ERR(uobj);
 -
 -      ret = uobj_remove_commit(uobj);
 -
 -      return ret ?: in_len;
 +      return uobj_perform_destroy(UVERBS_OBJECT_PD, cmd.pd_handle, file,
 +                                  in_len);
  }
  
  struct xrcd_table_entry {
@@@ -502,6 -468,7 +502,6 @@@ static void xrcd_table_delete(struct ib
  }
  
  ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
 -                          struct ib_device *ib_dev,
                            const char __user *buf, int in_len,
                            int out_len)
  {
        struct inode                   *inode = NULL;
        int                             ret = 0;
        int                             new_xrcd = 0;
 +      struct ib_device *ib_dev;
  
        if (out_len < sizeof resp)
                return -ENOSPC;
                }
        }
  
 -      obj  = (struct ib_uxrcd_object *)uobj_alloc(UVERBS_OBJECT_XRCD,
 -                                                  file->ucontext);
 +      obj = (struct ib_uxrcd_object *)uobj_alloc(UVERBS_OBJECT_XRCD, file,
 +                                                 &ib_dev);
        if (IS_ERR(obj)) {
                ret = PTR_ERR(obj);
                goto err_tree_mutex_unlock;
        }
  
        if (!xrcd) {
 -              xrcd = ib_dev->alloc_xrcd(ib_dev, file->ucontext, &udata);
 +              xrcd = ib_dev->alloc_xrcd(ib_dev, obj->uobject.context, &udata);
                if (IS_ERR(xrcd)) {
                        ret = PTR_ERR(xrcd);
                        goto err;
  
        mutex_unlock(&file->device->xrcd_tree_mutex);
  
 -      uobj_alloc_commit(&obj->uobject);
 -
 -      return in_len;
 +      return uobj_alloc_commit(&obj->uobject, in_len);
  
  err_copy:
        if (inode) {
@@@ -623,25 -591,32 +623,25 @@@ err_tree_mutex_unlock
  }
  
  ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file,
 -                           struct ib_device *ib_dev,
                             const char __user *buf, int in_len,
                             int out_len)
  {
        struct ib_uverbs_close_xrcd cmd;
 -      struct ib_uobject           *uobj;
 -      int                         ret = 0;
  
        if (copy_from_user(&cmd, buf, sizeof cmd))
                return -EFAULT;
  
 -      uobj  = uobj_get_write(UVERBS_OBJECT_XRCD, cmd.xrcd_handle,
 -                             file->ucontext);
 -      if (IS_ERR(uobj))
 -              return PTR_ERR(uobj);
 -
 -      ret = uobj_remove_commit(uobj);
 -      return ret ?: in_len;
 +      return uobj_perform_destroy(UVERBS_OBJECT_XRCD, cmd.xrcd_handle, file,
 +                                  in_len);
  }
  
 -int ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev,
 +int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject,
                           struct ib_xrcd *xrcd,
                           enum rdma_remove_reason why)
  {
        struct inode *inode;
        int ret;
 +      struct ib_uverbs_device *dev = uobject->context->ufile->device;
  
        inode = xrcd->inode;
        if (inode && !atomic_dec_and_test(&xrcd->usecnt))
  
        ret = ib_dealloc_xrcd(xrcd);
  
 -      if (why == RDMA_REMOVE_DESTROY && ret)
 +      if (ib_is_destroy_retryable(ret, why, uobject)) {
                atomic_inc(&xrcd->usecnt);
 -      else if (inode)
 +              return ret;
 +      }
 +
 +      if (inode)
                xrcd_table_delete(dev, inode);
  
        return ret;
  }
  
  ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
 -                       struct ib_device *ib_dev,
                         const char __user *buf, int in_len,
                         int out_len)
  {
        struct ib_pd                *pd;
        struct ib_mr                *mr;
        int                          ret;
 +      struct ib_device *ib_dev;
  
        if (out_len < sizeof resp)
                return -ENOSPC;
        if (ret)
                return ret;
  
 -      uobj  = uobj_alloc(UVERBS_OBJECT_MR, file->ucontext);
 +      uobj = uobj_alloc(UVERBS_OBJECT_MR, file, &ib_dev);
        if (IS_ERR(uobj))
                return PTR_ERR(uobj);
  
 -      pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext);
 +      pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file);
        if (!pd) {
                ret = -EINVAL;
                goto err_free;
  
        uobj_put_obj_read(pd);
  
 -      uobj_alloc_commit(uobj);
 -
 -      return in_len;
 +      return uobj_alloc_commit(uobj, in_len);
  
  err_copy:
        ib_dereg_mr(mr);
@@@ -753,6 -727,7 +753,6 @@@ err_free
  }
  
  ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
 -                         struct ib_device *ib_dev,
                           const char __user *buf, int in_len,
                           int out_len)
  {
             (cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)))
                        return -EINVAL;
  
 -      uobj  = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle,
 -                             file->ucontext);
 +      uobj = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle, file);
        if (IS_ERR(uobj))
                return PTR_ERR(uobj);
  
        }
  
        if (cmd.flags & IB_MR_REREG_PD) {
 -              pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext);
 +              pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle,
 +                                     file);
                if (!pd) {
                        ret = -EINVAL;
                        goto put_uobjs;
@@@ -844,19 -819,29 +844,19 @@@ put_uobjs
  }
  
  ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
 -                         struct ib_device *ib_dev,
                           const char __user *buf, int in_len,
                           int out_len)
  {
        struct ib_uverbs_dereg_mr cmd;
 -      struct ib_uobject        *uobj;
 -      int                       ret = -EINVAL;
  
        if (copy_from_user(&cmd, buf, sizeof cmd))
                return -EFAULT;
  
 -      uobj  = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle,
 -                             file->ucontext);
 -      if (IS_ERR(uobj))
 -              return PTR_ERR(uobj);
 -
 -      ret = uobj_remove_commit(uobj);
 -
 -      return ret ?: in_len;
 +      return uobj_perform_destroy(UVERBS_OBJECT_MR, cmd.mr_handle, file,
 +                                  in_len);
  }
  
  ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
 -                         struct ib_device *ib_dev,
                           const char __user *buf, int in_len,
                           int out_len)
  {
        struct ib_mw                  *mw;
        struct ib_udata                udata;
        int                            ret;
 +      struct ib_device *ib_dev;
  
        if (out_len < sizeof(resp))
                return -ENOSPC;
        if (copy_from_user(&cmd, buf, sizeof(cmd)))
                return -EFAULT;
  
 -      uobj  = uobj_alloc(UVERBS_OBJECT_MW, file->ucontext);
 +      uobj = uobj_alloc(UVERBS_OBJECT_MW, file, &ib_dev);
        if (IS_ERR(uobj))
                return PTR_ERR(uobj);
  
 -      pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext);
 +      pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file);
        if (!pd) {
                ret = -EINVAL;
                goto err_free;
        }
  
        uobj_put_obj_read(pd);
 -      uobj_alloc_commit(uobj);
 -
 -      return in_len;
 +      return uobj_alloc_commit(uobj, in_len);
  
  err_copy:
        uverbs_dealloc_mw(mw);
@@@ -925,19 -911,28 +925,19 @@@ err_free
  }
  
  ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file,
 -                           struct ib_device *ib_dev,
                             const char __user *buf, int in_len,
                             int out_len)
  {
        struct ib_uverbs_dealloc_mw cmd;
 -      struct ib_uobject          *uobj;
 -      int                         ret = -EINVAL;
  
        if (copy_from_user(&cmd, buf, sizeof(cmd)))
                return -EFAULT;
  
 -      uobj  = uobj_get_write(UVERBS_OBJECT_MW, cmd.mw_handle,
 -                             file->ucontext);
 -      if (IS_ERR(uobj))
 -              return PTR_ERR(uobj);
 -
 -      ret = uobj_remove_commit(uobj);
 -      return ret ?: in_len;
 +      return uobj_perform_destroy(UVERBS_OBJECT_MW, cmd.mw_handle, file,
 +                                  in_len);
  }
  
  ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
 -                                    struct ib_device *ib_dev,
                                      const char __user *buf, int in_len,
                                      int out_len)
  {
        struct ib_uverbs_create_comp_channel_resp  resp;
        struct ib_uobject                         *uobj;
        struct ib_uverbs_completion_event_file    *ev_file;
 +      struct ib_device *ib_dev;
  
        if (out_len < sizeof resp)
                return -ENOSPC;
        if (copy_from_user(&cmd, buf, sizeof cmd))
                return -EFAULT;
  
 -      uobj = uobj_alloc(UVERBS_OBJECT_COMP_CHANNEL, file->ucontext);
 +      uobj = uobj_alloc(UVERBS_OBJECT_COMP_CHANNEL, file, &ib_dev);
        if (IS_ERR(uobj))
                return PTR_ERR(uobj);
  
        resp.fd = uobj->id;
  
        ev_file = container_of(uobj, struct ib_uverbs_completion_event_file,
 -                             uobj_file.uobj);
 +                             uobj);
        ib_uverbs_init_event_queue(&ev_file->ev_queue);
  
        if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) {
                return -EFAULT;
        }
  
 -      uobj_alloc_commit(uobj);
 -      return in_len;
 +      return uobj_alloc_commit(uobj, in_len);
  }
  
  static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
 -                                      struct ib_device *ib_dev,
                                       struct ib_udata *ucore,
                                       struct ib_udata *uhw,
                                       struct ib_uverbs_ex_create_cq *cmd,
        int                             ret;
        struct ib_uverbs_ex_create_cq_resp resp;
        struct ib_cq_init_attr attr = {};
 -
 -      if (!ib_dev->create_cq)
 -              return ERR_PTR(-EOPNOTSUPP);
 +      struct ib_device *ib_dev;
  
        if (cmd->comp_vector >= file->device->num_comp_vectors)
                return ERR_PTR(-EINVAL);
  
 -      obj  = (struct ib_ucq_object *)uobj_alloc(UVERBS_OBJECT_CQ,
 -                                                file->ucontext);
 +      obj = (struct ib_ucq_object *)uobj_alloc(UVERBS_OBJECT_CQ, file,
 +                                               &ib_dev);
        if (IS_ERR(obj))
                return obj;
  
 +      if (!ib_dev->create_cq) {
 +              ret = -EOPNOTSUPP;
 +              goto err;
 +      }
 +
        if (cmd->comp_channel >= 0) {
 -              ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel,
 -                                                   file->ucontext);
 +              ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel, file);
                if (IS_ERR(ev_file)) {
                        ret = PTR_ERR(ev_file);
                        goto err;
        }
  
        obj->uobject.user_handle = cmd->user_handle;
 -      obj->uverbs_file           = file;
        obj->comp_events_reported  = 0;
        obj->async_events_reported = 0;
        INIT_LIST_HEAD(&obj->comp_list);
        if (cmd_sz > offsetof(typeof(*cmd), flags) + sizeof(cmd->flags))
                attr.flags = cmd->flags;
  
 -      cq = ib_dev->create_cq(ib_dev, &attr, file->ucontext, uhw);
 +      cq = ib_dev->create_cq(ib_dev, &attr, obj->uobject.context, uhw);
        if (IS_ERR(cq)) {
                ret = PTR_ERR(cq);
                goto err_file;
        if (ret)
                goto err_cb;
  
 -      uobj_alloc_commit(&obj->uobject);
 +      ret = uobj_alloc_commit(&obj->uobject, 0);
 +      if (ret)
 +              return ERR_PTR(ret);
        return obj;
  
  err_cb:
@@@ -1082,6 -1075,7 +1082,6 @@@ static int ib_uverbs_create_cq_cb(struc
  }
  
  ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
 -                          struct ib_device *ib_dev,
                            const char __user *buf, int in_len,
                            int out_len)
  {
        cmd_ex.comp_vector = cmd.comp_vector;
        cmd_ex.comp_channel = cmd.comp_channel;
  
 -      obj = create_cq(file, ib_dev, &ucore, &uhw, &cmd_ex,
 +      obj = create_cq(file, &ucore, &uhw, &cmd_ex,
                        offsetof(typeof(cmd_ex), comp_channel) +
                        sizeof(cmd.comp_channel), ib_uverbs_create_cq_cb,
                        NULL);
@@@ -1135,6 -1129,7 +1135,6 @@@ static int ib_uverbs_ex_create_cq_cb(st
  }
  
  int ib_uverbs_ex_create_cq(struct ib_uverbs_file *file,
 -                       struct ib_device *ib_dev,
                           struct ib_udata *ucore,
                           struct ib_udata *uhw)
  {
                             sizeof(resp.response_length)))
                return -ENOSPC;
  
 -      obj = create_cq(file, ib_dev, ucore, uhw, &cmd,
 +      obj = create_cq(file, ucore, uhw, &cmd,
                        min(ucore->inlen, sizeof(cmd)),
                        ib_uverbs_ex_create_cq_cb, NULL);
  
  }
  
  ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
 -                          struct ib_device *ib_dev,
                            const char __user *buf, int in_len,
                            int out_len)
  {
                   in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
                   out_len - sizeof(resp));
  
 -      cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext);
 +      cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file);
        if (!cq)
                return -EINVAL;
  
@@@ -1235,6 -1231,7 +1235,6 @@@ static int copy_wc_to_user(struct ib_de
  }
  
  ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
 -                        struct ib_device *ib_dev,
                          const char __user *buf, int in_len,
                          int out_len)
  {
        if (copy_from_user(&cmd, buf, sizeof cmd))
                return -EFAULT;
  
 -      cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext);
 +      cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file);
        if (!cq)
                return -EINVAL;
  
                if (!ret)
                        break;
  
 -              ret = copy_wc_to_user(ib_dev, data_ptr, &wc);
 +              ret = copy_wc_to_user(cq->device, data_ptr, &wc);
                if (ret)
                        goto out_put;
  
@@@ -1286,6 -1283,7 +1286,6 @@@ out_put
  }
  
  ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file,
 -                              struct ib_device *ib_dev,
                                const char __user *buf, int in_len,
                                int out_len)
  {
        if (copy_from_user(&cmd, buf, sizeof cmd))
                return -EFAULT;
  
 -      cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext);
 +      cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file);
        if (!cq)
                return -EINVAL;
  
  }
  
  ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
 -                           struct ib_device *ib_dev,
                             const char __user *buf, int in_len,
                             int out_len)
  {
        struct ib_uverbs_destroy_cq      cmd;
        struct ib_uverbs_destroy_cq_resp resp;
        struct ib_uobject               *uobj;
 -      struct ib_cq                    *cq;
        struct ib_ucq_object            *obj;
 -      int                              ret = -EINVAL;
  
        if (copy_from_user(&cmd, buf, sizeof cmd))
                return -EFAULT;
  
 -      uobj  = uobj_get_write(UVERBS_OBJECT_CQ, cmd.cq_handle,
 -                             file->ucontext);
 +      uobj = uobj_get_destroy(UVERBS_OBJECT_CQ, cmd.cq_handle, file);
        if (IS_ERR(uobj))
                return PTR_ERR(uobj);
  
 -      /*
 -       * Make sure we don't free the memory in remove_commit as we still
 -       * needs the uobject memory to create the response.
 -       */
 -      uverbs_uobject_get(uobj);
 -      cq      = uobj->object;
 -      obj     = container_of(cq->uobject, struct ib_ucq_object, uobject);
 -
 +      obj = container_of(uobj, struct ib_ucq_object, uobject);
        memset(&resp, 0, sizeof(resp));
 -
 -      ret = uobj_remove_commit(uobj);
 -      if (ret) {
 -              uverbs_uobject_put(uobj);
 -              return ret;
 -      }
 -
        resp.comp_events_reported  = obj->comp_events_reported;
        resp.async_events_reported = obj->async_events_reported;
  
 -      uverbs_uobject_put(uobj);
 +      uobj_put_destroy(uobj);
 +
        if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp))
                return -EFAULT;
  
@@@ -1360,13 -1375,12 +1360,13 @@@ static int create_qp(struct ib_uverbs_f
        int                             ret;
        struct ib_rwq_ind_table *ind_tbl = NULL;
        bool has_sq = true;
 +      struct ib_device *ib_dev;
  
        if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
                return -EPERM;
  
 -      obj  = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP,
 -                                                file->ucontext);
 +      obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, file,
 +                                               &ib_dev);
        if (IS_ERR(obj))
                return PTR_ERR(obj);
        obj->uxrcd = NULL;
        if (cmd_sz >= offsetof(typeof(*cmd), rwq_ind_tbl_handle) +
                      sizeof(cmd->rwq_ind_tbl_handle) &&
                      (cmd->comp_mask & IB_UVERBS_CREATE_QP_MASK_IND_TABLE)) {
 -              ind_tbl = uobj_get_obj_read(rwq_ind_table, UVERBS_OBJECT_RWQ_IND_TBL,
 -                                          cmd->rwq_ind_tbl_handle,
 -                                          file->ucontext);
 +              ind_tbl = uobj_get_obj_read(rwq_ind_table,
 +                                          UVERBS_OBJECT_RWQ_IND_TBL,
 +                                          cmd->rwq_ind_tbl_handle, file);
                if (!ind_tbl) {
                        ret = -EINVAL;
                        goto err_put;
  
        if (cmd->qp_type == IB_QPT_XRC_TGT) {
                xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd->pd_handle,
 -                                        file->ucontext);
 +                                        file);
  
                if (IS_ERR(xrcd_uobj)) {
                        ret = -EINVAL;
                        cmd->max_recv_sge = 0;
                } else {
                        if (cmd->is_srq) {
 -                              srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd->srq_handle,
 -                                                      file->ucontext);
 +                              srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ,
 +                                                      cmd->srq_handle, file);
                                if (!srq || srq->srq_type == IB_SRQT_XRC) {
                                        ret = -EINVAL;
                                        goto err_put;
  
                        if (!ind_tbl) {
                                if (cmd->recv_cq_handle != cmd->send_cq_handle) {
 -                                      rcq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd->recv_cq_handle,
 -                                                              file->ucontext);
 +                                      rcq = uobj_get_obj_read(
 +                                              cq, UVERBS_OBJECT_CQ,
 +                                              cmd->recv_cq_handle, file);
                                        if (!rcq) {
                                                ret = -EINVAL;
                                                goto err_put;
                }
  
                if (has_sq)
 -                      scq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd->send_cq_handle,
 -                                              file->ucontext);
 +                      scq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ,
 +                                              cmd->send_cq_handle, file);
                if (!ind_tbl)
                        rcq = rcq ?: scq;
 -              pd  = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, file->ucontext);
 +              pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle,
 +                                     file);
                if (!pd || (!scq && has_sq)) {
                        ret = -EINVAL;
                        goto err_put;
        if (ind_tbl)
                uobj_put_obj_read(ind_tbl);
  
 -      uobj_alloc_commit(&obj->uevent.uobject);
 -
 -      return 0;
 +      return uobj_alloc_commit(&obj->uevent.uobject, 0);
  err_cb:
        ib_destroy_qp(qp);
  
@@@ -1623,6 -1637,7 +1623,6 @@@ static int ib_uverbs_create_qp_cb(struc
  }
  
  ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
 -                          struct ib_device *ib_dev,
                            const char __user *buf, int in_len,
                            int out_len)
  {
@@@ -1683,6 -1698,7 +1683,6 @@@ static int ib_uverbs_ex_create_qp_cb(st
  }
  
  int ib_uverbs_ex_create_qp(struct ib_uverbs_file *file,
 -                         struct ib_device *ib_dev,
                           struct ib_udata *ucore,
                           struct ib_udata *uhw)
  {
  }
  
  ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
 -                        struct ib_device *ib_dev,
                          const char __user *buf, int in_len, int out_len)
  {
        struct ib_uverbs_open_qp        cmd;
        struct ib_qp                   *qp;
        struct ib_qp_open_attr          attr;
        int ret;
 +      struct ib_device *ib_dev;
  
        if (out_len < sizeof resp)
                return -ENOSPC;
                   in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
                   out_len - sizeof(resp));
  
 -      obj  = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP,
 -                                                file->ucontext);
 +      obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, file,
 +                                               &ib_dev);
        if (IS_ERR(obj))
                return PTR_ERR(obj);
  
 -      xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd.pd_handle,
 -                                file->ucontext);
 +      xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd.pd_handle, file);
        if (IS_ERR(xrcd_uobj)) {
                ret = -EINVAL;
                goto err_put;
        qp->uobject = &obj->uevent.uobject;
        uobj_put_read(xrcd_uobj);
  
 -
 -      uobj_alloc_commit(&obj->uevent.uobject);
 -
 -      return in_len;
 +      return uobj_alloc_commit(&obj->uevent.uobject, in_len);
  
  err_destroy:
        ib_destroy_qp(qp);
@@@ -1826,6 -1846,7 +1826,6 @@@ static void copy_ah_attr_to_uverbs(stru
  }
  
  ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
 -                         struct ib_device *ib_dev,
                           const char __user *buf, int in_len,
                           int out_len)
  {
                goto out;
        }
  
 -      qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext);
 +      qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file);
        if (!qp) {
                ret = -EINVAL;
                goto out;
@@@ -1947,11 -1968,11 +1947,11 @@@ static int modify_qp(struct ib_uverbs_f
        struct ib_qp *qp;
        int ret;
  
 -      attr = kmalloc(sizeof *attr, GFP_KERNEL);
 +      attr = kzalloc(sizeof(*attr), GFP_KERNEL);
        if (!attr)
                return -ENOMEM;
  
 -      qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd->base.qp_handle, file->ucontext);
 +      qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd->base.qp_handle, file);
        if (!qp) {
                ret = -EINVAL;
                goto out;
                goto release_qp;
        }
  
-       if ((cmd->base.attr_mask & IB_QP_AV) &&
-           !rdma_is_port_valid(qp->device, cmd->base.dest.port_num)) {
-               ret = -EINVAL;
-               goto release_qp;
+       if ((cmd->base.attr_mask & IB_QP_AV)) {
+               if (!rdma_is_port_valid(qp->device, cmd->base.dest.port_num)) {
+                       ret = -EINVAL;
+                       goto release_qp;
+               }
+               if (cmd->base.attr_mask & IB_QP_STATE &&
+                   cmd->base.qp_state == IB_QPS_RTR) {
+               /* We are in INIT->RTR TRANSITION (if we are not,
+                * this transition will be rejected in subsequent checks).
+                * In the INIT->RTR transition, we cannot have IB_QP_PORT set,
+                * but the IB_QP_STATE flag is required.
+                *
+                * Since kernel 3.14 (commit dbf727de7440), the uverbs driver,
+                * when IB_QP_AV is set, has required inclusion of a valid
+                * port number in the primary AV. (AVs are created and handled
+                * differently for infiniband and ethernet (RoCE) ports).
+                *
+                * Check the port number included in the primary AV against
+                * the port number in the qp struct, which was set (and saved)
+                * in the RST->INIT transition.
+                */
+                       if (cmd->base.dest.port_num != qp->real_qp->port) {
+                               ret = -EINVAL;
+                               goto release_qp;
+                       }
+               } else {
+               /* We are in SQD->SQD. (If we are not, this transition will
+                * be rejected later in the verbs layer checks).
+                * Check for both IB_QP_PORT and IB_QP_AV, these can be set
+                * together in the SQD->SQD transition.
+                *
+                * If only IP_QP_AV was set, add in IB_QP_PORT as well (the
+                * verbs layer driver does not track primary port changes
+                * resulting from path migration. Thus, in SQD, if the primary
+                * AV is modified, the primary port should also be modified).
+                *
+                * Note that in this transition, the IB_QP_STATE flag
+                * is not allowed.
+                */
+                       if (((cmd->base.attr_mask & (IB_QP_AV | IB_QP_PORT))
+                            == (IB_QP_AV | IB_QP_PORT)) &&
+                           cmd->base.port_num != cmd->base.dest.port_num) {
+                               ret = -EINVAL;
+                               goto release_qp;
+                       }
+                       if ((cmd->base.attr_mask & (IB_QP_AV | IB_QP_PORT))
+                           == IB_QP_AV) {
+                               cmd->base.attr_mask |= IB_QP_PORT;
+                               cmd->base.port_num = cmd->base.dest.port_num;
+                       }
+               }
        }
  
        if ((cmd->base.attr_mask & IB_QP_ALT_PATH) &&
            (!rdma_is_port_valid(qp->device, cmd->base.alt_port_num) ||
-           !rdma_is_port_valid(qp->device, cmd->base.alt_dest.port_num))) {
+           !rdma_is_port_valid(qp->device, cmd->base.alt_dest.port_num) ||
+           cmd->base.alt_port_num != cmd->base.alt_dest.port_num)) {
                ret = -EINVAL;
                goto release_qp;
        }
@@@ -2028,6 -2098,7 +2077,6 @@@ out
  }
  
  ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
 -                          struct ib_device *ib_dev,
                            const char __user *buf, int in_len,
                            int out_len)
  {
  }
  
  int ib_uverbs_ex_modify_qp(struct ib_uverbs_file *file,
 -                         struct ib_device *ib_dev,
                           struct ib_udata *ucore,
                           struct ib_udata *uhw)
  {
  }
  
  ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
 -                           struct ib_device *ib_dev,
                             const char __user *buf, int in_len,
                             int out_len)
  {
        struct ib_uverbs_destroy_qp_resp resp;
        struct ib_uobject               *uobj;
        struct ib_uqp_object            *obj;
 -      int                              ret = -EINVAL;
  
        if (copy_from_user(&cmd, buf, sizeof cmd))
                return -EFAULT;
  
 -      memset(&resp, 0, sizeof resp);
 -
 -      uobj  = uobj_get_write(UVERBS_OBJECT_QP, cmd.qp_handle,
 -                             file->ucontext);
 +      uobj = uobj_get_destroy(UVERBS_OBJECT_QP, cmd.qp_handle, file);
        if (IS_ERR(uobj))
                return PTR_ERR(uobj);
  
        obj = container_of(uobj, struct ib_uqp_object, uevent.uobject);
 -      /*
 -       * Make sure we don't free the memory in remove_commit as we still
 -       * needs the uobject memory to create the response.
 -       */
 -      uverbs_uobject_get(uobj);
 -
 -      ret = uobj_remove_commit(uobj);
 -      if (ret) {
 -              uverbs_uobject_put(uobj);
 -              return ret;
 -      }
 -
 +      memset(&resp, 0, sizeof(resp));
        resp.events_reported = obj->uevent.events_reported;
 -      uverbs_uobject_put(uobj);
 +
 +      uobj_put_destroy(uobj);
  
        if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp))
                return -EFAULT;
@@@ -2127,14 -2214,14 +2176,14 @@@ static void *alloc_wr(size_t wr_size, _
  }
  
  ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
 -                          struct ib_device *ib_dev,
                            const char __user *buf, int in_len,
                            int out_len)
  {
        struct ib_uverbs_post_send      cmd;
        struct ib_uverbs_post_send_resp resp;
        struct ib_uverbs_send_wr       *user_wr;
 -      struct ib_send_wr              *wr = NULL, *last, *next, *bad_wr;
 +      struct ib_send_wr              *wr = NULL, *last, *next;
 +      const struct ib_send_wr        *bad_wr;
        struct ib_qp                   *qp;
        int                             i, sg_ind;
        int                             is_ud;
        if (!user_wr)
                return -ENOMEM;
  
 -      qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext);
 +      qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file);
        if (!qp)
                goto out;
  
                                goto out_put;
                        }
  
 -                      ud->ah = uobj_get_obj_read(ah, UVERBS_OBJECT_AH, user_wr->wr.ud.ah,
 -                                                 file->ucontext);
 +                      ud->ah = uobj_get_obj_read(ah, UVERBS_OBJECT_AH,
 +                                                 user_wr->wr.ud.ah, file);
                        if (!ud->ah) {
                                kfree(ud);
                                ret = -EINVAL;
  }
  
  ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
 -                          struct ib_device *ib_dev,
                            const char __user *buf, int in_len,
                            int out_len)
  {
        struct ib_uverbs_post_recv      cmd;
        struct ib_uverbs_post_recv_resp resp;
 -      struct ib_recv_wr              *wr, *next, *bad_wr;
 +      struct ib_recv_wr              *wr, *next;
 +      const struct ib_recv_wr        *bad_wr;
        struct ib_qp                   *qp;
        ssize_t                         ret = -EINVAL;
  
        if (IS_ERR(wr))
                return PTR_ERR(wr);
  
 -      qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext);
 +      qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file);
        if (!qp)
                goto out;
  
  }
  
  ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file,
 -                              struct ib_device *ib_dev,
                                const char __user *buf, int in_len,
                                int out_len)
  {
        struct ib_uverbs_post_srq_recv      cmd;
        struct ib_uverbs_post_srq_recv_resp resp;
 -      struct ib_recv_wr                  *wr, *next, *bad_wr;
 +      struct ib_recv_wr                  *wr, *next;
 +      const struct ib_recv_wr            *bad_wr;
        struct ib_srq                      *srq;
        ssize_t                             ret = -EINVAL;
  
        if (IS_ERR(wr))
                return PTR_ERR(wr);
  
 -      srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file->ucontext);
 +      srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file);
        if (!srq)
                goto out;
  
        resp.bad_wr = 0;
 -      ret = srq->device->post_srq_recv(srq, wr, &bad_wr);
 +      ret = srq->device->post_srq_recv ?
 +              srq->device->post_srq_recv(srq, wr, &bad_wr) : -EOPNOTSUPP;
  
        uobj_put_obj_read(srq);
  
@@@ -2506,6 -2592,7 +2555,6 @@@ out
  }
  
  ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
 -                          struct ib_device *ib_dev,
                            const char __user *buf, int in_len,
                            int out_len)
  {
        struct ib_uobject               *uobj;
        struct ib_pd                    *pd;
        struct ib_ah                    *ah;
 -      struct rdma_ah_attr             attr;
 +      struct rdma_ah_attr             attr = {};
        int ret;
        struct ib_udata                   udata;
 +      struct ib_device *ib_dev;
  
        if (out_len < sizeof resp)
                return -ENOSPC;
        if (copy_from_user(&cmd, buf, sizeof cmd))
                return -EFAULT;
  
 -      if (!rdma_is_port_valid(ib_dev, cmd.attr.port_num))
 -              return -EINVAL;
 -
        ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
                   u64_to_user_ptr(cmd.response) + sizeof(resp),
                   in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
                   out_len - sizeof(resp));
  
 -      uobj  = uobj_alloc(UVERBS_OBJECT_AH, file->ucontext);
 +      uobj = uobj_alloc(UVERBS_OBJECT_AH, file, &ib_dev);
        if (IS_ERR(uobj))
                return PTR_ERR(uobj);
  
 -      pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext);
 +      if (!rdma_is_port_valid(ib_dev, cmd.attr.port_num)) {
 +              ret = -EINVAL;
 +              goto err;
 +      }
 +
 +      pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file);
        if (!pd) {
                ret = -EINVAL;
                goto err;
        }
  
        uobj_put_obj_read(pd);
 -      uobj_alloc_commit(uobj);
 -
 -      return in_len;
 +      return uobj_alloc_commit(uobj, in_len);
  
  err_copy:
        rdma_destroy_ah(ah);
  }
  
  ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file,
 -                           struct ib_device *ib_dev,
                             const char __user *buf, int in_len, int out_len)
  {
        struct ib_uverbs_destroy_ah cmd;
 -      struct ib_uobject          *uobj;
 -      int                         ret;
  
        if (copy_from_user(&cmd, buf, sizeof cmd))
                return -EFAULT;
  
 -      uobj  = uobj_get_write(UVERBS_OBJECT_AH, cmd.ah_handle,
 -                             file->ucontext);
 -      if (IS_ERR(uobj))
 -              return PTR_ERR(uobj);
 -
 -      ret = uobj_remove_commit(uobj);
 -      return ret ?: in_len;
 +      return uobj_perform_destroy(UVERBS_OBJECT_AH, cmd.ah_handle, file,
 +                                  in_len);
  }
  
  ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
 -                             struct ib_device *ib_dev,
                               const char __user *buf, int in_len,
                               int out_len)
  {
        if (copy_from_user(&cmd, buf, sizeof cmd))
                return -EFAULT;
  
 -      qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext);
 +      qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file);
        if (!qp)
                return -EINVAL;
  
@@@ -2656,6 -2751,7 +2705,6 @@@ out_put
  }
  
  ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
 -                             struct ib_device *ib_dev,
                               const char __user *buf, int in_len,
                               int out_len)
  {
        if (copy_from_user(&cmd, buf, sizeof cmd))
                return -EFAULT;
  
 -      qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext);
 +      qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file);
        if (!qp)
                return -EINVAL;
  
@@@ -2714,27 -2810,29 +2763,27 @@@ static struct ib_uflow_resources *flow_
        resources = kzalloc(sizeof(*resources), GFP_KERNEL);
  
        if (!resources)
 -              goto err_res;
 +              return NULL;
 +
 +      if (!num_specs)
 +              goto out;
  
        resources->counters =
                kcalloc(num_specs, sizeof(*resources->counters), GFP_KERNEL);
 -
 -      if (!resources->counters)
 -              goto err_cnt;
 -
        resources->collection =
                kcalloc(num_specs, sizeof(*resources->collection), GFP_KERNEL);
  
 -      if (!resources->collection)
 -              goto err_collection;
 +      if (!resources->counters || !resources->collection)
 +              goto err;
  
 +out:
        resources->max = num_specs;
 -
        return resources;
  
 -err_collection:
 +err:
        kfree(resources->counters);
 -err_cnt:
        kfree(resources);
 -err_res:
 +
        return NULL;
  }
  
@@@ -2742,9 -2840,6 +2791,9 @@@ void ib_uverbs_flow_resources_free(stru
  {
        unsigned int i;
  
 +      if (!uflow_res)
 +              return;
 +
        for (i = 0; i < uflow_res->collection_num; i++)
                atomic_dec(&uflow_res->collection[i]->usecnt);
  
@@@ -2780,7 -2875,7 +2829,7 @@@ static void flow_resources_add(struct i
        uflow_res->num++;
  }
  
 -static int kern_spec_to_ib_spec_action(struct ib_ucontext *ucontext,
 +static int kern_spec_to_ib_spec_action(struct ib_uverbs_file *ufile,
                                       struct ib_uverbs_flow_spec *kern_spec,
                                       union ib_flow_spec *ib_spec,
                                       struct ib_uflow_resources *uflow_res)
                ib_spec->action.act = uobj_get_obj_read(flow_action,
                                                        UVERBS_OBJECT_FLOW_ACTION,
                                                        kern_spec->action.handle,
 -                                                      ucontext);
 +                                                      ufile);
                if (!ib_spec->action.act)
                        return -EINVAL;
                ib_spec->action.size =
                        uobj_get_obj_read(counters,
                                          UVERBS_OBJECT_COUNTERS,
                                          kern_spec->flow_count.handle,
 -                                        ucontext);
 +                                        ufile);
                if (!ib_spec->flow_count.counters)
                        return -EINVAL;
                ib_spec->flow_count.size =
@@@ -2996,6 -3091,9 +3045,6 @@@ static int kern_spec_to_ib_spec_filter(
        void *kern_spec_mask;
        void *kern_spec_val;
  
 -      if (kern_spec->reserved)
 -              return -EINVAL;
 -
        kern_filter_sz = kern_spec_filter_sz(&kern_spec->hdr);
  
        kern_spec_val = (void *)kern_spec +
                                                     kern_filter_sz, ib_spec);
  }
  
 -static int kern_spec_to_ib_spec(struct ib_ucontext *ucontext,
 +static int kern_spec_to_ib_spec(struct ib_uverbs_file *ufile,
                                struct ib_uverbs_flow_spec *kern_spec,
                                union ib_flow_spec *ib_spec,
                                struct ib_uflow_resources *uflow_res)
                return -EINVAL;
  
        if (kern_spec->type >= IB_FLOW_SPEC_ACTION_TAG)
 -              return kern_spec_to_ib_spec_action(ucontext, kern_spec, ib_spec,
 +              return kern_spec_to_ib_spec_action(ufile, kern_spec, ib_spec,
                                                   uflow_res);
        else
                return kern_spec_to_ib_spec_filter(kern_spec, ib_spec);
  }
  
  int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
 -                         struct ib_device *ib_dev,
                           struct ib_udata *ucore,
                           struct ib_udata *uhw)
  {
        struct ib_wq_init_attr wq_init_attr = {};
        size_t required_cmd_sz;
        size_t required_resp_len;
 +      struct ib_device *ib_dev;
  
        required_cmd_sz = offsetof(typeof(cmd), max_sge) + sizeof(cmd.max_sge);
        required_resp_len = offsetof(typeof(resp), wqn) + sizeof(resp.wqn);
        if (cmd.comp_mask)
                return -EOPNOTSUPP;
  
 -      obj  = (struct ib_uwq_object *)uobj_alloc(UVERBS_OBJECT_WQ,
 -                                                file->ucontext);
 +      obj = (struct ib_uwq_object *)uobj_alloc(UVERBS_OBJECT_WQ, file,
 +                                               &ib_dev);
        if (IS_ERR(obj))
                return PTR_ERR(obj);
  
 -      pd  = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext);
 +      pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file);
        if (!pd) {
                err = -EINVAL;
                goto err_uobj;
        }
  
 -      cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext);
 +      cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file);
        if (!cq) {
                err = -EINVAL;
                goto err_put_pd;
  
        uobj_put_obj_read(pd);
        uobj_put_obj_read(cq);
 -      uobj_alloc_commit(&obj->uevent.uobject);
 -      return 0;
 +      return uobj_alloc_commit(&obj->uevent.uobject, 0);
  
  err_copy:
        ib_destroy_wq(wq);
@@@ -3140,6 -3239,7 +3189,6 @@@ err_uobj
  }
  
  int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file,
 -                          struct ib_device *ib_dev,
                            struct ib_udata *ucore,
                            struct ib_udata *uhw)
  {
                return -EOPNOTSUPP;
  
        resp.response_length = required_resp_len;
 -      uobj  = uobj_get_write(UVERBS_OBJECT_WQ, cmd.wq_handle,
 -                             file->ucontext);
 +      uobj = uobj_get_destroy(UVERBS_OBJECT_WQ, cmd.wq_handle, file);
        if (IS_ERR(uobj))
                return PTR_ERR(uobj);
  
        obj = container_of(uobj, struct ib_uwq_object, uevent.uobject);
 -      /*
 -       * Make sure we don't free the memory in remove_commit as we still
 -       * needs the uobject memory to create the response.
 -       */
 -      uverbs_uobject_get(uobj);
 -
 -      ret = uobj_remove_commit(uobj);
        resp.events_reported = obj->uevent.events_reported;
 -      uverbs_uobject_put(uobj);
 -      if (ret)
 -              return ret;
 +
 +      uobj_put_destroy(uobj);
  
        return ib_copy_to_udata(ucore, &resp, resp.response_length);
  }
  
  int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
 -                         struct ib_device *ib_dev,
                           struct ib_udata *ucore,
                           struct ib_udata *uhw)
  {
        if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE | IB_WQ_FLAGS))
                return -EINVAL;
  
 -      wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, cmd.wq_handle, file->ucontext);
 +      wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, cmd.wq_handle, file);
        if (!wq)
                return -EINVAL;
  
@@@ -3235,6 -3345,7 +3284,6 @@@ out
  }
  
  int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
 -                                    struct ib_device *ib_dev,
                                      struct ib_udata *ucore,
                                      struct ib_udata *uhw)
  {
        u32 expected_in_size;
        size_t required_cmd_sz_header;
        size_t required_resp_len;
 +      struct ib_device *ib_dev;
  
        required_cmd_sz_header = offsetof(typeof(cmd), log_ind_tbl_size) + sizeof(cmd.log_ind_tbl_size);
        required_resp_len = offsetof(typeof(resp), ind_tbl_num) + sizeof(resp.ind_tbl_num);
  
        for (num_read_wqs = 0; num_read_wqs < num_wq_handles;
                        num_read_wqs++) {
 -              wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, wqs_handles[num_read_wqs],
 -                                     file->ucontext);
 +              wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ,
 +                                     wqs_handles[num_read_wqs], file);
                if (!wq) {
                        err = -EINVAL;
                        goto put_wqs;
                wqs[num_read_wqs] = wq;
        }
  
 -      uobj  = uobj_alloc(UVERBS_OBJECT_RWQ_IND_TBL, file->ucontext);
 +      uobj = uobj_alloc(UVERBS_OBJECT_RWQ_IND_TBL, file, &ib_dev);
        if (IS_ERR(uobj)) {
                err = PTR_ERR(uobj);
                goto put_wqs;
        for (j = 0; j < num_read_wqs; j++)
                uobj_put_obj_read(wqs[j]);
  
 -      uobj_alloc_commit(uobj);
 -      return 0;
 +      return uobj_alloc_commit(uobj, 0);
  
  err_copy:
        ib_destroy_rwq_ind_table(rwq_ind_tbl);
@@@ -3378,10 -3489,12 +3427,10 @@@ err_free
  }
  
  int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file,
 -                                     struct ib_device *ib_dev,
                                       struct ib_udata *ucore,
                                       struct ib_udata *uhw)
  {
        struct ib_uverbs_ex_destroy_rwq_ind_table       cmd = {};
 -      struct ib_uobject               *uobj;
        int                     ret;
        size_t required_cmd_sz;
  
        if (cmd.comp_mask)
                return -EOPNOTSUPP;
  
 -      uobj  = uobj_get_write(UVERBS_OBJECT_RWQ_IND_TBL, cmd.ind_tbl_handle,
 -                             file->ucontext);
 -      if (IS_ERR(uobj))
 -              return PTR_ERR(uobj);
 -
 -      return uobj_remove_commit(uobj);
 +      return uobj_perform_destroy(UVERBS_OBJECT_RWQ_IND_TBL,
 +                                  cmd.ind_tbl_handle, file, 0);
  }
  
  int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
 -                           struct ib_device *ib_dev,
                             struct ib_udata *ucore,
                             struct ib_udata *uhw)
  {
        struct ib_flow_attr               *flow_attr;
        struct ib_qp                      *qp;
        struct ib_uflow_resources         *uflow_res;
+       struct ib_uverbs_flow_spec_hdr    *kern_spec;
        int err = 0;
-       void *kern_spec;
        void *ib_spec;
        int i;
 +      struct ib_device *ib_dev;
  
        if (ucore->inlen < sizeof(cmd))
                return -EINVAL;
                if (!kern_flow_attr)
                        return -ENOMEM;
  
-               memcpy(kern_flow_attr, &cmd.flow_attr, sizeof(*kern_flow_attr));
-               err = ib_copy_from_udata(kern_flow_attr + 1, ucore,
+               *kern_flow_attr = cmd.flow_attr;
+               err = ib_copy_from_udata(&kern_flow_attr->flow_specs, ucore,
                                         cmd.flow_attr.size);
                if (err)
                        goto err_free_attr;
                kern_flow_attr = &cmd.flow_attr;
        }
  
 -      uobj  = uobj_alloc(UVERBS_OBJECT_FLOW, file->ucontext);
 +      uobj = uobj_alloc(UVERBS_OBJECT_FLOW, file, &ib_dev);
        if (IS_ERR(uobj)) {
                err = PTR_ERR(uobj);
                goto err_free_attr;
        }
  
 -      qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext);
 +      qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file);
        if (!qp) {
                err = -EINVAL;
                goto err_uobj;
        }
  
+       if (qp->qp_type != IB_QPT_UD && qp->qp_type != IB_QPT_RAW_PACKET) {
+               err = -EINVAL;
+               goto err_put;
+       }
 +      if (!qp->device->create_flow) {
 +              err = -EOPNOTSUPP;
 +              goto err_put;
 +      }
 +
        flow_attr = kzalloc(struct_size(flow_attr, flows,
                                cmd.flow_attr.num_of_specs), GFP_KERNEL);
        if (!flow_attr) {
        flow_attr->flags = kern_flow_attr->flags;
        flow_attr->size = sizeof(*flow_attr);
  
-       kern_spec = kern_flow_attr + 1;
+       kern_spec = kern_flow_attr->flow_specs;
        ib_spec = flow_attr + 1;
        for (i = 0; i < flow_attr->num_of_specs &&
-            cmd.flow_attr.size > offsetof(struct ib_uverbs_flow_spec, reserved) &&
-            cmd.flow_attr.size >=
-            ((struct ib_uverbs_flow_spec *)kern_spec)->size; i++) {
-               err = kern_spec_to_ib_spec(file, kern_spec, ib_spec, uflow_res);
+                       cmd.flow_attr.size >= sizeof(*kern_spec) &&
+                       cmd.flow_attr.size >= kern_spec->size;
+            i++) {
+               err = kern_spec_to_ib_spec(
 -                              file->ucontext, (struct ib_uverbs_flow_spec *)kern_spec,
++                              file, (struct ib_uverbs_flow_spec *)kern_spec,
+                               ib_spec, uflow_res);
                if (err)
                        goto err_free;
  
                flow_attr->size +=
                        ((union ib_flow_spec *) ib_spec)->size;
-               cmd.flow_attr.size -= ((struct ib_uverbs_flow_spec *)kern_spec)->size;
-               kern_spec += ((struct ib_uverbs_flow_spec *) kern_spec)->size;
+               cmd.flow_attr.size -= kern_spec->size;
+               kern_spec = ((void *)kern_spec) + kern_spec->size;
                ib_spec += ((union ib_flow_spec *) ib_spec)->size;
        }
        if (cmd.flow_attr.size || (i != flow_attr->num_of_specs)) {
        }
        atomic_inc(&qp->usecnt);
        flow_id->qp = qp;
 +      flow_id->device = qp->device;
        flow_id->uobject = uobj;
        uobj->object = flow_id;
        uflow = container_of(uobj, typeof(*uflow), uobject);
                goto err_copy;
  
        uobj_put_obj_read(qp);
 -      uobj_alloc_commit(uobj);
        kfree(flow_attr);
        if (cmd.flow_attr.num_of_specs)
                kfree(kern_flow_attr);
 -      return 0;
 +      return uobj_alloc_commit(uobj, 0);
  err_copy:
 -      ib_destroy_flow(flow_id);
 +      if (!qp->device->destroy_flow(flow_id))
 +              atomic_dec(&qp->usecnt);
  err_free:
        ib_uverbs_flow_resources_free(uflow_res);
  err_free_flow_attr:
@@@ -3584,10 -3702,12 +3640,10 @@@ err_free_attr
  }
  
  int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file,
 -                            struct ib_device *ib_dev,
                              struct ib_udata *ucore,
                              struct ib_udata *uhw)
  {
        struct ib_uverbs_destroy_flow   cmd;
 -      struct ib_uobject               *uobj;
        int                             ret;
  
        if (ucore->inlen < sizeof(cmd))
        if (cmd.comp_mask)
                return -EINVAL;
  
 -      uobj  = uobj_get_write(UVERBS_OBJECT_FLOW, cmd.flow_handle,
 -                             file->ucontext);
 -      if (IS_ERR(uobj))
 -              return PTR_ERR(uobj);
 -
 -      ret = uobj_remove_commit(uobj);
 -      return ret;
 +      return uobj_perform_destroy(UVERBS_OBJECT_FLOW, cmd.flow_handle, file,
 +                                  0);
  }
  
  static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
 -                              struct ib_device *ib_dev,
                                struct ib_uverbs_create_xsrq *cmd,
                                struct ib_udata *udata)
  {
        struct ib_uobject               *uninitialized_var(xrcd_uobj);
        struct ib_srq_init_attr          attr;
        int ret;
 +      struct ib_device *ib_dev;
  
 -      obj  = (struct ib_usrq_object *)uobj_alloc(UVERBS_OBJECT_SRQ,
 -                                                 file->ucontext);
 +      obj = (struct ib_usrq_object *)uobj_alloc(UVERBS_OBJECT_SRQ, file,
 +                                                &ib_dev);
        if (IS_ERR(obj))
                return PTR_ERR(obj);
  
  
        if (cmd->srq_type == IB_SRQT_XRC) {
                xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd->xrcd_handle,
 -                                        file->ucontext);
 +                                        file);
                if (IS_ERR(xrcd_uobj)) {
                        ret = -EINVAL;
                        goto err;
        }
  
        if (ib_srq_has_cq(cmd->srq_type)) {
 -              attr.ext.cq  = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd->cq_handle,
 -                                               file->ucontext);
 +              attr.ext.cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ,
 +                                              cmd->cq_handle, file);
                if (!attr.ext.cq) {
                        ret = -EINVAL;
                        goto err_put_xrcd;
                }
        }
  
 -      pd  = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, file->ucontext);
 +      pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, file);
        if (!pd) {
                ret = -EINVAL;
                goto err_put_cq;
                uobj_put_obj_read(attr.ext.cq);
  
        uobj_put_obj_read(pd);
 -      uobj_alloc_commit(&obj->uevent.uobject);
 -
 -      return 0;
 +      return uobj_alloc_commit(&obj->uevent.uobject, 0);
  
  err_copy:
        ib_destroy_srq(srq);
@@@ -3741,6 -3868,7 +3797,6 @@@ err
  }
  
  ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
 -                           struct ib_device *ib_dev,
                             const char __user *buf, int in_len,
                             int out_len)
  {
                   in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
                   out_len - sizeof(resp));
  
 -      ret = __uverbs_create_xsrq(file, ib_dev, &xcmd, &udata);
 +      ret = __uverbs_create_xsrq(file, &xcmd, &udata);
        if (ret)
                return ret;
  
  }
  
  ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file,
 -                            struct ib_device *ib_dev,
                              const char __user *buf, int in_len, int out_len)
  {
        struct ib_uverbs_create_xsrq     cmd;
                   in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
                   out_len - sizeof(resp));
  
 -      ret = __uverbs_create_xsrq(file, ib_dev, &cmd, &udata);
 +      ret = __uverbs_create_xsrq(file, &cmd, &udata);
        if (ret)
                return ret;
  
  }
  
  ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
 -                           struct ib_device *ib_dev,
                             const char __user *buf, int in_len,
                             int out_len)
  {
        ib_uverbs_init_udata(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd,
                   out_len);
  
 -      srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file->ucontext);
 +      srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file);
        if (!srq)
                return -EINVAL;
  
  }
  
  ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file,
 -                          struct ib_device *ib_dev,
                            const char __user *buf,
                            int in_len, int out_len)
  {
        if (copy_from_user(&cmd, buf, sizeof cmd))
                return -EFAULT;
  
 -      srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file->ucontext);
 +      srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file);
        if (!srq)
                return -EINVAL;
  
  }
  
  ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
 -                            struct ib_device *ib_dev,
                              const char __user *buf, int in_len,
                              int out_len)
  {
        struct ib_uverbs_destroy_srq_resp resp;
        struct ib_uobject                *uobj;
        struct ib_uevent_object          *obj;
 -      int                               ret = -EINVAL;
  
        if (copy_from_user(&cmd, buf, sizeof cmd))
                return -EFAULT;
  
 -      uobj  = uobj_get_write(UVERBS_OBJECT_SRQ, cmd.srq_handle,
 -                             file->ucontext);
 +      uobj = uobj_get_destroy(UVERBS_OBJECT_SRQ, cmd.srq_handle, file);
        if (IS_ERR(uobj))
                return PTR_ERR(uobj);
  
        obj = container_of(uobj, struct ib_uevent_object, uobject);
 -      /*
 -       * Make sure we don't free the memory in remove_commit as we still
 -       * needs the uobject memory to create the response.
 -       */
 -      uverbs_uobject_get(uobj);
 -
        memset(&resp, 0, sizeof(resp));
 -
 -      ret = uobj_remove_commit(uobj);
 -      if (ret) {
 -              uverbs_uobject_put(uobj);
 -              return ret;
 -      }
        resp.events_reported = obj->events_reported;
 -      uverbs_uobject_put(uobj);
 +
 +      uobj_put_destroy(uobj);
 +
        if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp)))
                return -EFAULT;
  
  }
  
  int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
 -                            struct ib_device *ib_dev,
                              struct ib_udata *ucore,
                              struct ib_udata *uhw)
  {
        struct ib_uverbs_ex_query_device_resp resp = { {0} };
        struct ib_uverbs_ex_query_device  cmd;
        struct ib_device_attr attr = {0};
 +      struct ib_ucontext *ucontext;
 +      struct ib_device *ib_dev;
        int err;
  
 +      ucontext = ib_uverbs_get_ucontext(file);
 +      if (IS_ERR(ucontext))
 +              return PTR_ERR(ucontext);
 +      ib_dev = ucontext->device;
 +
        if (!ib_dev->query_device)
                return -EOPNOTSUPP;
  
        if (err)
                return err;
  
 -      copy_query_dev_fields(file, ib_dev, &resp.base, &attr);
 +      copy_query_dev_fields(ucontext, &resp.base, &attr);
  
        if (ucore->outlen < resp.response_length + sizeof(resp.odp_caps))
                goto end;
@@@ -4028,6 -4166,7 +4084,6 @@@ end
  }
  
  int ib_uverbs_ex_modify_cq(struct ib_uverbs_file *file,
 -                         struct ib_device *ib_dev,
                           struct ib_udata *ucore,
                           struct ib_udata *uhw)
  {
        if (cmd.attr_mask > IB_CQ_MODERATE)
                return -EOPNOTSUPP;
  
 -      cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext);
 +      cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file);
        if (!cq)
                return -EINVAL;
  
@@@ -326,162 -326,12 +326,162 @@@ EXPORT_SYMBOL(ib_dealloc_pd)
  
  /* Address handles */
  
 +/**
 + * rdma_copy_ah_attr - Copy rdma ah attribute from source to destination.
 + * @dest:       Pointer to destination ah_attr. Contents of the destination
 + *              pointer is assumed to be invalid and attribute are overwritten.
 + * @src:        Pointer to source ah_attr.
 + */
 +void rdma_copy_ah_attr(struct rdma_ah_attr *dest,
 +                     const struct rdma_ah_attr *src)
 +{
 +      *dest = *src;
 +      if (dest->grh.sgid_attr)
 +              rdma_hold_gid_attr(dest->grh.sgid_attr);
 +}
 +EXPORT_SYMBOL(rdma_copy_ah_attr);
 +
 +/**
 + * rdma_replace_ah_attr - Replace valid ah_attr with new new one.
 + * @old:        Pointer to existing ah_attr which needs to be replaced.
 + *              old is assumed to be valid or zero'd
 + * @new:        Pointer to the new ah_attr.
 + *
 + * rdma_replace_ah_attr() first releases any reference in the old ah_attr if
 + * old the ah_attr is valid; after that it copies the new attribute and holds
 + * the reference to the replaced ah_attr.
 + */
 +void rdma_replace_ah_attr(struct rdma_ah_attr *old,
 +                        const struct rdma_ah_attr *new)
 +{
 +      rdma_destroy_ah_attr(old);
 +      *old = *new;
 +      if (old->grh.sgid_attr)
 +              rdma_hold_gid_attr(old->grh.sgid_attr);
 +}
 +EXPORT_SYMBOL(rdma_replace_ah_attr);
 +
 +/**
 + * rdma_move_ah_attr - Move ah_attr pointed by source to destination.
 + * @dest:       Pointer to destination ah_attr to copy to.
 + *              dest is assumed to be valid or zero'd
 + * @src:        Pointer to the new ah_attr.
 + *
 + * rdma_move_ah_attr() first releases any reference in the destination ah_attr
 + * if it is valid. This also transfers ownership of internal references from
 + * src to dest, making src invalid in the process. No new reference of the src
 + * ah_attr is taken.
 + */
 +void rdma_move_ah_attr(struct rdma_ah_attr *dest, struct rdma_ah_attr *src)
 +{
 +      rdma_destroy_ah_attr(dest);
 +      *dest = *src;
 +      src->grh.sgid_attr = NULL;
 +}
 +EXPORT_SYMBOL(rdma_move_ah_attr);
 +
 +/*
 + * Validate that the rdma_ah_attr is valid for the device before passing it
 + * off to the driver.
 + */
 +static int rdma_check_ah_attr(struct ib_device *device,
 +                            struct rdma_ah_attr *ah_attr)
 +{
 +      if (!rdma_is_port_valid(device, ah_attr->port_num))
 +              return -EINVAL;
 +
 +      if ((rdma_is_grh_required(device, ah_attr->port_num) ||
 +           ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) &&
 +          !(ah_attr->ah_flags & IB_AH_GRH))
 +              return -EINVAL;
 +
 +      if (ah_attr->grh.sgid_attr) {
 +              /*
 +               * Make sure the passed sgid_attr is consistent with the
 +               * parameters
 +               */
 +              if (ah_attr->grh.sgid_attr->index != ah_attr->grh.sgid_index ||
 +                  ah_attr->grh.sgid_attr->port_num != ah_attr->port_num)
 +                      return -EINVAL;
 +      }
 +      return 0;
 +}
 +
 +/*
 + * If the ah requires a GRH then ensure that sgid_attr pointer is filled in.
 + * On success the caller is responsible to call rdma_unfill_sgid_attr().
 + */
 +static int rdma_fill_sgid_attr(struct ib_device *device,
 +                             struct rdma_ah_attr *ah_attr,
 +                             const struct ib_gid_attr **old_sgid_attr)
 +{
 +      const struct ib_gid_attr *sgid_attr;
 +      struct ib_global_route *grh;
 +      int ret;
 +
 +      *old_sgid_attr = ah_attr->grh.sgid_attr;
 +
 +      ret = rdma_check_ah_attr(device, ah_attr);
 +      if (ret)
 +              return ret;
 +
 +      if (!(ah_attr->ah_flags & IB_AH_GRH))
 +              return 0;
 +
 +      grh = rdma_ah_retrieve_grh(ah_attr);
 +      if (grh->sgid_attr)
 +              return 0;
 +
 +      sgid_attr =
 +              rdma_get_gid_attr(device, ah_attr->port_num, grh->sgid_index);
 +      if (IS_ERR(sgid_attr))
 +              return PTR_ERR(sgid_attr);
 +
 +      /* Move ownerhip of the kref into the ah_attr */
 +      grh->sgid_attr = sgid_attr;
 +      return 0;
 +}
 +
 +static void rdma_unfill_sgid_attr(struct rdma_ah_attr *ah_attr,
 +                                const struct ib_gid_attr *old_sgid_attr)
 +{
 +      /*
 +       * Fill didn't change anything, the caller retains ownership of
 +       * whatever it passed
 +       */
 +      if (ah_attr->grh.sgid_attr == old_sgid_attr)
 +              return;
 +
 +      /*
 +       * Otherwise, we need to undo what rdma_fill_sgid_attr so the caller
 +       * doesn't see any change in the rdma_ah_attr. If we get here
 +       * old_sgid_attr is NULL.
 +       */
 +      rdma_destroy_ah_attr(ah_attr);
 +}
 +
 +static const struct ib_gid_attr *
 +rdma_update_sgid_attr(struct rdma_ah_attr *ah_attr,
 +                    const struct ib_gid_attr *old_attr)
 +{
 +      if (old_attr)
 +              rdma_put_gid_attr(old_attr);
 +      if (ah_attr->ah_flags & IB_AH_GRH) {
 +              rdma_hold_gid_attr(ah_attr->grh.sgid_attr);
 +              return ah_attr->grh.sgid_attr;
 +      }
 +      return NULL;
 +}
 +
  static struct ib_ah *_rdma_create_ah(struct ib_pd *pd,
                                     struct rdma_ah_attr *ah_attr,
                                     struct ib_udata *udata)
  {
        struct ib_ah *ah;
  
 +      if (!pd->device->create_ah)
 +              return ERR_PTR(-EOPNOTSUPP);
 +
        ah = pd->device->create_ah(pd, ah_attr, udata);
  
        if (!IS_ERR(ah)) {
                ah->pd      = pd;
                ah->uobject = NULL;
                ah->type    = ah_attr->type;
 +              ah->sgid_attr = rdma_update_sgid_attr(ah_attr, NULL);
 +
                atomic_inc(&pd->usecnt);
        }
  
        return ah;
  }
  
 +/**
 + * rdma_create_ah - Creates an address handle for the
 + * given address vector.
 + * @pd: The protection domain associated with the address handle.
 + * @ah_attr: The attributes of the address vector.
 + *
 + * It returns 0 on success and returns appropriate error code on error.
 + * The address handle is used to reference a local or global destination
 + * in all UD QP post sends.
 + */
  struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr)
  {
 -      return _rdma_create_ah(pd, ah_attr, NULL);
 +      const struct ib_gid_attr *old_sgid_attr;
 +      struct ib_ah *ah;
 +      int ret;
 +
 +      ret = rdma_fill_sgid_attr(pd->device, ah_attr, &old_sgid_attr);
 +      if (ret)
 +              return ERR_PTR(ret);
 +
 +      ah = _rdma_create_ah(pd, ah_attr, NULL);
 +
 +      rdma_unfill_sgid_attr(ah_attr, old_sgid_attr);
 +      return ah;
  }
  EXPORT_SYMBOL(rdma_create_ah);
  
@@@ -541,27 -368,15 +541,27 @@@ struct ib_ah *rdma_create_user_ah(struc
                                  struct rdma_ah_attr *ah_attr,
                                  struct ib_udata *udata)
  {
 +      const struct ib_gid_attr *old_sgid_attr;
 +      struct ib_ah *ah;
        int err;
  
 +      err = rdma_fill_sgid_attr(pd->device, ah_attr, &old_sgid_attr);
 +      if (err)
 +              return ERR_PTR(err);
 +
        if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) {
                err = ib_resolve_eth_dmac(pd->device, ah_attr);
 -              if (err)
 -                      return ERR_PTR(err);
 +              if (err) {
 +                      ah = ERR_PTR(err);
 +                      goto out;
 +              }
        }
  
 -      return _rdma_create_ah(pd, ah_attr, udata);
 +      ah = _rdma_create_ah(pd, ah_attr, udata);
 +
 +out:
 +      rdma_unfill_sgid_attr(ah_attr, old_sgid_attr);
 +      return ah;
  }
  EXPORT_SYMBOL(rdma_create_user_ah);
  
@@@ -640,16 -455,16 +640,16 @@@ static bool find_gid_index(const union 
        return true;
  }
  
 -static int get_sgid_index_from_eth(struct ib_device *device, u8 port_num,
 -                                 u16 vlan_id, const union ib_gid *sgid,
 -                                 enum ib_gid_type gid_type,
 -                                 u16 *gid_index)
 +static const struct ib_gid_attr *
 +get_sgid_attr_from_eth(struct ib_device *device, u8 port_num,
 +                     u16 vlan_id, const union ib_gid *sgid,
 +                     enum ib_gid_type gid_type)
  {
        struct find_gid_index_context context = {.vlan_id = vlan_id,
                                                 .gid_type = gid_type};
  
 -      return ib_find_gid_by_filter(device, sgid, port_num, find_gid_index,
 -                                   &context, gid_index);
 +      return rdma_find_gid_by_filter(device, sgid, port_num, find_gid_index,
 +                                     &context);
  }
  
  int ib_get_gids_from_rdma_hdr(const union rdma_network_hdr *hdr,
@@@ -693,24 -508,39 +693,24 @@@ EXPORT_SYMBOL(ib_get_gids_from_rdma_hdr
  static int ib_resolve_unicast_gid_dmac(struct ib_device *device,
                                       struct rdma_ah_attr *ah_attr)
  {
 -      struct ib_gid_attr sgid_attr;
 -      struct ib_global_route *grh;
 +      struct ib_global_route *grh = rdma_ah_retrieve_grh(ah_attr);
 +      const struct ib_gid_attr *sgid_attr = grh->sgid_attr;
        int hop_limit = 0xff;
 -      union ib_gid sgid;
 -      int ret;
 -
 -      grh = rdma_ah_retrieve_grh(ah_attr);
 -
 -      ret = ib_query_gid(device,
 -                         rdma_ah_get_port_num(ah_attr),
 -                         grh->sgid_index,
 -                         &sgid, &sgid_attr);
 -      if (ret || !sgid_attr.ndev) {
 -              if (!ret)
 -                      ret = -ENXIO;
 -              return ret;
 -      }
 +      int ret = 0;
  
        /* If destination is link local and source GID is RoCEv1,
         * IP stack is not used.
         */
        if (rdma_link_local_addr((struct in6_addr *)grh->dgid.raw) &&
 -          sgid_attr.gid_type == IB_GID_TYPE_ROCE) {
 +          sgid_attr->gid_type == IB_GID_TYPE_ROCE) {
                rdma_get_ll_mac((struct in6_addr *)grh->dgid.raw,
                                ah_attr->roce.dmac);
 -              goto done;
 +              return ret;
        }
  
 -      ret = rdma_addr_find_l2_eth_by_grh(&sgid, &grh->dgid,
 +      ret = rdma_addr_find_l2_eth_by_grh(&sgid_attr->gid, &grh->dgid,
                                           ah_attr->roce.dmac,
 -                                         sgid_attr.ndev, &hop_limit);
 -done:
 -      dev_put(sgid_attr.ndev);
 +                                         sgid_attr->ndev, &hop_limit);
  
        grh->hop_limit = hop_limit;
        return ret;
   * as sgid and, sgid is used as dgid because sgid contains destinations
   * GID whom to respond to.
   *
 + * On success the caller is responsible to call rdma_destroy_ah_attr on the
 + * attr.
   */
  int ib_init_ah_attr_from_wc(struct ib_device *device, u8 port_num,
                            const struct ib_wc *wc, const struct ib_grh *grh,
                            struct rdma_ah_attr *ah_attr)
  {
        u32 flow_class;
 -      u16 gid_index;
        int ret;
        enum rdma_network_type net_type = RDMA_NETWORK_IB;
        enum ib_gid_type gid_type = IB_GID_TYPE_IB;
 +      const struct ib_gid_attr *sgid_attr;
        int hoplimit = 0xff;
        union ib_gid dgid;
        union ib_gid sgid;
                if (!(wc->wc_flags & IB_WC_GRH))
                        return -EPROTOTYPE;
  
 -              ret = get_sgid_index_from_eth(device, port_num,
 -                                            vlan_id, &dgid,
 -                                            gid_type, &gid_index);
 -              if (ret)
 -                      return ret;
 +              sgid_attr = get_sgid_attr_from_eth(device, port_num,
 +                                                 vlan_id, &dgid,
 +                                                 gid_type);
 +              if (IS_ERR(sgid_attr))
 +                      return PTR_ERR(sgid_attr);
  
                flow_class = be32_to_cpu(grh->version_tclass_flow);
 -              rdma_ah_set_grh(ah_attr, &sgid,
 -                              flow_class & 0xFFFFF,
 -                              (u8)gid_index, hoplimit,
 -                              (flow_class >> 20) & 0xFF);
 -              return ib_resolve_unicast_gid_dmac(device, ah_attr);
 +              rdma_move_grh_sgid_attr(ah_attr,
 +                                      &sgid,
 +                                      flow_class & 0xFFFFF,
 +                                      hoplimit,
 +                                      (flow_class >> 20) & 0xFF,
 +                                      sgid_attr);
 +
 +              ret = ib_resolve_unicast_gid_dmac(device, ah_attr);
 +              if (ret)
 +                      rdma_destroy_ah_attr(ah_attr);
 +
 +              return ret;
        } else {
                rdma_ah_set_dlid(ah_attr, wc->slid);
                rdma_ah_set_path_bits(ah_attr, wc->dlid_path_bits);
  
 -              if (wc->wc_flags & IB_WC_GRH) {
 -                      if (dgid.global.interface_id != cpu_to_be64(IB_SA_WELL_KNOWN_GUID)) {
 -                              ret = ib_find_cached_gid_by_port(device, &dgid,
 -                                                               IB_GID_TYPE_IB,
 -                                                               port_num, NULL,
 -                                                               &gid_index);
 -                              if (ret)
 -                                      return ret;
 -                      } else {
 -                              gid_index = 0;
 -                      }
 +              if ((wc->wc_flags & IB_WC_GRH) == 0)
 +                      return 0;
 +
 +              if (dgid.global.interface_id !=
 +                                      cpu_to_be64(IB_SA_WELL_KNOWN_GUID)) {
 +                      sgid_attr = rdma_find_gid_by_port(
 +                              device, &dgid, IB_GID_TYPE_IB, port_num, NULL);
 +              } else
 +                      sgid_attr = rdma_get_gid_attr(device, port_num, 0);
  
 -                      flow_class = be32_to_cpu(grh->version_tclass_flow);
 -                      rdma_ah_set_grh(ah_attr, &sgid,
 +              if (IS_ERR(sgid_attr))
 +                      return PTR_ERR(sgid_attr);
 +              flow_class = be32_to_cpu(grh->version_tclass_flow);
 +              rdma_move_grh_sgid_attr(ah_attr,
 +                                      &sgid,
                                        flow_class & 0xFFFFF,
 -                                      (u8)gid_index, hoplimit,
 -                                      (flow_class >> 20) & 0xFF);
 -              }
 +                                      hoplimit,
 +                                      (flow_class >> 20) & 0xFF,
 +                                      sgid_attr);
 +
                return 0;
        }
  }
  EXPORT_SYMBOL(ib_init_ah_attr_from_wc);
  
 +/**
 + * rdma_move_grh_sgid_attr - Sets the sgid attribute of GRH, taking ownership
 + * of the reference
 + *
 + * @attr:     Pointer to AH attribute structure
 + * @dgid:     Destination GID
 + * @flow_label:       Flow label
 + * @hop_limit:        Hop limit
 + * @traffic_class: traffic class
 + * @sgid_attr:        Pointer to SGID attribute
 + *
 + * This takes ownership of the sgid_attr reference. The caller must ensure
 + * rdma_destroy_ah_attr() is called before destroying the rdma_ah_attr after
 + * calling this function.
 + */
 +void rdma_move_grh_sgid_attr(struct rdma_ah_attr *attr, union ib_gid *dgid,
 +                           u32 flow_label, u8 hop_limit, u8 traffic_class,
 +                           const struct ib_gid_attr *sgid_attr)
 +{
 +      rdma_ah_set_grh(attr, dgid, flow_label, sgid_attr->index, hop_limit,
 +                      traffic_class);
 +      attr->grh.sgid_attr = sgid_attr;
 +}
 +EXPORT_SYMBOL(rdma_move_grh_sgid_attr);
 +
 +/**
 + * rdma_destroy_ah_attr - Release reference to SGID attribute of
 + * ah attribute.
 + * @ah_attr: Pointer to ah attribute
 + *
 + * Release reference to the SGID attribute of the ah attribute if it is
 + * non NULL. It is safe to call this multiple times, and safe to call it on
 + * a zero initialized ah_attr.
 + */
 +void rdma_destroy_ah_attr(struct rdma_ah_attr *ah_attr)
 +{
 +      if (ah_attr->grh.sgid_attr) {
 +              rdma_put_gid_attr(ah_attr->grh.sgid_attr);
 +              ah_attr->grh.sgid_attr = NULL;
 +      }
 +}
 +EXPORT_SYMBOL(rdma_destroy_ah_attr);
 +
  struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc,
                                   const struct ib_grh *grh, u8 port_num)
  {
        struct rdma_ah_attr ah_attr;
 +      struct ib_ah *ah;
        int ret;
  
        ret = ib_init_ah_attr_from_wc(pd->device, port_num, wc, grh, &ah_attr);
        if (ret)
                return ERR_PTR(ret);
  
 -      return rdma_create_ah(pd, &ah_attr);
 +      ah = rdma_create_ah(pd, &ah_attr);
 +
 +      rdma_destroy_ah_attr(&ah_attr);
 +      return ah;
  }
  EXPORT_SYMBOL(ib_create_ah_from_wc);
  
  int rdma_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr)
  {
 +      const struct ib_gid_attr *old_sgid_attr;
 +      int ret;
 +
        if (ah->type != ah_attr->type)
                return -EINVAL;
  
 -      return ah->device->modify_ah ?
 +      ret = rdma_fill_sgid_attr(ah->device, ah_attr, &old_sgid_attr);
 +      if (ret)
 +              return ret;
 +
 +      ret = ah->device->modify_ah ?
                ah->device->modify_ah(ah, ah_attr) :
                -EOPNOTSUPP;
 +
 +      ah->sgid_attr = rdma_update_sgid_attr(ah_attr, ah->sgid_attr);
 +      rdma_unfill_sgid_attr(ah_attr, old_sgid_attr);
 +      return ret;
  }
  EXPORT_SYMBOL(rdma_modify_ah);
  
  int rdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr)
  {
 +      ah_attr->grh.sgid_attr = NULL;
 +
        return ah->device->query_ah ?
                ah->device->query_ah(ah, ah_attr) :
                -EOPNOTSUPP;
@@@ -910,17 -669,13 +910,17 @@@ EXPORT_SYMBOL(rdma_query_ah)
  
  int rdma_destroy_ah(struct ib_ah *ah)
  {
 +      const struct ib_gid_attr *sgid_attr = ah->sgid_attr;
        struct ib_pd *pd;
        int ret;
  
        pd = ah->pd;
        ret = ah->device->destroy_ah(ah);
 -      if (!ret)
 +      if (!ret) {
                atomic_dec(&pd->usecnt);
 +              if (sgid_attr)
 +                      rdma_put_gid_attr(sgid_attr);
 +      }
  
        return ret;
  }
@@@ -1535,19 -1290,16 +1535,19 @@@ bool ib_modify_qp_is_ok(enum ib_qp_stat
  }
  EXPORT_SYMBOL(ib_modify_qp_is_ok);
  
 +/**
 + * ib_resolve_eth_dmac - Resolve destination mac address
 + * @device:           Device to consider
 + * @ah_attr:          address handle attribute which describes the
 + *                    source and destination parameters
 + * ib_resolve_eth_dmac() resolves destination mac address and L3 hop limit It
 + * returns 0 on success or appropriate error code. It initializes the
 + * necessary ah_attr fields when call is successful.
 + */
  static int ib_resolve_eth_dmac(struct ib_device *device,
                               struct rdma_ah_attr *ah_attr)
  {
 -      int           ret = 0;
 -      struct ib_global_route *grh;
 -
 -      if (!rdma_is_port_valid(device, rdma_ah_get_port_num(ah_attr)))
 -              return -EINVAL;
 -
 -      grh = rdma_ah_retrieve_grh(ah_attr);
 +      int ret = 0;
  
        if (rdma_is_multicast_addr((struct in6_addr *)ah_attr->grh.dgid.raw)) {
                if (ipv6_addr_v4mapped((struct in6_addr *)ah_attr->grh.dgid.raw)) {
        return ret;
  }
  
 +static bool is_qp_type_connected(const struct ib_qp *qp)
 +{
 +      return (qp->qp_type == IB_QPT_UC ||
 +              qp->qp_type == IB_QPT_RC ||
 +              qp->qp_type == IB_QPT_XRC_INI ||
 +              qp->qp_type == IB_QPT_XRC_TGT);
 +}
 +
  /**
   * IB core internal function to perform QP attributes modification.
   */
@@@ -1580,53 -1324,8 +1580,53 @@@ static int _ib_modify_qp(struct ib_qp *
                         int attr_mask, struct ib_udata *udata)
  {
        u8 port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
 +      const struct ib_gid_attr *old_sgid_attr_av;
 +      const struct ib_gid_attr *old_sgid_attr_alt_av;
        int ret;
  
 +      if (attr_mask & IB_QP_AV) {
 +              ret = rdma_fill_sgid_attr(qp->device, &attr->ah_attr,
 +                                        &old_sgid_attr_av);
 +              if (ret)
 +                      return ret;
 +      }
 +      if (attr_mask & IB_QP_ALT_PATH) {
 +              /*
 +               * FIXME: This does not track the migration state, so if the
 +               * user loads a new alternate path after the HW has migrated
 +               * from primary->alternate we will keep the wrong
 +               * references. This is OK for IB because the reference
 +               * counting does not serve any functional purpose.
 +               */
 +              ret = rdma_fill_sgid_attr(qp->device, &attr->alt_ah_attr,
 +                                        &old_sgid_attr_alt_av);
 +              if (ret)
 +                      goto out_av;
 +
 +              /*
 +               * Today the core code can only handle alternate paths and APM
 +               * for IB. Ban them in roce mode.
 +               */
 +              if (!(rdma_protocol_ib(qp->device,
 +                                     attr->alt_ah_attr.port_num) &&
 +                    rdma_protocol_ib(qp->device, port))) {
 +                      ret = EINVAL;
 +                      goto out;
 +              }
 +      }
 +
 +      /*
 +       * If the user provided the qp_attr then we have to resolve it. Kernel
 +       * users have to provide already resolved rdma_ah_attr's
 +       */
 +      if (udata && (attr_mask & IB_QP_AV) &&
 +          attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE &&
 +          is_qp_type_connected(qp)) {
 +              ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr);
 +              if (ret)
 +                      goto out;
 +      }
 +
        if (rdma_ib_or_roce(qp->device, port)) {
                if (attr_mask & IB_QP_RQ_PSN && attr->rq_psn & ~0xffffff) {
                        pr_warn("%s: %s rq_psn overflow, masking to 24 bits\n",
        }
  
        ret = ib_security_modify_qp(qp, attr, attr_mask, udata);
 -      if (!ret && (attr_mask & IB_QP_PORT))
 -              qp->port = attr->port_num;
 +      if (ret)
 +              goto out;
  
 +      if (attr_mask & IB_QP_PORT)
 +              qp->port = attr->port_num;
 +      if (attr_mask & IB_QP_AV)
 +              qp->av_sgid_attr =
 +                      rdma_update_sgid_attr(&attr->ah_attr, qp->av_sgid_attr);
 +      if (attr_mask & IB_QP_ALT_PATH)
 +              qp->alt_path_sgid_attr = rdma_update_sgid_attr(
 +                      &attr->alt_ah_attr, qp->alt_path_sgid_attr);
 +
 +out:
 +      if (attr_mask & IB_QP_ALT_PATH)
 +              rdma_unfill_sgid_attr(&attr->alt_ah_attr, old_sgid_attr_alt_av);
 +out_av:
 +      if (attr_mask & IB_QP_AV)
 +              rdma_unfill_sgid_attr(&attr->ah_attr, old_sgid_attr_av);
        return ret;
  }
  
 -static bool is_qp_type_connected(const struct ib_qp *qp)
 -{
 -      return (qp->qp_type == IB_QPT_UC ||
 -              qp->qp_type == IB_QPT_RC ||
 -              qp->qp_type == IB_QPT_XRC_INI ||
 -              qp->qp_type == IB_QPT_XRC_TGT);
 -}
 -
  /**
   * ib_modify_qp_with_udata - Modifies the attributes for the specified QP.
   * @ib_qp: The QP to modify.
  int ib_modify_qp_with_udata(struct ib_qp *ib_qp, struct ib_qp_attr *attr,
                            int attr_mask, struct ib_udata *udata)
  {
 -      struct ib_qp *qp = ib_qp->real_qp;
 -      int ret;
 -
 -      if (attr_mask & IB_QP_AV &&
 -          attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE &&
 -          is_qp_type_connected(qp)) {
 -              ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr);
 -              if (ret)
 -                      return ret;
 -      }
 -      return _ib_modify_qp(qp, attr, attr_mask, udata);
 +      return _ib_modify_qp(ib_qp->real_qp, attr, attr_mask, udata);
  }
  EXPORT_SYMBOL(ib_modify_qp_with_udata);
  
@@@ -1749,9 -1451,6 +1749,9 @@@ int ib_query_qp(struct ib_qp *qp
                int qp_attr_mask,
                struct ib_qp_init_attr *qp_init_attr)
  {
 +      qp_attr->ah_attr.grh.sgid_attr = NULL;
 +      qp_attr->alt_ah_attr.grh.sgid_attr = NULL;
 +
        return qp->device->query_qp ?
                qp->device->query_qp(qp->real_qp, qp_attr, qp_attr_mask, qp_init_attr) :
                -EOPNOTSUPP;
@@@ -1810,8 -1509,6 +1810,8 @@@ static int __ib_destroy_shared_qp(struc
  
  int ib_destroy_qp(struct ib_qp *qp)
  {
 +      const struct ib_gid_attr *alt_path_sgid_attr = qp->alt_path_sgid_attr;
 +      const struct ib_gid_attr *av_sgid_attr = qp->av_sgid_attr;
        struct ib_pd *pd;
        struct ib_cq *scq, *rcq;
        struct ib_srq *srq;
        rdma_restrack_del(&qp->res);
        ret = qp->device->destroy_qp(qp);
        if (!ret) {
 +              if (alt_path_sgid_attr)
 +                      rdma_put_gid_attr(alt_path_sgid_attr);
 +              if (av_sgid_attr)
 +                      rdma_put_gid_attr(av_sgid_attr);
                if (pd)
                        atomic_dec(&pd->usecnt);
                if (scq)
@@@ -1869,11 -1562,12 +1869,12 @@@ EXPORT_SYMBOL(ib_destroy_qp)
  
  /* Completion queues */
  
- struct ib_cq *ib_create_cq(struct ib_device *device,
-                          ib_comp_handler comp_handler,
-                          void (*event_handler)(struct ib_event *, void *),
-                          void *cq_context,
-                          const struct ib_cq_init_attr *cq_attr)
+ struct ib_cq *__ib_create_cq(struct ib_device *device,
+                            ib_comp_handler comp_handler,
+                            void (*event_handler)(struct ib_event *, void *),
+                            void *cq_context,
+                            const struct ib_cq_init_attr *cq_attr,
+                            const char *caller)
  {
        struct ib_cq *cq;
  
                cq->cq_context    = cq_context;
                atomic_set(&cq->usecnt, 0);
                cq->res.type = RDMA_RESTRACK_CQ;
+               cq->res.kern_name = caller;
                rdma_restrack_add(&cq->res);
        }
  
        return cq;
  }
- EXPORT_SYMBOL(ib_create_cq);
+ EXPORT_SYMBOL(__ib_create_cq);
  
  int rdma_set_cq_moderation(struct ib_cq *cq, u16 cq_count, u16 cq_period)
  {
@@@ -2282,6 -1977,35 +2284,6 @@@ int ib_destroy_rwq_ind_table(struct ib_
  }
  EXPORT_SYMBOL(ib_destroy_rwq_ind_table);
  
 -struct ib_flow *ib_create_flow(struct ib_qp *qp,
 -                             struct ib_flow_attr *flow_attr,
 -                             int domain)
 -{
 -      struct ib_flow *flow_id;
 -      if (!qp->device->create_flow)
 -              return ERR_PTR(-EOPNOTSUPP);
 -
 -      flow_id = qp->device->create_flow(qp, flow_attr, domain, NULL);
 -      if (!IS_ERR(flow_id)) {
 -              atomic_inc(&qp->usecnt);
 -              flow_id->qp = qp;
 -      }
 -      return flow_id;
 -}
 -EXPORT_SYMBOL(ib_create_flow);
 -
 -int ib_destroy_flow(struct ib_flow *flow_id)
 -{
 -      int err;
 -      struct ib_qp *qp = flow_id->qp;
 -
 -      err = qp->device->destroy_flow(flow_id);
 -      if (!err)
 -              atomic_dec(&qp->usecnt);
 -      return err;
 -}
 -EXPORT_SYMBOL(ib_destroy_flow);
 -
  int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
                       struct ib_mr_status *mr_status)
  {
@@@ -2476,6 -2200,7 +2478,6 @@@ static void __ib_drain_sq(struct ib_qp 
        struct ib_cq *cq = qp->send_cq;
        struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
        struct ib_drain_cqe sdrain;
 -      struct ib_send_wr *bad_swr;
        struct ib_rdma_wr swr = {
                .wr = {
                        .next = NULL,
        sdrain.cqe.done = ib_drain_qp_done;
        init_completion(&sdrain.done);
  
 -      ret = ib_post_send(qp, &swr.wr, &bad_swr);
 +      ret = ib_post_send(qp, &swr.wr, NULL);
        if (ret) {
                WARN_ONCE(ret, "failed to drain send queue: %d\n", ret);
                return;
@@@ -2515,7 -2240,7 +2517,7 @@@ static void __ib_drain_rq(struct ib_qp 
        struct ib_cq *cq = qp->recv_cq;
        struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
        struct ib_drain_cqe rdrain;
 -      struct ib_recv_wr rwr = {}, *bad_rwr;
 +      struct ib_recv_wr rwr = {};
        int ret;
  
        ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
        rdrain.cqe.done = ib_drain_qp_done;
        init_completion(&rdrain.done);
  
 -      ret = ib_post_recv(qp, &rwr, &bad_rwr);
 +      ret = ib_post_recv(qp, &rwr, NULL);
        if (ret) {
                WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret);
                return;
@@@ -1,5 -1,5 +1,5 @@@
  /*
 - * Copyright(c) 2015, 2016 Intel Corporation.
 + * Copyright(c) 2015 - 2018 Intel Corporation.
   *
   * This file is provided under a dual BSD/GPLv2 license.  When using or
   * redistributing this file, you may do so under either license.
@@@ -241,7 -241,7 +241,7 @@@ bail
        smp_wmb();
        qp->s_flags &= ~(RVT_S_RESP_PENDING
                                | RVT_S_ACK_PENDING
 -                              | RVT_S_AHG_VALID);
 +                              | HFI1_S_AHG_VALID);
        return 0;
  }
  
@@@ -271,7 -271,7 +271,7 @@@ int hfi1_make_rc_req(struct rvt_qp *qp
  
        lockdep_assert_held(&qp->s_lock);
        ps->s_txreq = get_txreq(ps->dev, qp);
-       if (IS_ERR(ps->s_txreq))
+       if (!ps->s_txreq)
                goto bail_no_tx;
  
        if (priv->hdr_type == HFI1_PKT_TYPE_9B) {
@@@ -1024,7 -1024,7 +1024,7 @@@ done
        if ((cmp_psn(qp->s_psn, qp->s_sending_hpsn) <= 0) &&
            (cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0))
                qp->s_flags |= RVT_S_WAIT_PSN;
 -      qp->s_flags &= ~RVT_S_AHG_VALID;
 +      qp->s_flags &= ~HFI1_S_AHG_VALID;
  }
  
  /*
@@@ -419,8 -419,8 +419,8 @@@ static int mlx5_query_port_roce(struct 
        translate_eth_proto_oper(eth_prot_oper, &props->active_speed,
                                 &props->active_width);
  
 -      props->port_cap_flags  |= IB_PORT_CM_SUP;
 -      props->port_cap_flags  |= IB_PORT_IP_BASED_GIDS;
 +      props->port_cap_flags |= IB_PORT_CM_SUP;
 +      props->ip_gids = true;
  
        props->gid_tbl_len      = MLX5_CAP_ROCE(dev->mdev,
                                                roce_address_table_size);
@@@ -510,11 -510,12 +510,11 @@@ static int set_roce_addr(struct mlx5_ib
                                      vlan_id, port_num);
  }
  
 -static int mlx5_ib_add_gid(const union ib_gid *gid,
 -                         const struct ib_gid_attr *attr,
 +static int mlx5_ib_add_gid(const struct ib_gid_attr *attr,
                           __always_unused void **context)
  {
        return set_roce_addr(to_mdev(attr->device), attr->port_num,
 -                           attr->index, gid, attr);
 +                           attr->index, &attr->gid, attr);
  }
  
  static int mlx5_ib_del_gid(const struct ib_gid_attr *attr,
                             attr->index, NULL, NULL);
  }
  
 -__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
 -                             int index)
 +__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev,
 +                             const struct ib_gid_attr *attr)
  {
 -      struct ib_gid_attr attr;
 -      union ib_gid gid;
 -
 -      if (ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr))
 -              return 0;
 -
 -      dev_put(attr.ndev);
 -
 -      if (attr.gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP)
 +      if (attr->gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP)
                return 0;
  
        return cpu_to_be16(MLX5_CAP_ROCE(dev->mdev, r_roce_min_src_udp_port));
  }
  
 -int mlx5_get_roce_gid_type(struct mlx5_ib_dev *dev, u8 port_num,
 -                         int index, enum ib_gid_type *gid_type)
 -{
 -      struct ib_gid_attr attr;
 -      union ib_gid gid;
 -      int ret;
 -
 -      ret = ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr);
 -      if (ret)
 -              return ret;
 -
 -      dev_put(attr.ndev);
 -
 -      *gid_type = attr.gid_type;
 -
 -      return 0;
 -}
 -
  static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev)
  {
        if (MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_IB)
@@@ -888,8 -915,7 +888,8 @@@ static int mlx5_ib_query_device(struct 
        max_sq_sg = (max_sq_desc - sizeof(struct mlx5_wqe_ctrl_seg) -
                     sizeof(struct mlx5_wqe_raddr_seg)) /
                sizeof(struct mlx5_wqe_data_seg);
 -      props->max_sge = min(max_rq_sg, max_sq_sg);
 +      props->max_send_sge = max_sq_sg;
 +      props->max_recv_sge = max_rq_sg;
        props->max_sge_rd          = MLX5_MAX_SGE_RD;
        props->max_cq              = 1 << MLX5_CAP_GEN(mdev, log_max_cq);
        props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_cq_sz)) - 1;
@@@ -1220,6 -1246,7 +1220,6 @@@ static int mlx5_query_hca_port(struct i
        props->qkey_viol_cntr   = rep->qkey_violation_counter;
        props->subnet_timeout   = rep->subnet_timeout;
        props->init_type_reply  = rep->init_type_reply;
 -      props->grh_required     = rep->grh_required;
  
        err = mlx5_query_port_link_width_oper(mdev, &ib_link_width_oper, port);
        if (err)
@@@ -1558,26 -1585,31 +1558,26 @@@ error
        return err;
  }
  
 -static int deallocate_uars(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *context)
 +static void deallocate_uars(struct mlx5_ib_dev *dev,
 +                          struct mlx5_ib_ucontext *context)
  {
        struct mlx5_bfreg_info *bfregi;
 -      int err;
        int i;
  
        bfregi = &context->bfregi;
 -      for (i = 0; i < bfregi->num_sys_pages; i++) {
 +      for (i = 0; i < bfregi->num_sys_pages; i++)
                if (i < bfregi->num_static_sys_pages ||
 -                  bfregi->sys_pages[i] != MLX5_IB_INVALID_UAR_INDEX) {
 -                      err = mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i]);
 -                      if (err) {
 -                              mlx5_ib_warn(dev, "failed to free uar %d, err=%d\n", i, err);
 -                              return err;
 -                      }
 -              }
 -      }
 -
 -      return 0;
 +                  bfregi->sys_pages[i] != MLX5_IB_INVALID_UAR_INDEX)
 +                      mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i]);
  }
  
  static int mlx5_ib_alloc_transport_domain(struct mlx5_ib_dev *dev, u32 *tdn)
  {
        int err;
  
 +      if (!MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
 +              return 0;
 +
        err = mlx5_core_alloc_transport_domain(dev->mdev, tdn);
        if (err)
                return err;
  
  static void mlx5_ib_dealloc_transport_domain(struct mlx5_ib_dev *dev, u32 tdn)
  {
 +      if (!MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
 +              return;
 +
        mlx5_core_dealloc_transport_domain(dev->mdev, tdn);
  
        if ((MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) ||
@@@ -1631,7 -1660,6 +1631,7 @@@ static struct ib_ucontext *mlx5_ib_allo
        int err;
        size_t min_req_v2 = offsetof(struct mlx5_ib_alloc_ucontext_req_v2,
                                     max_cqe_version);
 +      u32 dump_fill_mkey;
        bool lib_uar_4k;
  
        if (!dev->ib_active)
        if (err)
                return ERR_PTR(err);
  
 -      if (req.flags)
 -              return ERR_PTR(-EINVAL);
 +      if (req.flags & ~MLX5_IB_ALLOC_UCTX_DEVX)
 +              return ERR_PTR(-EOPNOTSUPP);
  
        if (req.comp_mask || req.reserved0 || req.reserved1 || req.reserved2)
                return ERR_PTR(-EOPNOTSUPP);
        context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range;
  #endif
  
 -      if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) {
 -              err = mlx5_ib_alloc_transport_domain(dev, &context->tdn);
 +      err = mlx5_ib_alloc_transport_domain(dev, &context->tdn);
 +      if (err)
 +              goto out_uars;
 +
 +      if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) {
 +              /* Block DEVX on Infiniband as of SELinux */
 +              if (mlx5_ib_port_link_layer(ibdev, 1) != IB_LINK_LAYER_ETHERNET) {
 +                      err = -EPERM;
 +                      goto out_td;
 +              }
 +
 +              err = mlx5_ib_devx_create(dev, context);
                if (err)
 -                      goto out_uars;
 +                      goto out_td;
 +      }
 +
 +      if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) {
 +              err = mlx5_cmd_dump_fill_mkey(dev->mdev, &dump_fill_mkey);
 +              if (err)
 +                      goto out_mdev;
        }
  
        INIT_LIST_HEAD(&context->vma_private_list);
                resp.response_length += sizeof(resp.num_dyn_bfregs);
        }
  
 +      if (field_avail(typeof(resp), dump_fill_mkey, udata->outlen)) {
 +              if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) {
 +                      resp.dump_fill_mkey = dump_fill_mkey;
 +                      resp.comp_mask |=
 +                              MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_DUMP_FILL_MKEY;
 +              }
 +              resp.response_length += sizeof(resp.dump_fill_mkey);
 +      }
 +
        err = ib_copy_to_udata(udata, &resp, resp.response_length);
        if (err)
 -              goto out_td;
 +              goto out_mdev;
  
        bfregi->ver = ver;
        bfregi->num_low_latency_bfregs = req.num_low_latency_bfregs;
  
        return &context->ibucontext;
  
 +out_mdev:
 +      if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX)
 +              mlx5_ib_devx_destroy(dev, context);
  out_td:
 -      if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
 -              mlx5_ib_dealloc_transport_domain(dev, context->tdn);
 +      mlx5_ib_dealloc_transport_domain(dev, context->tdn);
  
  out_uars:
        deallocate_uars(dev, context);
@@@ -1855,11 -1856,9 +1855,11 @@@ static int mlx5_ib_dealloc_ucontext(str
        struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
        struct mlx5_bfreg_info *bfregi;
  
 +      if (context->devx_uid)
 +              mlx5_ib_devx_destroy(dev, context);
 +
        bfregi = &context->bfregi;
 -      if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
 -              mlx5_ib_dealloc_transport_domain(dev, context->tdn);
 +      mlx5_ib_dealloc_transport_domain(dev, context->tdn);
  
        deallocate_uars(dev, context);
        kfree(bfregi->sys_pages);
@@@ -2041,7 -2040,7 +2041,7 @@@ static int uar_mmap(struct mlx5_ib_dev 
        struct mlx5_bfreg_info *bfregi = &context->bfregi;
        int err;
        unsigned long idx;
 -      phys_addr_t pfn, pa;
 +      phys_addr_t pfn;
        pgprot_t prot;
        u32 bfreg_dyn_idx = 0;
        u32 uar_index;
                goto err;
        }
  
 -      pa = pfn << PAGE_SHIFT;
 -
        err = mlx5_ib_set_vma_data(vma, context);
        if (err)
                goto err;
@@@ -2698,7 -2699,7 +2698,7 @@@ static int parse_flow_attr(struct mlx5_
                         IPPROTO_GRE);
  
                MLX5_SET(fte_match_set_misc, misc_params_c, gre_protocol,
 -                       0xffff);
 +                       ntohs(ib_spec->gre.mask.protocol));
                MLX5_SET(fte_match_set_misc, misc_params_v, gre_protocol,
                         ntohs(ib_spec->gre.val.protocol));
  
@@@ -2978,11 -2979,11 +2978,11 @@@ static void counters_clear_description(
  
  static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
  {
 -      struct mlx5_ib_dev *dev = to_mdev(flow_id->qp->device);
        struct mlx5_ib_flow_handler *handler = container_of(flow_id,
                                                          struct mlx5_ib_flow_handler,
                                                          ibflow);
        struct mlx5_ib_flow_handler *iter, *tmp;
 +      struct mlx5_ib_dev *dev = handler->dev;
  
        mutex_lock(&dev->flow_db->lock);
  
                counters_clear_description(handler->ibcounters);
  
        mutex_unlock(&dev->flow_db->lock);
 +      if (handler->flow_matcher)
 +              atomic_dec(&handler->flow_matcher->usecnt);
        kfree(handler);
  
        return 0;
@@@ -3022,26 -3021,6 +3022,26 @@@ enum flow_table_type 
  
  #define MLX5_FS_MAX_TYPES      6
  #define MLX5_FS_MAX_ENTRIES    BIT(16)
 +
 +static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_flow_namespace *ns,
 +                                         struct mlx5_ib_flow_prio *prio,
 +                                         int priority,
 +                                         int num_entries, int num_groups)
 +{
 +      struct mlx5_flow_table *ft;
 +
 +      ft = mlx5_create_auto_grouped_flow_table(ns, priority,
 +                                               num_entries,
 +                                               num_groups,
 +                                               0, 0);
 +      if (IS_ERR(ft))
 +              return ERR_CAST(ft);
 +
 +      prio->flow_table = ft;
 +      prio->refcount = 0;
 +      return prio;
 +}
 +
  static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
                                                struct ib_flow_attr *flow_attr,
                                                enum flow_table_type ft_type)
        int num_entries;
        int num_groups;
        int priority;
 -      int err = 0;
  
        max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
                                                       log_max_ft_size));
                return ERR_PTR(-ENOMEM);
  
        ft = prio->flow_table;
 -      if (!ft) {
 -              ft = mlx5_create_auto_grouped_flow_table(ns, priority,
 -                                                       num_entries,
 -                                                       num_groups,
 -                                                       0, 0);
 -
 -              if (!IS_ERR(ft)) {
 -                      prio->refcount = 0;
 -                      prio->flow_table = ft;
 -              } else {
 -                      err = PTR_ERR(ft);
 -              }
 -      }
 +      if (!ft)
 +              return _get_prio(ns, prio, priority, num_entries, num_groups);
  
 -      return err ? ERR_PTR(err) : prio;
 +      return prio;
  }
  
  static void set_underlay_qp(struct mlx5_ib_dev *dev,
@@@ -3208,8 -3199,8 +3208,8 @@@ static int flow_counters_set_data(struc
        if (!mcounters->hw_cntrs_hndl) {
                mcounters->hw_cntrs_hndl = mlx5_fc_create(
                        to_mdev(ibcounters->device)->mdev, false);
-               if (!mcounters->hw_cntrs_hndl) {
-                       ret = -ENOMEM;
+               if (IS_ERR(mcounters->hw_cntrs_hndl)) {
+                       ret = PTR_ERR(mcounters->hw_cntrs_hndl);
                        goto free;
                }
                hw_hndl = true;
@@@ -3365,7 -3356,6 +3365,7 @@@ static struct mlx5_ib_flow_handler *_cr
  
        ft_prio->refcount++;
        handler->prio = ft_prio;
 +      handler->dev = dev;
  
        ft_prio->flow_table = ft;
  free:
@@@ -3556,29 -3546,35 +3556,35 @@@ static struct ib_flow *mlx5_ib_create_f
                        return ERR_PTR(-ENOMEM);
  
                err = ib_copy_from_udata(ucmd, udata, required_ucmd_sz);
-               if (err) {
-                       kfree(ucmd);
-                       return ERR_PTR(err);
-               }
+               if (err)
+                       goto free_ucmd;
        }
  
-       if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO)
-               return ERR_PTR(-ENOMEM);
+       if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO) {
+               err = -ENOMEM;
+               goto free_ucmd;
+       }
  
        if (domain != IB_FLOW_DOMAIN_USER ||
            flow_attr->port > dev->num_ports ||
            (flow_attr->flags & ~(IB_FLOW_ATTR_FLAGS_DONT_TRAP |
-                                 IB_FLOW_ATTR_FLAGS_EGRESS)))
-               return ERR_PTR(-EINVAL);
+                                 IB_FLOW_ATTR_FLAGS_EGRESS))) {
+               err = -EINVAL;
+               goto free_ucmd;
+       }
  
        if (is_egress &&
            (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
-            flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT))
-               return ERR_PTR(-EINVAL);
+            flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
+               err = -EINVAL;
+               goto free_ucmd;
+       }
  
        dst = kzalloc(sizeof(*dst), GFP_KERNEL);
-       if (!dst)
-               return ERR_PTR(-ENOMEM);
+       if (!dst) {
+               err = -ENOMEM;
+               goto free_ucmd;
+       }
  
        mutex_lock(&dev->flow_db->lock);
  
@@@ -3647,194 -3643,11 +3653,194 @@@ destroy_ft
  unlock:
        mutex_unlock(&dev->flow_db->lock);
        kfree(dst);
+ free_ucmd:
        kfree(ucmd);
-       kfree(handler);
        return ERR_PTR(err);
  }
  
 +static struct mlx5_ib_flow_prio *_get_flow_table(struct mlx5_ib_dev *dev,
 +                                               int priority, bool mcast)
 +{
 +      int max_table_size;
 +      struct mlx5_flow_namespace *ns = NULL;
 +      struct mlx5_ib_flow_prio *prio;
 +
 +      max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
 +                           log_max_ft_size));
 +      if (max_table_size < MLX5_FS_MAX_ENTRIES)
 +              return ERR_PTR(-ENOMEM);
 +
 +      if (mcast)
 +              priority = MLX5_IB_FLOW_MCAST_PRIO;
 +      else
 +              priority = ib_prio_to_core_prio(priority, false);
 +
 +      ns = mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS);
 +      if (!ns)
 +              return ERR_PTR(-ENOTSUPP);
 +
 +      prio = &dev->flow_db->prios[priority];
 +
 +      if (prio->flow_table)
 +              return prio;
 +
 +      return _get_prio(ns, prio, priority, MLX5_FS_MAX_ENTRIES,
 +                       MLX5_FS_MAX_TYPES);
 +}
 +
 +static struct mlx5_ib_flow_handler *
 +_create_raw_flow_rule(struct mlx5_ib_dev *dev,
 +                    struct mlx5_ib_flow_prio *ft_prio,
 +                    struct mlx5_flow_destination *dst,
 +                    struct mlx5_ib_flow_matcher  *fs_matcher,
 +                    void *cmd_in, int inlen)
 +{
 +      struct mlx5_ib_flow_handler *handler;
 +      struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG};
 +      struct mlx5_flow_spec *spec;
 +      struct mlx5_flow_table *ft = ft_prio->flow_table;
 +      int err = 0;
 +
 +      spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
 +      handler = kzalloc(sizeof(*handler), GFP_KERNEL);
 +      if (!handler || !spec) {
 +              err = -ENOMEM;
 +              goto free;
 +      }
 +
 +      INIT_LIST_HEAD(&handler->list);
 +
 +      memcpy(spec->match_value, cmd_in, inlen);
 +      memcpy(spec->match_criteria, fs_matcher->matcher_mask.match_params,
 +             fs_matcher->mask_len);
 +      spec->match_criteria_enable = fs_matcher->match_criteria_enable;
 +
 +      flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 +      handler->rule = mlx5_add_flow_rules(ft, spec,
 +                                          &flow_act, dst, 1);
 +
 +      if (IS_ERR(handler->rule)) {
 +              err = PTR_ERR(handler->rule);
 +              goto free;
 +      }
 +
 +      ft_prio->refcount++;
 +      handler->prio = ft_prio;
 +      handler->dev = dev;
 +      ft_prio->flow_table = ft;
 +
 +free:
 +      if (err)
 +              kfree(handler);
 +      kvfree(spec);
 +      return err ? ERR_PTR(err) : handler;
 +}
 +
 +static bool raw_fs_is_multicast(struct mlx5_ib_flow_matcher *fs_matcher,
 +                              void *match_v)
 +{
 +      void *match_c;
 +      void *match_v_set_lyr_2_4, *match_c_set_lyr_2_4;
 +      void *dmac, *dmac_mask;
 +      void *ipv4, *ipv4_mask;
 +
 +      if (!(fs_matcher->match_criteria_enable &
 +            (1 << MATCH_CRITERIA_ENABLE_OUTER_BIT)))
 +              return false;
 +
 +      match_c = fs_matcher->matcher_mask.match_params;
 +      match_v_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_v,
 +                                         outer_headers);
 +      match_c_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_c,
 +                                         outer_headers);
 +
 +      dmac = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4,
 +                          dmac_47_16);
 +      dmac_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4,
 +                               dmac_47_16);
 +
 +      if (is_multicast_ether_addr(dmac) &&
 +          is_multicast_ether_addr(dmac_mask))
 +              return true;
 +
 +      ipv4 = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4,
 +                          dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
 +
 +      ipv4_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4,
 +                               dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
 +
 +      if (ipv4_is_multicast(*(__be32 *)(ipv4)) &&
 +          ipv4_is_multicast(*(__be32 *)(ipv4_mask)))
 +              return true;
 +
 +      return false;
 +}
 +
 +struct mlx5_ib_flow_handler *
 +mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev,
 +                      struct mlx5_ib_flow_matcher *fs_matcher,
 +                      void *cmd_in, int inlen, int dest_id,
 +                      int dest_type)
 +{
 +      struct mlx5_flow_destination *dst;
 +      struct mlx5_ib_flow_prio *ft_prio;
 +      int priority = fs_matcher->priority;
 +      struct mlx5_ib_flow_handler *handler;
 +      bool mcast;
 +      int err;
 +
 +      if (fs_matcher->flow_type != MLX5_IB_FLOW_TYPE_NORMAL)
 +              return ERR_PTR(-EOPNOTSUPP);
 +
 +      if (fs_matcher->priority > MLX5_IB_FLOW_LAST_PRIO)
 +              return ERR_PTR(-ENOMEM);
 +
 +      dst = kzalloc(sizeof(*dst), GFP_KERNEL);
 +      if (!dst)
 +              return ERR_PTR(-ENOMEM);
 +
 +      mcast = raw_fs_is_multicast(fs_matcher, cmd_in);
 +      mutex_lock(&dev->flow_db->lock);
 +
 +      ft_prio = _get_flow_table(dev, priority, mcast);
 +      if (IS_ERR(ft_prio)) {
 +              err = PTR_ERR(ft_prio);
 +              goto unlock;
 +      }
 +
 +      if (dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR) {
 +              dst->type = dest_type;
 +              dst->tir_num = dest_id;
 +      } else {
 +              dst->type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM;
 +              dst->ft_num = dest_id;
 +      }
 +
 +      handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher, cmd_in,
 +                                      inlen);
 +
 +      if (IS_ERR(handler)) {
 +              err = PTR_ERR(handler);
 +              goto destroy_ft;
 +      }
 +
 +      mutex_unlock(&dev->flow_db->lock);
 +      atomic_inc(&fs_matcher->usecnt);
 +      handler->flow_matcher = fs_matcher;
 +
 +      kfree(dst);
 +
 +      return handler;
 +
 +destroy_ft:
 +      put_flow_table(dev, ft_prio, false);
 +unlock:
 +      mutex_unlock(&dev->flow_db->lock);
 +      kfree(dst);
 +
 +      return ERR_PTR(err);
 +}
 +
  static u32 mlx5_ib_flow_action_flags_to_accel_xfrm_flags(u32 mlx5_flags)
  {
        u32 flags = 0;
@@@ -3859,11 -3672,12 +3865,11 @@@ mlx5_ib_create_flow_action_esp(struct i
        u64 flags;
        int err = 0;
  
 -      if (IS_UVERBS_COPY_ERR(uverbs_copy_from(&action_flags, attrs,
 -                                              MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS)))
 -              return ERR_PTR(-EFAULT);
 -
 -      if (action_flags >= (MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED << 1))
 -              return ERR_PTR(-EOPNOTSUPP);
 +      err = uverbs_get_flags64(
 +              &action_flags, attrs, MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS,
 +              ((MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED << 1) - 1));
 +      if (err)
 +              return ERR_PTR(err);
  
        flags = mlx5_ib_flow_action_flags_to_accel_xfrm_flags(action_flags);
  
@@@ -4652,8 -4466,7 +4658,8 @@@ static void destroy_dev_resources(struc
                cancel_work_sync(&devr->ports[port].pkey_change_work);
  }
  
 -static u32 get_core_cap_flags(struct ib_device *ibdev)
 +static u32 get_core_cap_flags(struct ib_device *ibdev,
 +                            struct mlx5_hca_vport_context *rep)
  {
        struct mlx5_ib_dev *dev = to_mdev(ibdev);
        enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, 1);
        bool raw_support = !mlx5_core_mp_enabled(dev->mdev);
        u32 ret = 0;
  
 +      if (rep->grh_required)
 +              ret |= RDMA_CORE_CAP_IB_GRH_REQUIRED;
 +
        if (ll == IB_LINK_LAYER_INFINIBAND)
 -              return RDMA_CORE_PORT_IBA_IB;
 +              return ret | RDMA_CORE_PORT_IBA_IB;
  
        if (raw_support)
 -              ret = RDMA_CORE_PORT_RAW_PACKET;
 +              ret |= RDMA_CORE_PORT_RAW_PACKET;
  
        if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV4_CAP))
                return ret;
@@@ -4692,23 -4502,17 +4698,23 @@@ static int mlx5_port_immutable(struct i
        struct ib_port_attr attr;
        struct mlx5_ib_dev *dev = to_mdev(ibdev);
        enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, port_num);
 +      struct mlx5_hca_vport_context rep = {0};
        int err;
  
 -      immutable->core_cap_flags = get_core_cap_flags(ibdev);
 -
        err = ib_query_port(ibdev, port_num, &attr);
        if (err)
                return err;
  
 +      if (ll == IB_LINK_LAYER_INFINIBAND) {
 +              err = mlx5_query_hca_vport_context(dev->mdev, 0, port_num, 0,
 +                                                 &rep);
 +              if (err)
 +                      return err;
 +      }
 +
        immutable->pkey_tbl_len = attr.pkey_tbl_len;
        immutable->gid_tbl_len = attr.gid_tbl_len;
 -      immutable->core_cap_flags = get_core_cap_flags(ibdev);
 +      immutable->core_cap_flags = get_core_cap_flags(ibdev, &rep);
        if ((ll == IB_LINK_LAYER_INFINIBAND) || MLX5_CAP_GEN(dev->mdev, roce))
                immutable->max_mad_size = IB_MGMT_MAD_SIZE;
  
@@@ -4806,7 -4610,7 +4812,7 @@@ static void mlx5_remove_netdev_notifier
        }
  }
  
 -static int mlx5_enable_eth(struct mlx5_ib_dev *dev, u8 port_num)
 +static int mlx5_enable_eth(struct mlx5_ib_dev *dev)
  {
        int err;
  
@@@ -4885,21 -4689,12 +4891,21 @@@ static const struct mlx5_ib_counter ext
        INIT_Q_COUNTER(req_cqe_flush_error),
  };
  
 +#define INIT_EXT_PPCNT_COUNTER(_name)         \
 +      { .name = #_name, .offset =     \
 +      MLX5_BYTE_OFF(ppcnt_reg, \
 +                    counter_set.eth_extended_cntrs_grp_data_layout._name##_high)}
 +
 +static const struct mlx5_ib_counter ext_ppcnt_cnts[] = {
 +      INIT_EXT_PPCNT_COUNTER(rx_icrc_encapsulated),
 +};
 +
  static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
  {
        int i;
  
        for (i = 0; i < dev->num_ports; i++) {
 -              if (dev->port[i].cnts.set_id)
 +              if (dev->port[i].cnts.set_id_valid)
                        mlx5_core_dealloc_q_counter(dev->mdev,
                                                    dev->port[i].cnts.set_id);
                kfree(dev->port[i].cnts.names);
@@@ -4929,10 -4724,7 +4935,10 @@@ static int __mlx5_ib_alloc_counters(str
                cnts->num_cong_counters = ARRAY_SIZE(cong_cnts);
                num_counters += ARRAY_SIZE(cong_cnts);
        }
 -
 +      if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
 +              cnts->num_ext_ppcnt_counters = ARRAY_SIZE(ext_ppcnt_cnts);
 +              num_counters += ARRAY_SIZE(ext_ppcnt_cnts);
 +      }
        cnts->names = kcalloc(num_counters, sizeof(cnts->names), GFP_KERNEL);
        if (!cnts->names)
                return -ENOMEM;
@@@ -4989,13 -4781,6 +4995,13 @@@ static void mlx5_ib_fill_counters(struc
                        offsets[j] = cong_cnts[i].offset;
                }
        }
 +
 +      if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
 +              for (i = 0; i < ARRAY_SIZE(ext_ppcnt_cnts); i++, j++) {
 +                      names[j] = ext_ppcnt_cnts[i].name;
 +                      offsets[j] = ext_ppcnt_cnts[i].offset;
 +              }
 +      }
  }
  
  static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev)
@@@ -5041,8 -4826,7 +5047,8 @@@ static struct rdma_hw_stats *mlx5_ib_al
  
        return rdma_alloc_hw_stats_struct(port->cnts.names,
                                          port->cnts.num_q_counters +
 -                                        port->cnts.num_cong_counters,
 +                                        port->cnts.num_cong_counters +
 +                                        port->cnts.num_ext_ppcnt_counters,
                                          RDMA_HW_STATS_DEFAULT_LIFESPAN);
  }
  
@@@ -5075,34 -4859,6 +5081,34 @@@ free
        return ret;
  }
  
 +static int mlx5_ib_query_ext_ppcnt_counters(struct mlx5_ib_dev *dev,
 +                                        struct mlx5_ib_port *port,
 +                                        struct rdma_hw_stats *stats)
 +{
 +      int offset = port->cnts.num_q_counters + port->cnts.num_cong_counters;
 +      int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
 +      int ret, i;
 +      void *out;
 +
 +      out = kvzalloc(sz, GFP_KERNEL);
 +      if (!out)
 +              return -ENOMEM;
 +
 +      ret = mlx5_cmd_query_ext_ppcnt_counters(dev->mdev, out);
 +      if (ret)
 +              goto free;
 +
 +      for (i = 0; i < port->cnts.num_ext_ppcnt_counters; i++) {
 +              stats->value[i + offset] =
 +                      be64_to_cpup((__be64 *)(out +
 +                                  port->cnts.offsets[i + offset]));
 +      }
 +
 +free:
 +      kvfree(out);
 +      return ret;
 +}
 +
  static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
                                struct rdma_hw_stats *stats,
                                u8 port_num, int index)
        if (!stats)
                return -EINVAL;
  
 -      num_counters = port->cnts.num_q_counters + port->cnts.num_cong_counters;
 +      num_counters = port->cnts.num_q_counters +
 +                     port->cnts.num_cong_counters +
 +                     port->cnts.num_ext_ppcnt_counters;
  
        /* q_counters are per IB device, query the master mdev */
        ret = mlx5_ib_query_q_counters(dev->mdev, port, stats);
        if (ret)
                return ret;
  
 +      if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
 +              ret =  mlx5_ib_query_ext_ppcnt_counters(dev, port, stats);
 +              if (ret)
 +                      return ret;
 +      }
 +
        if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
                mdev = mlx5_ib_get_native_port_mdev(dev, port_num,
                                                    &mdev_port_num);
@@@ -5157,6 -4905,11 +5163,6 @@@ done
        return num_counters;
  }
  
 -static void mlx5_ib_free_rdma_netdev(struct net_device *netdev)
 -{
 -      return mlx5_rdma_netdev_free(netdev);
 -}
 -
  static struct net_device*
  mlx5_ib_alloc_rdma_netdev(struct ib_device *hca,
                          u8 port_num,
                          void (*setup)(struct net_device *))
  {
        struct net_device *netdev;
 -      struct rdma_netdev *rn;
  
        if (type != RDMA_NETDEV_IPOIB)
                return ERR_PTR(-EOPNOTSUPP);
  
        netdev = mlx5_rdma_netdev_alloc(to_mdev(hca)->mdev, hca,
                                        name, setup);
 -      if (likely(!IS_ERR_OR_NULL(netdev))) {
 -              rn = netdev_priv(netdev);
 -              rn->free_rdma_netdev = mlx5_ib_free_rdma_netdev;
 -      }
        return netdev;
  }
  
@@@ -5369,8 -5127,8 +5375,8 @@@ static bool mlx5_ib_bind_slave_port(str
  
        spin_lock(&ibdev->port[port_num].mp.mpi_lock);
        if (ibdev->port[port_num].mp.mpi) {
 -              mlx5_ib_warn(ibdev, "port %d already affiliated.\n",
 -                           port_num + 1);
 +              mlx5_ib_dbg(ibdev, "port %d already affiliated.\n",
 +                          port_num + 1);
                spin_unlock(&ibdev->port[port_num].mp.mpi_lock);
                return false;
        }
@@@ -5505,47 -5263,45 +5511,47 @@@ static void mlx5_ib_cleanup_multiport_m
        mlx5_nic_vport_disable_roce(dev->mdev);
  }
  
 -ADD_UVERBS_ATTRIBUTES_SIMPLE(mlx5_ib_dm, UVERBS_OBJECT_DM,
 -                           UVERBS_METHOD_DM_ALLOC,
 -                           &UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET,
 -                                                UVERBS_ATTR_TYPE(u64),
 -                                                UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
 -                           &UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX,
 -                                                UVERBS_ATTR_TYPE(u16),
 -                                                UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
 -
 -ADD_UVERBS_ATTRIBUTES_SIMPLE(mlx5_ib_flow_action, UVERBS_OBJECT_FLOW_ACTION,
 -                           UVERBS_METHOD_FLOW_ACTION_ESP_CREATE,
 -                           &UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS,
 -                                               UVERBS_ATTR_TYPE(u64),
 -                                               UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
 +ADD_UVERBS_ATTRIBUTES_SIMPLE(
 +      mlx5_ib_dm,
 +      UVERBS_OBJECT_DM,
 +      UVERBS_METHOD_DM_ALLOC,
 +      UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET,
 +                          UVERBS_ATTR_TYPE(u64),
 +                          UA_MANDATORY),
 +      UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX,
 +                          UVERBS_ATTR_TYPE(u16),
 +                          UA_MANDATORY));
 +
 +ADD_UVERBS_ATTRIBUTES_SIMPLE(
 +      mlx5_ib_flow_action,
 +      UVERBS_OBJECT_FLOW_ACTION,
 +      UVERBS_METHOD_FLOW_ACTION_ESP_CREATE,
 +      UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS,
 +                           enum mlx5_ib_uapi_flow_action_flags));
  
 -#define NUM_TREES     2
  static int populate_specs_root(struct mlx5_ib_dev *dev)
  {
 -      const struct uverbs_object_tree_def *default_root[NUM_TREES + 1] = {
 -              uverbs_default_get_objects()};
 -      size_t num_trees = 1;
 +      const struct uverbs_object_tree_def **trees = dev->driver_trees;
 +      size_t num_trees = 0;
  
 -      if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_DEVICE &&
 -          !WARN_ON(num_trees >= ARRAY_SIZE(default_root)))
 -              default_root[num_trees++] = &mlx5_ib_flow_action;
 +      if (mlx5_accel_ipsec_device_caps(dev->mdev) &
 +          MLX5_ACCEL_IPSEC_CAP_DEVICE)
 +              trees[num_trees++] = &mlx5_ib_flow_action;
  
 -      if (MLX5_CAP_DEV_MEM(dev->mdev, memic) &&
 -          !WARN_ON(num_trees >= ARRAY_SIZE(default_root)))
 -              default_root[num_trees++] = &mlx5_ib_dm;
 +      if (MLX5_CAP_DEV_MEM(dev->mdev, memic))
 +              trees[num_trees++] = &mlx5_ib_dm;
  
 -      dev->ib_dev.specs_root =
 -              uverbs_alloc_spec_tree(num_trees, default_root);
 +      if (MLX5_CAP_GEN_64(dev->mdev, general_obj_types) &
 +          MLX5_GENERAL_OBJ_TYPES_CAP_UCTX)
 +              trees[num_trees++] = mlx5_ib_get_devx_tree();
  
 -      return PTR_ERR_OR_ZERO(dev->ib_dev.specs_root);
 -}
 +      num_trees += mlx5_ib_get_flow_trees(trees + num_trees);
  
 -static void depopulate_specs_root(struct mlx5_ib_dev *dev)
 -{
 -      uverbs_free_spec_tree(dev->ib_dev.specs_root);
 +      WARN_ON(num_trees >= ARRAY_SIZE(dev->driver_trees));
 +      trees[num_trees] = NULL;
 +      dev->ib_dev.driver_specs = trees;
 +
 +      return 0;
  }
  
  static int mlx5_ib_read_counters(struct ib_counters *counters,
@@@ -5796,8 -5552,6 +5802,8 @@@ int mlx5_ib_stage_caps_init(struct mlx5
        dev->ib_dev.modify_qp           = mlx5_ib_modify_qp;
        dev->ib_dev.query_qp            = mlx5_ib_query_qp;
        dev->ib_dev.destroy_qp          = mlx5_ib_destroy_qp;
 +      dev->ib_dev.drain_sq            = mlx5_ib_drain_sq;
 +      dev->ib_dev.drain_rq            = mlx5_ib_drain_rq;
        dev->ib_dev.post_send           = mlx5_ib_post_send;
        dev->ib_dev.post_recv           = mlx5_ib_post_recv;
        dev->ib_dev.create_cq           = mlx5_ib_create_cq;
@@@ -5895,9 -5649,9 +5901,9 @@@ int mlx5_ib_stage_rep_non_default_cb(st
        return 0;
  }
  
 -static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev,
 -                                        u8 port_num)
 +static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev)
  {
 +      u8 port_num;
        int i;
  
        for (i = 0; i < dev->num_ports; i++) {
                        (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
                        (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
  
 +      port_num = mlx5_core_native_port_num(dev->mdev) - 1;
 +
        return mlx5_add_netdev_notifier(dev, port_num);
  }
  
@@@ -5938,12 -5690,14 +5944,12 @@@ int mlx5_ib_stage_rep_roce_init(struct 
        enum rdma_link_layer ll;
        int port_type_cap;
        int err = 0;
 -      u8 port_num;
  
 -      port_num = mlx5_core_native_port_num(dev->mdev) - 1;
        port_type_cap = MLX5_CAP_GEN(mdev, port_type);
        ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
  
        if (ll == IB_LINK_LAYER_ETHERNET)
 -              err = mlx5_ib_stage_common_roce_init(dev, port_num);
 +              err = mlx5_ib_stage_common_roce_init(dev);
  
        return err;
  }
@@@ -5958,17 -5712,19 +5964,17 @@@ static int mlx5_ib_stage_roce_init(stru
        struct mlx5_core_dev *mdev = dev->mdev;
        enum rdma_link_layer ll;
        int port_type_cap;
 -      u8 port_num;
        int err;
  
 -      port_num = mlx5_core_native_port_num(dev->mdev) - 1;
        port_type_cap = MLX5_CAP_GEN(mdev, port_type);
        ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
  
        if (ll == IB_LINK_LAYER_ETHERNET) {
 -              err = mlx5_ib_stage_common_roce_init(dev, port_num);
 +              err = mlx5_ib_stage_common_roce_init(dev);
                if (err)
                        return err;
  
 -              err = mlx5_enable_eth(dev, port_num);
 +              err = mlx5_enable_eth(dev);
                if (err)
                        goto cleanup;
        }
@@@ -5985,7 -5741,9 +5991,7 @@@ static void mlx5_ib_stage_roce_cleanup(
        struct mlx5_core_dev *mdev = dev->mdev;
        enum rdma_link_layer ll;
        int port_type_cap;
 -      u8 port_num;
  
 -      port_num = mlx5_core_native_port_num(dev->mdev) - 1;
        port_type_cap = MLX5_CAP_GEN(mdev, port_type);
        ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
  
@@@ -6084,6 -5842,11 +6090,6 @@@ int mlx5_ib_stage_ib_reg_init(struct ml
        return ib_register_device(&dev->ib_dev, NULL);
  }
  
 -static void mlx5_ib_stage_depopulate_specs(struct mlx5_ib_dev *dev)
 -{
 -      depopulate_specs_root(dev);
 -}
 -
  void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev)
  {
        destroy_umrc_res(dev);
@@@ -6152,6 -5915,8 +6158,6 @@@ void __mlx5_ib_remove(struct mlx5_ib_de
        ib_dealloc_device((struct ib_device *)dev);
  }
  
 -static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num);
 -
  void *__mlx5_ib_add(struct mlx5_ib_dev *dev,
                    const struct mlx5_ib_profile *profile)
  {
@@@ -6218,7 -5983,7 +6224,7 @@@ static const struct mlx5_ib_profile pf_
                     mlx5_ib_stage_pre_ib_reg_umr_cleanup),
        STAGE_CREATE(MLX5_IB_STAGE_SPECS,
                     mlx5_ib_stage_populate_specs,
 -                   mlx5_ib_stage_depopulate_specs),
 +                   NULL),
        STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
                     mlx5_ib_stage_ib_reg_init,
                     mlx5_ib_stage_ib_reg_cleanup),
@@@ -6266,7 -6031,7 +6272,7 @@@ static const struct mlx5_ib_profile nic
                     mlx5_ib_stage_pre_ib_reg_umr_cleanup),
        STAGE_CREATE(MLX5_IB_STAGE_SPECS,
                     mlx5_ib_stage_populate_specs,
 -                   mlx5_ib_stage_depopulate_specs),
 +                   NULL),
        STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
                     mlx5_ib_stage_ib_reg_init,
                     mlx5_ib_stage_ib_reg_cleanup),
                     mlx5_ib_stage_rep_reg_cleanup),
  };
  
 -static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num)
 +static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev)
  {
        struct mlx5_ib_multiport_info *mpi;
        struct mlx5_ib_dev *dev;
        if (!bound) {
                list_add_tail(&mpi->list, &mlx5_ib_unaffiliated_port_list);
                dev_dbg(&mdev->pdev->dev, "no suitable IB device found to bind to, added to unaffiliated list.\n");
 -      } else {
 -              mlx5_ib_dbg(dev, "bound port %u\n", port_num + 1);
        }
        mutex_unlock(&mlx5_ib_multiport_mutex);
  
@@@ -6332,8 -6099,11 +6338,8 @@@ static void *mlx5_ib_add(struct mlx5_co
        port_type_cap = MLX5_CAP_GEN(mdev, port_type);
        ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
  
 -      if (mlx5_core_is_mp_slave(mdev) && ll == IB_LINK_LAYER_ETHERNET) {
 -              u8 port_num = mlx5_core_native_port_num(mdev) - 1;
 -
 -              return mlx5_ib_add_slave_port(mdev, port_num);
 -      }
 +      if (mlx5_core_is_mp_slave(mdev) && ll == IB_LINK_LAYER_ETHERNET)
 +              return mlx5_ib_add_slave_port(mdev);
  
        dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
        if (!dev)
        dev->num_ports = max(MLX5_CAP_GEN(mdev, num_ports),
                             MLX5_CAP_GEN(mdev, num_vhca_ports));
  
-       if (MLX5_VPORT_MANAGER(mdev) &&
+       if (MLX5_ESWITCH_MANAGER(mdev) &&
            mlx5_ib_eswitch_mode(mdev->priv.eswitch) == SRIOV_OFFLOADS) {
                dev->rep = mlx5_ib_vport_rep(mdev->priv.eswitch, 0);
  
@@@ -266,18 -266,24 +266,24 @@@ struct ib_srq *mlx5_ib_create_srq(struc
  
        desc_size = sizeof(struct mlx5_wqe_srq_next_seg) +
                    srq->msrq.max_gs * sizeof(struct mlx5_wqe_data_seg);
-       if (desc_size == 0 || srq->msrq.max_gs > desc_size)
-               return ERR_PTR(-EINVAL);
+       if (desc_size == 0 || srq->msrq.max_gs > desc_size) {
+               err = -EINVAL;
+               goto err_srq;
+       }
        desc_size = roundup_pow_of_two(desc_size);
        desc_size = max_t(size_t, 32, desc_size);
-       if (desc_size < sizeof(struct mlx5_wqe_srq_next_seg))
-               return ERR_PTR(-EINVAL);
+       if (desc_size < sizeof(struct mlx5_wqe_srq_next_seg)) {
+               err = -EINVAL;
+               goto err_srq;
+       }
        srq->msrq.max_avail_gather = (desc_size - sizeof(struct mlx5_wqe_srq_next_seg)) /
                sizeof(struct mlx5_wqe_data_seg);
        srq->msrq.wqe_shift = ilog2(desc_size);
        buf_size = srq->msrq.max * desc_size;
-       if (buf_size < desc_size)
-               return ERR_PTR(-EINVAL);
+       if (buf_size < desc_size) {
+               err = -EINVAL;
+               goto err_srq;
+       }
        in.type = init_attr->srq_type;
  
        if (pd->uobject)
@@@ -440,8 -446,8 +446,8 @@@ void mlx5_ib_free_srq_wqe(struct mlx5_i
        spin_unlock(&srq->lock);
  }
  
 -int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
 -                        struct ib_recv_wr **bad_wr)
 +int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
 +                        const struct ib_recv_wr **bad_wr)
  {
        struct mlx5_ib_srq *srq = to_msrq(ibsrq);
        struct mlx5_wqe_srq_next_seg *next;
  #include <rdma/qedr-abi.h>
  #include "qedr_roce_cm.h"
  
 +#define QEDR_SRQ_WQE_ELEM_SIZE        sizeof(union rdma_srq_elm)
 +#define       RDMA_MAX_SGE_PER_SRQ    (4)
 +#define RDMA_MAX_SRQ_WQE_SIZE (RDMA_MAX_SGE_PER_SRQ + 1)
 +
  #define DB_ADDR_SHIFT(addr)           ((addr) << DB_PWM_ADDR_OFFSET_SHIFT)
  
  static inline int qedr_ib_copy_to_udata(struct ib_udata *udata, void *src,
@@@ -88,19 -84,6 +88,19 @@@ int qedr_iw_query_gid(struct ib_device 
        return 0;
  }
  
 +int qedr_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
 +{
 +      struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
 +      struct qedr_device_attr *qattr = &dev->attr;
 +      struct qedr_srq *srq = get_qedr_srq(ibsrq);
 +
 +      srq_attr->srq_limit = srq->srq_limit;
 +      srq_attr->max_wr = qattr->max_srq_wr;
 +      srq_attr->max_sge = qattr->max_sge;
 +
 +      return 0;
 +}
 +
  int qedr_query_device(struct ib_device *ibdev,
                      struct ib_device_attr *attr, struct ib_udata *udata)
  {
            IB_DEVICE_RC_RNR_NAK_GEN |
            IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS;
  
 -      attr->max_sge = qattr->max_sge;
 +      attr->max_send_sge = qattr->max_sge;
 +      attr->max_recv_sge = qattr->max_sge;
        attr->max_sge_rd = qattr->max_sge;
        attr->max_cq = qattr->max_cq;
        attr->max_cqe = qattr->max_cqe;
@@@ -242,7 -224,7 +242,7 @@@ int qedr_query_port(struct ib_device *i
        attr->lmc = 0;
        attr->sm_lid = 0;
        attr->sm_sl = 0;
 -      attr->port_cap_flags = IB_PORT_IP_BASED_GIDS;
 +      attr->ip_gids = true;
        if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
                attr->gid_tbl_len = 1;
                attr->pkey_tbl_len = 1;
@@@ -1093,19 -1075,27 +1093,19 @@@ static inline int get_gid_info_from_tab
                                          struct qed_rdma_modify_qp_in_params
                                          *qp_params)
  {
 +      const struct ib_gid_attr *gid_attr;
        enum rdma_network_type nw_type;
 -      struct ib_gid_attr gid_attr;
        const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
 -      union ib_gid gid;
        u32 ipv4_addr;
 -      int rc = 0;
        int i;
  
 -      rc = ib_get_cached_gid(ibqp->device,
 -                             rdma_ah_get_port_num(&attr->ah_attr),
 -                             grh->sgid_index, &gid, &gid_attr);
 -      if (rc)
 -              return rc;
 -
 -      qp_params->vlan_id = rdma_vlan_dev_vlan_id(gid_attr.ndev);
 +      gid_attr = grh->sgid_attr;
 +      qp_params->vlan_id = rdma_vlan_dev_vlan_id(gid_attr->ndev);
  
 -      dev_put(gid_attr.ndev);
 -      nw_type = ib_gid_to_network_type(gid_attr.gid_type, &gid);
 +      nw_type = rdma_gid_attr_network_type(gid_attr);
        switch (nw_type) {
        case RDMA_NETWORK_IPV6:
 -              memcpy(&qp_params->sgid.bytes[0], &gid.raw[0],
 +              memcpy(&qp_params->sgid.bytes[0], &gid_attr->gid.raw[0],
                       sizeof(qp_params->sgid));
                memcpy(&qp_params->dgid.bytes[0],
                       &grh->dgid,
                          QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
                break;
        case RDMA_NETWORK_IB:
 -              memcpy(&qp_params->sgid.bytes[0], &gid.raw[0],
 +              memcpy(&qp_params->sgid.bytes[0], &gid_attr->gid.raw[0],
                       sizeof(qp_params->sgid));
                memcpy(&qp_params->dgid.bytes[0],
                       &grh->dgid,
        case RDMA_NETWORK_IPV4:
                memset(&qp_params->sgid, 0, sizeof(qp_params->sgid));
                memset(&qp_params->dgid, 0, sizeof(qp_params->dgid));
 -              ipv4_addr = qedr_get_ipv4_from_gid(gid.raw);
 +              ipv4_addr = qedr_get_ipv4_from_gid(gid_attr->gid.raw);
                qp_params->sgid.ipv4_addr = ipv4_addr;
                ipv4_addr =
                    qedr_get_ipv4_from_gid(grh->dgid.raw);
@@@ -1199,21 -1189,6 +1199,21 @@@ static int qedr_check_qp_attrs(struct i
        return 0;
  }
  
 +static int qedr_copy_srq_uresp(struct qedr_dev *dev,
 +                             struct qedr_srq *srq, struct ib_udata *udata)
 +{
 +      struct qedr_create_srq_uresp uresp = {};
 +      int rc;
 +
 +      uresp.srq_id = srq->srq_id;
 +
 +      rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
 +      if (rc)
 +              DP_ERR(dev, "create srq: problem copying data to user space\n");
 +
 +      return rc;
 +}
 +
  static void qedr_copy_rq_uresp(struct qedr_dev *dev,
                               struct qedr_create_qp_uresp *uresp,
                               struct qedr_qp *qp)
@@@ -1280,18 -1255,13 +1280,18 @@@ static void qedr_set_common_qp_params(s
        qp->state = QED_ROCE_QP_STATE_RESET;
        qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false;
        qp->sq_cq = get_qedr_cq(attrs->send_cq);
 -      qp->rq_cq = get_qedr_cq(attrs->recv_cq);
        qp->dev = dev;
 -      qp->rq.max_sges = attrs->cap.max_recv_sge;
  
 -      DP_DEBUG(dev, QEDR_MSG_QP,
 -               "RQ params:\trq_max_sges = %d, rq_cq_id = %d\n",
 -               qp->rq.max_sges, qp->rq_cq->icid);
 +      if (attrs->srq) {
 +              qp->srq = get_qedr_srq(attrs->srq);
 +      } else {
 +              qp->rq_cq = get_qedr_cq(attrs->recv_cq);
 +              qp->rq.max_sges = attrs->cap.max_recv_sge;
 +              DP_DEBUG(dev, QEDR_MSG_QP,
 +                       "RQ params:\trq_max_sges = %d, rq_cq_id = %d\n",
 +                       qp->rq.max_sges, qp->rq_cq->icid);
 +      }
 +
        DP_DEBUG(dev, QEDR_MSG_QP,
                 "QP params:\tpd = %d, qp_type = %d, max_inline_data = %d, state = %d, signaled = %d, use_srq=%d\n",
                 pd->pd_id, qp->qp_type, qp->max_inline_data,
@@@ -1306,303 -1276,9 +1306,303 @@@ static void qedr_set_roce_db_info(struc
        qp->sq.db = dev->db_addr +
                    DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
        qp->sq.db_data.data.icid = qp->icid + 1;
 -      qp->rq.db = dev->db_addr +
 -                  DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
 -      qp->rq.db_data.data.icid = qp->icid;
 +      if (!qp->srq) {
 +              qp->rq.db = dev->db_addr +
 +                          DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
 +              qp->rq.db_data.data.icid = qp->icid;
 +      }
 +}
 +
 +static int qedr_check_srq_params(struct ib_pd *ibpd, struct qedr_dev *dev,
 +                               struct ib_srq_init_attr *attrs,
 +                               struct ib_udata *udata)
 +{
 +      struct qedr_device_attr *qattr = &dev->attr;
 +
 +      if (attrs->attr.max_wr > qattr->max_srq_wr) {
 +              DP_ERR(dev,
 +                     "create srq: unsupported srq_wr=0x%x requested (max_srq_wr=0x%x)\n",
 +                     attrs->attr.max_wr, qattr->max_srq_wr);
 +              return -EINVAL;
 +      }
 +
 +      if (attrs->attr.max_sge > qattr->max_sge) {
 +              DP_ERR(dev,
 +                     "create srq: unsupported sge=0x%x requested (max_srq_sge=0x%x)\n",
 +                     attrs->attr.max_sge, qattr->max_sge);
 +              return -EINVAL;
 +      }
 +
 +      return 0;
 +}
 +
 +static void qedr_free_srq_user_params(struct qedr_srq *srq)
 +{
 +      qedr_free_pbl(srq->dev, &srq->usrq.pbl_info, srq->usrq.pbl_tbl);
 +      ib_umem_release(srq->usrq.umem);
 +      ib_umem_release(srq->prod_umem);
 +}
 +
 +static void qedr_free_srq_kernel_params(struct qedr_srq *srq)
 +{
 +      struct qedr_srq_hwq_info *hw_srq = &srq->hw_srq;
 +      struct qedr_dev *dev = srq->dev;
 +
 +      dev->ops->common->chain_free(dev->cdev, &hw_srq->pbl);
 +
 +      dma_free_coherent(&dev->pdev->dev, sizeof(struct rdma_srq_producers),
 +                        hw_srq->virt_prod_pair_addr,
 +                        hw_srq->phy_prod_pair_addr);
 +}
 +
 +static int qedr_init_srq_user_params(struct ib_ucontext *ib_ctx,
 +                                   struct qedr_srq *srq,
 +                                   struct qedr_create_srq_ureq *ureq,
 +                                   int access, int dmasync)
 +{
 +      struct scatterlist *sg;
 +      int rc;
 +
 +      rc = qedr_init_user_queue(ib_ctx, srq->dev, &srq->usrq, ureq->srq_addr,
 +                                ureq->srq_len, access, dmasync, 1);
 +      if (rc)
 +              return rc;
 +
 +      srq->prod_umem = ib_umem_get(ib_ctx, ureq->prod_pair_addr,
 +                                   sizeof(struct rdma_srq_producers),
 +                                   access, dmasync);
 +      if (IS_ERR(srq->prod_umem)) {
 +              qedr_free_pbl(srq->dev, &srq->usrq.pbl_info, srq->usrq.pbl_tbl);
 +              ib_umem_release(srq->usrq.umem);
 +              DP_ERR(srq->dev,
 +                     "create srq: failed ib_umem_get for producer, got %ld\n",
 +                     PTR_ERR(srq->prod_umem));
 +              return PTR_ERR(srq->prod_umem);
 +      }
 +
 +      sg = srq->prod_umem->sg_head.sgl;
 +      srq->hw_srq.phy_prod_pair_addr = sg_dma_address(sg);
 +
 +      return 0;
 +}
 +
 +static int qedr_alloc_srq_kernel_params(struct qedr_srq *srq,
 +                                      struct qedr_dev *dev,
 +                                      struct ib_srq_init_attr *init_attr)
 +{
 +      struct qedr_srq_hwq_info *hw_srq = &srq->hw_srq;
 +      dma_addr_t phy_prod_pair_addr;
 +      u32 num_elems;
 +      void *va;
 +      int rc;
 +
 +      va = dma_alloc_coherent(&dev->pdev->dev,
 +                              sizeof(struct rdma_srq_producers),
 +                              &phy_prod_pair_addr, GFP_KERNEL);
 +      if (!va) {
 +              DP_ERR(dev,
 +                     "create srq: failed to allocate dma memory for producer\n");
 +              return -ENOMEM;
 +      }
 +
 +      hw_srq->phy_prod_pair_addr = phy_prod_pair_addr;
 +      hw_srq->virt_prod_pair_addr = va;
 +
 +      num_elems = init_attr->attr.max_wr * RDMA_MAX_SRQ_WQE_SIZE;
 +      rc = dev->ops->common->chain_alloc(dev->cdev,
 +                                         QED_CHAIN_USE_TO_CONSUME_PRODUCE,
 +                                         QED_CHAIN_MODE_PBL,
 +                                         QED_CHAIN_CNT_TYPE_U32,
 +                                         num_elems,
 +                                         QEDR_SRQ_WQE_ELEM_SIZE,
 +                                         &hw_srq->pbl, NULL);
 +      if (rc)
 +              goto err0;
 +
 +      hw_srq->num_elems = num_elems;
 +
 +      return 0;
 +
 +err0:
 +      dma_free_coherent(&dev->pdev->dev, sizeof(struct rdma_srq_producers),
 +                        va, phy_prod_pair_addr);
 +      return rc;
 +}
 +
 +static int qedr_idr_add(struct qedr_dev *dev, struct qedr_idr *qidr,
 +                      void *ptr, u32 id);
 +static void qedr_idr_remove(struct qedr_dev *dev,
 +                          struct qedr_idr *qidr, u32 id);
 +
 +struct ib_srq *qedr_create_srq(struct ib_pd *ibpd,
 +                             struct ib_srq_init_attr *init_attr,
 +                             struct ib_udata *udata)
 +{
 +      struct qed_rdma_destroy_srq_in_params destroy_in_params;
 +      struct qed_rdma_create_srq_in_params in_params = {};
 +      struct qedr_dev *dev = get_qedr_dev(ibpd->device);
 +      struct qed_rdma_create_srq_out_params out_params;
 +      struct qedr_pd *pd = get_qedr_pd(ibpd);
 +      struct qedr_create_srq_ureq ureq = {};
 +      u64 pbl_base_addr, phy_prod_pair_addr;
 +      struct ib_ucontext *ib_ctx = NULL;
 +      struct qedr_srq_hwq_info *hw_srq;
 +      struct qedr_ucontext *ctx = NULL;
 +      u32 page_cnt, page_size;
 +      struct qedr_srq *srq;
 +      int rc = 0;
 +
 +      DP_DEBUG(dev, QEDR_MSG_QP,
 +               "create SRQ called from %s (pd %p)\n",
 +               (udata) ? "User lib" : "kernel", pd);
 +
 +      rc = qedr_check_srq_params(ibpd, dev, init_attr, udata);
 +      if (rc)
 +              return ERR_PTR(-EINVAL);
 +
 +      srq = kzalloc(sizeof(*srq), GFP_KERNEL);
 +      if (!srq)
 +              return ERR_PTR(-ENOMEM);
 +
 +      srq->dev = dev;
 +      hw_srq = &srq->hw_srq;
 +      spin_lock_init(&srq->lock);
 +
 +      hw_srq->max_wr = init_attr->attr.max_wr;
 +      hw_srq->max_sges = init_attr->attr.max_sge;
 +
 +      if (udata && ibpd->uobject && ibpd->uobject->context) {
 +              ib_ctx = ibpd->uobject->context;
 +              ctx = get_qedr_ucontext(ib_ctx);
 +
 +              if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) {
 +                      DP_ERR(dev,
 +                             "create srq: problem copying data from user space\n");
 +                      goto err0;
 +              }
 +
 +              rc = qedr_init_srq_user_params(ib_ctx, srq, &ureq, 0, 0);
 +              if (rc)
 +                      goto err0;
 +
 +              page_cnt = srq->usrq.pbl_info.num_pbes;
 +              pbl_base_addr = srq->usrq.pbl_tbl->pa;
 +              phy_prod_pair_addr = hw_srq->phy_prod_pair_addr;
 +              page_size = BIT(srq->usrq.umem->page_shift);
 +      } else {
 +              struct qed_chain *pbl;
 +
 +              rc = qedr_alloc_srq_kernel_params(srq, dev, init_attr);
 +              if (rc)
 +                      goto err0;
 +
 +              pbl = &hw_srq->pbl;
 +              page_cnt = qed_chain_get_page_cnt(pbl);
 +              pbl_base_addr = qed_chain_get_pbl_phys(pbl);
 +              phy_prod_pair_addr = hw_srq->phy_prod_pair_addr;
 +              page_size = QED_CHAIN_PAGE_SIZE;
 +      }
 +
 +      in_params.pd_id = pd->pd_id;
 +      in_params.pbl_base_addr = pbl_base_addr;
 +      in_params.prod_pair_addr = phy_prod_pair_addr;
 +      in_params.num_pages = page_cnt;
 +      in_params.page_size = page_size;
 +
 +      rc = dev->ops->rdma_create_srq(dev->rdma_ctx, &in_params, &out_params);
 +      if (rc)
 +              goto err1;
 +
 +      srq->srq_id = out_params.srq_id;
 +
 +      if (udata) {
 +              rc = qedr_copy_srq_uresp(dev, srq, udata);
 +              if (rc)
 +                      goto err2;
 +      }
 +
 +      rc = qedr_idr_add(dev, &dev->srqidr, srq, srq->srq_id);
 +      if (rc)
 +              goto err2;
 +
 +      DP_DEBUG(dev, QEDR_MSG_SRQ,
 +               "create srq: created srq with srq_id=0x%0x\n", srq->srq_id);
 +      return &srq->ibsrq;
 +
 +err2:
 +      destroy_in_params.srq_id = srq->srq_id;
 +
 +      dev->ops->rdma_destroy_srq(dev->rdma_ctx, &destroy_in_params);
 +err1:
 +      if (udata)
 +              qedr_free_srq_user_params(srq);
 +      else
 +              qedr_free_srq_kernel_params(srq);
 +err0:
 +      kfree(srq);
 +
 +      return ERR_PTR(-EFAULT);
 +}
 +
 +int qedr_destroy_srq(struct ib_srq *ibsrq)
 +{
 +      struct qed_rdma_destroy_srq_in_params in_params = {};
 +      struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
 +      struct qedr_srq *srq = get_qedr_srq(ibsrq);
 +
 +      qedr_idr_remove(dev, &dev->srqidr, srq->srq_id);
 +      in_params.srq_id = srq->srq_id;
 +      dev->ops->rdma_destroy_srq(dev->rdma_ctx, &in_params);
 +
 +      if (ibsrq->pd->uobject)
 +              qedr_free_srq_user_params(srq);
 +      else
 +              qedr_free_srq_kernel_params(srq);
 +
 +      DP_DEBUG(dev, QEDR_MSG_SRQ,
 +               "destroy srq: destroyed srq with srq_id=0x%0x\n",
 +               srq->srq_id);
 +      kfree(srq);
 +
 +      return 0;
 +}
 +
 +int qedr_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
 +                  enum ib_srq_attr_mask attr_mask, struct ib_udata *udata)
 +{
 +      struct qed_rdma_modify_srq_in_params in_params = {};
 +      struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
 +      struct qedr_srq *srq = get_qedr_srq(ibsrq);
 +      int rc;
 +
 +      if (attr_mask & IB_SRQ_MAX_WR) {
 +              DP_ERR(dev,
 +                     "modify srq: invalid attribute mask=0x%x specified for %p\n",
 +                     attr_mask, srq);
 +              return -EINVAL;
 +      }
 +
 +      if (attr_mask & IB_SRQ_LIMIT) {
 +              if (attr->srq_limit >= srq->hw_srq.max_wr) {
 +                      DP_ERR(dev,
 +                             "modify srq: invalid srq_limit=0x%x (max_srq_limit=0x%x)\n",
 +                             attr->srq_limit, srq->hw_srq.max_wr);
 +                      return -EINVAL;
 +              }
 +
 +              in_params.srq_id = srq->srq_id;
 +              in_params.wqe_limit = attr->srq_limit;
 +              rc = dev->ops->rdma_modify_srq(dev->rdma_ctx, &in_params);
 +              if (rc)
 +                      return rc;
 +      }
 +
 +      srq->srq_limit = attr->srq_limit;
 +
 +      DP_DEBUG(dev, QEDR_MSG_SRQ,
 +               "modify srq: modified srq with srq_id=0x%0x\n", srq->srq_id);
 +
 +      return 0;
  }
  
  static inline void
@@@ -1623,17 -1299,9 +1623,17 @@@ qedr_init_common_qp_in_params(struct qe
        params->dpi = pd->uctx ? pd->uctx->dpi : dev->dpi;
        params->sq_cq_id = get_qedr_cq(attrs->send_cq)->icid;
        params->stats_queue = 0;
 -      params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
        params->srq_id = 0;
        params->use_srq = false;
 +
 +      if (!qp->srq) {
 +              params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
 +
 +      } else {
 +              params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
 +              params->srq_id = qp->srq->srq_id;
 +              params->use_srq = true;
 +      }
  }
  
  static inline void qedr_qp_user_print(struct qedr_dev *dev, struct qedr_qp *qp)
                 qp->usq.buf_len, qp->urq.buf_addr, qp->urq.buf_len);
  }
  
 -static int qedr_idr_add(struct qedr_dev *dev, void *ptr, u32 id)
 +static int qedr_idr_add(struct qedr_dev *dev, struct qedr_idr *qidr,
 +                      void *ptr, u32 id)
  {
        int rc;
  
 -      if (!rdma_protocol_iwarp(&dev->ibdev, 1))
 -              return 0;
 -
        idr_preload(GFP_KERNEL);
 -      spin_lock_irq(&dev->idr_lock);
 +      spin_lock_irq(&qidr->idr_lock);
  
 -      rc = idr_alloc(&dev->qpidr, ptr, id, id + 1, GFP_ATOMIC);
 +      rc = idr_alloc(&qidr->idr, ptr, id, id + 1, GFP_ATOMIC);
  
 -      spin_unlock_irq(&dev->idr_lock);
 +      spin_unlock_irq(&qidr->idr_lock);
        idr_preload_end();
  
        return rc < 0 ? rc : 0;
  }
  
 -static void qedr_idr_remove(struct qedr_dev *dev, u32 id)
 +static void qedr_idr_remove(struct qedr_dev *dev, struct qedr_idr *qidr, u32 id)
  {
 -      if (!rdma_protocol_iwarp(&dev->ibdev, 1))
 -              return;
 -
 -      spin_lock_irq(&dev->idr_lock);
 -      idr_remove(&dev->qpidr, id);
 -      spin_unlock_irq(&dev->idr_lock);
 +      spin_lock_irq(&qidr->idr_lock);
 +      idr_remove(&qidr->idr, id);
 +      spin_unlock_irq(&qidr->idr_lock);
  }
  
  static inline void
@@@ -1683,10 -1356,9 +1683,10 @@@ qedr_iwarp_populate_user_qp(struct qedr
  
        qedr_populate_pbls(dev, qp->usq.umem, qp->usq.pbl_tbl,
                           &qp->usq.pbl_info, FW_PAGE_SHIFT);
 -
 -      qp->urq.pbl_tbl->va = out_params->rq_pbl_virt;
 -      qp->urq.pbl_tbl->pa = out_params->rq_pbl_phys;
 +      if (!qp->srq) {
 +              qp->urq.pbl_tbl->va = out_params->rq_pbl_virt;
 +              qp->urq.pbl_tbl->pa = out_params->rq_pbl_phys;
 +      }
  
        qedr_populate_pbls(dev, qp->urq.umem, qp->urq.pbl_tbl,
                           &qp->urq.pbl_info, FW_PAGE_SHIFT);
@@@ -1732,13 -1404,11 +1732,13 @@@ static int qedr_create_user_qp(struct q
        if (rc)
                return rc;
  
 -      /* RQ - read access only (0), dma sync not required (0) */
 -      rc = qedr_init_user_queue(ib_ctx, dev, &qp->urq, ureq.rq_addr,
 -                                ureq.rq_len, 0, 0, alloc_and_init);
 -      if (rc)
 -              return rc;
 +      if (!qp->srq) {
 +              /* RQ - read access only (0), dma sync not required (0) */
 +              rc = qedr_init_user_queue(ib_ctx, dev, &qp->urq, ureq.rq_addr,
 +                                        ureq.rq_len, 0, 0, alloc_and_init);
 +              if (rc)
 +                      return rc;
 +      }
  
        memset(&in_params, 0, sizeof(in_params));
        qedr_init_common_qp_in_params(dev, pd, qp, attrs, false, &in_params);
        in_params.qp_handle_hi = ureq.qp_handle_hi;
        in_params.sq_num_pages = qp->usq.pbl_info.num_pbes;
        in_params.sq_pbl_ptr = qp->usq.pbl_tbl->pa;
 -      in_params.rq_num_pages = qp->urq.pbl_info.num_pbes;
 -      in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa;
 +      if (!qp->srq) {
 +              in_params.rq_num_pages = qp->urq.pbl_info.num_pbes;
 +              in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa;
 +      }
  
        qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
                                              &in_params, &out_params);
@@@ -2011,13 -1679,16 +2011,13 @@@ struct ib_qp *qedr_create_qp(struct ib_
        if (rc)
                return ERR_PTR(rc);
  
 -      if (attrs->srq)
 -              return ERR_PTR(-EINVAL);
 -
        DP_DEBUG(dev, QEDR_MSG_QP,
                 "create qp: called from %s, event_handler=%p, eepd=%p sq_cq=%p, sq_icid=%d, rq_cq=%p, rq_icid=%d\n",
                 udata ? "user library" : "kernel", attrs->event_handler, pd,
                 get_qedr_cq(attrs->send_cq),
                 get_qedr_cq(attrs->send_cq)->icid,
                 get_qedr_cq(attrs->recv_cq),
 -               get_qedr_cq(attrs->recv_cq)->icid);
 +               attrs->recv_cq ? get_qedr_cq(attrs->recv_cq)->icid : 0);
  
        qp = kzalloc(sizeof(*qp), GFP_KERNEL);
        if (!qp) {
  
        qp->ibqp.qp_num = qp->qp_id;
  
 -      rc = qedr_idr_add(dev, qp, qp->qp_id);
 -      if (rc)
 -              goto err;
 +      if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
 +              rc = qedr_idr_add(dev, &dev->qpidr, qp, qp->qp_id);
 +              if (rc)
 +                      goto err;
 +      }
  
        return &qp->ibqp;
  
@@@ -2288,6 -1957,9 +2288,9 @@@ int qedr_modify_qp(struct ib_qp *ibqp, 
        }
  
        if (attr_mask & (IB_QP_AV | IB_QP_PATH_MTU)) {
+               if (rdma_protocol_iwarp(&dev->ibdev, 1))
+                       return -EINVAL;
                if (attr_mask & IB_QP_PATH_MTU) {
                        if (attr->path_mtu < IB_MTU_256 ||
                            attr->path_mtu > IB_MTU_4096) {
@@@ -2617,9 -2289,8 +2620,9 @@@ int qedr_destroy_qp(struct ib_qp *ibqp
  
        qedr_free_qp_resources(dev, qp);
  
 -      if (atomic_dec_and_test(&qp->refcnt)) {
 -              qedr_idr_remove(dev, qp->qp_id);
 +      if (atomic_dec_and_test(&qp->refcnt) &&
 +          rdma_protocol_iwarp(&dev->ibdev, 1)) {
 +              qedr_idr_remove(dev, &dev->qpidr, qp->qp_id);
                kfree(qp);
        }
        return rc;
@@@ -2634,7 -2305,7 +2637,7 @@@ struct ib_ah *qedr_create_ah(struct ib_
        if (!ah)
                return ERR_PTR(-ENOMEM);
  
 -      ah->attr = *attr;
 +      rdma_copy_ah_attr(&ah->attr, attr);
  
        return &ah->ibah;
  }
@@@ -2643,7 -2314,6 +2646,7 @@@ int qedr_destroy_ah(struct ib_ah *ibah
  {
        struct qedr_ah *ah = get_qedr_ah(ibah);
  
 +      rdma_destroy_ah_attr(&ah->attr);
        kfree(ah);
        return 0;
  }
@@@ -3035,9 -2705,9 +3038,9 @@@ static void swap_wqe_data64(u64 *p
  
  static u32 qedr_prepare_sq_inline_data(struct qedr_dev *dev,
                                       struct qedr_qp *qp, u8 *wqe_size,
 -                                     struct ib_send_wr *wr,
 -                                     struct ib_send_wr **bad_wr, u8 *bits,
 -                                     u8 bit)
 +                                     const struct ib_send_wr *wr,
 +                                     const struct ib_send_wr **bad_wr,
 +                                     u8 *bits, u8 bit)
  {
        u32 data_size = sge_data_len(wr->sg_list, wr->num_sge);
        char *seg_prt, *wqe;
        } while (0)
  
  static u32 qedr_prepare_sq_sges(struct qedr_qp *qp, u8 *wqe_size,
 -                              struct ib_send_wr *wr)
 +                              const struct ib_send_wr *wr)
  {
        u32 data_size = 0;
        int i;
@@@ -3144,8 -2814,8 +3147,8 @@@ static u32 qedr_prepare_sq_rdma_data(st
                                     struct qedr_qp *qp,
                                     struct rdma_sq_rdma_wqe_1st *rwqe,
                                     struct rdma_sq_rdma_wqe_2nd *rwqe2,
 -                                   struct ib_send_wr *wr,
 -                                   struct ib_send_wr **bad_wr)
 +                                   const struct ib_send_wr *wr,
 +                                   const struct ib_send_wr **bad_wr)
  {
        rwqe2->r_key = cpu_to_le32(rdma_wr(wr)->rkey);
        DMA_REGPAIR_LE(rwqe2->remote_va, rdma_wr(wr)->remote_addr);
@@@ -3167,8 -2837,8 +3170,8 @@@ static u32 qedr_prepare_sq_send_data(st
                                     struct qedr_qp *qp,
                                     struct rdma_sq_send_wqe_1st *swqe,
                                     struct rdma_sq_send_wqe_2st *swqe2,
 -                                   struct ib_send_wr *wr,
 -                                   struct ib_send_wr **bad_wr)
 +                                   const struct ib_send_wr *wr,
 +                                   const struct ib_send_wr **bad_wr)
  {
        memset(swqe2, 0, sizeof(*swqe2));
        if (wr->send_flags & IB_SEND_INLINE) {
  
  static int qedr_prepare_reg(struct qedr_qp *qp,
                            struct rdma_sq_fmr_wqe_1st *fwqe1,
 -                          struct ib_reg_wr *wr)
 +                          const struct ib_reg_wr *wr)
  {
        struct qedr_mr *mr = get_qedr_mr(wr->mr);
        struct rdma_sq_fmr_wqe_2nd *fwqe2;
@@@ -3246,8 -2916,7 +3249,8 @@@ static enum ib_wc_opcode qedr_ib_to_wc_
        }
  }
  
 -static inline bool qedr_can_post_send(struct qedr_qp *qp, struct ib_send_wr *wr)
 +static inline bool qedr_can_post_send(struct qedr_qp *qp,
 +                                    const struct ib_send_wr *wr)
  {
        int wq_is_full, err_wr, pbl_is_full;
        struct qedr_dev *dev = qp->dev;
        return true;
  }
  
 -static int __qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 -                   struct ib_send_wr **bad_wr)
 +static int __qedr_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
 +                          const struct ib_send_wr **bad_wr)
  {
        struct qedr_dev *dev = get_qedr_dev(ibqp->device);
        struct qedr_qp *qp = get_qedr_qp(ibqp);
        return rc;
  }
  
 -int qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 -                 struct ib_send_wr **bad_wr)
 +int qedr_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
 +                 const struct ib_send_wr **bad_wr)
  {
        struct qedr_dev *dev = get_qedr_dev(ibqp->device);
        struct qedr_qp *qp = get_qedr_qp(ibqp);
        return rc;
  }
  
 -int qedr_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
 -                 struct ib_recv_wr **bad_wr)
 +static u32 qedr_srq_elem_left(struct qedr_srq_hwq_info *hw_srq)
 +{
 +      u32 used;
 +
 +      /* Calculate number of elements used based on producer
 +       * count and consumer count and subtract it from max
 +       * work request supported so that we get elements left.
 +       */
 +      used = hw_srq->wr_prod_cnt - hw_srq->wr_cons_cnt;
 +
 +      return hw_srq->max_wr - used;
 +}
 +
 +int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
 +                     const struct ib_recv_wr **bad_wr)
 +{
 +      struct qedr_srq *srq = get_qedr_srq(ibsrq);
 +      struct qedr_srq_hwq_info *hw_srq;
 +      struct qedr_dev *dev = srq->dev;
 +      struct qed_chain *pbl;
 +      unsigned long flags;
 +      int status = 0;
 +      u32 num_sge;
 +      u32 offset;
 +
 +      spin_lock_irqsave(&srq->lock, flags);
 +
 +      hw_srq = &srq->hw_srq;
 +      pbl = &srq->hw_srq.pbl;
 +      while (wr) {
 +              struct rdma_srq_wqe_header *hdr;
 +              int i;
 +
 +              if (!qedr_srq_elem_left(hw_srq) ||
 +                  wr->num_sge > srq->hw_srq.max_sges) {
 +                      DP_ERR(dev, "Can't post WR  (%d,%d) || (%d > %d)\n",
 +                             hw_srq->wr_prod_cnt, hw_srq->wr_cons_cnt,
 +                             wr->num_sge, srq->hw_srq.max_sges);
 +                      status = -ENOMEM;
 +                      *bad_wr = wr;
 +                      break;
 +              }
 +
 +              hdr = qed_chain_produce(pbl);
 +              num_sge = wr->num_sge;
 +              /* Set number of sge and work request id in header */
 +              SRQ_HDR_SET(hdr, wr->wr_id, num_sge);
 +
 +              srq->hw_srq.wr_prod_cnt++;
 +              hw_srq->wqe_prod++;
 +              hw_srq->sge_prod++;
 +
 +              DP_DEBUG(dev, QEDR_MSG_SRQ,
 +                       "SRQ WR: SGEs: %d with wr_id[%d] = %llx\n",
 +                       wr->num_sge, hw_srq->wqe_prod, wr->wr_id);
 +
 +              for (i = 0; i < wr->num_sge; i++) {
 +                      struct rdma_srq_sge *srq_sge = qed_chain_produce(pbl);
 +
 +                      /* Set SGE length, lkey and address */
 +                      SRQ_SGE_SET(srq_sge, wr->sg_list[i].addr,
 +                                  wr->sg_list[i].length, wr->sg_list[i].lkey);
 +
 +                      DP_DEBUG(dev, QEDR_MSG_SRQ,
 +                               "[%d]: len %d key %x addr %x:%x\n",
 +                               i, srq_sge->length, srq_sge->l_key,
 +                               srq_sge->addr.hi, srq_sge->addr.lo);
 +                      hw_srq->sge_prod++;
 +              }
 +
 +              /* Flush WQE and SGE information before
 +               * updating producer.
 +               */
 +              wmb();
 +
 +              /* SRQ producer is 8 bytes. Need to update SGE producer index
 +               * in first 4 bytes and need to update WQE producer in
 +               * next 4 bytes.
 +               */
 +              *srq->hw_srq.virt_prod_pair_addr = hw_srq->sge_prod;
 +              offset = offsetof(struct rdma_srq_producers, wqe_prod);
 +              *((u8 *)srq->hw_srq.virt_prod_pair_addr + offset) =
 +                      hw_srq->wqe_prod;
 +
 +              /* Flush producer after updating it. */
 +              wmb();
 +              wr = wr->next;
 +      }
 +
 +      DP_DEBUG(dev, QEDR_MSG_SRQ, "POST: Elements in S-RQ: %d\n",
 +               qed_chain_get_elem_left(pbl));
 +      spin_unlock_irqrestore(&srq->lock, flags);
 +
 +      return status;
 +}
 +
 +int qedr_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
 +                 const struct ib_recv_wr **bad_wr)
  {
        struct qedr_qp *qp = get_qedr_qp(ibqp);
        struct qedr_dev *dev = qp->dev;
@@@ -4052,31 -3625,6 +4055,31 @@@ static void __process_resp_one(struct q
        wc->wr_id = wr_id;
  }
  
 +static int process_resp_one_srq(struct qedr_dev *dev, struct qedr_qp *qp,
 +                              struct qedr_cq *cq, struct ib_wc *wc,
 +                              struct rdma_cqe_responder *resp)
 +{
 +      struct qedr_srq *srq = qp->srq;
 +      u64 wr_id;
 +
 +      wr_id = HILO_GEN(le32_to_cpu(resp->srq_wr_id.hi),
 +                       le32_to_cpu(resp->srq_wr_id.lo), u64);
 +
 +      if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) {
 +              wc->status = IB_WC_WR_FLUSH_ERR;
 +              wc->vendor_err = 0;
 +              wc->wr_id = wr_id;
 +              wc->byte_len = 0;
 +              wc->src_qp = qp->id;
 +              wc->qp = &qp->ibqp;
 +              wc->wr_id = wr_id;
 +      } else {
 +              __process_resp_one(dev, qp, cq, wc, resp, wr_id);
 +      }
 +      srq->hw_srq.wr_cons_cnt++;
 +
 +      return 1;
 +}
  static int process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
                            struct qedr_cq *cq, struct ib_wc *wc,
                            struct rdma_cqe_responder *resp)
@@@ -4126,19 -3674,6 +4129,19 @@@ static void try_consume_resp_cqe(struc
        }
  }
  
 +static int qedr_poll_cq_resp_srq(struct qedr_dev *dev, struct qedr_qp *qp,
 +                               struct qedr_cq *cq, int num_entries,
 +                               struct ib_wc *wc,
 +                               struct rdma_cqe_responder *resp)
 +{
 +      int cnt;
 +
 +      cnt = process_resp_one_srq(dev, qp, cq, wc, resp);
 +      consume_cqe(cq);
 +
 +      return cnt;
 +}
 +
  static int qedr_poll_cq_resp(struct qedr_dev *dev, struct qedr_qp *qp,
                             struct qedr_cq *cq, int num_entries,
                             struct ib_wc *wc, struct rdma_cqe_responder *resp,
@@@ -4216,11 -3751,6 +4219,11 @@@ int qedr_poll_cq(struct ib_cq *ibcq, in
                        cnt = qedr_poll_cq_resp(dev, qp, cq, num_entries, wc,
                                                &cqe->resp, &update);
                        break;
 +              case RDMA_CQE_TYPE_RESPONDER_SRQ:
 +                      cnt = qedr_poll_cq_resp_srq(dev, qp, cq, num_entries,
 +                                                  wc, &cqe->resp);
 +                      update = 1;
 +                      break;
                case RDMA_CQE_TYPE_INVALID:
                default:
                        DP_ERR(dev, "Error: invalid CQE type = %d\n",
@@@ -278,7 -278,6 +278,7 @@@ static int mlx5_internal_err_ret_value(
        case MLX5_CMD_OP_DESTROY_PSV:
        case MLX5_CMD_OP_DESTROY_SRQ:
        case MLX5_CMD_OP_DESTROY_XRC_SRQ:
 +      case MLX5_CMD_OP_DESTROY_XRQ:
        case MLX5_CMD_OP_DESTROY_DCT:
        case MLX5_CMD_OP_DEALLOC_Q_COUNTER:
        case MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT:
        case MLX5_CMD_OP_DEALLOC_ENCAP_HEADER:
        case MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT:
        case MLX5_CMD_OP_FPGA_DESTROY_QP:
 +      case MLX5_CMD_OP_DESTROY_GENERAL_OBJECT:
                return MLX5_CMD_STAT_OK;
  
        case MLX5_CMD_OP_QUERY_HCA_CAP:
        case MLX5_CMD_OP_CREATE_XRC_SRQ:
        case MLX5_CMD_OP_QUERY_XRC_SRQ:
        case MLX5_CMD_OP_ARM_XRC_SRQ:
 +      case MLX5_CMD_OP_CREATE_XRQ:
 +      case MLX5_CMD_OP_QUERY_XRQ:
 +      case MLX5_CMD_OP_ARM_XRQ:
        case MLX5_CMD_OP_CREATE_DCT:
        case MLX5_CMD_OP_DRAIN_DCT:
        case MLX5_CMD_OP_QUERY_DCT:
        case MLX5_CMD_OP_FPGA_MODIFY_QP:
        case MLX5_CMD_OP_FPGA_QUERY_QP:
        case MLX5_CMD_OP_FPGA_QUERY_QP_COUNTERS:
 +      case MLX5_CMD_OP_CREATE_GENERAL_OBJECT:
 +      case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT:
 +      case MLX5_CMD_OP_QUERY_GENERAL_OBJECT:
                *status = MLX5_DRIVER_STATUS_ABORTED;
                *synd = MLX5_DRIVER_SYND;
                return -EIO;
@@@ -460,7 -452,6 +460,7 @@@ const char *mlx5_command_str(int comman
        MLX5_COMMAND_STR_CASE(SET_HCA_CAP);
        MLX5_COMMAND_STR_CASE(QUERY_ISSI);
        MLX5_COMMAND_STR_CASE(SET_ISSI);
 +      MLX5_COMMAND_STR_CASE(SET_DRIVER_VERSION);
        MLX5_COMMAND_STR_CASE(CREATE_MKEY);
        MLX5_COMMAND_STR_CASE(QUERY_MKEY);
        MLX5_COMMAND_STR_CASE(DESTROY_MKEY);
        MLX5_COMMAND_STR_CASE(FPGA_QUERY_QP);
        MLX5_COMMAND_STR_CASE(FPGA_QUERY_QP_COUNTERS);
        MLX5_COMMAND_STR_CASE(FPGA_DESTROY_QP);
 +      MLX5_COMMAND_STR_CASE(CREATE_XRQ);
 +      MLX5_COMMAND_STR_CASE(DESTROY_XRQ);
 +      MLX5_COMMAND_STR_CASE(QUERY_XRQ);
 +      MLX5_COMMAND_STR_CASE(ARM_XRQ);
 +      MLX5_COMMAND_STR_CASE(CREATE_GENERAL_OBJECT);
 +      MLX5_COMMAND_STR_CASE(DESTROY_GENERAL_OBJECT);
 +      MLX5_COMMAND_STR_CASE(MODIFY_GENERAL_OBJECT);
 +      MLX5_COMMAND_STR_CASE(QUERY_GENERAL_OBJECT);
 +      MLX5_COMMAND_STR_CASE(QUERY_MODIFY_HEADER_CONTEXT);
        default: return "unknown command opcode";
        }
  }
@@@ -695,7 -677,7 +695,7 @@@ struct mlx5_ifc_mbox_out_bits 
  
  struct mlx5_ifc_mbox_in_bits {
        u8         opcode[0x10];
 -      u8         reserved_at_10[0x10];
 +      u8         uid[0x10];
  
        u8         reserved_at_20[0x10];
        u8         op_mod[0x10];
@@@ -715,7 -697,6 +715,7 @@@ static int mlx5_cmd_check(struct mlx5_c
        u8  status;
        u16 opcode;
        u16 op_mod;
 +      u16 uid;
  
        mlx5_cmd_mbox_status(out, &status, &syndrome);
        if (!status)
  
        opcode = MLX5_GET(mbox_in, in, opcode);
        op_mod = MLX5_GET(mbox_in, in, op_mod);
 +      uid    = MLX5_GET(mbox_in, in, uid);
  
 -      mlx5_core_err(dev,
 +      if (!uid && opcode != MLX5_CMD_OP_DESTROY_MKEY)
 +              mlx5_core_err_rl(dev,
 +                      "%s(0x%x) op_mod(0x%x) failed, status %s(0x%x), syndrome (0x%x)\n",
 +                      mlx5_command_str(opcode), opcode, op_mod,
 +                      cmd_status_str(status), status, syndrome);
 +      else
 +              mlx5_core_dbg(dev,
                      "%s(0x%x) op_mod(0x%x) failed, status %s(0x%x), syndrome (0x%x)\n",
                      mlx5_command_str(opcode),
                      opcode, op_mod,
@@@ -833,6 -807,7 +833,7 @@@ static void cmd_work_handler(struct wor
        unsigned long flags;
        bool poll_cmd = ent->polling;
        int alloc_ret;
+       int cmd_mode;
  
        sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem;
        down(sem);
        set_signature(ent, !cmd->checksum_disabled);
        dump_command(dev, ent, 1);
        ent->ts1 = ktime_get_ns();
+       cmd_mode = cmd->mode;
  
        if (ent->callback)
                schedule_delayed_work(&ent->cb_timeout_work, cb_timeout);
        iowrite32be(1 << ent->idx, &dev->iseg->cmd_dbell);
        mmiowb();
        /* if not in polling don't use ent after this point */
-       if (cmd->mode == CMD_MODE_POLLING || poll_cmd) {
+       if (cmd_mode == CMD_MODE_POLLING || poll_cmd) {
                poll_timeout(ent);
                /* make sure we read the descriptor after ownership is SW */
                rmb();
@@@ -1046,10 -1022,7 +1048,10 @@@ static ssize_t dbg_write(struct file *f
        if (!dbg->in_msg || !dbg->out_msg)
                return -ENOMEM;
  
 -      if (copy_from_user(lbuf, buf, sizeof(lbuf)))
 +      if (count < sizeof(lbuf) - 1)
 +              return -EINVAL;
 +
 +      if (copy_from_user(lbuf, buf, sizeof(lbuf) - 1))
                return -EFAULT;
  
        lbuf[sizeof(lbuf) - 1] = 0;
@@@ -1253,12 -1226,21 +1255,12 @@@ static ssize_t data_read(struct file *f
  {
        struct mlx5_core_dev *dev = filp->private_data;
        struct mlx5_cmd_debug *dbg = &dev->cmd.dbg;
 -      int copy;
 -
 -      if (*pos)
 -              return 0;
  
        if (!dbg->out_msg)
                return -ENOMEM;
  
 -      copy = min_t(int, count, dbg->outlen);
 -      if (copy_to_user(buf, dbg->out_msg, copy))
 -              return -EFAULT;
 -
 -      *pos += copy;
 -
 -      return copy;
 +      return simple_read_from_buffer(buf, count, pos, dbg->out_msg,
 +                                     dbg->outlen);
  }
  
  static const struct file_operations dfops = {
@@@ -1276,11 -1258,19 +1278,11 @@@ static ssize_t outlen_read(struct file 
        char outlen[8];
        int err;
  
 -      if (*pos)
 -              return 0;
 -
        err = snprintf(outlen, sizeof(outlen), "%d", dbg->outlen);
        if (err < 0)
                return err;
  
 -      if (copy_to_user(buf, &outlen, err))
 -              return -EFAULT;
 -
 -      *pos += err;
 -
 -      return err;
 +      return simple_read_from_buffer(buf, count, pos, outlen, err);
  }
  
  static ssize_t outlen_write(struct file *filp, const char __user *buf,
  {
        struct mlx5_core_dev *dev = filp->private_data;
        struct mlx5_cmd_debug *dbg = &dev->cmd.dbg;
-       char outlen_str[8];
+       char outlen_str[8] = {0};
        int outlen;
        void *ptr;
        int err;
        if (copy_from_user(outlen_str, buf, count))
                return -EFAULT;
  
-       outlen_str[7] = 0;
        err = sscanf(outlen_str, "%d", &outlen);
        if (err < 0)
                return err;
@@@ -70,9 -70,9 +70,9 @@@ mlx5_eswitch_add_offloaded_rule(struct 
                flow_act.action &= ~(MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH |
                                     MLX5_FLOW_CONTEXT_ACTION_VLAN_POP);
        else if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) {
 -              flow_act.vlan.ethtype = ntohs(attr->vlan_proto);
 -              flow_act.vlan.vid = attr->vlan_vid;
 -              flow_act.vlan.prio = attr->vlan_prio;
 +              flow_act.vlan[0].ethtype = ntohs(attr->vlan_proto);
 +              flow_act.vlan[0].vid = attr->vlan_vid;
 +              flow_act.vlan[0].prio = attr->vlan_prio;
        }
  
        if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
@@@ -1079,8 -1079,8 +1079,8 @@@ static int mlx5_devlink_eswitch_check(s
        if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
                return -EOPNOTSUPP;
  
-       if (!MLX5_CAP_GEN(dev, vport_group_manager))
-               return -EOPNOTSUPP;
+       if(!MLX5_ESWITCH_MANAGER(dev))
+               return -EPERM;
  
        if (dev->priv.eswitch->mode == SRIOV_NONE)
                return -EOPNOTSUPP;
@@@ -32,6 -32,7 +32,7 @@@
  
  #include <linux/mutex.h>
  #include <linux/mlx5/driver.h>
+ #include <linux/mlx5/eswitch.h>
  
  #include "mlx5_core.h"
  #include "fs_core.h"
@@@ -309,17 -310,89 +310,17 @@@ static struct fs_prio *find_prio(struc
        return NULL;
  }
  
 -static bool check_last_reserved(const u32 *match_criteria)
 -{
 -      char *match_criteria_reserved =
 -              MLX5_ADDR_OF(fte_match_param, match_criteria, MLX5_FTE_MATCH_PARAM_RESERVED);
 -
 -      return  !match_criteria_reserved[0] &&
 -              !memcmp(match_criteria_reserved, match_criteria_reserved + 1,
 -                      MLX5_FLD_SZ_BYTES(fte_match_param,
 -                                        MLX5_FTE_MATCH_PARAM_RESERVED) - 1);
 -}
 -
 -static bool check_valid_mask(u8 match_criteria_enable, const u32 *match_criteria)
 -{
 -      if (match_criteria_enable & ~(
 -              (1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS)   |
 -              (1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS) |
 -              (1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_INNER_HEADERS) |
 -              (1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS_2)))
 -              return false;
 -
 -      if (!(match_criteria_enable &
 -            1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS)) {
 -              char *fg_type_mask = MLX5_ADDR_OF(fte_match_param,
 -                                                match_criteria, outer_headers);
 -
 -              if (fg_type_mask[0] ||
 -                  memcmp(fg_type_mask, fg_type_mask + 1,
 -                         MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4) - 1))
 -                      return false;
 -      }
 -
 -      if (!(match_criteria_enable &
 -            1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS)) {
 -              char *fg_type_mask = MLX5_ADDR_OF(fte_match_param,
 -                                                match_criteria, misc_parameters);
 -
 -              if (fg_type_mask[0] ||
 -                  memcmp(fg_type_mask, fg_type_mask + 1,
 -                         MLX5_ST_SZ_BYTES(fte_match_set_misc) - 1))
 -                      return false;
 -      }
 -
 -      if (!(match_criteria_enable &
 -            1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_INNER_HEADERS)) {
 -              char *fg_type_mask = MLX5_ADDR_OF(fte_match_param,
 -                                                match_criteria, inner_headers);
 -
 -              if (fg_type_mask[0] ||
 -                  memcmp(fg_type_mask, fg_type_mask + 1,
 -                         MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4) - 1))
 -                      return false;
 -      }
 -
 -      if (!(match_criteria_enable &
 -            1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS_2)) {
 -              char *fg_type_mask = MLX5_ADDR_OF(fte_match_param,
 -                                                match_criteria, misc_parameters_2);
 -
 -              if (fg_type_mask[0] ||
 -                  memcmp(fg_type_mask, fg_type_mask + 1,
 -                         MLX5_ST_SZ_BYTES(fte_match_set_misc2) - 1))
 -                      return false;
 -      }
 -
 -      return check_last_reserved(match_criteria);
 -}
 -
  static bool check_valid_spec(const struct mlx5_flow_spec *spec)
  {
        int i;
  
 -      if (!check_valid_mask(spec->match_criteria_enable, spec->match_criteria)) {
 -              pr_warn("mlx5_core: Match criteria given mismatches match_criteria_enable\n");
 -              return false;
 -      }
 -
        for (i = 0; i < MLX5_ST_SZ_DW_MATCH_PARAM; i++)
                if (spec->match_value[i] & ~spec->match_criteria[i]) {
                        pr_warn("mlx5_core: match_value differs from match_criteria\n");
                        return false;
                }
  
 -      return check_last_reserved(spec->match_value);
 +      return true;
  }
  
  static struct mlx5_flow_root_namespace *find_root(struct fs_node *node)
@@@ -1086,6 -1159,9 +1087,6 @@@ struct mlx5_flow_group *mlx5_create_flo
        struct mlx5_flow_group *fg;
        int err;
  
 -      if (!check_valid_mask(match_criteria_enable, match_criteria))
 -              return ERR_PTR(-EINVAL);
 -
        if (ft->autogroup.active)
                return ERR_PTR(-EPERM);
  
@@@ -1356,9 -1432,7 +1357,9 @@@ static bool mlx5_flow_dests_cmp(struct 
                    (d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
                     d1->ft == d2->ft) ||
                    (d1->type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
 -                   d1->tir_num == d2->tir_num))
 +                   d1->tir_num == d2->tir_num) ||
 +                  (d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM &&
 +                   d1->ft_num == d2->ft_num))
                        return true;
        }
  
@@@ -1391,9 -1465,7 +1392,9 @@@ static bool check_conflicting_actions(u
                             MLX5_FLOW_CONTEXT_ACTION_DECAP |
                             MLX5_FLOW_CONTEXT_ACTION_MOD_HDR  |
                             MLX5_FLOW_CONTEXT_ACTION_VLAN_POP |
 -                           MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH))
 +                           MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH |
 +                           MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2 |
 +                           MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2))
                return true;
  
        return false;
@@@ -1752,7 -1824,7 +1753,7 @@@ search_again_locked
  
        g = alloc_auto_flow_group(ft, spec);
        if (IS_ERR(g)) {
 -              rule = (void *)g;
 +              rule = ERR_CAST(g);
                up_write_ref_node(&ft->node);
                return rule;
        }
@@@ -1815,7 -1887,7 +1816,7 @@@ mlx5_add_flow_rules(struct mlx5_flow_ta
        if (flow_act->action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) {
                if (!fwd_next_prio_supported(ft))
                        return ERR_PTR(-EOPNOTSUPP);
-               if (dest)
+               if (dest_num)
                        return ERR_PTR(-EINVAL);
                mutex_lock(&root->chain_lock);
                next_ft = find_next_chained_ft(prio);
@@@ -2581,7 -2653,7 +2582,7 @@@ int mlx5_init_fs(struct mlx5_core_dev *
                        goto err;
        }
  
-       if (MLX5_CAP_GEN(dev, eswitch_flow_table)) {
+       if (MLX5_ESWITCH_MANAGER(dev)) {
                if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, ft_support)) {
                        err = init_fdb_root_ns(steering);
                        if (err)
@@@ -76,6 -76,7 +76,7 @@@ void mlx5i_init(struct mlx5_core_dev *m
                void *ppriv)
  {
        struct mlx5e_priv *priv  = mlx5i_epriv(netdev);
+       u16 max_mtu;
  
        /* priv init */
        priv->mdev        = mdev;
@@@ -84,6 -85,9 +85,9 @@@
        priv->ppriv       = ppriv;
        mutex_init(&priv->state_lock);
  
+       mlx5_query_port_max_mtu(mdev, &max_mtu, 1);
+       netdev->mtu = max_mtu;
        mlx5e_build_nic_params(mdev, &priv->channels.params,
                               profile->max_nch(mdev), netdev->mtu);
        mlx5i_build_nic_params(mdev, &priv->channels.params);
@@@ -580,22 -584,6 +584,22 @@@ static int mlx5i_check_required_hca_cap
        return 0;
  }
  
 +static void mlx5_rdma_netdev_free(struct net_device *netdev)
 +{
 +      struct mlx5e_priv *priv = mlx5i_epriv(netdev);
 +      struct mlx5i_priv *ipriv = priv->ppriv;
 +      const struct mlx5e_profile *profile = priv->profile;
 +
 +      mlx5e_detach_netdev(priv);
 +      profile->cleanup(priv);
 +      destroy_workqueue(priv->wq);
 +
 +      if (!ipriv->sub_interface) {
 +              mlx5i_pkey_qpn_ht_cleanup(netdev);
 +              mlx5e_destroy_mdev_resources(priv->mdev);
 +      }
 +}
 +
  struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
                                          struct ib_device *ibdev,
                                          const char *name,
        rn->detach_mcast = mlx5i_detach_mcast;
        rn->set_id = mlx5i_set_pkey_index;
  
 +      netdev->priv_destructor = mlx5_rdma_netdev_free;
 +      netdev->needs_free_netdev = 1;
 +
        return netdev;
  
  destroy_ht:
@@@ -684,3 -669,21 +688,3 @@@ err_free_netdev
        return NULL;
  }
  EXPORT_SYMBOL(mlx5_rdma_netdev_alloc);
 -
 -void mlx5_rdma_netdev_free(struct net_device *netdev)
 -{
 -      struct mlx5e_priv *priv = mlx5i_epriv(netdev);
 -      struct mlx5i_priv *ipriv = priv->ppriv;
 -      const struct mlx5e_profile *profile = priv->profile;
 -
 -      mlx5e_detach_netdev(priv);
 -      profile->cleanup(priv);
 -      destroy_workqueue(priv->wq);
 -
 -      if (!ipriv->sub_interface) {
 -              mlx5i_pkey_qpn_ht_cleanup(netdev);
 -              mlx5e_destroy_mdev_resources(priv->mdev);
 -      }
 -      free_netdev(netdev);
 -}
 -EXPORT_SYMBOL(mlx5_rdma_netdev_free);
diff --combined drivers/nvme/host/rdma.c
@@@ -560,12 -560,6 +560,6 @@@ static void nvme_rdma_free_queue(struc
        if (!test_and_clear_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags))
                return;
  
-       if (nvme_rdma_queue_idx(queue) == 0) {
-               nvme_rdma_free_qe(queue->device->dev,
-                       &queue->ctrl->async_event_sqe,
-                       sizeof(struct nvme_command), DMA_TO_DEVICE);
-       }
        nvme_rdma_destroy_queue_ib(queue);
        rdma_destroy_id(queue->cm_id);
  }
@@@ -698,7 -692,7 +692,7 @@@ static struct blk_mq_tag_set *nvme_rdma
                set = &ctrl->tag_set;
                memset(set, 0, sizeof(*set));
                set->ops = &nvme_rdma_mq_ops;
-               set->queue_depth = nctrl->opts->queue_size;
+               set->queue_depth = nctrl->sqsize + 1;
                set->reserved_tags = 1; /* fabric connect */
                set->numa_node = NUMA_NO_NODE;
                set->flags = BLK_MQ_F_SHOULD_MERGE;
@@@ -734,11 -728,15 +728,15 @@@ out
  static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl,
                bool remove)
  {
-       nvme_rdma_stop_queue(&ctrl->queues[0]);
        if (remove) {
                blk_cleanup_queue(ctrl->ctrl.admin_q);
                nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset);
        }
+       if (ctrl->async_event_sqe.data) {
+               nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe,
+                               sizeof(struct nvme_command), DMA_TO_DEVICE);
+               ctrl->async_event_sqe.data = NULL;
+       }
        nvme_rdma_free_queue(&ctrl->queues[0]);
  }
  
@@@ -755,11 -753,16 +753,16 @@@ static int nvme_rdma_configure_admin_qu
  
        ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev);
  
+       error = nvme_rdma_alloc_qe(ctrl->device->dev, &ctrl->async_event_sqe,
+                       sizeof(struct nvme_command), DMA_TO_DEVICE);
+       if (error)
+               goto out_free_queue;
        if (new) {
                ctrl->ctrl.admin_tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, true);
                if (IS_ERR(ctrl->ctrl.admin_tagset)) {
                        error = PTR_ERR(ctrl->ctrl.admin_tagset);
-                       goto out_free_queue;
+                       goto out_free_async_qe;
                }
  
                ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set);
        if (error)
                goto out_stop_queue;
  
-       error = nvme_rdma_alloc_qe(ctrl->queues[0].device->dev,
-                       &ctrl->async_event_sqe, sizeof(struct nvme_command),
-                       DMA_TO_DEVICE);
-       if (error)
-               goto out_stop_queue;
        return 0;
  
  out_stop_queue:
@@@ -811,6 -808,9 +808,9 @@@ out_cleanup_queue
  out_free_tagset:
        if (new)
                nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset);
+ out_free_async_qe:
+       nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe,
+               sizeof(struct nvme_command), DMA_TO_DEVICE);
  out_free_queue:
        nvme_rdma_free_queue(&ctrl->queues[0]);
        return error;
  static void nvme_rdma_destroy_io_queues(struct nvme_rdma_ctrl *ctrl,
                bool remove)
  {
-       nvme_rdma_stop_io_queues(ctrl);
        if (remove) {
                blk_cleanup_queue(ctrl->ctrl.connect_q);
                nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.tagset);
@@@ -888,9 -887,9 +887,9 @@@ static void nvme_rdma_free_ctrl(struct 
        list_del(&ctrl->list);
        mutex_unlock(&nvme_rdma_ctrl_mutex);
  
-       kfree(ctrl->queues);
        nvmf_free_options(nctrl->opts);
  free_ctrl:
+       kfree(ctrl->queues);
        kfree(ctrl);
  }
  
@@@ -949,6 -948,7 +948,7 @@@ static void nvme_rdma_reconnect_ctrl_wo
        return;
  
  destroy_admin:
+       nvme_rdma_stop_queue(&ctrl->queues[0]);
        nvme_rdma_destroy_admin_queue(ctrl, false);
  requeue:
        dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n",
@@@ -965,12 -965,14 +965,14 @@@ static void nvme_rdma_error_recovery_wo
  
        if (ctrl->ctrl.queue_count > 1) {
                nvme_stop_queues(&ctrl->ctrl);
+               nvme_rdma_stop_io_queues(ctrl);
                blk_mq_tagset_busy_iter(&ctrl->tag_set,
                                        nvme_cancel_request, &ctrl->ctrl);
                nvme_rdma_destroy_io_queues(ctrl, false);
        }
  
        blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
+       nvme_rdma_stop_queue(&ctrl->queues[0]);
        blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
                                nvme_cancel_request, &ctrl->ctrl);
        nvme_rdma_destroy_admin_queue(ctrl, false);
@@@ -1038,6 -1040,7 +1040,6 @@@ static void nvme_rdma_inv_rkey_done(str
  static int nvme_rdma_inv_rkey(struct nvme_rdma_queue *queue,
                struct nvme_rdma_request *req)
  {
 -      struct ib_send_wr *bad_wr;
        struct ib_send_wr wr = {
                .opcode             = IB_WR_LOCAL_INV,
                .next               = NULL,
        req->reg_cqe.done = nvme_rdma_inv_rkey_done;
        wr.wr_cqe = &req->reg_cqe;
  
 -      return ib_post_send(queue->qp, &wr, &bad_wr);
 +      return ib_post_send(queue->qp, &wr, NULL);
  }
  
  static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue,
@@@ -1243,7 -1246,7 +1245,7 @@@ static int nvme_rdma_post_send(struct n
                struct nvme_rdma_qe *qe, struct ib_sge *sge, u32 num_sge,
                struct ib_send_wr *first)
  {
 -      struct ib_send_wr wr, *bad_wr;
 +      struct ib_send_wr wr;
        int ret;
  
        sge->addr   = qe->dma;
        else
                first = &wr;
  
 -      ret = ib_post_send(queue->qp, first, &bad_wr);
 +      ret = ib_post_send(queue->qp, first, NULL);
        if (unlikely(ret)) {
                dev_err(queue->ctrl->ctrl.device,
                             "%s failed with error code %d\n", __func__, ret);
  static int nvme_rdma_post_recv(struct nvme_rdma_queue *queue,
                struct nvme_rdma_qe *qe)
  {
 -      struct ib_recv_wr wr, *bad_wr;
 +      struct ib_recv_wr wr;
        struct ib_sge list;
        int ret;
  
        wr.sg_list  = &list;
        wr.num_sge  = 1;
  
 -      ret = ib_post_recv(queue->qp, &wr, &bad_wr);
 +      ret = ib_post_recv(queue->qp, &wr, NULL);
        if (unlikely(ret)) {
                dev_err(queue->ctrl->ctrl.device,
                        "%s failed with error code %d\n", __func__, ret);
@@@ -1636,7 -1639,7 +1638,7 @@@ static blk_status_t nvme_rdma_queue_rq(
        WARN_ON_ONCE(rq->tag < 0);
  
        if (!nvmf_check_ready(&queue->ctrl->ctrl, rq, queue_ready))
-               return nvmf_fail_nonready_command(rq);
+               return nvmf_fail_nonready_command(&queue->ctrl->ctrl, rq);
  
        dev = queue->device->dev;
        ib_dma_sync_single_for_cpu(dev, sqe->dma,
@@@ -1735,6 -1738,7 +1737,7 @@@ static void nvme_rdma_shutdown_ctrl(str
  {
        if (ctrl->ctrl.queue_count > 1) {
                nvme_stop_queues(&ctrl->ctrl);
+               nvme_rdma_stop_io_queues(ctrl);
                blk_mq_tagset_busy_iter(&ctrl->tag_set,
                                        nvme_cancel_request, &ctrl->ctrl);
                nvme_rdma_destroy_io_queues(ctrl, shutdown);
                nvme_disable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap);
  
        blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
+       nvme_rdma_stop_queue(&ctrl->queues[0]);
        blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
                                nvme_cancel_request, &ctrl->ctrl);
        blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
@@@ -1931,11 -1936,6 +1935,6 @@@ static struct nvme_ctrl *nvme_rdma_crea
                goto out_free_ctrl;
        }
  
-       ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_rdma_ctrl_ops,
-                               0 /* no quirks, we're perfect! */);
-       if (ret)
-               goto out_free_ctrl;
        INIT_DELAYED_WORK(&ctrl->reconnect_work,
                        nvme_rdma_reconnect_ctrl_work);
        INIT_WORK(&ctrl->err_work, nvme_rdma_error_recovery_work);
        ctrl->queues = kcalloc(ctrl->ctrl.queue_count, sizeof(*ctrl->queues),
                                GFP_KERNEL);
        if (!ctrl->queues)
-               goto out_uninit_ctrl;
+               goto out_free_ctrl;
+       ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_rdma_ctrl_ops,
+                               0 /* no quirks, we're perfect! */);
+       if (ret)
+               goto out_kfree_queues;
  
        changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING);
        WARN_ON_ONCE(!changed);
  
        ret = nvme_rdma_configure_admin_queue(ctrl, true);
        if (ret)
-               goto out_kfree_queues;
+               goto out_uninit_ctrl;
  
        /* sanity check icdoff */
        if (ctrl->ctrl.icdoff) {
                goto out_remove_admin_queue;
        }
  
-       if (opts->queue_size > ctrl->ctrl.maxcmd) {
-               /* warn if maxcmd is lower than queue_size */
-               dev_warn(ctrl->ctrl.device,
-                       "queue_size %zu > ctrl maxcmd %u, clamping down\n",
-                       opts->queue_size, ctrl->ctrl.maxcmd);
-               opts->queue_size = ctrl->ctrl.maxcmd;
-       }
+       /* only warn if argument is too large here, will clamp later */
        if (opts->queue_size > ctrl->ctrl.sqsize + 1) {
-               /* warn if sqsize is lower than queue_size */
                dev_warn(ctrl->ctrl.device,
                        "queue_size %zu > ctrl sqsize %u, clamping down\n",
                        opts->queue_size, ctrl->ctrl.sqsize + 1);
-               opts->queue_size = ctrl->ctrl.sqsize + 1;
+       }
+       /* warn if maxcmd is lower than sqsize+1 */
+       if (ctrl->ctrl.sqsize + 1 > ctrl->ctrl.maxcmd) {
+               dev_warn(ctrl->ctrl.device,
+                       "sqsize %u > ctrl maxcmd %u, clamping down\n",
+                       ctrl->ctrl.sqsize + 1, ctrl->ctrl.maxcmd);
+               ctrl->ctrl.sqsize = ctrl->ctrl.maxcmd - 1;
        }
  
        if (opts->nr_io_queues) {
        return &ctrl->ctrl;
  
  out_remove_admin_queue:
+       nvme_rdma_stop_queue(&ctrl->queues[0]);
        nvme_rdma_destroy_admin_queue(ctrl, true);
- out_kfree_queues:
-       kfree(ctrl->queues);
  out_uninit_ctrl:
        nvme_uninit_ctrl(&ctrl->ctrl);
        nvme_put_ctrl(&ctrl->ctrl);
        if (ret > 0)
                ret = -EIO;
        return ERR_PTR(ret);
+ out_kfree_queues:
+       kfree(ctrl->queues);
  out_free_ctrl:
        kfree(ctrl);
        return ERR_PTR(ret);
diff --combined fs/cifs/smbdirect.c
@@@ -18,6 -18,7 +18,7 @@@
  #include "smbdirect.h"
  #include "cifs_debug.h"
  #include "cifsproto.h"
+ #include "smb2proto.h"
  
  static struct smbd_response *get_empty_queue_buffer(
                struct smbd_connection *info);
@@@ -801,7 -802,7 +802,7 @@@ out1
   */
  static int smbd_post_send_negotiate_req(struct smbd_connection *info)
  {
 -      struct ib_send_wr send_wr, *send_wr_fail;
 +      struct ib_send_wr send_wr;
        int rc = -ENOMEM;
        struct smbd_request *request;
        struct smbd_negotiate_req *packet;
  
        request->has_payload = false;
        atomic_inc(&info->send_pending);
 -      rc = ib_post_send(info->id->qp, &send_wr, &send_wr_fail);
 +      rc = ib_post_send(info->id->qp, &send_wr, NULL);
        if (!rc)
                return 0;
  
@@@ -1023,7 -1024,7 +1024,7 @@@ static void smbd_destroy_header(struct 
  static int smbd_post_send(struct smbd_connection *info,
                struct smbd_request *request, bool has_payload)
  {
 -      struct ib_send_wr send_wr, *send_wr_fail;
 +      struct ib_send_wr send_wr;
        int rc, i;
  
        for (i = 0; i < request->num_sge; i++) {
                atomic_inc(&info->send_pending);
        }
  
 -      rc = ib_post_send(info->id->qp, &send_wr, &send_wr_fail);
 +      rc = ib_post_send(info->id->qp, &send_wr, NULL);
        if (rc) {
                log_rdma_send(ERR, "ib_post_send failed rc=%d\n", rc);
                if (has_payload) {
@@@ -1183,7 -1184,7 +1184,7 @@@ static int smbd_post_send_data
  static int smbd_post_recv(
                struct smbd_connection *info, struct smbd_response *response)
  {
 -      struct ib_recv_wr recv_wr, *recv_wr_fail = NULL;
 +      struct ib_recv_wr recv_wr;
        int rc = -EIO;
  
        response->sge.addr = ib_dma_map_single(
        recv_wr.sg_list = &response->sge;
        recv_wr.num_sge = 1;
  
 -      rc = ib_post_recv(info->id->qp, &recv_wr, &recv_wr_fail);
 +      rc = ib_post_recv(info->id->qp, &recv_wr, NULL);
        if (rc) {
                ib_dma_unmap_single(info->id->device, response->sge.addr,
                                    response->sge.length, DMA_FROM_DEVICE);
@@@ -1661,16 -1662,9 +1662,16 @@@ static struct smbd_connection *_smbd_ge
        info->max_receive_size = smbd_max_receive_size;
        info->keep_alive_interval = smbd_keep_alive_interval;
  
 -      if (info->id->device->attrs.max_sge < SMBDIRECT_MAX_SGE) {
 -              log_rdma_event(ERR, "warning: device max_sge = %d too small\n",
 -                      info->id->device->attrs.max_sge);
 +      if (info->id->device->attrs.max_send_sge < SMBDIRECT_MAX_SGE) {
 +              log_rdma_event(ERR,
 +                      "warning: device max_send_sge = %d too small\n",
 +                      info->id->device->attrs.max_send_sge);
 +              log_rdma_event(ERR, "Queue Pair creation may fail\n");
 +      }
 +      if (info->id->device->attrs.max_recv_sge < SMBDIRECT_MAX_SGE) {
 +              log_rdma_event(ERR,
 +                      "warning: device max_recv_sge = %d too small\n",
 +                      info->id->device->attrs.max_recv_sge);
                log_rdma_event(ERR, "Queue Pair creation may fail\n");
        }
  
@@@ -2089,12 -2083,13 +2090,13 @@@ int smbd_recv(struct smbd_connection *i
   * rqst: the data to write
   * return value: 0 if successfully write, otherwise error code
   */
- int smbd_send(struct smbd_connection *info, struct smb_rqst *rqst)
+ int smbd_send(struct TCP_Server_Info *server, struct smb_rqst *rqst)
  {
+       struct smbd_connection *info = server->smbd_conn;
        struct kvec vec;
        int nvecs;
        int size;
-       unsigned int buflen = 0, remaining_data_length;
+       unsigned int buflen, remaining_data_length;
        int start, i, j;
        int max_iov_size =
                info->max_send_size - sizeof(struct smbd_data_transfer);
                log_write(ERR, "expected the pdu length in 1st iov, but got %zu\n", rqst->rq_iov[0].iov_len);
                return -EINVAL;
        }
-       iov = &rqst->rq_iov[1];
-       /* total up iov array first */
-       for (i = 0; i < rqst->rq_nvec-1; i++) {
-               buflen += iov[i].iov_len;
-       }
  
        /*
         * Add in the page array if there is one. The caller needs to set
         * rq_tailsz to PAGE_SIZE when the buffer has multiple pages and
         * ends at page boundary
         */
-       if (rqst->rq_npages) {
-               if (rqst->rq_npages == 1)
-                       buflen += rqst->rq_tailsz;
-               else
-                       buflen += rqst->rq_pagesz * (rqst->rq_npages - 1) -
-                                       rqst->rq_offset + rqst->rq_tailsz;
-       }
+       buflen = smb_rqst_len(server, rqst);
  
        if (buflen + sizeof(struct smbd_data_transfer) >
                info->max_fragmented_send_size) {
                goto done;
        }
  
+       iov = &rqst->rq_iov[1];
        cifs_dbg(FYI, "Sending smb (RDMA): smb_len=%u\n", buflen);
        for (i = 0; i < rqst->rq_nvec-1; i++)
                dump_smb(iov[i].iov_base, iov[i].iov_len);
@@@ -2488,6 -2473,7 +2480,6 @@@ struct smbd_mr *smbd_register_mr
        int rc, i;
        enum dma_data_direction dir;
        struct ib_reg_wr *reg_wr;
 -      struct ib_send_wr *bad_wr;
  
        if (num_pages > info->max_frmr_depth) {
                log_rdma_mr(ERR, "num_pages=%d max_frmr_depth=%d\n",
@@@ -2561,7 -2547,7 +2553,7 @@@ skip_multiple_pages
         * on IB_WR_REG_MR. Hardware enforces a barrier and order of execution
         * on the next ib_post_send when we actaully send I/O to remote peer
         */
 -      rc = ib_post_send(info->id->qp, &reg_wr->wr, &bad_wr);
 +      rc = ib_post_send(info->id->qp, &reg_wr->wr, NULL);
        if (!rc)
                return smbdirect_mr;
  
@@@ -2606,7 -2592,7 +2598,7 @@@ static void local_inv_done(struct ib_c
   */
  int smbd_deregister_mr(struct smbd_mr *smbdirect_mr)
  {
 -      struct ib_send_wr *wr, *bad_wr;
 +      struct ib_send_wr *wr;
        struct smbd_connection *info = smbdirect_mr->conn;
        int rc = 0;
  
                wr->send_flags = IB_SEND_SIGNALED;
  
                init_completion(&smbdirect_mr->invalidate_done);
 -              rc = ib_post_send(info->id->qp, wr, &bad_wr);
 +              rc = ib_post_send(info->id->qp, wr, NULL);
                if (rc) {
                        log_rdma_mr(ERR, "ib_post_send failed rc=%x\n", rc);
                        smbd_disconnect_rdma_connection(info);
@@@ -138,14 -138,9 +138,14 @@@ enum 
        MLX5_REG_HOST_ENDIANNESS = 0x7004,
        MLX5_REG_MCIA            = 0x9014,
        MLX5_REG_MLCR            = 0x902b,
 +      MLX5_REG_MTRC_CAP        = 0x9040,
 +      MLX5_REG_MTRC_CONF       = 0x9041,
 +      MLX5_REG_MTRC_STDB       = 0x9042,
 +      MLX5_REG_MTRC_CTRL       = 0x9043,
        MLX5_REG_MPCNT           = 0x9051,
        MLX5_REG_MTPPS           = 0x9053,
        MLX5_REG_MTPPSE          = 0x9054,
 +      MLX5_REG_MPEGC           = 0x9056,
        MLX5_REG_MCQI            = 0x9061,
        MLX5_REG_MCC             = 0x9062,
        MLX5_REG_MCDA            = 0x9063,
@@@ -363,6 -358,7 +363,7 @@@ struct mlx5_frag_buf_ctrl 
        struct mlx5_frag_buf    frag_buf;
        u32                     sz_m1;
        u32                     frag_sz_m1;
+       u32                     strides_offset;
        u8                      log_sz;
        u8                      log_stride;
        u8                      log_frag_strides;
@@@ -988,14 -984,22 +989,22 @@@ static inline u32 mlx5_base_mkey(const 
        return key & 0xffffff00u;
  }
  
- static inline void mlx5_fill_fbc(u8 log_stride, u8 log_sz,
-                                struct mlx5_frag_buf_ctrl *fbc)
+ static inline void mlx5_fill_fbc_offset(u8 log_stride, u8 log_sz,
+                                       u32 strides_offset,
+                                       struct mlx5_frag_buf_ctrl *fbc)
  {
        fbc->log_stride = log_stride;
        fbc->log_sz     = log_sz;
        fbc->sz_m1      = (1 << fbc->log_sz) - 1;
        fbc->log_frag_strides = PAGE_SHIFT - fbc->log_stride;
        fbc->frag_sz_m1 = (1 << fbc->log_frag_strides) - 1;
+       fbc->strides_offset = strides_offset;
+ }
+ static inline void mlx5_fill_fbc(u8 log_stride, u8 log_sz,
+                                struct mlx5_frag_buf_ctrl *fbc)
+ {
+       mlx5_fill_fbc_offset(log_stride, log_sz, 0, fbc);
  }
  
  static inline void mlx5_core_init_cq_frag_buf(struct mlx5_frag_buf_ctrl *fbc,
  static inline void *mlx5_frag_buf_get_wqe(struct mlx5_frag_buf_ctrl *fbc,
                                          u32 ix)
  {
-       unsigned int frag = (ix >> fbc->log_frag_strides);
+       unsigned int frag;
+       ix  += fbc->strides_offset;
+       frag = ix >> fbc->log_frag_strides;
  
        return fbc->frag_buf.frags[frag].buf +
                ((fbc->frag_sz_m1 & ix) << fbc->log_stride);
@@@ -1072,6 -1079,8 +1084,6 @@@ int mlx5_core_destroy_mkey(struct mlx5_
                           struct mlx5_core_mkey *mkey);
  int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey,
                         u32 *out, int outlen);
 -int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *_mkey,
 -                           u32 *mkey);
  int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn);
  int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn);
  int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb,
@@@ -1218,11 -1227,14 +1230,11 @@@ struct net_device *mlx5_rdma_netdev_all
  {
        return ERR_PTR(-EOPNOTSUPP);
  }
 -
 -static inline void mlx5_rdma_netdev_free(struct net_device *netdev) {}
  #else
  struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
                                          struct ib_device *ibdev,
                                          const char *name,
                                          void (*setup)(struct net_device *));
 -void mlx5_rdma_netdev_free(struct net_device *netdev);
  #endif /* CONFIG_MLX5_CORE_IPOIB */
  
  struct mlx5_profile {
@@@ -76,16 -76,6 +76,16 @@@ enum 
  };
  
  enum {
 +      MLX5_GENERAL_OBJ_TYPES_CAP_UCTX = (1ULL << 4),
 +      MLX5_GENERAL_OBJ_TYPES_CAP_UMEM = (1ULL << 5),
 +};
 +
 +enum {
 +      MLX5_OBJ_TYPE_UCTX = 0x0004,
 +      MLX5_OBJ_TYPE_UMEM = 0x0005,
 +};
 +
 +enum {
        MLX5_CMD_OP_QUERY_HCA_CAP                 = 0x100,
        MLX5_CMD_OP_QUERY_ADAPTER                 = 0x101,
        MLX5_CMD_OP_INIT_HCA                      = 0x102,
        MLX5_CMD_OP_DEALLOC_ENCAP_HEADER          = 0x93e,
        MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT   = 0x940,
        MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT = 0x941,
 +      MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT   = 0x942,
        MLX5_CMD_OP_FPGA_CREATE_QP                = 0x960,
        MLX5_CMD_OP_FPGA_MODIFY_QP                = 0x961,
        MLX5_CMD_OP_FPGA_QUERY_QP                 = 0x962,
        MLX5_CMD_OP_FPGA_DESTROY_QP               = 0x963,
        MLX5_CMD_OP_FPGA_QUERY_QP_COUNTERS        = 0x964,
 +      MLX5_CMD_OP_CREATE_GENERAL_OBJECT         = 0xa00,
 +      MLX5_CMD_OP_MODIFY_GENERAL_OBJECT         = 0xa01,
 +      MLX5_CMD_OP_QUERY_GENERAL_OBJECT          = 0xa02,
 +      MLX5_CMD_OP_DESTROY_GENERAL_OBJECT        = 0xa03,
        MLX5_CMD_OP_MAX
  };
  
@@@ -341,10 -326,7 +341,10 @@@ struct mlx5_ifc_flow_table_prop_layout_
        u8         reserved_at_9[0x1];
        u8         pop_vlan[0x1];
        u8         push_vlan[0x1];
 -      u8         reserved_at_c[0x14];
 +      u8         reserved_at_c[0x1];
 +      u8         pop_vlan_2[0x1];
 +      u8         push_vlan_2[0x1];
 +      u8         reserved_at_f[0x11];
  
        u8         reserved_at_20[0x2];
        u8         log_max_ft_size[0x6];
@@@ -892,9 -874,7 +892,9 @@@ struct mlx5_ifc_cmd_hca_cap_bits 
        u8         log_max_eq_sz[0x8];
        u8         reserved_at_e8[0x2];
        u8         log_max_mkey[0x6];
 -      u8         reserved_at_f0[0xc];
 +      u8         reserved_at_f0[0x8];
 +      u8         dump_fill_mkey[0x1];
 +      u8         reserved_at_f9[0x3];
        u8         log_max_eq[0x4];
  
        u8         max_indirection[0x8];
        u8         vnic_env_queue_counters[0x1];
        u8         ets[0x1];
        u8         nic_flow_table[0x1];
-       u8         eswitch_flow_table[0x1];
+       u8         eswitch_manager[0x1];
        u8         device_memory[0x1];
        u8         mcam_reg[0x1];
        u8         pcam_reg[0x1];
        u8         reserved_at_3f8[0x3];
        u8         log_max_current_uc_list[0x5];
  
 -      u8         reserved_at_400[0x80];
 +      u8         general_obj_types[0x40];
 +
 +      u8         reserved_at_440[0x40];
  
        u8         reserved_at_480[0x3];
        u8         log_max_l2_table[0x5];
@@@ -1184,7 -1162,6 +1184,7 @@@ enum mlx5_flow_destination_type 
  
        MLX5_FLOW_DESTINATION_TYPE_PORT         = 0x99,
        MLX5_FLOW_DESTINATION_TYPE_COUNTER      = 0x100,
 +      MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM = 0x101,
  };
  
  struct mlx5_ifc_dest_format_struct_bits {
@@@ -1691,11 -1668,7 +1691,11 @@@ struct mlx5_ifc_eth_extended_cntrs_grp_
  
        u8         rx_buffer_full_low[0x20];
  
 -      u8         reserved_at_1c0[0x600];
 +      u8         rx_icrc_encapsulated_high[0x20];
 +
 +      u8         rx_icrc_encapsulated_low[0x20];
 +
 +      u8         reserved_at_200[0x5c0];
  };
  
  struct mlx5_ifc_eth_3635_cntrs_grp_data_layout_bits {
@@@ -2394,8 -2367,6 +2394,8 @@@ enum 
        MLX5_FLOW_CONTEXT_ACTION_MOD_HDR   = 0x40,
        MLX5_FLOW_CONTEXT_ACTION_VLAN_POP  = 0x80,
        MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH = 0x100,
 +      MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2  = 0x400,
 +      MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2 = 0x800,
  };
  
  struct mlx5_ifc_vlan_bits {
@@@ -2426,9 -2397,7 +2426,9 @@@ struct mlx5_ifc_flow_context_bits 
  
        u8         modify_header_id[0x20];
  
 -      u8         reserved_at_100[0x100];
 +      struct mlx5_ifc_vlan_bits push_vlan_2;
 +
 +      u8         reserved_at_120[0xe0];
  
        struct mlx5_ifc_fte_match_param_bits match_value;
  
@@@ -8061,23 -8030,9 +8061,23 @@@ struct mlx5_ifc_peir_reg_bits 
        u8         error_type[0x8];
  };
  
 -struct mlx5_ifc_pcam_enhanced_features_bits {
 -      u8         reserved_at_0[0x76];
 +struct mlx5_ifc_mpegc_reg_bits {
 +      u8         reserved_at_0[0x30];
 +      u8         field_select[0x10];
 +
 +      u8         tx_overflow_sense[0x1];
 +      u8         mark_cqe[0x1];
 +      u8         mark_cnp[0x1];
 +      u8         reserved_at_43[0x1b];
 +      u8         tx_lossy_overflow_oper[0x2];
 +
 +      u8         reserved_at_60[0x100];
 +};
  
 +struct mlx5_ifc_pcam_enhanced_features_bits {
 +      u8         reserved_at_0[0x6d];
 +      u8         rx_icrc_encapsulated_counter[0x1];
 +      u8         reserved_at_6e[0x8];
        u8         pfcc_mask[0x1];
        u8         reserved_at_77[0x4];
        u8         rx_buffer_fullness_counters[0x1];
@@@ -8122,11 -8077,7 +8122,11 @@@ struct mlx5_ifc_pcam_reg_bits 
  };
  
  struct mlx5_ifc_mcam_enhanced_features_bits {
 -      u8         reserved_at_0[0x7b];
 +      u8         reserved_at_0[0x74];
 +      u8         mark_tx_action_cnp[0x1];
 +      u8         mark_tx_action_cqe[0x1];
 +      u8         dynamic_tx_overflow[0x1];
 +      u8         reserved_at_77[0x4];
        u8         pcie_outbound_stalled[0x1];
        u8         tx_overflow_buffer_pkt[0x1];
        u8         mtpps_enh_out_per_adj[0x1];
@@@ -8141,11 -8092,7 +8141,11 @@@ struct mlx5_ifc_mcam_access_reg_bits 
        u8         mcqi[0x1];
        u8         reserved_at_1f[0x1];
  
 -      u8         regs_95_to_64[0x20];
 +      u8         regs_95_to_87[0x9];
 +      u8         mpegc[0x1];
 +      u8         regs_85_to_68[0x12];
 +      u8         tracer_registers[0x4];
 +
        u8         regs_63_to_32[0x20];
        u8         regs_31_to_0[0x20];
  };
@@@ -9168,113 -9115,4 +9168,113 @@@ struct mlx5_ifc_dealloc_memic_out_bits 
        u8         reserved_at_40[0x40];
  };
  
 +struct mlx5_ifc_general_obj_in_cmd_hdr_bits {
 +      u8         opcode[0x10];
 +      u8         uid[0x10];
 +
 +      u8         reserved_at_20[0x10];
 +      u8         obj_type[0x10];
 +
 +      u8         obj_id[0x20];
 +
 +      u8         reserved_at_60[0x20];
 +};
 +
 +struct mlx5_ifc_general_obj_out_cmd_hdr_bits {
 +      u8         status[0x8];
 +      u8         reserved_at_8[0x18];
 +
 +      u8         syndrome[0x20];
 +
 +      u8         obj_id[0x20];
 +
 +      u8         reserved_at_60[0x20];
 +};
 +
 +struct mlx5_ifc_umem_bits {
 +      u8         modify_field_select[0x40];
 +
 +      u8         reserved_at_40[0x5b];
 +      u8         log_page_size[0x5];
 +
 +      u8         page_offset[0x20];
 +
 +      u8         num_of_mtt[0x40];
 +
 +      struct mlx5_ifc_mtt_bits  mtt[0];
 +};
 +
 +struct mlx5_ifc_uctx_bits {
 +      u8         modify_field_select[0x40];
 +
 +      u8         reserved_at_40[0x1c0];
 +};
 +
 +struct mlx5_ifc_create_umem_in_bits {
 +      struct mlx5_ifc_general_obj_in_cmd_hdr_bits   hdr;
 +      struct mlx5_ifc_umem_bits                     umem;
 +};
 +
 +struct mlx5_ifc_create_uctx_in_bits {
 +      struct mlx5_ifc_general_obj_in_cmd_hdr_bits   hdr;
 +      struct mlx5_ifc_uctx_bits                     uctx;
 +};
 +
 +struct mlx5_ifc_mtrc_string_db_param_bits {
 +      u8         string_db_base_address[0x20];
 +
 +      u8         reserved_at_20[0x8];
 +      u8         string_db_size[0x18];
 +};
 +
 +struct mlx5_ifc_mtrc_cap_bits {
 +      u8         trace_owner[0x1];
 +      u8         trace_to_memory[0x1];
 +      u8         reserved_at_2[0x4];
 +      u8         trc_ver[0x2];
 +      u8         reserved_at_8[0x14];
 +      u8         num_string_db[0x4];
 +
 +      u8         first_string_trace[0x8];
 +      u8         num_string_trace[0x8];
 +      u8         reserved_at_30[0x28];
 +
 +      u8         log_max_trace_buffer_size[0x8];
 +
 +      u8         reserved_at_60[0x20];
 +
 +      struct mlx5_ifc_mtrc_string_db_param_bits string_db_param[8];
 +
 +      u8         reserved_at_280[0x180];
 +};
 +
 +struct mlx5_ifc_mtrc_conf_bits {
 +      u8         reserved_at_0[0x1c];
 +      u8         trace_mode[0x4];
 +      u8         reserved_at_20[0x18];
 +      u8         log_trace_buffer_size[0x8];
 +      u8         trace_mkey[0x20];
 +      u8         reserved_at_60[0x3a0];
 +};
 +
 +struct mlx5_ifc_mtrc_stdb_bits {
 +      u8         string_db_index[0x4];
 +      u8         reserved_at_4[0x4];
 +      u8         read_size[0x18];
 +      u8         start_offset[0x20];
 +      u8         string_db_data[0];
 +};
 +
 +struct mlx5_ifc_mtrc_ctrl_bits {
 +      u8         trace_status[0x2];
 +      u8         reserved_at_2[0x2];
 +      u8         arm_event[0x1];
 +      u8         reserved_at_5[0xb];
 +      u8         modify_field_select[0x10];
 +      u8         reserved_at_20[0x2b];
 +      u8         current_timestamp52_32[0x15];
 +      u8         current_timestamp31_0[0x20];
 +      u8         reserved_at_80[0x180];
 +};
 +
  #endif /* MLX5_IFC_H */
diff --combined include/rdma/ib_verbs.h
@@@ -94,7 -94,6 +94,7 @@@ enum ib_gid_type 
  struct ib_gid_attr {
        struct net_device       *ndev;
        struct ib_device        *device;
 +      union ib_gid            gid;
        enum ib_gid_type        gid_type;
        u16                     index;
        u8                      port_num;
@@@ -149,13 -148,13 +149,13 @@@ static inline enum ib_gid_type ib_netwo
        return IB_GID_TYPE_IB;
  }
  
 -static inline enum rdma_network_type ib_gid_to_network_type(enum ib_gid_type gid_type,
 -                                                          union ib_gid *gid)
 +static inline enum rdma_network_type
 +rdma_gid_attr_network_type(const struct ib_gid_attr *attr)
  {
 -      if (gid_type == IB_GID_TYPE_IB)
 +      if (attr->gid_type == IB_GID_TYPE_IB)
                return RDMA_NETWORK_IB;
  
 -      if (ipv6_addr_v4mapped((struct in6_addr *)gid))
 +      if (ipv6_addr_v4mapped((struct in6_addr *)&attr->gid))
                return RDMA_NETWORK_IPV4;
        else
                return RDMA_NETWORK_IPV6;
@@@ -345,8 -344,7 +345,8 @@@ struct ib_device_attr 
        int                     max_qp;
        int                     max_qp_wr;
        u64                     device_cap_flags;
 -      int                     max_sge;
 +      int                     max_send_sge;
 +      int                     max_recv_sge;
        int                     max_sge_rd;
        int                     max_cq;
        int                     max_cqe;
@@@ -432,6 -430,33 +432,6 @@@ enum ib_port_state 
        IB_PORT_ACTIVE_DEFER    = 5
  };
  
 -enum ib_port_cap_flags {
 -      IB_PORT_SM                              = 1 <<  1,
 -      IB_PORT_NOTICE_SUP                      = 1 <<  2,
 -      IB_PORT_TRAP_SUP                        = 1 <<  3,
 -      IB_PORT_OPT_IPD_SUP                     = 1 <<  4,
 -      IB_PORT_AUTO_MIGR_SUP                   = 1 <<  5,
 -      IB_PORT_SL_MAP_SUP                      = 1 <<  6,
 -      IB_PORT_MKEY_NVRAM                      = 1 <<  7,
 -      IB_PORT_PKEY_NVRAM                      = 1 <<  8,
 -      IB_PORT_LED_INFO_SUP                    = 1 <<  9,
 -      IB_PORT_SM_DISABLED                     = 1 << 10,
 -      IB_PORT_SYS_IMAGE_GUID_SUP              = 1 << 11,
 -      IB_PORT_PKEY_SW_EXT_PORT_TRAP_SUP       = 1 << 12,
 -      IB_PORT_EXTENDED_SPEEDS_SUP             = 1 << 14,
 -      IB_PORT_CM_SUP                          = 1 << 16,
 -      IB_PORT_SNMP_TUNNEL_SUP                 = 1 << 17,
 -      IB_PORT_REINIT_SUP                      = 1 << 18,
 -      IB_PORT_DEVICE_MGMT_SUP                 = 1 << 19,
 -      IB_PORT_VENDOR_CLASS_SUP                = 1 << 20,
 -      IB_PORT_DR_NOTICE_SUP                   = 1 << 21,
 -      IB_PORT_CAP_MASK_NOTICE_SUP             = 1 << 22,
 -      IB_PORT_BOOT_MGMT_SUP                   = 1 << 23,
 -      IB_PORT_LINK_LATENCY_SUP                = 1 << 24,
 -      IB_PORT_CLIENT_REG_SUP                  = 1 << 25,
 -      IB_PORT_IP_BASED_GIDS                   = 1 << 26,
 -};
 -
  enum ib_port_width {
        IB_WIDTH_1X     = 1,
        IB_WIDTH_4X     = 2,
@@@ -529,7 -554,6 +529,7 @@@ static inline struct rdma_hw_stats *rdm
  #define RDMA_CORE_CAP_AF_IB             0x00001000
  #define RDMA_CORE_CAP_ETH_AH            0x00002000
  #define RDMA_CORE_CAP_OPA_AH            0x00004000
 +#define RDMA_CORE_CAP_IB_GRH_REQUIRED   0x00008000
  
  /* Protocol                             0xFFF00000 */
  #define RDMA_CORE_CAP_PROT_IB           0x00100000
  #define RDMA_CORE_CAP_PROT_RAW_PACKET   0x01000000
  #define RDMA_CORE_CAP_PROT_USNIC        0x02000000
  
 +#define RDMA_CORE_PORT_IB_GRH_REQUIRED (RDMA_CORE_CAP_IB_GRH_REQUIRED \
 +                                      | RDMA_CORE_CAP_PROT_ROCE     \
 +                                      | RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP)
 +
  #define RDMA_CORE_PORT_IBA_IB          (RDMA_CORE_CAP_PROT_IB  \
                                        | RDMA_CORE_CAP_IB_MAD \
                                        | RDMA_CORE_CAP_IB_SMI \
@@@ -575,8 -595,6 +575,8 @@@ struct ib_port_attr 
        enum ib_mtu             max_mtu;
        enum ib_mtu             active_mtu;
        int                     gid_tbl_len;
 +      unsigned int            ip_gids:1;
 +      /* This is the value from PortInfo CapabilityMask, defined by IBA */
        u32                     port_cap_flags;
        u32                     max_msg_sz;
        u32                     bad_pkey_cntr;
        u8                      active_width;
        u8                      active_speed;
        u8                      phys_state;
 -      bool                    grh_required;
  };
  
  enum ib_device_modify_flags {
@@@ -670,7 -689,6 +670,7 @@@ struct ib_event_handler 
        } while (0)
  
  struct ib_global_route {
 +      const struct ib_gid_attr *sgid_attr;
        union ib_gid    dgid;
        u32             flow_label;
        u8              sgid_index;
@@@ -1352,7 -1370,7 +1352,7 @@@ struct ib_rdma_wr 
        u32                     rkey;
  };
  
 -static inline struct ib_rdma_wr *rdma_wr(struct ib_send_wr *wr)
 +static inline const struct ib_rdma_wr *rdma_wr(const struct ib_send_wr *wr)
  {
        return container_of(wr, struct ib_rdma_wr, wr);
  }
@@@ -1367,7 -1385,7 +1367,7 @@@ struct ib_atomic_wr 
        u32                     rkey;
  };
  
 -static inline struct ib_atomic_wr *atomic_wr(struct ib_send_wr *wr)
 +static inline const struct ib_atomic_wr *atomic_wr(const struct ib_send_wr *wr)
  {
        return container_of(wr, struct ib_atomic_wr, wr);
  }
@@@ -1384,7 -1402,7 +1384,7 @@@ struct ib_ud_wr 
        u8                      port_num;   /* valid for DR SMPs on switch only */
  };
  
 -static inline struct ib_ud_wr *ud_wr(struct ib_send_wr *wr)
 +static inline const struct ib_ud_wr *ud_wr(const struct ib_send_wr *wr)
  {
        return container_of(wr, struct ib_ud_wr, wr);
  }
@@@ -1396,7 -1414,7 +1396,7 @@@ struct ib_reg_wr 
        int                     access;
  };
  
 -static inline struct ib_reg_wr *reg_wr(struct ib_send_wr *wr)
 +static inline const struct ib_reg_wr *reg_wr(const struct ib_send_wr *wr)
  {
        return container_of(wr, struct ib_reg_wr, wr);
  }
@@@ -1409,8 -1427,7 +1409,8 @@@ struct ib_sig_handover_wr 
        struct ib_sge          *prot;
  };
  
 -static inline struct ib_sig_handover_wr *sig_handover_wr(struct ib_send_wr *wr)
 +static inline const struct ib_sig_handover_wr *
 +sig_handover_wr(const struct ib_send_wr *wr)
  {
        return container_of(wr, struct ib_sig_handover_wr, wr);
  }
@@@ -1426,16 -1443,14 +1426,16 @@@ struct ib_recv_wr 
  };
  
  enum ib_access_flags {
 -      IB_ACCESS_LOCAL_WRITE   = 1,
 -      IB_ACCESS_REMOTE_WRITE  = (1<<1),
 -      IB_ACCESS_REMOTE_READ   = (1<<2),
 -      IB_ACCESS_REMOTE_ATOMIC = (1<<3),
 -      IB_ACCESS_MW_BIND       = (1<<4),
 -      IB_ZERO_BASED           = (1<<5),
 -      IB_ACCESS_ON_DEMAND     = (1<<6),
 -      IB_ACCESS_HUGETLB       = (1<<7),
 +      IB_ACCESS_LOCAL_WRITE = IB_UVERBS_ACCESS_LOCAL_WRITE,
 +      IB_ACCESS_REMOTE_WRITE = IB_UVERBS_ACCESS_REMOTE_WRITE,
 +      IB_ACCESS_REMOTE_READ = IB_UVERBS_ACCESS_REMOTE_READ,
 +      IB_ACCESS_REMOTE_ATOMIC = IB_UVERBS_ACCESS_REMOTE_ATOMIC,
 +      IB_ACCESS_MW_BIND = IB_UVERBS_ACCESS_MW_BIND,
 +      IB_ZERO_BASED = IB_UVERBS_ACCESS_ZERO_BASED,
 +      IB_ACCESS_ON_DEMAND = IB_UVERBS_ACCESS_ON_DEMAND,
 +      IB_ACCESS_HUGETLB = IB_UVERBS_ACCESS_HUGETLB,
 +
 +      IB_ACCESS_SUPPORTED = ((IB_ACCESS_HUGETLB << 1) - 1)
  };
  
  /*
@@@ -1458,17 -1473,14 +1458,17 @@@ struct ib_fmr_attr 
  struct ib_umem;
  
  enum rdma_remove_reason {
 -      /* Userspace requested uobject deletion. Call could fail */
 +      /*
 +       * Userspace requested uobject deletion or initial try
 +       * to remove uobject via cleanup. Call could fail
 +       */
        RDMA_REMOVE_DESTROY,
        /* Context deletion. This call should delete the actual object itself */
        RDMA_REMOVE_CLOSE,
        /* Driver is being hot-unplugged. This call should delete the actual object itself */
        RDMA_REMOVE_DRIVER_REMOVE,
 -      /* Context is being cleaned-up, but commit was just completed */
 -      RDMA_REMOVE_DURING_CLEANUP,
 +      /* uobj is being cleaned-up before being committed */
 +      RDMA_REMOVE_ABORT,
  };
  
  struct ib_rdmacg_object {
  struct ib_ucontext {
        struct ib_device       *device;
        struct ib_uverbs_file  *ufile;
 +      /*
 +       * 'closing' can be read by the driver only during a destroy callback,
 +       * it is set when we are closing the file descriptor and indicates
 +       * that mm_sem may be locked.
 +       */
        int                     closing;
  
 -      /* locking the uobjects_list */
 -      struct mutex            uobjects_lock;
 -      struct list_head        uobjects;
 -      /* protects cleanup process from other actions */
 -      struct rw_semaphore     cleanup_rwsem;
 -      enum rdma_remove_reason cleanup_reason;
 +      bool cleanup_retryable;
  
        struct pid             *tgid;
  #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
  
  struct ib_uobject {
        u64                     user_handle;    /* handle given to us by userspace */
 +      /* ufile & ucontext owning this object */
 +      struct ib_uverbs_file  *ufile;
 +      /* FIXME, save memory: ufile->context == context */
        struct ib_ucontext     *context;        /* associated user context */
        void                   *object;         /* containing object */
        struct list_head        list;           /* link to context's list */
        atomic_t                usecnt;         /* protects exclusive access */
        struct rcu_head         rcu;            /* kfree_rcu() overhead */
  
 -      const struct uverbs_obj_type *type;
 -};
 -
 -struct ib_uobject_file {
 -      struct ib_uobject       uobj;
 -      /* ufile contains the lock between context release and file close */
 -      struct ib_uverbs_file   *ufile;
 +      const struct uverbs_api_object *uapi_object;
  };
  
  struct ib_udata {
@@@ -1563,7 -1578,6 +1563,7 @@@ struct ib_ah 
        struct ib_device        *device;
        struct ib_pd            *pd;
        struct ib_uobject       *uobject;
 +      const struct ib_gid_attr *sgid_attr;
        enum rdma_ah_attr_type  type;
  };
  
@@@ -1762,9 -1776,6 +1762,9 @@@ struct ib_qp 
        struct ib_uobject      *uobject;
        void                  (*event_handler)(struct ib_event *, void *);
        void                   *qp_context;
 +      /* sgid_attrs associated with the AV's */
 +      const struct ib_gid_attr *av_sgid_attr;
 +      const struct ib_gid_attr *alt_path_sgid_attr;
        u32                     qp_num;
        u32                     max_write_sge;
        u32                     max_read_sge;
@@@ -2087,7 -2098,6 +2087,7 @@@ struct ib_flow_attr 
  
  struct ib_flow {
        struct ib_qp            *qp;
 +      struct ib_device        *device;
        struct ib_uobject       *uobject;
  };
  
@@@ -2203,11 -2213,7 +2203,11 @@@ struct rdma_netdev 
        struct ib_device  *hca;
        u8                 port_num;
  
 -      /* cleanup function must be specified */
 +      /*
 +       * cleanup function must be specified.
 +       * FIXME: This is only used for OPA_VNIC and that usage should be
 +       * removed too.
 +       */
        void (*free_rdma_netdev)(struct net_device *netdev);
  
        /* control functions */
@@@ -2236,6 -2242,11 +2236,6 @@@ struct ib_counters 
        atomic_t        usecnt;
  };
  
 -enum ib_read_counters_flags {
 -      /* prefer read values from driver cache */
 -      IB_READ_COUNTERS_ATTR_PREFER_CACHED = 1 << 0,
 -};
 -
  struct ib_counters_read_attr {
        u64     *counters_buff;
        u32     ncounters;
@@@ -2330,7 -2341,8 +2330,7 @@@ struct ib_device 
         * concurrently for different ports. This function is only called when
         * roce_gid_table is used.
         */
 -      int                        (*add_gid)(const union ib_gid *gid,
 -                                            const struct ib_gid_attr *attr,
 +      int                        (*add_gid)(const struct ib_gid_attr *attr,
                                              void **context);
        /* When calling del_gid, the HW vendor's driver should delete the
         * gid of device @device at gid index gid_index of port port_num
                                                struct ib_srq_attr *srq_attr);
        int                        (*destroy_srq)(struct ib_srq *srq);
        int                        (*post_srq_recv)(struct ib_srq *srq,
 -                                                  struct ib_recv_wr *recv_wr,
 -                                                  struct ib_recv_wr **bad_recv_wr);
 +                                                  const struct ib_recv_wr *recv_wr,
 +                                                  const struct ib_recv_wr **bad_recv_wr);
        struct ib_qp *             (*create_qp)(struct ib_pd *pd,
                                                struct ib_qp_init_attr *qp_init_attr,
                                                struct ib_udata *udata);
                                               struct ib_qp_init_attr *qp_init_attr);
        int                        (*destroy_qp)(struct ib_qp *qp);
        int                        (*post_send)(struct ib_qp *qp,
 -                                              struct ib_send_wr *send_wr,
 -                                              struct ib_send_wr **bad_send_wr);
 +                                              const struct ib_send_wr *send_wr,
 +                                              const struct ib_send_wr **bad_send_wr);
        int                        (*post_recv)(struct ib_qp *qp,
 -                                              struct ib_recv_wr *recv_wr,
 -                                              struct ib_recv_wr **bad_recv_wr);
 +                                              const struct ib_recv_wr *recv_wr,
 +                                              const struct ib_recv_wr **bad_recv_wr);
        struct ib_cq *             (*create_cq)(struct ib_device *device,
                                                const struct ib_cq_init_attr *attr,
                                                struct ib_ucontext *context,
        const struct cpumask *(*get_vector_affinity)(struct ib_device *ibdev,
                                                     int comp_vector);
  
 -      struct uverbs_root_spec         *specs_root;
 +      const struct uverbs_object_tree_def *const *driver_specs;
        enum rdma_driver_id             driver_id;
  };
  
@@@ -2667,46 -2679,6 +2667,46 @@@ static inline bool ib_is_udata_cleared(
  }
  
  /**
 + * ib_is_destroy_retryable - Check whether the uobject destruction
 + * is retryable.
 + * @ret: The initial destruction return code
 + * @why: remove reason
 + * @uobj: The uobject that is destroyed
 + *
 + * This function is a helper function that IB layer and low-level drivers
 + * can use to consider whether the destruction of the given uobject is
 + * retry-able.
 + * It checks the original return code, if it wasn't success the destruction
 + * is retryable according to the ucontext state (i.e. cleanup_retryable) and
 + * the remove reason. (i.e. why).
 + * Must be called with the object locked for destroy.
 + */
 +static inline bool ib_is_destroy_retryable(int ret, enum rdma_remove_reason why,
 +                                         struct ib_uobject *uobj)
 +{
 +      return ret && (why == RDMA_REMOVE_DESTROY ||
 +                     uobj->context->cleanup_retryable);
 +}
 +
 +/**
 + * ib_destroy_usecnt - Called during destruction to check the usecnt
 + * @usecnt: The usecnt atomic
 + * @why: remove reason
 + * @uobj: The uobject that is destroyed
 + *
 + * Non-zero usecnts will block destruction unless destruction was triggered by
 + * a ucontext cleanup.
 + */
 +static inline int ib_destroy_usecnt(atomic_t *usecnt,
 +                                  enum rdma_remove_reason why,
 +                                  struct ib_uobject *uobj)
 +{
 +      if (atomic_read(usecnt) && ib_is_destroy_retryable(-EBUSY, why, uobj))
 +              return -EBUSY;
 +      return 0;
 +}
 +
 +/**
   * ib_modify_qp_is_ok - Check that the supplied attribute mask
   * contains all required attributes and no attributes not allowed for
   * the given QP state transition.
@@@ -2783,13 -2755,6 +2783,13 @@@ static inline int rdma_is_port_valid(co
                port <= rdma_end_port(device));
  }
  
 +static inline bool rdma_is_grh_required(const struct ib_device *device,
 +                                      u8 port_num)
 +{
 +      return device->port_immutable[port_num].core_cap_flags &
 +              RDMA_CORE_PORT_IB_GRH_REQUIRED;
 +}
 +
  static inline bool rdma_protocol_ib(const struct ib_device *device, u8 port_num)
  {
        return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_IB;
@@@ -3081,6 -3046,10 +3081,6 @@@ static inline bool rdma_cap_read_inv(st
        return rdma_protocol_iwarp(dev, port_num);
  }
  
 -int ib_query_gid(struct ib_device *device,
 -               u8 port_num, int index, union ib_gid *gid,
 -               struct ib_gid_attr *attr);
 -
  int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port,
                         int state);
  int ib_get_vf_config(struct ib_device *device, int vf, u8 port,
@@@ -3179,13 -3148,6 +3179,13 @@@ int ib_get_rdma_header_version(const un
   *   ignored unless the work completion indicates that the GRH is valid.
   * @ah_attr: Returned attributes that can be used when creating an address
   *   handle for replying to the message.
 + * When ib_init_ah_attr_from_wc() returns success,
 + * (a) for IB link layer it optionally contains a reference to SGID attribute
 + * when GRH is present for IB link layer.
 + * (b) for RoCE link layer it contains a reference to SGID attribute.
 + * User must invoke rdma_cleanup_ah_attr_gid_attr() to release reference to SGID
 + * attributes which are initialized using ib_init_ah_attr_from_wc().
 + *
   */
  int ib_init_ah_attr_from_wc(struct ib_device *device, u8 port_num,
                            const struct ib_wc *wc, const struct ib_grh *grh,
@@@ -3285,12 -3247,10 +3285,12 @@@ int ib_destroy_srq(struct ib_srq *srq)
   *   the work request that failed to be posted on the QP.
   */
  static inline int ib_post_srq_recv(struct ib_srq *srq,
 -                                 struct ib_recv_wr *recv_wr,
 -                                 struct ib_recv_wr **bad_recv_wr)
 +                                 const struct ib_recv_wr *recv_wr,
 +                                 const struct ib_recv_wr **bad_recv_wr)
  {
 -      return srq->device->post_srq_recv(srq, recv_wr, bad_recv_wr);
 +      const struct ib_recv_wr *dummy;
 +
 +      return srq->device->post_srq_recv(srq, recv_wr, bad_recv_wr ? : &dummy);
  }
  
  /**
@@@ -3388,12 -3348,10 +3388,12 @@@ int ib_close_qp(struct ib_qp *qp)
   * earlier work requests in the list.
   */
  static inline int ib_post_send(struct ib_qp *qp,
 -                             struct ib_send_wr *send_wr,
 -                             struct ib_send_wr **bad_send_wr)
 +                             const struct ib_send_wr *send_wr,
 +                             const struct ib_send_wr **bad_send_wr)
  {
 -      return qp->device->post_send(qp, send_wr, bad_send_wr);
 +      const struct ib_send_wr *dummy;
 +
 +      return qp->device->post_send(qp, send_wr, bad_send_wr ? : &dummy);
  }
  
  /**
   *   the work request that failed to be posted on the QP.
   */
  static inline int ib_post_recv(struct ib_qp *qp,
 -                             struct ib_recv_wr *recv_wr,
 -                             struct ib_recv_wr **bad_recv_wr)
 +                             const struct ib_recv_wr *recv_wr,
 +                             const struct ib_recv_wr **bad_recv_wr)
  {
 -      return qp->device->post_recv(qp, recv_wr, bad_recv_wr);
 +      const struct ib_recv_wr *dummy;
 +
 +      return qp->device->post_recv(qp, recv_wr, bad_recv_wr ? : &dummy);
  }
  
  struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private,
@@@ -3435,11 -3391,14 +3435,14 @@@ int ib_process_cq_direct(struct ib_cq *
   *
   * Users can examine the cq structure to determine the actual CQ size.
   */
- struct ib_cq *ib_create_cq(struct ib_device *device,
-                          ib_comp_handler comp_handler,
-                          void (*event_handler)(struct ib_event *, void *),
-                          void *cq_context,
-                          const struct ib_cq_init_attr *cq_attr);
+ struct ib_cq *__ib_create_cq(struct ib_device *device,
+                            ib_comp_handler comp_handler,
+                            void (*event_handler)(struct ib_event *, void *),
+                            void *cq_context,
+                            const struct ib_cq_init_attr *cq_attr,
+                            const char *caller);
+ #define ib_create_cq(device, cmp_hndlr, evt_hndlr, cq_ctxt, cq_attr) \
+       __ib_create_cq((device), (cmp_hndlr), (evt_hndlr), (cq_ctxt), (cq_attr), KBUILD_MODNAME)
  
  /**
   * ib_resize_cq - Modifies the capacity of the CQ.
@@@ -3842,6 -3801,10 +3845,6 @@@ struct ib_xrcd *__ib_alloc_xrcd(struct 
   */
  int ib_dealloc_xrcd(struct ib_xrcd *xrcd);
  
 -struct ib_flow *ib_create_flow(struct ib_qp *qp,
 -                             struct ib_flow_attr *flow_attr, int domain);
 -int ib_destroy_flow(struct ib_flow *flow_id);
 -
  static inline int ib_check_mr_access(int flags)
  {
        /*
@@@ -4070,19 -4033,8 +4073,19 @@@ static inline void rdma_ah_set_grh(stru
        grh->sgid_index = sgid_index;
        grh->hop_limit = hop_limit;
        grh->traffic_class = traffic_class;
 +      grh->sgid_attr = NULL;
  }
  
 +void rdma_destroy_ah_attr(struct rdma_ah_attr *ah_attr);
 +void rdma_move_grh_sgid_attr(struct rdma_ah_attr *attr, union ib_gid *dgid,
 +                           u32 flow_label, u8 hop_limit, u8 traffic_class,
 +                           const struct ib_gid_attr *sgid_attr);
 +void rdma_copy_ah_attr(struct rdma_ah_attr *dest,
 +                     const struct rdma_ah_attr *src);
 +void rdma_replace_ah_attr(struct rdma_ah_attr *old,
 +                        const struct rdma_ah_attr *new);
 +void rdma_move_ah_attr(struct rdma_ah_attr *dest, struct rdma_ah_attr *src);
 +
  /**
   * rdma_ah_find_type - Return address handle type.
   *
@@@ -4150,20 -4102,6 +4153,20 @@@ ib_get_vector_affinity(struct ib_devic
  
  }
  
 +static inline void ib_set_flow(struct ib_uobject *uobj, struct ib_flow *ibflow,
 +                             struct ib_qp *qp, struct ib_device *device)
 +{
 +      uobj->object = ibflow;
 +      ibflow->uobject = uobj;
 +
 +      if (qp) {
 +              atomic_inc(&qp->usecnt);
 +              ibflow->qp = qp;
 +      }
 +
 +      ibflow->device = device;
 +}
 +
  /**
   * rdma_roce_rescan_device - Rescan all of the network devices in the system
   * and add their gids, as needed, to the relevant RoCE devices.
   */
  void rdma_roce_rescan_device(struct ib_device *ibdev);
  
 +struct ib_ucontext *ib_uverbs_get_ucontext(struct ib_uverbs_file *ufile);
 +
 +int uverbs_destroy_def_handler(struct ib_uverbs_file *file,
 +                             struct uverbs_attr_bundle *attrs);
  #endif /* IB_VERBS_H */
diff --combined net/rds/ib_frmr.c
@@@ -102,6 -102,7 +102,6 @@@ static void rds_ib_free_frmr(struct rds
  static int rds_ib_post_reg_frmr(struct rds_ib_mr *ibmr)
  {
        struct rds_ib_frmr *frmr = &ibmr->u.frmr;
 -      struct ib_send_wr *failed_wr;
        struct ib_reg_wr reg_wr;
        int ret, off = 0;
  
                        IB_ACCESS_REMOTE_WRITE;
        reg_wr.wr.send_flags = IB_SEND_SIGNALED;
  
 -      failed_wr = &reg_wr.wr;
 -      ret = ib_post_send(ibmr->ic->i_cm_id->qp, &reg_wr.wr, &failed_wr);
 -      WARN_ON(failed_wr != &reg_wr.wr);
 +      ret = ib_post_send(ibmr->ic->i_cm_id->qp, &reg_wr.wr, NULL);
        if (unlikely(ret)) {
                /* Failure here can be because of -ENOMEM as well */
                frmr->fr_state = FRMR_IS_STALE;
@@@ -227,7 -230,7 +227,7 @@@ out_unmap
  
  static int rds_ib_post_inv(struct rds_ib_mr *ibmr)
  {
 -      struct ib_send_wr *s_wr, *failed_wr;
 +      struct ib_send_wr *s_wr;
        struct rds_ib_frmr *frmr = &ibmr->u.frmr;
        struct rdma_cm_id *i_cm_id = ibmr->ic->i_cm_id;
        int ret = -EINVAL;
        s_wr->ex.invalidate_rkey = frmr->mr->rkey;
        s_wr->send_flags = IB_SEND_SIGNALED;
  
 -      failed_wr = s_wr;
 -      ret = ib_post_send(i_cm_id->qp, s_wr, &failed_wr);
 -      WARN_ON(failed_wr != s_wr);
 +      ret = ib_post_send(i_cm_id->qp, s_wr, NULL);
        if (unlikely(ret)) {
                frmr->fr_state = FRMR_IS_STALE;
                frmr->fr_inv = false;
@@@ -339,6 -344,11 +339,11 @@@ struct rds_ib_mr *rds_ib_reg_frmr(struc
        struct rds_ib_frmr *frmr;
        int ret;
  
+       if (!ic) {
+               /* TODO: Add FRWR support for RDS_GET_MR using proxy qp*/
+               return ERR_PTR(-EOPNOTSUPP);
+       }
        do {
                if (ibmr)
                        rds_ib_free_frmr(ibmr, true);
diff --combined net/smc/smc_tx.c
@@@ -255,6 -255,7 +255,6 @@@ static int smc_tx_rdma_write(struct smc
                             int num_sges, struct ib_sge sges[])
  {
        struct smc_link_group *lgr = conn->lgr;
 -      struct ib_send_wr *failed_wr = NULL;
        struct ib_rdma_wr rdma_wr;
        struct smc_link *link;
        int rc;
                /* offset within RMBE */
                peer_rmbe_offset;
        rdma_wr.rkey = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey;
 -      rc = ib_post_send(link->roce_qp, &rdma_wr.wr, &failed_wr);
 +      rc = ib_post_send(link->roce_qp, &rdma_wr.wr, NULL);
        if (rc) {
                conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
                smc_lgr_terminate(lgr);
@@@ -494,7 -495,8 +494,8 @@@ out
  
  void smc_tx_consumer_update(struct smc_connection *conn, bool force)
  {
-       union smc_host_cursor cfed, cons;
+       union smc_host_cursor cfed, cons, prod;
+       int sender_free = conn->rmb_desc->len;
        int to_confirm;
  
        smc_curs_write(&cons,
                       smc_curs_read(&conn->rx_curs_confirmed, conn),
                       conn);
        to_confirm = smc_curs_diff(conn->rmb_desc->len, &cfed, &cons);
+       if (to_confirm > conn->rmbe_update_limit) {
+               smc_curs_write(&prod,
+                              smc_curs_read(&conn->local_rx_ctrl.prod, conn),
+                              conn);
+               sender_free = conn->rmb_desc->len -
+                             smc_curs_diff(conn->rmb_desc->len, &prod, &cfed);
+       }
  
        if (conn->local_rx_ctrl.prod_flags.cons_curs_upd_req ||
            force ||
            ((to_confirm > conn->rmbe_update_limit) &&
-            ((to_confirm > (conn->rmb_desc->len / 2)) ||
+            ((sender_free <= (conn->rmb_desc->len / 2)) ||
              conn->local_rx_ctrl.prod_flags.write_blocked))) {
                if ((smc_cdc_get_slot_and_msg_send(conn) < 0) &&
                    conn->alert_token_local) { /* connection healthy */