Merge branch 'work.csum_and_copy' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 12 Oct 2020 23:24:13 +0000 (16:24 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 12 Oct 2020 23:24:13 +0000 (16:24 -0700)
Pull copy_and_csum cleanups from Al Viro:
 "Saner calling conventions for csum_and_copy_..._user() and friends"

[ Removing 800+ lines of code and cleaning stuff up is good  - Linus ]

* 'work.csum_and_copy' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
  ppc: propagate the calling conventions change down to csum_partial_copy_generic()
  amd64: switch csum_partial_copy_generic() to new calling conventions
  sparc64: propagate the calling convention changes down to __csum_partial_copy_...()
  xtensa: propagate the calling conventions change down into csum_partial_copy_generic()
  mips: propagate the calling convention change down into __csum_partial_copy_..._user()
  mips: __csum_partial_copy_kernel() has no users left
  mips: csum_and_copy_{to,from}_user() are never called under KERNEL_DS
  sparc32: propagate the calling conventions change down to __csum_partial_copy_sparc_generic()
  i386: propagate the calling conventions change down to csum_partial_copy_generic()
  sh: propage the calling conventions change down to csum_partial_copy_generic()
  m68k: get rid of zeroing destination on error in csum_and_copy_from_user()
  arm: propagate the calling convention changes down to csum_partial_copy_from_user()
  alpha: propagate the calling convention changes down to csum_partial_copy.c helpers
  saner calling conventions for csum_and_copy_..._user()
  csum_and_copy_..._user(): pass 0xffffffff instead of 0 as initial sum
  csum_partial_copy_nocheck(): drop the last argument
  unify generic instances of csum_partial_copy_nocheck()
  icmp_push_reply(): reorder adding the checksum up
  skb_copy_and_csum_bits(): don't bother with the last argument

1  2 
include/linux/skbuff.h
lib/iov_iter.c
net/core/skbuff.c
net/ipv4/ip_output.c
net/ipv4/raw.c

diff --combined include/linux/skbuff.h
@@@ -71,7 -71,7 +71,7 @@@
   *    NETIF_F_IPV6_CSUM - Driver (device) is only able to checksum plain
   *                      TCP or UDP packets over IPv6. These are specifically
   *                      unencapsulated packets of the form IPv6|TCP or
 - *                      IPv4|UDP where the Next Header field in the IPv6
 + *                      IPv6|UDP where the Next Header field in the IPv6
   *                      header is either TCP or UDP. IPv6 extension headers
   *                      are not supported with this feature. This feature
   *                      cannot be set in features for a device with
@@@ -1056,16 -1056,7 +1056,16 @@@ void kfree_skb(struct sk_buff *skb)
  void kfree_skb_list(struct sk_buff *segs);
  void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt);
  void skb_tx_error(struct sk_buff *skb);
 +
 +#ifdef CONFIG_TRACEPOINTS
  void consume_skb(struct sk_buff *skb);
 +#else
 +static inline void consume_skb(struct sk_buff *skb)
 +{
 +      return kfree_skb(skb);
 +}
 +#endif
 +
  void __consume_stateless_skb(struct sk_buff *skb);
  void  __kfree_skb(struct sk_buff *skb);
  extern struct kmem_cache *skbuff_head_cache;
@@@ -2667,7 -2658,7 +2667,7 @@@ static inline int pskb_network_may_pull
   *
   * Using max(32, L1_CACHE_BYTES) makes sense (especially with RPS)
   * to reduce average number of cache lines per packet.
 - * get_rps_cpus() for example only access one 64 bytes aligned block :
 + * get_rps_cpu() for example only access one 64 bytes aligned block :
   * NET_IP_ALIGN(2) + ethernet_header(14) + IP_header(20/40) + ports(8)
   */
  #ifndef NET_SKB_PAD
@@@ -3223,9 -3214,8 +3223,9 @@@ static inline int skb_padto(struct sk_b
   *    is untouched. Otherwise it is extended. Returns zero on
   *    success. The skb is freed on error if @free_on_error is true.
   */
 -static inline int __skb_put_padto(struct sk_buff *skb, unsigned int len,
 -                                bool free_on_error)
 +static inline int __must_check __skb_put_padto(struct sk_buff *skb,
 +                                             unsigned int len,
 +                                             bool free_on_error)
  {
        unsigned int size = skb->len;
  
   *    is untouched. Otherwise it is extended. Returns zero on
   *    success. The skb is freed on error.
   */
 -static inline int skb_put_padto(struct sk_buff *skb, unsigned int len)
 +static inline int __must_check skb_put_padto(struct sk_buff *skb, unsigned int len)
  {
        return __skb_put_padto(skb, len, true);
  }
@@@ -3545,7 -3535,7 +3545,7 @@@ int skb_kill_datagram(struct sock *sk, 
  int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len);
  int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len);
  __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to,
-                             int len, __wsum csum);
+                             int len);
  int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset,
                    struct pipe_inode_info *pipe, unsigned int len,
                    unsigned int flags);
@@@ -3755,19 -3745,19 +3755,19 @@@ static inline bool __skb_metadata_diffe
  #define __it(x, op) (x -= sizeof(u##op))
  #define __it_diff(a, b, op) (*(u##op *)__it(a, op)) ^ (*(u##op *)__it(b, op))
        case 32: diffs |= __it_diff(a, b, 64);
 -               /* fall through */
 +              fallthrough;
        case 24: diffs |= __it_diff(a, b, 64);
 -               /* fall through */
 +              fallthrough;
        case 16: diffs |= __it_diff(a, b, 64);
 -               /* fall through */
 +              fallthrough;
        case  8: diffs |= __it_diff(a, b, 64);
                break;
        case 28: diffs |= __it_diff(a, b, 64);
 -               /* fall through */
 +              fallthrough;
        case 20: diffs |= __it_diff(a, b, 64);
 -               /* fall through */
 +              fallthrough;
        case 12: diffs |= __it_diff(a, b, 64);
 -               /* fall through */
 +              fallthrough;
        case  4: diffs |= __it_diff(a, b, 32);
                break;
        }
diff --combined lib/iov_iter.c
@@@ -581,7 -581,7 +581,7 @@@ static size_t copy_pipe_to_iter(const v
  static __wsum csum_and_memcpy(void *to, const void *from, size_t len,
                              __wsum sum, size_t off)
  {
-       __wsum next = csum_partial_copy_nocheck(from, to, len, 0);
+       __wsum next = csum_partial_copy_nocheck(from, to, len);
        return csum_block_add(sum, next, off);
  }
  
@@@ -637,30 -637,30 +637,30 @@@ size_t _copy_to_iter(const void *addr, 
  }
  EXPORT_SYMBOL(_copy_to_iter);
  
 -#ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE
 -static int copyout_mcsafe(void __user *to, const void *from, size_t n)
 +#ifdef CONFIG_ARCH_HAS_COPY_MC
 +static int copyout_mc(void __user *to, const void *from, size_t n)
  {
        if (access_ok(to, n)) {
                instrument_copy_to_user(to, from, n);
 -              n = copy_to_user_mcsafe((__force void *) to, from, n);
 +              n = copy_mc_to_user((__force void *) to, from, n);
        }
        return n;
  }
  
 -static unsigned long memcpy_mcsafe_to_page(struct page *page, size_t offset,
 +static unsigned long copy_mc_to_page(struct page *page, size_t offset,
                const char *from, size_t len)
  {
        unsigned long ret;
        char *to;
  
        to = kmap_atomic(page);
 -      ret = memcpy_mcsafe(to + offset, from, len);
 +      ret = copy_mc_to_kernel(to + offset, from, len);
        kunmap_atomic(to);
  
        return ret;
  }
  
 -static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes,
 +static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes,
                                struct iov_iter *i)
  {
        struct pipe_inode_info *pipe = i->pipe;
                size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
                unsigned long rem;
  
 -              rem = memcpy_mcsafe_to_page(pipe->bufs[i_head & p_mask].page,
 +              rem = copy_mc_to_page(pipe->bufs[i_head & p_mask].page,
                                            off, addr, chunk);
                i->head = i_head;
                i->iov_offset = off + chunk - rem;
  }
  
  /**
 - * _copy_to_iter_mcsafe - copy to user with source-read error exception handling
 + * _copy_mc_to_iter - copy to iter with source memory error exception handling
   * @addr: source kernel address
   * @bytes: total transfer length
   * @iter: destination iterator
   *
 - * The pmem driver arranges for filesystem-dax to use this facility via
 - * dax_copy_to_iter() for protecting read/write to persistent memory.
 - * Unless / until an architecture can guarantee identical performance
 - * between _copy_to_iter_mcsafe() and _copy_to_iter() it would be a
 - * performance regression to switch more users to the mcsafe version.
 + * The pmem driver deploys this for the dax operation
 + * (dax_copy_to_iter()) for dax reads (bypass page-cache and the
 + * block-layer). Upon #MC read(2) aborts and returns EIO or the bytes
 + * successfully copied.
   *
 - * Otherwise, the main differences between this and typical _copy_to_iter().
 + * The main differences between this and typical _copy_to_iter().
   *
   * * Typical tail/residue handling after a fault retries the copy
   *   byte-by-byte until the fault happens again. Re-triggering machine
   * * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies.
   *   Compare to copy_to_iter() where only ITER_IOVEC attempts might return
   *   a short copy.
 - *
 - * See MCSAFE_TEST for self-test.
   */
 -size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i)
 +size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
  {
        const char *from = addr;
        unsigned long rem, curr_addr, s_addr = (unsigned long) addr;
  
        if (unlikely(iov_iter_is_pipe(i)))
 -              return copy_pipe_to_iter_mcsafe(addr, bytes, i);
 +              return copy_mc_pipe_to_iter(addr, bytes, i);
        if (iter_is_iovec(i))
                might_fault();
        iterate_and_advance(i, bytes, v,
 -              copyout_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
 +              copyout_mc(v.iov_base, (from += v.iov_len) - v.iov_len,
 +                         v.iov_len),
                ({
 -              rem = memcpy_mcsafe_to_page(v.bv_page, v.bv_offset,
 -                               (from += v.bv_len) - v.bv_len, v.bv_len);
 +              rem = copy_mc_to_page(v.bv_page, v.bv_offset,
 +                                    (from += v.bv_len) - v.bv_len, v.bv_len);
                if (rem) {
                        curr_addr = (unsigned long) from;
                        bytes = curr_addr - s_addr - rem;
                }
                }),
                ({
 -              rem = memcpy_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len,
 -                              v.iov_len);
 +              rem = copy_mc_to_kernel(v.iov_base, (from += v.iov_len)
 +                                      - v.iov_len, v.iov_len);
                if (rem) {
                        curr_addr = (unsigned long) from;
                        bytes = curr_addr - s_addr - rem;
  
        return bytes;
  }
 -EXPORT_SYMBOL_GPL(_copy_to_iter_mcsafe);
 -#endif /* CONFIG_ARCH_HAS_UACCESS_MCSAFE */
 +EXPORT_SYMBOL_GPL(_copy_mc_to_iter);
 +#endif /* CONFIG_ARCH_HAS_COPY_MC */
  
  size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
  {
@@@ -1447,15 -1449,14 +1447,14 @@@ size_t csum_and_copy_from_iter(void *ad
                return 0;
        }
        iterate_and_advance(i, bytes, v, ({
-               int err = 0;
                next = csum_and_copy_from_user(v.iov_base,
                                               (to += v.iov_len) - v.iov_len,
-                                              v.iov_len, 0, &err);
-               if (!err) {
+                                              v.iov_len);
+               if (next) {
                        sum = csum_block_add(sum, next, off);
                        off += v.iov_len;
                }
-               err ? v.iov_len : 0;
+               next ? 0 : v.iov_len;
        }), ({
                char *p = kmap_atomic(v.bv_page);
                sum = csum_and_memcpy((to += v.bv_len) - v.bv_len,
@@@ -1489,11 -1490,10 +1488,10 @@@ bool csum_and_copy_from_iter_full(void 
        if (unlikely(i->count < bytes))
                return false;
        iterate_all_kinds(i, bytes, v, ({
-               int err = 0;
                next = csum_and_copy_from_user(v.iov_base,
                                               (to += v.iov_len) - v.iov_len,
-                                              v.iov_len, 0, &err);
-               if (err)
+                                              v.iov_len);
+               if (!next)
                        return false;
                sum = csum_block_add(sum, next, off);
                off += v.iov_len;
@@@ -1535,15 -1535,14 +1533,14 @@@ size_t csum_and_copy_to_iter(const voi
                return 0;
        }
        iterate_and_advance(i, bytes, v, ({
-               int err = 0;
                next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len,
                                             v.iov_base,
-                                            v.iov_len, 0, &err);
-               if (!err) {
+                                            v.iov_len);
+               if (next) {
                        sum = csum_block_add(sum, next, off);
                        off += v.iov_len;
                }
-               err ? v.iov_len : 0;
+               next ? 0 : v.iov_len;
        }), ({
                char *p = kmap_atomic(v.bv_page);
                sum = csum_and_memcpy(p + v.bv_offset,
diff --combined net/core/skbuff.c
@@@ -820,7 -820,6 +820,7 @@@ void skb_tx_error(struct sk_buff *skb
  }
  EXPORT_SYMBOL(skb_tx_error);
  
 +#ifdef CONFIG_TRACEPOINTS
  /**
   *    consume_skb - free an skbuff
   *    @skb: buffer to free
@@@ -838,7 -837,6 +838,7 @@@ void consume_skb(struct sk_buff *skb
        __kfree_skb(skb);
  }
  EXPORT_SYMBOL(consume_skb);
 +#endif
  
  /**
   *    consume_stateless_skb - free an skbuff, assuming it is stateless
@@@ -2725,19 -2723,20 +2725,20 @@@ EXPORT_SYMBOL(skb_checksum)
  /* Both of above in one bottle. */
  
  __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
-                                   u8 *to, int len, __wsum csum)
+                                   u8 *to, int len)
  {
        int start = skb_headlen(skb);
        int i, copy = start - offset;
        struct sk_buff *frag_iter;
        int pos = 0;
+       __wsum csum = 0;
  
        /* Copy header. */
        if (copy > 0) {
                if (copy > len)
                        copy = len;
                csum = csum_partial_copy_nocheck(skb->data + offset, to,
-                                                copy, csum);
+                                                copy);
                if ((len -= copy) == 0)
                        return csum;
                offset += copy;
                                vaddr = kmap_atomic(p);
                                csum2 = csum_partial_copy_nocheck(vaddr + p_off,
                                                                  to + copied,
-                                                                 p_len, 0);
+                                                                 p_len);
                                kunmap_atomic(vaddr);
                                csum = csum_block_add(csum, csum2, pos);
                                pos += p_len;
                                copy = len;
                        csum2 = skb_copy_and_csum_bits(frag_iter,
                                                       offset - start,
-                                                      to, copy, 0);
+                                                      to, copy);
                        csum = csum_block_add(csum, csum2, pos);
                        if ((len -= copy) == 0)
                                return csum;
@@@ -3013,7 -3012,7 +3014,7 @@@ void skb_copy_and_csum_dev(const struc
        csum = 0;
        if (csstart != skb->len)
                csum = skb_copy_and_csum_bits(skb, csstart, to + csstart,
-                                             skb->len - csstart, 0);
+                                             skb->len - csstart);
  
        if (skb->ip_summed == CHECKSUM_PARTIAL) {
                long csstuff = csstart + skb->csum_offset;
@@@ -3934,7 -3933,7 +3935,7 @@@ normal
                                        skb_copy_and_csum_bits(head_skb, offset,
                                                               skb_put(nskb,
                                                                       len),
-                                                              len, 0);
+                                                              len);
                                SKB_GSO_CB(nskb)->csum_start =
                                        skb_headroom(nskb) + doffset;
                        } else {
@@@ -5420,8 -5419,8 +5421,8 @@@ struct sk_buff *skb_vlan_untag(struct s
        skb = skb_share_check(skb, GFP_ATOMIC);
        if (unlikely(!skb))
                goto err_free;
 -
 -      if (unlikely(!pskb_may_pull(skb, VLAN_HLEN)))
 +      /* We may access the two bytes after vlan_hdr in vlan_set_encap_proto(). */
 +      if (unlikely(!pskb_may_pull(skb, VLAN_HLEN + sizeof(unsigned short))))
                goto err_free;
  
        vhdr = (struct vlan_hdr *)skb->data;
@@@ -5622,7 -5621,7 +5623,7 @@@ int skb_mpls_push(struct sk_buff *skb, 
        lse->label_stack_entry = mpls_lse;
        skb_postpush_rcsum(skb, lse, MPLS_HLEN);
  
 -      if (ethernet)
 +      if (ethernet && mac_len >= ETH_HLEN)
                skb_mod_eth_type(skb, eth_hdr(skb), mpls_proto);
        skb->protocol = mpls_proto;
  
@@@ -5662,7 -5661,7 +5663,7 @@@ int skb_mpls_pop(struct sk_buff *skb, _
        skb_reset_mac_header(skb);
        skb_set_network_header(skb, mac_len);
  
 -      if (ethernet) {
 +      if (ethernet && mac_len >= ETH_HLEN) {
                struct ethhdr *hdr;
  
                /* use mpls_hdr() to get ethertype to account for VLANs. */
@@@ -5989,13 -5988,9 +5990,13 @@@ static int pskb_carve_inside_nonlinear(
        if (skb_has_frag_list(skb))
                skb_clone_fraglist(skb);
  
 -      if (k == 0) {
 -              /* split line is in frag list */
 -              pskb_carve_frag_list(skb, shinfo, off - pos, gfp_mask);
 +      /* split line is in frag list */
 +      if (k == 0 && pskb_carve_frag_list(skb, shinfo, off - pos, gfp_mask)) {
 +              /* skb_frag_unref() is not needed here as shinfo->nr_frags = 0. */
 +              if (skb_has_frag_list(skb))
 +                      kfree_skb_list(skb_shinfo(skb)->frag_list);
 +              kfree(data);
 +              return -ENOMEM;
        }
        skb_release_data(skb);
  
diff --combined net/ipv4/ip_output.c
@@@ -74,7 -74,6 +74,7 @@@
  #include <net/icmp.h>
  #include <net/checksum.h>
  #include <net/inetpeer.h>
 +#include <net/inet_ecn.h>
  #include <net/lwtunnel.h>
  #include <linux/bpf-cgroup.h>
  #include <linux/igmp.h>
@@@ -1127,7 -1126,7 +1127,7 @@@ alloc_new_skb
                        if (fraggap) {
                                skb->csum = skb_copy_and_csum_bits(
                                        skb_prev, maxfraglen,
-                                       data + transhdrlen, fraggap, 0);
+                                       data + transhdrlen, fraggap);
                                skb_prev->csum = csum_sub(skb_prev->csum,
                                                          skb->csum);
                                data += fraggap;
@@@ -1412,7 -1411,7 +1412,7 @@@ ssize_t ip_append_page(struct sock *sk
                                skb->csum = skb_copy_and_csum_bits(skb_prev,
                                                                   maxfraglen,
                                                    skb_transport_header(skb),
-                                                                  fraggap, 0);
+                                                                  fraggap);
                                skb_prev->csum = csum_sub(skb_prev->csum,
                                                          skb->csum);
                                pskb_trim_unique(skb_prev, maxfraglen);
@@@ -1649,7 -1648,7 +1649,7 @@@ static int ip_reply_glue_bits(void *dpt
  {
        __wsum csum;
  
-       csum = csum_partial_copy_nocheck(dptr+offset, to, len, 0);
+       csum = csum_partial_copy_nocheck(dptr+offset, to, len);
        skb->csum = csum_block_add(skb->csum, csum, odd);
        return 0;
  }
@@@ -1704,7 -1703,7 +1704,7 @@@ void ip_send_unicast_reply(struct sock 
        if (IS_ERR(rt))
                return;
  
 -      inet_sk(sk)->tos = arg->tos;
 +      inet_sk(sk)->tos = arg->tos & ~INET_ECN_MASK;
  
        sk->sk_protocol = ip_hdr(skb)->protocol;
        sk->sk_bound_dev_if = arg->bound_dev_if;
diff --combined net/ipv4/raw.c
@@@ -478,7 -478,7 +478,7 @@@ static int raw_getfrag(void *from, cha
                        skb->csum = csum_block_add(
                                skb->csum,
                                csum_partial_copy_nocheck(rfv->hdr.c + offset,
-                                                         to, copy, 0),
+                                                         to, copy),
                                odd);
  
                odd = 0;
@@@ -610,7 -610,7 +610,7 @@@ static int raw_sendmsg(struct sock *sk
        } else if (!ipc.oif) {
                ipc.oif = inet->uc_index;
        } else if (ipv4_is_lbcast(daddr) && inet->uc_index) {
 -              /* oif is set, packet is to local broadcast and
 +              /* oif is set, packet is to local broadcast
                 * and uc_index is set. oif is most likely set
                 * by sk_bound_dev_if. If uc_index != oif check if the
                 * oif is an L3 master and uc_index is an L3 slave.