* NETIF_F_IPV6_CSUM - Driver (device) is only able to checksum plain
* TCP or UDP packets over IPv6. These are specifically
* unencapsulated packets of the form IPv6|TCP or
- * IPv4|UDP where the Next Header field in the IPv6
+ * IPv6|UDP where the Next Header field in the IPv6
* header is either TCP or UDP. IPv6 extension headers
* are not supported with this feature. This feature
* cannot be set in features for a device with
void kfree_skb_list(struct sk_buff *segs);
void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt);
void skb_tx_error(struct sk_buff *skb);
+
+#ifdef CONFIG_TRACEPOINTS
void consume_skb(struct sk_buff *skb);
+#else
+static inline void consume_skb(struct sk_buff *skb)
+{
+ return kfree_skb(skb);
+}
+#endif
+
void __consume_stateless_skb(struct sk_buff *skb);
void __kfree_skb(struct sk_buff *skb);
extern struct kmem_cache *skbuff_head_cache;
*
* Using max(32, L1_CACHE_BYTES) makes sense (especially with RPS)
* to reduce average number of cache lines per packet.
- * get_rps_cpus() for example only access one 64 bytes aligned block :
+ * get_rps_cpu() for example only access one 64 bytes aligned block :
* NET_IP_ALIGN(2) + ethernet_header(14) + IP_header(20/40) + ports(8)
*/
#ifndef NET_SKB_PAD
* is untouched. Otherwise it is extended. Returns zero on
* success. The skb is freed on error if @free_on_error is true.
*/
-static inline int __skb_put_padto(struct sk_buff *skb, unsigned int len,
- bool free_on_error)
+static inline int __must_check __skb_put_padto(struct sk_buff *skb,
+ unsigned int len,
+ bool free_on_error)
{
unsigned int size = skb->len;
* is untouched. Otherwise it is extended. Returns zero on
* success. The skb is freed on error.
*/
-static inline int skb_put_padto(struct sk_buff *skb, unsigned int len)
+static inline int __must_check skb_put_padto(struct sk_buff *skb, unsigned int len)
{
return __skb_put_padto(skb, len, true);
}
int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len);
int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len);
__wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to,
- int len, __wsum csum);
+ int len);
int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset,
struct pipe_inode_info *pipe, unsigned int len,
unsigned int flags);
#define __it(x, op) (x -= sizeof(u##op))
#define __it_diff(a, b, op) (*(u##op *)__it(a, op)) ^ (*(u##op *)__it(b, op))
case 32: diffs |= __it_diff(a, b, 64);
- /* fall through */
+ fallthrough;
case 24: diffs |= __it_diff(a, b, 64);
- /* fall through */
+ fallthrough;
case 16: diffs |= __it_diff(a, b, 64);
- /* fall through */
+ fallthrough;
case 8: diffs |= __it_diff(a, b, 64);
break;
case 28: diffs |= __it_diff(a, b, 64);
- /* fall through */
+ fallthrough;
case 20: diffs |= __it_diff(a, b, 64);
- /* fall through */
+ fallthrough;
case 12: diffs |= __it_diff(a, b, 64);
- /* fall through */
+ fallthrough;
case 4: diffs |= __it_diff(a, b, 32);
break;
}
static __wsum csum_and_memcpy(void *to, const void *from, size_t len,
__wsum sum, size_t off)
{
- __wsum next = csum_partial_copy_nocheck(from, to, len, 0);
+ __wsum next = csum_partial_copy_nocheck(from, to, len);
return csum_block_add(sum, next, off);
}
}
EXPORT_SYMBOL(_copy_to_iter);
-#ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE
-static int copyout_mcsafe(void __user *to, const void *from, size_t n)
+#ifdef CONFIG_ARCH_HAS_COPY_MC
+static int copyout_mc(void __user *to, const void *from, size_t n)
{
if (access_ok(to, n)) {
instrument_copy_to_user(to, from, n);
- n = copy_to_user_mcsafe((__force void *) to, from, n);
+ n = copy_mc_to_user((__force void *) to, from, n);
}
return n;
}
-static unsigned long memcpy_mcsafe_to_page(struct page *page, size_t offset,
+static unsigned long copy_mc_to_page(struct page *page, size_t offset,
const char *from, size_t len)
{
unsigned long ret;
char *to;
to = kmap_atomic(page);
- ret = memcpy_mcsafe(to + offset, from, len);
+ ret = copy_mc_to_kernel(to + offset, from, len);
kunmap_atomic(to);
return ret;
}
-static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes,
+static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes,
struct iov_iter *i)
{
struct pipe_inode_info *pipe = i->pipe;
size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
unsigned long rem;
- rem = memcpy_mcsafe_to_page(pipe->bufs[i_head & p_mask].page,
+ rem = copy_mc_to_page(pipe->bufs[i_head & p_mask].page,
off, addr, chunk);
i->head = i_head;
i->iov_offset = off + chunk - rem;
}
/**
- * _copy_to_iter_mcsafe - copy to user with source-read error exception handling
+ * _copy_mc_to_iter - copy to iter with source memory error exception handling
* @addr: source kernel address
* @bytes: total transfer length
* @iter: destination iterator
*
- * The pmem driver arranges for filesystem-dax to use this facility via
- * dax_copy_to_iter() for protecting read/write to persistent memory.
- * Unless / until an architecture can guarantee identical performance
- * between _copy_to_iter_mcsafe() and _copy_to_iter() it would be a
- * performance regression to switch more users to the mcsafe version.
+ * The pmem driver deploys this for the dax operation
+ * (dax_copy_to_iter()) for dax reads (bypass page-cache and the
+ * block-layer). Upon #MC read(2) aborts and returns EIO or the bytes
+ * successfully copied.
*
- * Otherwise, the main differences between this and typical _copy_to_iter().
+ * The main differences between this and typical _copy_to_iter().
*
* * Typical tail/residue handling after a fault retries the copy
* byte-by-byte until the fault happens again. Re-triggering machine
* * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies.
* Compare to copy_to_iter() where only ITER_IOVEC attempts might return
* a short copy.
- *
- * See MCSAFE_TEST for self-test.
*/
-size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i)
+size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
{
const char *from = addr;
unsigned long rem, curr_addr, s_addr = (unsigned long) addr;
if (unlikely(iov_iter_is_pipe(i)))
- return copy_pipe_to_iter_mcsafe(addr, bytes, i);
+ return copy_mc_pipe_to_iter(addr, bytes, i);
if (iter_is_iovec(i))
might_fault();
iterate_and_advance(i, bytes, v,
- copyout_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
+ copyout_mc(v.iov_base, (from += v.iov_len) - v.iov_len,
+ v.iov_len),
({
- rem = memcpy_mcsafe_to_page(v.bv_page, v.bv_offset,
- (from += v.bv_len) - v.bv_len, v.bv_len);
+ rem = copy_mc_to_page(v.bv_page, v.bv_offset,
+ (from += v.bv_len) - v.bv_len, v.bv_len);
if (rem) {
curr_addr = (unsigned long) from;
bytes = curr_addr - s_addr - rem;
}
}),
({
- rem = memcpy_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len,
- v.iov_len);
+ rem = copy_mc_to_kernel(v.iov_base, (from += v.iov_len)
+ - v.iov_len, v.iov_len);
if (rem) {
curr_addr = (unsigned long) from;
bytes = curr_addr - s_addr - rem;
return bytes;
}
-EXPORT_SYMBOL_GPL(_copy_to_iter_mcsafe);
-#endif /* CONFIG_ARCH_HAS_UACCESS_MCSAFE */
+EXPORT_SYMBOL_GPL(_copy_mc_to_iter);
+#endif /* CONFIG_ARCH_HAS_COPY_MC */
size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
{
return 0;
}
iterate_and_advance(i, bytes, v, ({
- int err = 0;
next = csum_and_copy_from_user(v.iov_base,
(to += v.iov_len) - v.iov_len,
- v.iov_len, 0, &err);
- if (!err) {
+ v.iov_len);
+ if (next) {
sum = csum_block_add(sum, next, off);
off += v.iov_len;
}
- err ? v.iov_len : 0;
+ next ? 0 : v.iov_len;
}), ({
char *p = kmap_atomic(v.bv_page);
sum = csum_and_memcpy((to += v.bv_len) - v.bv_len,
if (unlikely(i->count < bytes))
return false;
iterate_all_kinds(i, bytes, v, ({
- int err = 0;
next = csum_and_copy_from_user(v.iov_base,
(to += v.iov_len) - v.iov_len,
- v.iov_len, 0, &err);
- if (err)
+ v.iov_len);
+ if (!next)
return false;
sum = csum_block_add(sum, next, off);
off += v.iov_len;
return 0;
}
iterate_and_advance(i, bytes, v, ({
- int err = 0;
next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len,
v.iov_base,
- v.iov_len, 0, &err);
- if (!err) {
+ v.iov_len);
+ if (next) {
sum = csum_block_add(sum, next, off);
off += v.iov_len;
}
- err ? v.iov_len : 0;
+ next ? 0 : v.iov_len;
}), ({
char *p = kmap_atomic(v.bv_page);
sum = csum_and_memcpy(p + v.bv_offset,
}
EXPORT_SYMBOL(skb_tx_error);
+#ifdef CONFIG_TRACEPOINTS
/**
* consume_skb - free an skbuff
* @skb: buffer to free
__kfree_skb(skb);
}
EXPORT_SYMBOL(consume_skb);
+#endif
/**
* consume_stateless_skb - free an skbuff, assuming it is stateless
/* Both of above in one bottle. */
__wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
- u8 *to, int len, __wsum csum)
+ u8 *to, int len)
{
int start = skb_headlen(skb);
int i, copy = start - offset;
struct sk_buff *frag_iter;
int pos = 0;
+ __wsum csum = 0;
/* Copy header. */
if (copy > 0) {
if (copy > len)
copy = len;
csum = csum_partial_copy_nocheck(skb->data + offset, to,
- copy, csum);
+ copy);
if ((len -= copy) == 0)
return csum;
offset += copy;
vaddr = kmap_atomic(p);
csum2 = csum_partial_copy_nocheck(vaddr + p_off,
to + copied,
- p_len, 0);
+ p_len);
kunmap_atomic(vaddr);
csum = csum_block_add(csum, csum2, pos);
pos += p_len;
copy = len;
csum2 = skb_copy_and_csum_bits(frag_iter,
offset - start,
- to, copy, 0);
+ to, copy);
csum = csum_block_add(csum, csum2, pos);
if ((len -= copy) == 0)
return csum;
csum = 0;
if (csstart != skb->len)
csum = skb_copy_and_csum_bits(skb, csstart, to + csstart,
- skb->len - csstart, 0);
+ skb->len - csstart);
if (skb->ip_summed == CHECKSUM_PARTIAL) {
long csstuff = csstart + skb->csum_offset;
skb_copy_and_csum_bits(head_skb, offset,
skb_put(nskb,
len),
- len, 0);
+ len);
SKB_GSO_CB(nskb)->csum_start =
skb_headroom(nskb) + doffset;
} else {
skb = skb_share_check(skb, GFP_ATOMIC);
if (unlikely(!skb))
goto err_free;
-
- if (unlikely(!pskb_may_pull(skb, VLAN_HLEN)))
+ /* We may access the two bytes after vlan_hdr in vlan_set_encap_proto(). */
+ if (unlikely(!pskb_may_pull(skb, VLAN_HLEN + sizeof(unsigned short))))
goto err_free;
vhdr = (struct vlan_hdr *)skb->data;
lse->label_stack_entry = mpls_lse;
skb_postpush_rcsum(skb, lse, MPLS_HLEN);
- if (ethernet)
+ if (ethernet && mac_len >= ETH_HLEN)
skb_mod_eth_type(skb, eth_hdr(skb), mpls_proto);
skb->protocol = mpls_proto;
skb_reset_mac_header(skb);
skb_set_network_header(skb, mac_len);
- if (ethernet) {
+ if (ethernet && mac_len >= ETH_HLEN) {
struct ethhdr *hdr;
/* use mpls_hdr() to get ethertype to account for VLANs. */
if (skb_has_frag_list(skb))
skb_clone_fraglist(skb);
- if (k == 0) {
- /* split line is in frag list */
- pskb_carve_frag_list(skb, shinfo, off - pos, gfp_mask);
+ /* split line is in frag list */
+ if (k == 0 && pskb_carve_frag_list(skb, shinfo, off - pos, gfp_mask)) {
+ /* skb_frag_unref() is not needed here as shinfo->nr_frags = 0. */
+ if (skb_has_frag_list(skb))
+ kfree_skb_list(skb_shinfo(skb)->frag_list);
+ kfree(data);
+ return -ENOMEM;
}
skb_release_data(skb);
#include <net/icmp.h>
#include <net/checksum.h>
#include <net/inetpeer.h>
+#include <net/inet_ecn.h>
#include <net/lwtunnel.h>
#include <linux/bpf-cgroup.h>
#include <linux/igmp.h>
if (fraggap) {
skb->csum = skb_copy_and_csum_bits(
skb_prev, maxfraglen,
- data + transhdrlen, fraggap, 0);
+ data + transhdrlen, fraggap);
skb_prev->csum = csum_sub(skb_prev->csum,
skb->csum);
data += fraggap;
skb->csum = skb_copy_and_csum_bits(skb_prev,
maxfraglen,
skb_transport_header(skb),
- fraggap, 0);
+ fraggap);
skb_prev->csum = csum_sub(skb_prev->csum,
skb->csum);
pskb_trim_unique(skb_prev, maxfraglen);
{
__wsum csum;
- csum = csum_partial_copy_nocheck(dptr+offset, to, len, 0);
+ csum = csum_partial_copy_nocheck(dptr+offset, to, len);
skb->csum = csum_block_add(skb->csum, csum, odd);
return 0;
}
if (IS_ERR(rt))
return;
- inet_sk(sk)->tos = arg->tos;
+ inet_sk(sk)->tos = arg->tos & ~INET_ECN_MASK;
sk->sk_protocol = ip_hdr(skb)->protocol;
sk->sk_bound_dev_if = arg->bound_dev_if;
skb->csum = csum_block_add(
skb->csum,
csum_partial_copy_nocheck(rfv->hdr.c + offset,
- to, copy, 0),
+ to, copy),
odd);
odd = 0;
} else if (!ipc.oif) {
ipc.oif = inet->uc_index;
} else if (ipv4_is_lbcast(daddr) && inet->uc_index) {
- /* oif is set, packet is to local broadcast and
+ /* oif is set, packet is to local broadcast
* and uc_index is set. oif is most likely set
* by sk_bound_dev_if. If uc_index != oif check if the
* oif is an L3 master and uc_index is an L3 slave.