powerpc: Remove duplicate cacheable_memcpy/memzero functions
authorKyle Moffett <Kyle.D.Moffett@boeing.com>
Tue, 15 Nov 2011 02:32:10 +0000 (21:32 -0500)
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>
Tue, 17 Mar 2015 00:25:50 +0000 (11:25 +1100)
These functions are only used from one place each.  If the cacheable_*
versions really are more efficient, then those changes should be
migrated into the common code instead.

NOTE: The old routines are just flat buggy on kernels that support
      hardware with different cacheline sizes.

Signed-off-by: Kyle Moffett <Kyle.D.Moffett@boeing.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
arch/powerpc/include/asm/cache.h
arch/powerpc/lib/copy_32.S
arch/powerpc/lib/ppc_ksyms.c
arch/powerpc/mm/ppc_mmu_32.c
drivers/net/ethernet/ibm/emac/core.c

index 34a05a1..0dc42c5 100644 (file)
@@ -76,9 +76,6 @@ extern void _set_L3CR(unsigned long);
 #define _set_L3CR(val) do { } while(0)
 #endif
 
-extern void cacheable_memzero(void *p, unsigned int nb);
-extern void *cacheable_memcpy(void *, const void *, unsigned int);
-
 #endif /* !__ASSEMBLY__ */
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_CACHE_H */
index 55f19f9..6813f80 100644 (file)
@@ -69,54 +69,6 @@ CACHELINE_BYTES = L1_CACHE_BYTES
 LG_CACHELINE_BYTES = L1_CACHE_SHIFT
 CACHELINE_MASK = (L1_CACHE_BYTES-1)
 
-/*
- * Use dcbz on the complete cache lines in the destination
- * to set them to zero.  This requires that the destination
- * area is cacheable.  -- paulus
- */
-_GLOBAL(cacheable_memzero)
-       mr      r5,r4
-       li      r4,0
-       addi    r6,r3,-4
-       cmplwi  0,r5,4
-       blt     7f
-       stwu    r4,4(r6)
-       beqlr
-       andi.   r0,r6,3
-       add     r5,r0,r5
-       subf    r6,r0,r6
-       clrlwi  r7,r6,32-LG_CACHELINE_BYTES
-       add     r8,r7,r5
-       srwi    r9,r8,LG_CACHELINE_BYTES
-       addic.  r9,r9,-1        /* total number of complete cachelines */
-       ble     2f
-       xori    r0,r7,CACHELINE_MASK & ~3
-       srwi.   r0,r0,2
-       beq     3f
-       mtctr   r0
-4:     stwu    r4,4(r6)
-       bdnz    4b
-3:     mtctr   r9
-       li      r7,4
-10:    dcbz    r7,r6
-       addi    r6,r6,CACHELINE_BYTES
-       bdnz    10b
-       clrlwi  r5,r8,32-LG_CACHELINE_BYTES
-       addi    r5,r5,4
-2:     srwi    r0,r5,2
-       mtctr   r0
-       bdz     6f
-1:     stwu    r4,4(r6)
-       bdnz    1b
-6:     andi.   r5,r5,3
-7:     cmpwi   0,r5,0
-       beqlr
-       mtctr   r5
-       addi    r6,r6,3
-8:     stbu    r4,1(r6)
-       bdnz    8b
-       blr
-
 _GLOBAL(memset)
        rlwimi  r4,r4,8,16,23
        rlwimi  r4,r4,16,0,15
@@ -142,85 +94,6 @@ _GLOBAL(memset)
        bdnz    8b
        blr
 
-/*
- * This version uses dcbz on the complete cache lines in the
- * destination area to reduce memory traffic.  This requires that
- * the destination area is cacheable.
- * We only use this version if the source and dest don't overlap.
- * -- paulus.
- */
-_GLOBAL(cacheable_memcpy)
-       add     r7,r3,r5                /* test if the src & dst overlap */
-       add     r8,r4,r5
-       cmplw   0,r4,r7
-       cmplw   1,r3,r8
-       crand   0,0,4                   /* cr0.lt &= cr1.lt */
-       blt     memcpy                  /* if regions overlap */
-
-       addi    r4,r4,-4
-       addi    r6,r3,-4
-       neg     r0,r3
-       andi.   r0,r0,CACHELINE_MASK    /* # bytes to start of cache line */
-       beq     58f
-
-       cmplw   0,r5,r0                 /* is this more than total to do? */
-       blt     63f                     /* if not much to do */
-       andi.   r8,r0,3                 /* get it word-aligned first */
-       subf    r5,r0,r5
-       mtctr   r8
-       beq+    61f
-70:    lbz     r9,4(r4)                /* do some bytes */
-       stb     r9,4(r6)
-       addi    r4,r4,1
-       addi    r6,r6,1
-       bdnz    70b
-61:    srwi.   r0,r0,2
-       mtctr   r0
-       beq     58f
-72:    lwzu    r9,4(r4)                /* do some words */
-       stwu    r9,4(r6)
-       bdnz    72b
-
-58:    srwi.   r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
-       clrlwi  r5,r5,32-LG_CACHELINE_BYTES
-       li      r11,4
-       mtctr   r0
-       beq     63f
-53:
-       dcbz    r11,r6
-       COPY_16_BYTES
-#if L1_CACHE_BYTES >= 32
-       COPY_16_BYTES
-#if L1_CACHE_BYTES >= 64
-       COPY_16_BYTES
-       COPY_16_BYTES
-#if L1_CACHE_BYTES >= 128
-       COPY_16_BYTES
-       COPY_16_BYTES
-       COPY_16_BYTES
-       COPY_16_BYTES
-#endif
-#endif
-#endif
-       bdnz    53b
-
-63:    srwi.   r0,r5,2
-       mtctr   r0
-       beq     64f
-30:    lwzu    r0,4(r4)
-       stwu    r0,4(r6)
-       bdnz    30b
-
-64:    andi.   r0,r5,3
-       mtctr   r0
-       beq+    65f
-40:    lbz     r0,4(r4)
-       stb     r0,4(r6)
-       addi    r4,r4,1
-       addi    r6,r6,1
-       bdnz    40b
-65:    blr
-
 _GLOBAL(memmove)
        cmplw   0,r3,r4
        bgt     backwards_memcpy
index f993959..c7f8e95 100644 (file)
@@ -8,10 +8,6 @@ EXPORT_SYMBOL(memset);
 EXPORT_SYMBOL(memmove);
 EXPORT_SYMBOL(memcmp);
 EXPORT_SYMBOL(memchr);
-#ifdef CONFIG_PPC32
-EXPORT_SYMBOL(cacheable_memcpy);
-EXPORT_SYMBOL(cacheable_memzero);
-#endif
 
 EXPORT_SYMBOL(strcpy);
 EXPORT_SYMBOL(strncpy);
index 5029dc1..eb0e489 100644 (file)
@@ -224,7 +224,7 @@ void __init MMU_init_hw(void)
         */
        if ( ppc_md.progress ) ppc_md.progress("hash:find piece", 0x322);
        Hash = __va(memblock_alloc(Hash_size, Hash_size));
-       cacheable_memzero(Hash, Hash_size);
+       memset(Hash, 0, Hash_size);
        _SDR1 = __pa(Hash) | SDR1_LOW_BITS;
 
        Hash_end = (struct hash_pte *) ((unsigned long)Hash + Hash_size);
index 162762d..220bae6 100644 (file)
@@ -79,13 +79,6 @@ MODULE_AUTHOR
     ("Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net>");
 MODULE_LICENSE("GPL");
 
-/*
- * PPC64 doesn't (yet) have a cacheable_memcpy
- */
-#ifdef CONFIG_PPC64
-#define cacheable_memcpy(d,s,n) memcpy((d),(s),(n))
-#endif
-
 /* minimum number of free TX descriptors required to wake up TX process */
 #define EMAC_TX_WAKEUP_THRESH          (NUM_TX_BUFF / 4)
 
@@ -1673,7 +1666,7 @@ static inline int emac_rx_sg_append(struct emac_instance *dev, int slot)
                        dev_kfree_skb(dev->rx_sg_skb);
                        dev->rx_sg_skb = NULL;
                } else {
-                       cacheable_memcpy(skb_tail_pointer(dev->rx_sg_skb),
+                       memcpy(skb_tail_pointer(dev->rx_sg_skb),
                                         dev->rx_skb[slot]->data, len);
                        skb_put(dev->rx_sg_skb, len);
                        emac_recycle_rx_skb(dev, slot, len);
@@ -1730,8 +1723,7 @@ static int emac_poll_rx(void *param, int budget)
                                goto oom;
 
                        skb_reserve(copy_skb, EMAC_RX_SKB_HEADROOM + 2);
-                       cacheable_memcpy(copy_skb->data - 2, skb->data - 2,
-                                        len + 2);
+                       memcpy(copy_skb->data - 2, skb->data - 2, len + 2);
                        emac_recycle_rx_skb(dev, slot, len);
                        skb = copy_skb;
                } else if (unlikely(emac_alloc_rx_skb(dev, slot, GFP_ATOMIC)))