A number of our chips like loads and stores to be paired. A small kernel
module testcase shows the improvement of pairing loads and stores in
copy_4k_page:
POWER6: +9%
POWER7: +1.5%
#include <linux/module.h>
#include <linux/mm.h>
#define ITERATIONS
10000000
static int __init copypage_init(void)
{
struct timespec before, after;
unsigned long i;
struct page *destpage, *srcpage;
char *dest, *src;
destpage = alloc_page(GFP_KERNEL);
srcpage = alloc_page(GFP_KERNEL);
dest = page_address(destpage);
src = page_address(srcpage);
getnstimeofday(&before);
for (i = 0; i < ITERATIONS; i++)
copy_4K_page(dest, src);
getnstimeofday(&after);
free_page((unsigned long)dest);
free_page((unsigned long)src);
printk(KERN_DEBUG "copy_4K_page loop took %lu ns\n",
(after.tv_sec - before.tv_sec) * NSEC_PER_SEC +
(after.tv_nsec - before.tv_nsec));
return 0;
}
static void __exit copypage_exit(void)
{
}
module_init(copypage_init)
module_exit(copypage_exit)
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Anton Blanchard");
Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>