// Copies `count` bytes by blocks of `kBlockSize` bytes.
// Copies at the start and end of the buffer are unaligned.
-// Copies in the middle of the buffer are aligned to `kBlockSize`.
+// Copies in the middle of the buffer are aligned to `kAlignment`.
//
// e.g. with
// [12345678123456781234567812345678]
-// [__XXXXXXXXXXXXXXXXXXXXXXXXXXX___]
-// [__XXXXXXXX______________________]
-// [________XXXXXXXX________________]
-// [________________XXXXXXXX________]
-// [_____________________XXXXXXXX___]
+// [__XXXXXXXXXXXXXXXXXXXXXXXXXXXX___]
+// [__XXXX___________________________]
+// [_____XXXXXXXX____________________]
+// [_____________XXXXXXXX____________]
+// [_____________________XXXXXXXX____]
+// [______________________XXXXXXXX___]
//
-// Precondition: `count > 2 * kBlockSize` for efficiency.
-// `count >= kBlockSize` for correctness.
-template <size_t kBlockSize>
+// Precondition: `kAlignment <= kBlockSize`
+// `count > 2 * kBlockSize` for efficiency.
+// `count >= kAlignment` for correctness.
+template <size_t kBlockSize, size_t kAlignment = kBlockSize>
static void CopyAlignedBlocks(char *__restrict dst, const char *__restrict src,
size_t count) {
- CopyBlock<kBlockSize>(dst, src); // Copy first block
+ static_assert(is_power2(kAlignment), "kAlignment must be a power of two");
+ static_assert(is_power2(kBlockSize), "kBlockSize must be a power of two");
+ static_assert(kAlignment <= kBlockSize,
+ "kAlignment must be less or equal to block size");
+ CopyBlock<kAlignment>(dst, src); // Copy first block
// Copy aligned blocks
- const size_t ofla = offset_from_last_aligned<kBlockSize>(src);
+ const size_t ofla = offset_from_last_aligned<kAlignment>(src);
const size_t limit = count + ofla - kBlockSize;
- for (size_t offset = kBlockSize; offset < limit; offset += kBlockSize)
- CopyBlock<kBlockSize>(dst - ofla + offset, src - ofla + offset);
+ for (size_t offset = kAlignment; offset < limit; offset += kBlockSize)
+ CopyBlock<kBlockSize>(dst - ofla + offset,
+ assume_aligned<kAlignment>(src - ofla + offset));
CopyLastBlock<kBlockSize>(dst, src, count); // Copy last block
}
EXPECT_STREQ(trace.Read(), "011121111111");
}
-TEST(MemcpyUtilsTest, MaxReloads) {
+TEST(MemcpyUtilsTest, CopyAlignedBlocksWithAlignment) {
+ auto &trace = GetTrace();
+ // Source is aligned and multiple of alignment.
+ // "11111111"
+ trace.Clear();
+ CopyAlignedBlocks<8, 4>(I(0), I(0), 8);
+ EXPECT_STREQ(trace.Write(), "22221111");
+ EXPECT_STREQ(trace.Read(), "22221111");
+
+ // Source is aligned and multiple of alignment.
+ // "111111111"
+ trace.Clear();
+ CopyAlignedBlocks<8, 4>(I(0), I(0), 9);
+ EXPECT_STREQ(trace.Write(), "122211111");
+ EXPECT_STREQ(trace.Read(), "122211111");
+}
+
+TEST(MemcpyUtilsTest, CopyAlignedBlocksMaxReloads) {
auto &trace = GetTrace();
for (size_t alignment = 0; alignment < 32; ++alignment) {
for (size_t count = 64; count < 768; ++count) {
}
}
+TEST(MemcpyUtilsTest, CopyAlignedBlocksWithAlignmentMaxReloads) {
+ auto &trace = GetTrace();
+ for (size_t alignment = 0; alignment < 32; ++alignment) {
+ for (size_t count = 64; count < 768; ++count) {
+ trace.Clear();
+ // We should never reload more than twice when copying from count = 2x32.
+ CopyAlignedBlocks<32, 16>(I(alignment), I(0), count);
+ const char *const written = trace.Write();
+ // First bytes are untouched.
+ for (size_t i = 0; i < alignment; ++i)
+ EXPECT_EQ(written[i], '0');
+ // Next bytes are loaded once or twice but no more.
+ for (size_t i = alignment; i < count; ++i) {
+ EXPECT_GE(written[i], '1');
+ EXPECT_LE(written[i], '2');
+ }
+ }
+ }
+}
+
} // namespace __llvm_libc