Merge branch 'master' of git://git.denx.de/u-boot-sunxi
authorTom Rini <trini@konsulko.com>
Tue, 10 Jan 2017 13:19:21 +0000 (08:19 -0500)
committerTom Rini <trini@konsulko.com>
Tue, 10 Jan 2017 13:19:21 +0000 (08:19 -0500)
30 files changed:
arch/arm/cpu/armv8/Makefile
arch/arm/cpu/armv8/cpu.c
arch/arm/cpu/armv8/lowlevel_init.S [new file with mode: 0644]
arch/arm/cpu/armv8/start.S
arch/arm/include/asm/arch-bcm235xx/boot0.h
arch/arm/include/asm/arch-bcm281xx/boot0.h
arch/arm/include/asm/arch-sunxi/boot0.h
arch/arm/include/asm/arch-sunxi/clock_sun6i.h
arch/arm/include/asm/arch-sunxi/cpu.h
arch/arm/include/asm/arch-sunxi/dram.h
arch/arm/include/asm/arch-sunxi/dram_sun8i_h3.h
arch/arm/include/asm/armv8/mmu.h
arch/arm/lib/Makefile
arch/arm/lib/vectors.S
arch/arm/mach-omap2/boot-common.c
arch/arm/mach-sunxi/Makefile
arch/arm/mach-sunxi/board.c
arch/arm/mach-sunxi/clock_sun6i.c
arch/arm/mach-sunxi/dram_sun8i_h3.c
arch/arm/mach-sunxi/rmr_switch.S [new file with mode: 0644]
arch/arm/mach-tegra/spl.c
board/sunxi/Kconfig
common/spl/spl.c
common/spl/spl_mmc.c
configs/pine64_plus_defconfig
drivers/mtd/spi/sunxi_spi_spl.c
include/common.h
include/configs/sunxi-common.h
include/spl.h
lib/tiny-printf.c

index 28ba786..e780afc 100644 (file)
@@ -26,3 +26,4 @@ obj-$(CONFIG_S32V234) += s32v234/
 obj-$(CONFIG_ARCH_ZYNQMP) += zynqmp/
 obj-$(CONFIG_TARGET_HIKEY) += hisilicon/
 obj-$(CONFIG_ARMV8_PSCI) += psci.o
+obj-$(CONFIG_ARCH_SUNXI) += lowlevel_init.o
index 5dcb5e2..28a27f7 100644 (file)
 #include <asm/secure.h>
 #include <linux/compiler.h>
 
+/*
+ * sdelay() - simple spin loop.
+ *
+ * Will delay execution by roughly (@loops * 2) cycles.
+ * This is necessary to be used before timers are accessible.
+ *
+ * A value of "0" will results in 2^64 loops.
+ */
+void sdelay(unsigned long loops)
+{
+       __asm__ volatile ("1:\n" "subs %0, %0, #1\n"
+                         "b.ne 1b" : "=r" (loops) : "0"(loops) : "cc");
+}
+
 int cleanup_before_linux(void)
 {
        /*
diff --git a/arch/arm/cpu/armv8/lowlevel_init.S b/arch/arm/cpu/armv8/lowlevel_init.S
new file mode 100644 (file)
index 0000000..189e35f
--- /dev/null
@@ -0,0 +1,44 @@
+/*
+ * A lowlevel_init function that sets up the stack to call a C function to
+ * perform further init.
+ *
+ * SPDX-License-Identifier:    GPL-2.0+
+ */
+
+#include <asm-offsets.h>
+#include <config.h>
+#include <linux/linkage.h>
+
+ENTRY(lowlevel_init)
+       /*
+        * Setup a temporary stack. Global data is not available yet.
+        */
+#if defined(CONFIG_SPL_BUILD) && defined(CONFIG_SPL_STACK)
+       ldr     w0, =CONFIG_SPL_STACK
+#else
+       ldr     w0, =CONFIG_SYS_INIT_SP_ADDR
+#endif
+       bic     sp, x0, #0xf    /* 16-byte alignment for ABI compliance */
+
+       /*
+        * Save the old LR(passed in x29) and the current LR to stack
+        */
+       stp     x29, x30, [sp, #-16]!
+
+       /*
+        * Call the very early init function. This should do only the
+        * absolute bare minimum to get started. It should not:
+        *
+        * - set up DRAM
+        * - use global_data
+        * - clear BSS
+        * - try to start a console
+        *
+        * For boards with SPL this should be empty since SPL can do all of
+        * this init in the SPL board_init_f() function which is called
+        * immediately after this.
+        */
+       bl      s_init
+       ldp     x29, x30, [sp]
+       ret
+ENDPROC(lowlevel_init)
index 4f5f6d8..140609d 100644 (file)
@@ -19,8 +19,6 @@
 
 .globl _start
 _start:
-       b       reset
-
 #ifdef CONFIG_ENABLE_ARM_SOC_BOOT0_HOOK
 /*
  * Various SoCs need something special and SoC-specific up front in
@@ -28,7 +26,8 @@ _start:
  * use it here.
  */
 #include <asm/arch/boot0.h>
-ARM_SOC_BOOT0_HOOK
+#else
+       b       reset
 #endif
 
        .align 3
index 7e72882..a747bd3 100644 (file)
@@ -4,12 +4,6 @@
  * SPDX-License-Identifier:    GPL-2.0+
  */
 
-#ifndef __BOOT0_H
-#define __BOOT0_H
-
 /* BOOT0 header information */
-#define ARM_SOC_BOOT0_HOOK     \
-       .word   0xbabeface;     \
+       .word   0xbabeface
        .word   _end - _start
-
-#endif /* __BOOT0_H */
index 7e72882..a747bd3 100644 (file)
@@ -4,12 +4,6 @@
  * SPDX-License-Identifier:    GPL-2.0+
  */
 
-#ifndef __BOOT0_H
-#define __BOOT0_H
-
 /* BOOT0 header information */
-#define ARM_SOC_BOOT0_HOOK     \
-       .word   0xbabeface;     \
+       .word   0xbabeface
        .word   _end - _start
-
-#endif /* __BOOT0_H */
index ea5675e..9c6d82d 100644 (file)
@@ -4,11 +4,36 @@
  * SPDX-License-Identifier:    GPL-2.0+
  */
 
-#ifndef __BOOT0_H
-#define __BOOT0_H
-
+#if defined(CONFIG_RESERVE_ALLWINNER_BOOT0_HEADER) && !defined(CONFIG_SPL_BUILD)
 /* reserve space for BOOT0 header information */
-#define ARM_SOC_BOOT0_HOOK     \
+       b       reset
        .space  1532
-
-#endif /* __BOOT0_H */
+#elif defined(CONFIG_ARM_BOOT_HOOK_RMR)
+/*
+ * Switch into AArch64 if needed.
+ * Refer to arch/arm/mach-sunxi/rmr_switch.S for the original source.
+ */
+       tst     x0, x0                  // this is "b #0x84" in ARM
+       b       reset
+       .space  0x7c
+       .word   0xe59f1024      // ldr     r1, [pc, #36] ; 0x170000a0
+       .word   0xe59f0024      // ldr     r0, [pc, #36] ; CONFIG_*_TEXT_BASE
+       .word   0xe5810000      // str     r0, [r1]
+       .word   0xf57ff04f      // dsb     sy
+       .word   0xf57ff06f      // isb     sy
+       .word   0xee1c0f50      // mrc     15, 0, r0, cr12, cr0, {2} ; RMR
+       .word   0xe3800003      // orr     r0, r0, #3
+       .word   0xee0c0f50      // mcr     15, 0, r0, cr12, cr0, {2} ; RMR
+       .word   0xf57ff06f      // isb     sy
+       .word   0xe320f003      // wfi
+       .word   0xeafffffd      // b       @wfi
+       .word   0x017000a0      // writeable RVBAR mapping address
+#ifdef CONFIG_SPL_BUILD
+       .word   CONFIG_SPL_TEXT_BASE
+#else
+       .word   CONFIG_SYS_TEXT_BASE
+#endif
+#else
+/* normal execution */
+       b       reset
+#endif
index be9fcfd..3f87672 100644 (file)
@@ -322,6 +322,7 @@ struct sunxi_ccm_reg {
 #define CCM_DRAMCLK_CFG_DIV0_MASK      (0xf << 8)
 #define CCM_DRAMCLK_CFG_SRC_PLL5       (0x0 << 20)
 #define CCM_DRAMCLK_CFG_SRC_PLL6x2     (0x1 << 20)
+#define CCM_DRAMCLK_CFG_SRC_PLL11      (0x1 << 20) /* A64 only */
 #define CCM_DRAMCLK_CFG_SRC_MASK       (0x3 << 20)
 #define CCM_DRAMCLK_CFG_UPD            (0x1 << 16)
 #define CCM_DRAMCLK_CFG_RST            (0x1 << 31)
index 73583ed..6f96a97 100644 (file)
@@ -13,4 +13,7 @@
 #include <asm/arch/cpu_sun4i.h>
 #endif
 
+#define SOCID_A64      0x1689
+#define SOCID_H3       0x1680
+
 #endif /* _SUNXI_CPU_H */
index e0be744..53e6d47 100644 (file)
@@ -24,7 +24,7 @@
 #include <asm/arch/dram_sun8i_a33.h>
 #elif defined(CONFIG_MACH_SUN8I_A83T)
 #include <asm/arch/dram_sun8i_a83t.h>
-#elif defined(CONFIG_MACH_SUN8I_H3)
+#elif defined(CONFIG_MACH_SUN8I_H3) || defined(CONFIG_MACH_SUN50I)
 #include <asm/arch/dram_sun8i_h3.h>
 #elif defined(CONFIG_MACH_SUN9I)
 #include <asm/arch/dram_sun9i.h>
index d0f2b8a..25d07d9 100644 (file)
@@ -15,7 +15,8 @@
 
 struct sunxi_mctl_com_reg {
        u32 cr;                 /* 0x00 control register */
-       u8 res0[0xc];           /* 0x04 */
+       u8 res0[0x8];           /* 0x04 */
+       u32 tmr;                /* 0x0c (unused on H3) */
        u32 mcr[16][2];         /* 0x10 */
        u32 bwcr;               /* 0x90 bandwidth control register */
        u32 maer;               /* 0x94 master enable register */
@@ -32,7 +33,9 @@ struct sunxi_mctl_com_reg {
        u32 swoffr;             /* 0xc4 */
        u8 res2[0x8];           /* 0xc8 */
        u32 cccr;               /* 0xd0 */
-       u8 res3[0x72c];         /* 0xd4 */
+       u8 res3[0x54];          /* 0xd4 */
+       u32 mdfs_bwlr[3];       /* 0x128 (unused on H3) */
+       u8 res4[0x6cc];         /* 0x134 */
        u32 protect;            /* 0x800 */
 };
 
@@ -81,7 +84,8 @@ struct sunxi_mctl_ctl_reg {
        u32 rfshtmg;            /* 0x90 refresh timing */
        u32 rfshctl1;           /* 0x94 */
        u32 pwrtmg;             /* 0x98 */
-       u8  res3[0x20];         /* 0x9c */
+       u8 res3[0x1c];          /* 0x9c */
+       u32 vtfcr;              /* 0xb8 (unused on H3) */
        u32 dqsgmr;             /* 0xbc */
        u32 dtcr;               /* 0xc0 */
        u32 dtar[4];            /* 0xc4 */
@@ -106,20 +110,23 @@ struct sunxi_mctl_ctl_reg {
        u32 perfhpr[2];         /* 0x1c4 */
        u32 perflpr[2];         /* 0x1cc */
        u32 perfwr[2];          /* 0x1d4 */
-       u8 res8[0x2c];          /* 0x1dc */
-       u32 aciocr;             /* 0x208 */
-       u8 res9[0xf4];          /* 0x20c */
+       u8 res8[0x24];          /* 0x1dc */
+       u32 acmdlr;             /* 0x200 AC master delay line register */
+       u32 aclcdlr;            /* 0x204 AC local calibrated delay line register */
+       u32 aciocr;             /* 0x208 AC I/O configuration register */
+       u8 res9[0x4];           /* 0x20c */
+       u32 acbdlr[31];         /* 0x210 AC bit delay line registers */
+       u8 res10[0x74];         /* 0x28c */
        struct {                /* 0x300 DATX8 modules*/
-               u32 mdlr;               /* 0x00 */
-               u32 lcdlr[3];           /* 0x04 */
-               u32 iocr[11];           /* 0x10 IO configuration register */
-               u32 bdlr6;              /* 0x3c */
-               u32 gtr;                /* 0x40 */
-               u32 gcr;                /* 0x44 */
-               u32 gsr[3];             /* 0x48 */
+               u32 mdlr;               /* 0x00 master delay line register */
+               u32 lcdlr[3];           /* 0x04 local calibrated delay line registers */
+               u32 bdlr[12];           /* 0x10 bit delay line registers */
+               u32 gtr;                /* 0x40 general timing register */
+               u32 gcr;                /* 0x44 general configuration register */
+               u32 gsr[3];             /* 0x48 general status registers */
                u8 res0[0x2c];          /* 0x54 */
-       } datx[4];
-       u8 res10[0x388];        /* 0x500 */
+       } dx[4];
+       u8 res11[0x388];        /* 0x500 */
        u32 upd2;               /* 0x888 */
 };
 
@@ -172,14 +179,16 @@ struct sunxi_mctl_ctl_reg {
 
 #define PGSR_INIT_DONE (0x1 << 0)      /* PHY init done */
 
-#define ZQCR_PWRDOWN   (0x1 << 31)     /* ZQ power down */
+#define ZQCR_PWRDOWN   (1U << 31)      /* ZQ power down */
 
-#define DATX_IOCR_DQ(x)        (x)             /* DQ0-7 IOCR index */
-#define DATX_IOCR_DM   (8)             /* DM IOCR index */
-#define DATX_IOCR_DQS  (9)             /* DQS IOCR index */
-#define DATX_IOCR_DQSN (10)            /* DQSN IOCR index */
+#define ACBDLR_WRITE_DELAY(x)  ((x) << 8)
 
-#define DATX_IOCR_WRITE_DELAY(x)       ((x) << 8)
-#define DATX_IOCR_READ_DELAY(x)                ((x) << 0)
+#define DXBDLR_DQ(x)   (x)             /* DQ0-7 BDLR index */
+#define DXBDLR_DM      8               /* DM BDLR index */
+#define DXBDLR_DQS     9               /* DQS BDLR index */
+#define DXBDLR_DQSN    10              /* DQSN BDLR index */
+
+#define DXBDLR_WRITE_DELAY(x)  ((x) << 8)
+#define DXBDLR_READ_DELAY(x)   ((x) << 0)
 
 #endif /* _SUNXI_DRAM_SUN8I_H3_H */
index aa0f3c4..e9b4cdb 100644 (file)
@@ -8,14 +8,6 @@
 #ifndef _ASM_ARMV8_MMU_H_
 #define _ASM_ARMV8_MMU_H_
 
-#ifdef __ASSEMBLY__
-#define _AC(X, Y)      X
-#else
-#define _AC(X, Y)      (X##Y)
-#endif
-
-#define UL(x)          _AC(x, UL)
-
 /***************************************************************/
 /*
  * The following definitions are related each other, shoud be
index 0051f76..024139d 100644 (file)
@@ -77,8 +77,10 @@ ifndef CONFIG_HAS_THUMB2
 
 # for C files, just apend -marm, which will override previous -mthumb*
 
+ifndef CONFIG_ARM64
 CFLAGS_cache.o := -marm
 CFLAGS_cache-cp15.o := -marm
+endif
 
 # For .S, drop -mthumb* and other thumb-related options.
 # CFLAGS_REMOVE_* would not have an effet, so AFLAGS_REMOVE_*
index 5cc132b..9fe7415 100644 (file)
@@ -67,7 +67,6 @@ _start:
  * use it here.
  */
 #include <asm/arch/boot0.h>
-ARM_SOC_BOOT0_HOOK
 #endif
 
 /*
index 385310b..7ae3d80 100644 (file)
@@ -228,7 +228,7 @@ void __noreturn jump_to_image_no_args(struct spl_image_info *spl_image)
 
        u32 boot_params = *((u32 *)OMAP_SRAM_SCRATCH_BOOT_PARAMS);
 
-       debug("image entry point: 0x%X\n", spl_image->entry_point);
+       debug("image entry point: 0x%lX\n", spl_image->entry_point);
        /* Pass the saved boot_params from rom code */
        image_entry((u32 *)boot_params);
 }
index e73114e..7daba11 100644 (file)
@@ -50,4 +50,5 @@ obj-$(CONFIG_MACH_SUN8I_A33)  += dram_sun8i_a33.o
 obj-$(CONFIG_MACH_SUN8I_A83T)  += dram_sun8i_a83t.o
 obj-$(CONFIG_MACH_SUN8I_H3)    += dram_sun8i_h3.o
 obj-$(CONFIG_MACH_SUN9I)       += dram_sun9i.o
+obj-$(CONFIG_MACH_SUN50I)      += dram_sun8i_h3.o
 endif
index aa11493..52be5b0 100644 (file)
@@ -133,7 +133,7 @@ static int gpio_init(void)
        return 0;
 }
 
-#ifdef CONFIG_SPL_BUILD
+#if defined(CONFIG_SPL_BOARD_LOAD_IMAGE) && defined(CONFIG_SPL_BUILD)
 static int spl_board_load_image(struct spl_image_info *spl_image,
                                struct spl_boot_device *bootdev)
 {
index ed8cd9b..d123b3a 100644 (file)
@@ -21,6 +21,8 @@ void clock_init_safe(void)
 {
        struct sunxi_ccm_reg * const ccm =
                (struct sunxi_ccm_reg *)SUNXI_CCM_BASE;
+
+#if !defined(CONFIG_MACH_SUN8I_H3) && !defined(CONFIG_MACH_SUN50I)
        struct sunxi_prcm_reg * const prcm =
                (struct sunxi_prcm_reg *)SUNXI_PRCM_BASE;
 
@@ -31,6 +33,7 @@ void clock_init_safe(void)
                PRCM_PLL_CTRL_LDO_DIGITAL_EN | PRCM_PLL_CTRL_LDO_ANALOG_EN |
                PRCM_PLL_CTRL_EXT_OSC_EN | PRCM_PLL_CTRL_LDO_OUT_L(1140));
        clrbits_le32(&prcm->pll_ctrl1, PRCM_PLL_CTRL_LDO_KEY_MASK);
+#endif
 
        clock_set_pll1(408000000);
 
@@ -41,7 +44,8 @@ void clock_init_safe(void)
        writel(AHB1_ABP1_DIV_DEFAULT, &ccm->ahb1_apb1_div);
 
        writel(MBUS_CLK_DEFAULT, &ccm->mbus0_clk_cfg);
-       writel(MBUS_CLK_DEFAULT, &ccm->mbus1_clk_cfg);
+       if (IS_ENABLED(CONFIG_MACH_SUN6I))
+               writel(MBUS_CLK_DEFAULT, &ccm->mbus1_clk_cfg);
 }
 #endif
 
@@ -213,14 +217,14 @@ done:
 }
 #endif
 
-#ifdef CONFIG_MACH_SUN8I_A33
+#if defined(CONFIG_MACH_SUN8I_A33) || defined(CONFIG_MACH_SUN50I)
 void clock_set_pll11(unsigned int clk, bool sigma_delta_enable)
 {
        struct sunxi_ccm_reg * const ccm =
                (struct sunxi_ccm_reg *)SUNXI_CCM_BASE;
 
        if (sigma_delta_enable)
-               writel(CCM_PLL11_PATTERN, &ccm->pll5_pattern_cfg);
+               writel(CCM_PLL11_PATTERN, &ccm->pll11_pattern_cfg0);
 
        writel(CCM_PLL11_CTRL_EN | CCM_PLL11_CTRL_UPD |
               (sigma_delta_enable ? CCM_PLL11_CTRL_SIGMA_DELTA_EN : 0) |
index b08b8e6..9f7cc7f 100644 (file)
 #include <asm/io.h>
 #include <asm/arch/clock.h>
 #include <asm/arch/dram.h>
+#include <asm/arch/cpu.h>
 #include <linux/kconfig.h>
 
+/*
+ * The delay parameters below allow to allegedly specify delay times of some
+ * unknown unit for each individual bit trace in each of the four data bytes
+ * the 32-bit wide access consists of. Also three control signals can be
+ * adjusted individually.
+ */
+#define BITS_PER_BYTE          8
+#define NR_OF_BYTE_LANES       (32 / BITS_PER_BYTE)
+/* The eight data lines (DQn) plus DM, DQS and DQSN */
+#define LINES_PER_BYTE_LANE    (BITS_PER_BYTE + 3)
 struct dram_para {
-       u32 read_delays;
-       u32 write_delays;
        u16 page_size;
        u8 bus_width;
        u8 dual_rank;
        u8 row_bits;
+       const u8 dx_read_delays[NR_OF_BYTE_LANES][LINES_PER_BYTE_LANE];
+       const u8 dx_write_delays[NR_OF_BYTE_LANES][LINES_PER_BYTE_LANE];
+       const u8 ac_delays[31];
 };
 
 static inline int ns_to_t(int nanoseconds)
@@ -31,30 +43,6 @@ static inline int ns_to_t(int nanoseconds)
        return DIV_ROUND_UP(ctrl_freq * nanoseconds, 1000);
 }
 
-static u32 bin_to_mgray(int val)
-{
-       static const u8 lookup_table[32] = {
-               0x00, 0x01, 0x02, 0x03, 0x06, 0x07, 0x04, 0x05,
-               0x0c, 0x0d, 0x0e, 0x0f, 0x0a, 0x0b, 0x08, 0x09,
-               0x18, 0x19, 0x1a, 0x1b, 0x1e, 0x1f, 0x1c, 0x1d,
-               0x14, 0x15, 0x16, 0x17, 0x12, 0x13, 0x10, 0x11,
-       };
-
-       return lookup_table[clamp(val, 0, 31)];
-}
-
-static int mgray_to_bin(u32 val)
-{
-       static const u8 lookup_table[32] = {
-               0x00, 0x01, 0x02, 0x03, 0x06, 0x07, 0x04, 0x05,
-               0x0e, 0x0f, 0x0c, 0x0d, 0x08, 0x09, 0x0a, 0x0b,
-               0x1e, 0x1f, 0x1c, 0x1d, 0x18, 0x19, 0x1a, 0x1b,
-               0x10, 0x11, 0x12, 0x13, 0x16, 0x17, 0x14, 0x15,
-       };
-
-       return lookup_table[val & 0x1f];
-}
-
 static void mctl_phy_init(u32 val)
 {
        struct sunxi_mctl_ctl_reg * const mctl_ctl =
@@ -64,74 +52,144 @@ static void mctl_phy_init(u32 val)
        mctl_await_completion(&mctl_ctl->pgsr[0], PGSR_INIT_DONE, 0x1);
 }
 
-static void mctl_dq_delay(u32 read, u32 write)
+static void mctl_set_bit_delays(struct dram_para *para)
 {
        struct sunxi_mctl_ctl_reg * const mctl_ctl =
                        (struct sunxi_mctl_ctl_reg *)SUNXI_DRAM_CTL0_BASE;
        int i, j;
-       u32 val;
-
-       for (i = 0; i < 4; i++) {
-               val = DATX_IOCR_WRITE_DELAY((write >> (i * 4)) & 0xf) |
-                     DATX_IOCR_READ_DELAY(((read >> (i * 4)) & 0xf) * 2);
-
-               for (j = DATX_IOCR_DQ(0); j <= DATX_IOCR_DM; j++)
-                       writel(val, &mctl_ctl->datx[i].iocr[j]);
-       }
 
        clrbits_le32(&mctl_ctl->pgcr[0], 1 << 26);
 
-       for (i = 0; i < 4; i++) {
-               val = DATX_IOCR_WRITE_DELAY((write >> (16 + i * 4)) & 0xf) |
-                     DATX_IOCR_READ_DELAY((read >> (16 + i * 4)) & 0xf);
+       for (i = 0; i < NR_OF_BYTE_LANES; i++)
+               for (j = 0; j < LINES_PER_BYTE_LANE; j++)
+                       writel(DXBDLR_WRITE_DELAY(para->dx_write_delays[i][j]) |
+                              DXBDLR_READ_DELAY(para->dx_read_delays[i][j]),
+                              &mctl_ctl->dx[i].bdlr[j]);
 
-               writel(val, &mctl_ctl->datx[i].iocr[DATX_IOCR_DQS]);
-               writel(val, &mctl_ctl->datx[i].iocr[DATX_IOCR_DQSN]);
-       }
+       for (i = 0; i < 31; i++)
+               writel(ACBDLR_WRITE_DELAY(para->ac_delays[i]),
+                      &mctl_ctl->acbdlr[i]);
 
        setbits_le32(&mctl_ctl->pgcr[0], 1 << 26);
+}
 
-       udelay(1);
+enum {
+       MBUS_PORT_CPU           = 0,
+       MBUS_PORT_GPU           = 1,
+       MBUS_PORT_UNUSED        = 2,
+       MBUS_PORT_DMA           = 3,
+       MBUS_PORT_VE            = 4,
+       MBUS_PORT_CSI           = 5,
+       MBUS_PORT_NAND          = 6,
+       MBUS_PORT_SS            = 7,
+       MBUS_PORT_TS            = 8,
+       MBUS_PORT_DI            = 9,
+       MBUS_PORT_DE            = 10,
+       MBUS_PORT_DE_CFD        = 11,
+};
+
+enum {
+       MBUS_QOS_LOWEST = 0,
+       MBUS_QOS_LOW,
+       MBUS_QOS_HIGH,
+       MBUS_QOS_HIGHEST
+};
+
+inline void mbus_configure_port(u8 port,
+                               bool bwlimit,
+                               bool priority,
+                               u8 qos,         /* MBUS_QOS_LOWEST .. MBUS_QOS_HIGEST */
+                               u8 waittime,    /* 0 .. 0xf */
+                               u8 acs,         /* 0 .. 0xff */
+                               u16 bwl0,       /* 0 .. 0xffff, bandwidth limit in MB/s */
+                               u16 bwl1,
+                               u16 bwl2)
+{
+       struct sunxi_mctl_com_reg * const mctl_com =
+                       (struct sunxi_mctl_com_reg *)SUNXI_DRAM_COM_BASE;
+
+       const u32 cfg0 = ( (bwlimit ? (1 << 0) : 0)
+                          | (priority ? (1 << 1) : 0)
+                          | ((qos & 0x3) << 2)
+                          | ((waittime & 0xf) << 4)
+                          | ((acs & 0xff) << 8)
+                          | (bwl0 << 16) );
+       const u32 cfg1 = ((u32)bwl2 << 16) | (bwl1 & 0xffff);
+
+       debug("MBUS port %d cfg0 %08x cfg1 %08x\n", port, cfg0, cfg1);
+       writel(cfg0, &mctl_com->mcr[port][0]);
+       writel(cfg1, &mctl_com->mcr[port][1]);
 }
 
-static void mctl_set_master_priority(void)
+#define MBUS_CONF(port, bwlimit, qos, acs, bwl0, bwl1, bwl2)   \
+       mbus_configure_port(MBUS_PORT_ ## port, bwlimit, false, \
+                           MBUS_QOS_ ## qos, 0, acs, bwl0, bwl1, bwl2)
+
+static void mctl_set_master_priority_h3(void)
 {
        struct sunxi_mctl_com_reg * const mctl_com =
                        (struct sunxi_mctl_com_reg *)SUNXI_DRAM_COM_BASE;
 
        /* enable bandwidth limit windows and set windows size 1us */
-       writel(0x00010190, &mctl_com->bwcr);
+       writel((1 << 16) | (400 << 0), &mctl_com->bwcr);
 
        /* set cpu high priority */
        writel(0x00000001, &mctl_com->mapr);
 
-       writel(0x0200000d, &mctl_com->mcr[0][0]);
-       writel(0x00800100, &mctl_com->mcr[0][1]);
-       writel(0x06000009, &mctl_com->mcr[1][0]);
-       writel(0x01000400, &mctl_com->mcr[1][1]);
-       writel(0x0200000d, &mctl_com->mcr[2][0]);
-       writel(0x00600100, &mctl_com->mcr[2][1]);
-       writel(0x0100000d, &mctl_com->mcr[3][0]);
-       writel(0x00200080, &mctl_com->mcr[3][1]);
-       writel(0x07000009, &mctl_com->mcr[4][0]);
-       writel(0x01000640, &mctl_com->mcr[4][1]);
-       writel(0x0100000d, &mctl_com->mcr[5][0]);
-       writel(0x00200080, &mctl_com->mcr[5][1]);
-       writel(0x01000009, &mctl_com->mcr[6][0]);
-       writel(0x00400080, &mctl_com->mcr[6][1]);
-       writel(0x0100000d, &mctl_com->mcr[7][0]);
-       writel(0x00400080, &mctl_com->mcr[7][1]);
-       writel(0x0100000d, &mctl_com->mcr[8][0]);
-       writel(0x00400080, &mctl_com->mcr[8][1]);
-       writel(0x04000009, &mctl_com->mcr[9][0]);
-       writel(0x00400100, &mctl_com->mcr[9][1]);
-       writel(0x2000030d, &mctl_com->mcr[10][0]);
-       writel(0x04001800, &mctl_com->mcr[10][1]);
-       writel(0x04000009, &mctl_com->mcr[11][0]);
-       writel(0x00400120, &mctl_com->mcr[11][1]);
+       MBUS_CONF(   CPU,  true, HIGHEST, 0,  512,  256,  128);
+       MBUS_CONF(   GPU,  true,    HIGH, 0, 1536, 1024,  256);
+       MBUS_CONF(UNUSED,  true, HIGHEST, 0,  512,  256,   96);
+       MBUS_CONF(   DMA,  true, HIGHEST, 0,  256,  128,   32);
+       MBUS_CONF(    VE,  true,    HIGH, 0, 1792, 1600,  256);
+       MBUS_CONF(   CSI,  true, HIGHEST, 0,  256,  128,   32);
+       MBUS_CONF(  NAND,  true,    HIGH, 0,  256,  128,   64);
+       MBUS_CONF(    SS,  true, HIGHEST, 0,  256,  128,   64);
+       MBUS_CONF(    TS,  true, HIGHEST, 0,  256,  128,   64);
+       MBUS_CONF(    DI,  true,    HIGH, 0, 1024,  256,   64);
+       MBUS_CONF(    DE,  true, HIGHEST, 3, 8192, 6120, 1024);
+       MBUS_CONF(DE_CFD,  true,    HIGH, 0, 1024,  288,   64);
 }
 
-static void mctl_set_timing_params(struct dram_para *para)
+static void mctl_set_master_priority_a64(void)
+{
+       struct sunxi_mctl_com_reg * const mctl_com =
+                       (struct sunxi_mctl_com_reg *)SUNXI_DRAM_COM_BASE;
+
+       /* enable bandwidth limit windows and set windows size 1us */
+       writel(399, &mctl_com->tmr);
+       writel((1 << 16), &mctl_com->bwcr);
+
+       /* Port 2 is reserved per Allwinner's linux-3.10 source, yet they
+        * initialise it */
+       MBUS_CONF(   CPU,  true, HIGHEST, 0,  160,  100,   80);
+       MBUS_CONF(   GPU, false,    HIGH, 0, 1536, 1400,  256);
+       MBUS_CONF(UNUSED,  true, HIGHEST, 0,  512,  256,   96);
+       MBUS_CONF(   DMA,  true,    HIGH, 0,  256,   80,  100);
+       MBUS_CONF(    VE,  true,    HIGH, 0, 1792, 1600,  256);
+       MBUS_CONF(   CSI,  true,    HIGH, 0,  256,  128,    0);
+       MBUS_CONF(  NAND,  true,    HIGH, 0,  256,  128,   64);
+       MBUS_CONF(    SS,  true, HIGHEST, 0,  256,  128,   64);
+       MBUS_CONF(    TS,  true, HIGHEST, 0,  256,  128,   64);
+       MBUS_CONF(    DI,  true,    HIGH, 0, 1024,  256,   64);
+       MBUS_CONF(    DE,  true,    HIGH, 2, 8192, 6144, 2048);
+       MBUS_CONF(DE_CFD,  true,    HIGH, 0, 1280,  144,   64);
+
+       writel(0x81000004, &mctl_com->mdfs_bwlr[2]);
+}
+
+static void mctl_set_master_priority(uint16_t socid)
+{
+       switch (socid) {
+       case SOCID_H3:
+               mctl_set_master_priority_h3();
+               return;
+       case SOCID_A64:
+               mctl_set_master_priority_a64();
+               return;
+       }
+}
+
+static void mctl_set_timing_params(uint16_t socid, struct dram_para *para)
 {
        struct sunxi_mctl_ctl_reg * const mctl_ctl =
                        (struct sunxi_mctl_ctl_reg *)SUNXI_DRAM_CTL0_BASE;
@@ -212,7 +270,31 @@ static void mctl_set_timing_params(struct dram_para *para)
        writel(RFSHTMG_TREFI(trefi) | RFSHTMG_TRFC(trfc), &mctl_ctl->rfshtmg);
 }
 
-static void mctl_zq_calibration(struct dram_para *para)
+static u32 bin_to_mgray(int val)
+{
+       static const u8 lookup_table[32] = {
+               0x00, 0x01, 0x02, 0x03, 0x06, 0x07, 0x04, 0x05,
+               0x0c, 0x0d, 0x0e, 0x0f, 0x0a, 0x0b, 0x08, 0x09,
+               0x18, 0x19, 0x1a, 0x1b, 0x1e, 0x1f, 0x1c, 0x1d,
+               0x14, 0x15, 0x16, 0x17, 0x12, 0x13, 0x10, 0x11,
+       };
+
+       return lookup_table[clamp(val, 0, 31)];
+}
+
+static int mgray_to_bin(u32 val)
+{
+       static const u8 lookup_table[32] = {
+               0x00, 0x01, 0x02, 0x03, 0x06, 0x07, 0x04, 0x05,
+               0x0e, 0x0f, 0x0c, 0x0d, 0x08, 0x09, 0x0a, 0x0b,
+               0x1e, 0x1f, 0x1c, 0x1d, 0x18, 0x19, 0x1a, 0x1b,
+               0x10, 0x11, 0x12, 0x13, 0x16, 0x17, 0x14, 0x15,
+       };
+
+       return lookup_table[val & 0x1f];
+}
+
+static void mctl_h3_zq_calibration_quirk(struct dram_para *para)
 {
        struct sunxi_mctl_ctl_reg * const mctl_ctl =
                        (struct sunxi_mctl_ctl_reg *)SUNXI_DRAM_CTL0_BASE;
@@ -282,7 +364,7 @@ static void mctl_set_cr(struct dram_para *para)
               MCTL_CR_ROW_BITS(para->row_bits), &mctl_com->cr);
 }
 
-static void mctl_sys_init(struct dram_para *para)
+static void mctl_sys_init(uint16_t socid, struct dram_para *para)
 {
        struct sunxi_ccm_reg * const ccm =
                        (struct sunxi_ccm_reg *)SUNXI_CCM_BASE;
@@ -294,16 +376,30 @@ static void mctl_sys_init(struct dram_para *para)
        clrbits_le32(&ccm->ahb_gate0, 1 << AHB_GATE_OFFSET_MCTL);
        clrbits_le32(&ccm->ahb_reset0_cfg, 1 << AHB_RESET_OFFSET_MCTL);
        clrbits_le32(&ccm->pll5_cfg, CCM_PLL5_CTRL_EN);
+       if (socid == SOCID_A64)
+               clrbits_le32(&ccm->pll11_cfg, CCM_PLL11_CTRL_EN);
        udelay(10);
 
        clrbits_le32(&ccm->dram_clk_cfg, CCM_DRAMCLK_CFG_RST);
        udelay(1000);
 
-       clock_set_pll5(CONFIG_DRAM_CLK * 2 * 1000000, false);
-       clrsetbits_le32(&ccm->dram_clk_cfg,
-                       CCM_DRAMCLK_CFG_DIV_MASK | CCM_DRAMCLK_CFG_SRC_MASK,
-                       CCM_DRAMCLK_CFG_DIV(1) | CCM_DRAMCLK_CFG_SRC_PLL5 |
-                       CCM_DRAMCLK_CFG_UPD);
+       if (socid == SOCID_A64) {
+               clock_set_pll11(CONFIG_DRAM_CLK * 2 * 1000000, false);
+               clrsetbits_le32(&ccm->dram_clk_cfg,
+                               CCM_DRAMCLK_CFG_DIV_MASK |
+                               CCM_DRAMCLK_CFG_SRC_MASK,
+                               CCM_DRAMCLK_CFG_DIV(1) |
+                               CCM_DRAMCLK_CFG_SRC_PLL11 |
+                               CCM_DRAMCLK_CFG_UPD);
+       } else if (socid == SOCID_H3) {
+               clock_set_pll5(CONFIG_DRAM_CLK * 2 * 1000000, false);
+               clrsetbits_le32(&ccm->dram_clk_cfg,
+                               CCM_DRAMCLK_CFG_DIV_MASK |
+                               CCM_DRAMCLK_CFG_SRC_MASK,
+                               CCM_DRAMCLK_CFG_DIV(1) |
+                               CCM_DRAMCLK_CFG_SRC_PLL5 |
+                               CCM_DRAMCLK_CFG_UPD);
+       }
        mctl_await_completion(&ccm->dram_clk_cfg, CCM_DRAMCLK_CFG_UPD, 0);
 
        setbits_le32(&ccm->ahb_reset0_cfg, 1 << AHB_RESET_OFFSET_MCTL);
@@ -318,7 +414,12 @@ static void mctl_sys_init(struct dram_para *para)
        udelay(500);
 }
 
-static int mctl_channel_init(struct dram_para *para)
+/* These are more guessed based on some Allwinner code. */
+#define DX_GCR_ODT_DYNAMIC     (0x0 << 4)
+#define DX_GCR_ODT_ALWAYS_ON   (0x1 << 4)
+#define DX_GCR_ODT_OFF         (0x2 << 4)
+
+static int mctl_channel_init(uint16_t socid, struct dram_para *para)
 {
        struct sunxi_mctl_com_reg * const mctl_com =
                        (struct sunxi_mctl_com_reg *)SUNXI_DRAM_COM_BASE;
@@ -328,8 +429,8 @@ static int mctl_channel_init(struct dram_para *para)
        unsigned int i;
 
        mctl_set_cr(para);
-       mctl_set_timing_params(para);
-       mctl_set_master_priority();
+       mctl_set_timing_params(socid, para);
+       mctl_set_master_priority(socid);
 
        /* setting VTC, default disable all VT */
        clrbits_le32(&mctl_ctl->pgcr[0], (1 << 30) | 0x3f);
@@ -344,10 +445,11 @@ static int mctl_channel_init(struct dram_para *para)
 
        /* set dramc odt */
        for (i = 0; i < 4; i++)
-               clrsetbits_le32(&mctl_ctl->datx[i].gcr, (0x3 << 4) |
+               clrsetbits_le32(&mctl_ctl->dx[i].gcr, (0x3 << 4) |
                                (0x1 << 1) | (0x3 << 2) | (0x3 << 12) |
                                (0x3 << 14),
-                               IS_ENABLED(CONFIG_DRAM_ODT_EN) ? 0x0 : 0x2);
+                               IS_ENABLED(CONFIG_DRAM_ODT_EN) ?
+                                       DX_GCR_ODT_DYNAMIC : DX_GCR_ODT_OFF);
 
        /* AC PDR should always ON */
        setbits_le32(&mctl_ctl->aciocr, 0x1 << 1);
@@ -355,48 +457,58 @@ static int mctl_channel_init(struct dram_para *para)
        /* set DQS auto gating PD mode */
        setbits_le32(&mctl_ctl->pgcr[2], 0x3 << 6);
 
-       /* dx ddr_clk & hdr_clk dynamic mode */
-       clrbits_le32(&mctl_ctl->pgcr[0], (0x3 << 14) | (0x3 << 12));
-
-       /* dphy & aphy phase select 270 degree */
-       clrsetbits_le32(&mctl_ctl->pgcr[2], (0x3 << 10) | (0x3 << 8),
-                       (0x1 << 10) | (0x2 << 8));
+       if (socid == SOCID_H3) {
+               /* dx ddr_clk & hdr_clk dynamic mode */
+               clrbits_le32(&mctl_ctl->pgcr[0], (0x3 << 14) | (0x3 << 12));
+
+               /* dphy & aphy phase select 270 degree */
+               clrsetbits_le32(&mctl_ctl->pgcr[2], (0x3 << 10) | (0x3 << 8),
+                               (0x1 << 10) | (0x2 << 8));
+       } else if (socid == SOCID_A64) {
+               /* dphy & aphy phase select ? */
+               clrsetbits_le32(&mctl_ctl->pgcr[2], (0x3 << 10) | (0x3 << 8),
+                               (0x0 << 10) | (0x3 << 8));
+       }
 
        /* set half DQ */
        if (para->bus_width != 32) {
-               writel(0x0, &mctl_ctl->datx[2].gcr);
-               writel(0x0, &mctl_ctl->datx[3].gcr);
+               writel(0x0, &mctl_ctl->dx[2].gcr);
+               writel(0x0, &mctl_ctl->dx[3].gcr);
        }
 
        /* data training configuration */
        clrsetbits_le32(&mctl_ctl->dtcr, 0xf << 24,
                        (para->dual_rank ? 0x3 : 0x1) << 24);
 
+       mctl_set_bit_delays(para);
+       udelay(50);
 
-       if (para->read_delays || para->write_delays) {
-               mctl_dq_delay(para->read_delays, para->write_delays);
-               udelay(50);
-       }
+       if (socid == SOCID_H3) {
+               mctl_h3_zq_calibration_quirk(para);
 
-       mctl_zq_calibration(para);
+               mctl_phy_init(PIR_PLLINIT | PIR_DCAL | PIR_PHYRST |
+                             PIR_DRAMRST | PIR_DRAMINIT | PIR_QSGATE);
+       } else if (socid == SOCID_A64) {
+               clrsetbits_le32(&mctl_ctl->zqcr, 0xffffff, CONFIG_DRAM_ZQ);
 
-       mctl_phy_init(PIR_PLLINIT | PIR_DCAL | PIR_PHYRST | PIR_DRAMRST |
-                     PIR_DRAMINIT | PIR_QSGATE);
+               mctl_phy_init(PIR_ZCAL | PIR_PLLINIT | PIR_DCAL | PIR_PHYRST |
+                             PIR_DRAMRST | PIR_DRAMINIT | PIR_QSGATE);
+       }
 
        /* detect ranks and bus width */
        if (readl(&mctl_ctl->pgsr[0]) & (0xfe << 20)) {
                /* only one rank */
-               if (((readl(&mctl_ctl->datx[0].gsr[0]) >> 24) & 0x2) ||
-                   ((readl(&mctl_ctl->datx[1].gsr[0]) >> 24) & 0x2)) {
+               if (((readl(&mctl_ctl->dx[0].gsr[0]) >> 24) & 0x2) ||
+                   ((readl(&mctl_ctl->dx[1].gsr[0]) >> 24) & 0x2)) {
                        clrsetbits_le32(&mctl_ctl->dtcr, 0xf << 24, 0x1 << 24);
                        para->dual_rank = 0;
                }
 
                /* only half DQ width */
-               if (((readl(&mctl_ctl->datx[2].gsr[0]) >> 24) & 0x1) ||
-                   ((readl(&mctl_ctl->datx[3].gsr[0]) >> 24) & 0x1)) {
-                       writel(0x0, &mctl_ctl->datx[2].gcr);
-                       writel(0x0, &mctl_ctl->datx[3].gcr);
+               if (((readl(&mctl_ctl->dx[2].gsr[0]) >> 24) & 0x1) ||
+                   ((readl(&mctl_ctl->dx[3].gsr[0]) >> 24) & 0x1)) {
+                       writel(0x0, &mctl_ctl->dx[2].gcr);
+                       writel(0x0, &mctl_ctl->dx[3].gcr);
                        para->bus_width = 16;
                }
 
@@ -419,7 +531,10 @@ static int mctl_channel_init(struct dram_para *para)
        udelay(10);
 
        /* set PGCR3, CKE polarity */
-       writel(0x00aa0060, &mctl_ctl->pgcr[3]);
+       if (socid == SOCID_H3)
+               writel(0x00aa0060, &mctl_ctl->pgcr[3]);
+       else if (socid == SOCID_A64)
+               writel(0xc0aa0060, &mctl_ctl->pgcr[3]);
 
        /* power down zq calibration module for power save */
        setbits_le32(&mctl_ctl->zqcr, ZQCR_PWRDOWN);
@@ -450,6 +565,45 @@ static void mctl_auto_detect_dram_size(struct dram_para *para)
                        break;
 }
 
+/*
+ * The actual values used here are taken from Allwinner provided boot0
+ * binaries, though they are probably board specific, so would likely benefit
+ * from invidual tuning for each board. Apparently a lot of boards copy from
+ * some Allwinner reference design, so we go with those generic values for now
+ * in the hope that they are reasonable for most (all?) boards.
+ */
+#define SUN8I_H3_DX_READ_DELAYS                                        \
+       {{ 18, 18, 18, 18, 18, 18, 18, 18, 18,  0,  0 },        \
+        { 14, 14, 14, 14, 14, 14, 14, 14, 14,  0,  0 },        \
+        { 18, 18, 18, 18, 18, 18, 18, 18, 18,  0,  0 },        \
+        { 14, 14, 14, 14, 14, 14, 14, 14, 14,  0,  0 }}
+#define SUN8I_H3_DX_WRITE_DELAYS                               \
+       {{  0,  0,  0,  0,  0,  0,  0,  0,  0, 10, 10 },        \
+        {  0,  0,  0,  0,  0,  0,  0,  0,  0, 10, 10 },        \
+        {  0,  0,  0,  0,  0,  0,  0,  0,  0, 10, 10 },        \
+        {  0,  0,  0,  0,  0,  0,  0,  0,  0,  6,  6 }}
+#define SUN8I_H3_AC_DELAYS                                     \
+       {  0,  0,  0,  0,  0,  0,  0,  0,                       \
+          0,  0,  0,  0,  0,  0,  0,  0,                       \
+          0,  0,  0,  0,  0,  0,  0,  0,                       \
+          0,  0,  0,  0,  0,  0,  0      }
+
+#define SUN50I_A64_DX_READ_DELAYS                              \
+       {{ 16, 16, 16, 16, 17, 16, 16, 17, 16,  1,  0 },        \
+        { 17, 17, 17, 17, 17, 17, 17, 17, 17,  1,  0 },        \
+        { 16, 17, 17, 16, 16, 16, 16, 16, 16,  0,  0 },        \
+        { 17, 17, 17, 17, 17, 17, 17, 17, 17,  1,  0 }}
+#define SUN50I_A64_DX_WRITE_DELAYS                             \
+       {{  0,  0,  0,  0,  0,  0,  0,  0,  0, 15, 15 },        \
+        {  0,  0,  0,  0,  1,  1,  1,  1,  0, 10, 10 },        \
+        {  1,  0,  1,  1,  1,  1,  1,  1,  0, 11, 11 },        \
+        {  1,  0,  0,  1,  1,  1,  1,  1,  0, 12, 12 }}
+#define SUN50I_A64_AC_DELAYS                                   \
+       {  5,  5, 13, 10,  2,  5,  3,  3,                       \
+          0,  3,  3,  3,  1,  0,  0,  0,                       \
+          3,  4,  0,  3,  4,  1,  4,  0,                       \
+          1,  1,  0,  1, 13,  5,  4      }
+
 unsigned long sunxi_dram_init(void)
 {
        struct sunxi_mctl_com_reg * const mctl_com =
@@ -458,16 +612,34 @@ unsigned long sunxi_dram_init(void)
                        (struct sunxi_mctl_ctl_reg *)SUNXI_DRAM_CTL0_BASE;
 
        struct dram_para para = {
-               .read_delays = 0x00007979,      /* dram_tpr12 */
-               .write_delays = 0x6aaa0000,     /* dram_tpr11 */
                .dual_rank = 0,
                .bus_width = 32,
                .row_bits = 15,
                .page_size = 4096,
-       };
 
-       mctl_sys_init(&para);
-       if (mctl_channel_init(&para))
+#if defined(CONFIG_MACH_SUN8I_H3)
+               .dx_read_delays  = SUN8I_H3_DX_READ_DELAYS,
+               .dx_write_delays = SUN8I_H3_DX_WRITE_DELAYS,
+               .ac_delays       = SUN8I_H3_AC_DELAYS,
+#elif defined(CONFIG_MACH_SUN50I)
+               .dx_read_delays  = SUN50I_A64_DX_READ_DELAYS,
+               .dx_write_delays = SUN50I_A64_DX_WRITE_DELAYS,
+               .ac_delays       = SUN50I_A64_AC_DELAYS,
+#endif
+       };
+/*
+ * Let the compiler optimize alternatives away by passing this value into
+ * the static functions. This saves us #ifdefs, but still keeps the binary
+ * small.
+ */
+#if defined(CONFIG_MACH_SUN8I_H3)
+       uint16_t socid = SOCID_H3;
+#elif defined(CONFIG_MACH_SUN50I)
+       uint16_t socid = SOCID_A64;
+#endif
+
+       mctl_sys_init(socid, &para);
+       if (mctl_channel_init(socid, &para))
                return 0;
 
        if (para.dual_rank)
@@ -477,7 +649,13 @@ unsigned long sunxi_dram_init(void)
        udelay(1);
 
        /* odt delay */
-       writel(0x0c000400, &mctl_ctl->odtcfg);
+       if (socid == SOCID_H3)
+               writel(0x0c000400, &mctl_ctl->odtcfg);
+
+       if (socid == SOCID_A64) {
+               setbits_le32(&mctl_ctl->vtfcr, 2 << 8);
+               clrbits_le32(&mctl_ctl->pgcr[2], (1 << 13));
+       }
 
        /* clear credit value */
        setbits_le32(&mctl_com->cccr, 1 << 31);
@@ -486,6 +664,6 @@ unsigned long sunxi_dram_init(void)
        mctl_auto_detect_dram_size(&para);
        mctl_set_cr(&para);
 
-       return (1 << (para.row_bits + 3)) * para.page_size *
+       return (1UL << (para.row_bits + 3)) * para.page_size *
                                                (para.dual_rank ? 2 : 1);
 }
diff --git a/arch/arm/mach-sunxi/rmr_switch.S b/arch/arm/mach-sunxi/rmr_switch.S
new file mode 100644 (file)
index 0000000..cefa930
--- /dev/null
@@ -0,0 +1,41 @@
+@
+@ ARMv8 RMR reset sequence on Allwinner SoCs.
+@
+@ All 64-bit capable Allwinner SoCs reset in AArch32 (and continue to
+@ exectute the Boot ROM in this state), so we need to switch to AArch64
+@ at some point.
+@ Section G6.2.133 of the ARMv8 ARM describes the Reset Management Register
+@ (RMR), which triggers a warm-reset of a core and can request to switch
+@ into a different execution state (AArch32 or AArch64).
+@ The address at which execution starts after the reset is held in the
+@ RVBAR system register, which is architecturally read-only.
+@ Allwinner provides a writable alias of this register in MMIO space, so
+@ we can easily set the start address of AArch64 code.
+@ This code below switches to AArch64 and starts execution at the specified
+@ start address. It needs to be assembled by an ARM(32) assembler and
+@ the machine code must be inserted as verbatim .word statements into the
+@ beginning of the AArch64 U-Boot code.
+@ To get the encoded bytes, use:
+@ ${CROSS_COMPILE}gcc -c -o rmr_switch.o rmr_switch.S
+@ ${CROSS_COMPILE}objdump -d rmr_switch.o
+@
+@ The resulting words should be inserted into the U-Boot file at
+@ arch/arm/include/asm/arch-sunxi/boot0.h.
+@
+@ This file is not build by the U-Boot build system, but provided only as a
+@ reference and to be able to regenerate a (probably fixed) version of this
+@ code found in encoded form in boot0.h.
+
+.text
+
+       ldr     r1, =0x017000a0         @ MMIO mapped RVBAR[0] register
+       ldr     r0, =0x57aA7add         @ start address, to be replaced
+       str     r0, [r1]
+       dsb     sy
+       isb     sy
+       mrc     15, 0, r0, cr12, cr0, 2 @ read RMR register
+       orr     r0, r0, #3              @ request reset in AArch64
+       mcr     15, 0, r0, cr12, cr0, 2 @ write RMR register
+       isb     sy
+1:     wfi
+       b       1b
index e0f9d5b..41c88cb 100644 (file)
@@ -42,7 +42,7 @@ u32 spl_boot_device(void)
 
 void __noreturn jump_to_image_no_args(struct spl_image_info *spl_image)
 {
-       debug("image entry point: 0x%X\n", spl_image->entry_point);
+       debug("image entry point: 0x%lX\n", spl_image->entry_point);
 
        start_cpu((u32)spl_image->entry_point);
        halt_avp();
index e1d4ab1..0001133 100644 (file)
@@ -125,6 +125,7 @@ config MACH_SUN50I
        bool "sun50i (Allwinner A64)"
        select ARM64
        select SUNXI_GEN_SUN6I
+       select SUPPORT_SPL
 
 endchoice
 
@@ -133,6 +134,29 @@ config MACH_SUN8I
        bool
        default y if MACH_SUN8I_A23 || MACH_SUN8I_A33 || MACH_SUN8I_H3 || MACH_SUN8I_A83T
 
+config RESERVE_ALLWINNER_BOOT0_HEADER
+       bool "reserve space for Allwinner boot0 header"
+       select ENABLE_ARM_SOC_BOOT0_HOOK
+       ---help---
+       Prepend a 1536 byte (empty) header to the U-Boot image file, to be
+       filled with magic values post build. The Allwinner provided boot0
+       blob relies on this information to load and execute U-Boot.
+       Only needed on 64-bit Allwinner boards so far when using boot0.
+
+config ARM_BOOT_HOOK_RMR
+       bool
+       depends on ARM64
+       default y
+       select ENABLE_ARM_SOC_BOOT0_HOOK
+       ---help---
+       Insert some ARM32 code at the very beginning of the U-Boot binary
+       which uses an RMR register write to bring the core into AArch64 mode.
+       The very first instruction acts as a switch, since it's carefully
+       chosen to be a NOP in one mode and a branch in the other, so the
+       code would only be executed if not already in AArch64.
+       This allows both the SPL and the U-Boot proper to be entered in
+       either mode and switch to AArch64 if needed.
+
 config DRAM_TYPE
        int "sunxi dram type"
        depends on MACH_SUN8I_A83T
@@ -145,6 +169,7 @@ config DRAM_CLK
        default 792 if MACH_SUN9I
        default 312 if MACH_SUN6I || MACH_SUN8I
        default 360 if MACH_SUN4I || MACH_SUN5I || MACH_SUN7I
+       default 672 if MACH_SUN50I
        ---help---
        Set the dram clock speed, valid range 240 - 480 (prior to sun9i),
        must be a multiple of 24. For the sun9i (A80), the tested values
@@ -164,6 +189,7 @@ config DRAM_ZQ
        default 123 if MACH_SUN4I || MACH_SUN5I || MACH_SUN6I || MACH_SUN8I
        default 127 if MACH_SUN7I
        default 4145117 if MACH_SUN9I
+       default 3881915 if MACH_SUN50I
        ---help---
        Set the dram zq value.
 
@@ -171,6 +197,7 @@ config DRAM_ODT_EN
        bool "sunxi dram odt enable"
        default n if !MACH_SUN8I_A23
        default y if MACH_SUN8I_A23
+       default y if MACH_SUN50I
        ---help---
        Select this to enable dram odt (on die termination).
 
index f7df834..a76ea3a 100644 (file)
@@ -115,7 +115,7 @@ int spl_parse_image_header(struct spl_image_info *spl_image,
                }
                spl_image->os = image_get_os(header);
                spl_image->name = image_get_name(header);
-               debug("spl: payload image: %.*s load addr: 0x%x size: %d\n",
+               debug("spl: payload image: %.*s load addr: 0x%lx size: %d\n",
                        (int)sizeof(spl_image->name), spl_image->name,
                        spl_image->load_addr, spl_image->size);
        } else {
@@ -140,7 +140,7 @@ int spl_parse_image_header(struct spl_image_info *spl_image,
                        spl_image->load_addr = CONFIG_SYS_LOAD_ADDR;
                        spl_image->entry_point = CONFIG_SYS_LOAD_ADDR;
                        spl_image->size = end - start;
-                       debug("spl: payload zImage, load addr: 0x%x size: %d\n",
+                       debug("spl: payload zImage, load addr: 0x%lx size: %d\n",
                              spl_image->load_addr, spl_image->size);
                        return 0;
                }
@@ -164,9 +164,9 @@ __weak void __noreturn jump_to_image_no_args(struct spl_image_info *spl_image)
        typedef void __noreturn (*image_entry_noargs_t)(void);
 
        image_entry_noargs_t image_entry =
-               (image_entry_noargs_t)(unsigned long)spl_image->entry_point;
+               (image_entry_noargs_t)spl_image->entry_point;
 
-       debug("image entry point: 0x%X\n", spl_image->entry_point);
+       debug("image entry point: 0x%lX\n", spl_image->entry_point);
        image_entry();
 }
 
index 85e3de8..0cd355c 100644 (file)
@@ -36,7 +36,7 @@ static int mmc_load_legacy(struct spl_image_info *spl_image, struct mmc *mmc,
        /* Read the header too to avoid extra memcpy */
        count = blk_dread(mmc_get_blk_desc(mmc), sector, image_size_sectors,
                          (void *)(ulong)spl_image->load_addr);
-       debug("read %x sectors to %x\n", image_size_sectors,
+       debug("read %x sectors to %lx\n", image_size_sectors,
              spl_image->load_addr);
        if (count != image_size_sectors)
                return -EIO;
index 6d0198f..2374170 100644 (file)
@@ -1,12 +1,11 @@
 CONFIG_ARM=y
-CONFIG_ENABLE_ARM_SOC_BOOT0_HOOK=y
+CONFIG_RESERVE_ALLWINNER_BOOT0_HEADER=y
 CONFIG_ARCH_SUNXI=y
 CONFIG_MACH_SUN50I=y
-CONFIG_DRAM_CLK=672
-CONFIG_DRAM_ZQ=3881915
 CONFIG_DEFAULT_DEVICE_TREE="sun50i-a64-pine64-plus"
 # CONFIG_SYS_MALLOC_CLEAR_ON_INIT is not set
 CONFIG_CONSOLE_MUX=y
+CONFIG_SPL=y
 # CONFIG_CMD_IMLS is not set
 # CONFIG_CMD_FLASH is not set
 # CONFIG_CMD_FPGA is not set
index e70064c..a24c115 100644 (file)
@@ -284,4 +284,4 @@ static int spl_spi_load_image(struct spl_image_info *spl_image,
        return 0;
 }
 /* Use priorty 0 to override the default if it happens to be linked in */
-SPL_LOAD_IMAGE_METHOD("sunxi SPI" 0, BOOT_DEVICE_SPI, spl_spi_load_image);
+SPL_LOAD_IMAGE_METHOD("sunxi SPI", 0, BOOT_DEVICE_SPI, spl_spi_load_image);
index a8d833b..ee0436b 100644 (file)
@@ -15,6 +15,9 @@ typedef volatile unsigned long        vu_long;
 typedef volatile unsigned short vu_short;
 typedef volatile unsigned char vu_char;
 
+/* Allow sharing constants with type modifiers between C and assembly. */
+#define _AC(X, Y)       (X##Y)
+
 #include <config.h>
 #include <errno.h>
 #include <asm-offsets.h>
@@ -936,7 +939,12 @@ int cpu_disable(int nr);
 int cpu_release(int nr, int argc, char * const argv[]);
 #endif
 
-#endif /* __ASSEMBLY__ */
+#else  /* __ASSEMBLY__ */
+
+/* Drop a C type modifier (like in 3UL) for constants used in assembly. */
+#define _AC(X, Y)       X
+
+#endif /* __ASSEMBLY__ */
 
 #ifdef CONFIG_PPC
 /*
@@ -948,6 +956,9 @@ int cpu_release(int nr, int argc, char * const argv[]);
 
 /* Put only stuff here that the assembler can digest */
 
+/* Declare an unsigned long constant digestable both by C and an assembler. */
+#define UL(x)           _AC(x, UL)
+
 #ifdef CONFIG_POST
 #define CONFIG_HAS_POST
 #ifndef CONFIG_POST_ALT_LIST
index b0bfc0d..ab2d33f 100644 (file)
@@ -35,7 +35,7 @@
 /*
  * High Level Configuration Options
  */
-#ifdef CONFIG_SPL_BUILD
+#if defined(CONFIG_SPL_BUILD) && !defined(CONFIG_ARM64)
 #define CONFIG_SYS_THUMB_BUILD /* Thumbs mode to save space in SPL */
 #endif
 
 
 #define CONFIG_SPL_FRAMEWORK
 
+#ifndef CONFIG_ARM64           /* AArch64 FEL support is not ready yet */
 #define CONFIG_SPL_BOARD_LOAD_IMAGE
+#endif
 
 #if defined(CONFIG_MACH_SUN9I)
 #define CONFIG_SPL_TEXT_BASE           0x10040         /* sram start+header */
index 6e746b2..bde4437 100644 (file)
@@ -23,8 +23,8 @@
 struct spl_image_info {
        const char *name;
        u8 os;
-       u32 load_addr;
-       u32 entry_point;
+       ulong load_addr;
+       ulong entry_point;
        u32 size;
        u32 flags;
 };
index 30ac759..dfa8432 100644 (file)
@@ -38,8 +38,8 @@ static void out_dgt(struct printf_info *info, char dgt)
        info->zs = 1;
 }
 
-static void div_out(struct printf_info *info, unsigned int *num,
-                   unsigned int div)
+static void div_out(struct printf_info *info, unsigned long *num,
+                   unsigned long div)
 {
        unsigned char dgt = 0;
 
@@ -56,9 +56,9 @@ int _vprintf(struct printf_info *info, const char *fmt, va_list va)
 {
        char ch;
        char *p;
-       unsigned int num;
+       unsigned long num;
        char buf[12];
-       unsigned int div;
+       unsigned long div;
 
        while ((ch = *(fmt++))) {
                if (ch != '%') {
@@ -66,8 +66,12 @@ int _vprintf(struct printf_info *info, const char *fmt, va_list va)
                } else {
                        bool lz = false;
                        int width = 0;
+                       bool islong = false;
 
                        ch = *(fmt++);
+                       if (ch == '-')
+                               ch = *(fmt++);
+
                        if (ch == '0') {
                                ch = *(fmt++);
                                lz = 1;
@@ -80,6 +84,11 @@ int _vprintf(struct printf_info *info, const char *fmt, va_list va)
                                        ch = *fmt++;
                                }
                        }
+                       if (ch == 'l') {
+                               ch = *(fmt++);
+                               islong = true;
+                       }
+
                        info->bf = buf;
                        p = info->bf;
                        info->zs = 0;
@@ -89,24 +98,43 @@ int _vprintf(struct printf_info *info, const char *fmt, va_list va)
                                goto abort;
                        case 'u':
                        case 'd':
-                               num = va_arg(va, unsigned int);
-                               if (ch == 'd' && (int)num < 0) {
-                                       num = -(int)num;
-                                       out(info, '-');
+                               div = 1000000000;
+                               if (islong) {
+                                       num = va_arg(va, unsigned long);
+                                       if (sizeof(long) > 4)
+                                               div *= div * 10;
+                               } else {
+                                       num = va_arg(va, unsigned int);
+                               }
+
+                               if (ch == 'd') {
+                                       if (islong && (long)num < 0) {
+                                               num = -(long)num;
+                                               out(info, '-');
+                                       } else if (!islong && (int)num < 0) {
+                                               num = -(int)num;
+                                               out(info, '-');
+                                       }
                                }
                                if (!num) {
                                        out_dgt(info, 0);
                                } else {
-                                       for (div = 1000000000; div; div /= 10)
+                                       for (; div; div /= 10)
                                                div_out(info, &num, div);
                                }
                                break;
                        case 'x':
-                               num = va_arg(va, unsigned int);
+                               if (islong) {
+                                       num = va_arg(va, unsigned long);
+                                       div = 1UL << (sizeof(long) * 8 - 4);
+                               } else {
+                                       num = va_arg(va, unsigned int);
+                                       div = 0x10000000;
+                               }
                                if (!num) {
                                        out_dgt(info, 0);
                                } else {
-                                       for (div = 0x10000000; div; div /= 0x10)
+                                       for (; div; div /= 0x10)
                                                div_out(info, &num, div);
                                }
                                break;